当前位置: 首页 > news >正文

sklearn pipeline

示例代码

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import numpy as np
import scipy.linalg
from sklearn.preprocessing import LabelEncoder, StandardScaler
import optuna
import scipy.linalg
from sklearn.linear_model import BayesianRidge
import pandas as pd
from sklearn.model_selection import LeaveOneOut, cross_val_scoreclass EmscScaler(object):def __init__(self, order=1):self.order = orderself._mx = Nonedef mlr(self, x, y):"""Multiple linear regression fit of the columns of matrix x(dependent variables) to constituent vector y (independent variables)order -     order of a smoothing polynomial, which can be includedin the set of independent variables. If order isnot specified, no background will be included.b -         fit coeffsf -         fit result (m x 1 column vector)r -         residual   (m x 1 column vector)"""if self.order > 0:s = np.ones((len(y), 1))for j in range(self.order):s = np.concatenate((s, (np.arange(0, 1 + (1.0 / (len(y) - 1)), 1.0 / (len(y) - 1)) ** j).reshape(-1,1)[0:len(y)]),1)X = np.concatenate((x.reshape(-1,1), s), 1)else:X = x# calc fit b=fit coefficientsb = np.dot(np.dot(scipy.linalg.pinv(np.dot(X.T, X)), X.T), y)f = np.dot(X, b)r = y - freturn b, f, rdef fit(self, X, y=None):"""fit to X (get average spectrum), y is a passthrough for pipeline compatibility"""self._mx = np.mean(X, axis=0)def transform(self, X, y=None, copy=None):if type(self._mx) == type(None):print("EMSC not fit yet. run .fit method on reference spectra")else:# do fittingcorr = np.zeros(X.shape)for i in range(len(X)):b, f, r = self.mlr(self._mx, X[i, :])corr[i, :] = np.reshape((r / b[0]) + self._mx, (corr.shape[1],))return corrdef fit_transform(self, X, y=None):self.fit(X)return self.transform(X)from sklearn.base import BaseEstimator, TransformerMixin
class SpectraPreprocessor(BaseEstimator, TransformerMixin):def __init__(self, emsc_order=3,X_ref=None):self.emsc_order = emsc_orderself.emsc_scalers = [EmscScaler(order=emsc_order) for _ in range(4)]self.X_ref = X_refdef fit(self, X, y=None):X_ref = self.X_refif X_ref is None:X_ref = X.copy()# Define the column ranges for each segmentranges = [(0, 251), (281, 482), (482, 683), (683, 854)]# Fit EmscScaler for each segmentfor i, (start, end) in enumerate(ranges):self.emsc_scalers[i].fit(X_ref[:, start:end])return selfdef transform(self, X, y=None):# Define the column ranges for each segmentranges = [(0, 251), (281, 482), (482, 683), (683, 854)]# Transform each segmenttransformed_segments = []for i, (start, end) in enumerate(ranges):segment = X[:, start:end]transformed_segment = self.emsc_scalers[i].transform(segment)transformed_segments.append(transformed_segment)# Concatenate all transformed segmentsreturn np.concatenate(transformed_segments, axis=1)def fit_transform(self, X, y=None):self.fit(X)return self.transform(X)def bayesian_ridge_optuna_for_emsc_data(x_train, y_train, pipeline_):def objective(trial):try:alpha_1 = trial.suggest_float('alpha_1', 0.001, 1, log=True)alpha_2 = trial.suggest_float('alpha_2', 0.001, 1, log=True)lambda_1 = trial.suggest_float('lambda_1', 0.001, 1, log=True)lambda_2 = trial.suggest_float('lambda_2', 0.001, 1, log=True)model = pipeline_.set_params(bayesian_ridge__alpha_1=alpha_1,bayesian_ridge__alpha_2=alpha_2,bayesian_ridge__lambda_1=lambda_1,bayesian_ridge__lambda_2=lambda_2)model.fit(x_train, y_train)score = cross_val_score(model, x_train, y_train, cv=10, n_jobs=-1, scoring='r2')return np.mean(score)except ValueError as e:return -np.infoptuna.logging.set_verbosity(optuna.logging.WARNING)pruner = optuna.pruners.MedianPruner()study = optuna.create_study(direction="maximize", pruner=pruner)study.optimize(objective, n_trials=500, show_progress_bar=True, n_jobs=1)return study.best_paramsdef getdata(filenamex, filenamey):x = pd.read_csv(filenamex, header=None)y = pd.read_csv(filenamey)data = pd.concat([x, y], axis=1)return dataname = 'test'
x, y = np.random.rand(100,884), np.random.rand(100)
x_ref =  np.random.rand(30,884)
pipeline = Pipeline([('preprocessor', SpectraPreprocessor(emsc_order=3, X_ref=None)),('scaler', StandardScaler()),('bayesian_ridge', BayesianRidge())
])pipeline.set_params(preprocessor__X_ref=x_ref)############################################################################################################################################################
best_params = bayesian_ridge_optuna_for_emsc_data(x, y, pipeline)
############################################################################################################################################################pipeline.set_params(bayesian_ridge__alpha_1=best_params['alpha_1'],bayesian_ridge__alpha_2=best_params['alpha_2'],bayesian_ridge__lambda_1=best_params['lambda_1'],bayesian_ridge__lambda_2=best_params['lambda_2']
)
pipeline.fit(x, y)
y_pred = pipeline.predict(x)
print(y_pred)
http://www.lryc.cn/news/458720.html

相关文章:

  • springboot实现服务注册与发现
  • 美格智能亮相2024中国移动全球合作伙伴大会,共赢AI+时代
  • 【LeetCode】动态规划—309. 买卖股票的最佳时机含冷冻期(附完整Python/C++代码)
  • IDE启动失败
  • 【Kubernetes】常见面试题汇总(六十)
  • maven dependency中scope的取值类型
  • 线性代数在大一计算机课程中的重要性
  • 笔记本电脑按住电源键强行关机,对电脑有伤害吗?
  • 如何将 cryptopp库移植到UE5内
  • SpringBoot 集成GPT实战,超简单详细
  • 基于Langchain框架下Prompt工程调教大模型(LLM)[输入输出接口、提示词模板与例子选择器的协同应用
  • Vue基于vue-office实现docx、xlsx、pdf文件的在线预览
  • 哪个软件可以在线编辑ppt? 一口气推荐5个做ppt的得力助手!
  • Django学习笔记九:Django中间件Middleware
  • 原来自媒体高手都是这样选话题的,活该人家赚大钱,真后悔知道晚了
  • 胤娲科技:AI绘梦师——一键复刻梵高《星空》
  • 第18课-C++继承:探索面向对象编程的复用之道
  • 麒麟V10系统下的调试工具(网络和串口调试助手)
  • ssh封装上传下载
  • 018_FEA_Structure_Static_in_Matlab结构静力学分析
  • 网页打不开、找不到服务器IP地址
  • RUM性能优化之图片加载
  • 【Java】—— 泛型:泛型的理解及其在集合(List,Set)、比较器(Comparator)中的使用
  • 【Python】selenium遇到“InvalidArgumentException”的解决方法
  • RT-DETR改进策略:BackBone改进|CAFormer在RT-DETR中的创新应用,显著提升目标检测性能
  • 【YOLOv11】ultralytics最新作品yolov11 AND 模型的训练、推理、验证、导出 以及 使用
  • 动态规划——多状态动态规划问题
  • leetcode-10/9【堆相关】
  • 自然语言处理问答系统:技术进展、应用与挑战
  • 向量数据库!AI 时代的变革者还是泡沫?