当前位置: 首页 > news >正文

基于Pytorch框架的LSTM算法(二)——多维度单步预测

1.项目说明

**选用Close和Low两个特征,使用窗口time_steps窗口的2个特征,然后预测Close这一个特征数据未来一天的数据

当batch_first=True,则LSTM的inputs=(batch_size,time_steps,input_size)

batch_size = len(data)-time_steps
time_steps = 滑动窗口,本项目中值为lookback
input_size = 2【因为选取了Close和Low两个特征】**

2.数据集

参考:https://blog.csdn.net/qq_38633279/article/details/134245512?spm=1001.2014.3001.5501中的数据集

3.数据预处理

3.1 读取数据

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import seaborn as sns
import math, time
from sklearn.metrics import mean_squared_errorfilepath = './data/rlData.csv'
data = pd.read_csv(filepath)
data = data.sort_values('Date')
data.head()
data.shapesns.set_style("darkgrid")
plt.figure(figsize = (15,9))
plt.plot(data[['Close']])
plt.xticks(range(0,data.shape[0],20), data['Date'].loc[::20], rotation=45)
plt.title("****** Stock Price",fontsize=18, fontweight='bold')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (USD)',fontsize=18)
plt.show()

3.2 选取Close和Low两个特征

price = data[['Close', 'Low']]

3.3 数据归一化

scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))
price['Low'] = scaler.fit_transform(price['Low'].values.reshape(-1,1))

3.4 数据集的制造[batch_size,time_steps,input_size]

本次选取2个维度特征作为输出,因此,input_size =2
x_train.shape = [batch_size,time_steps,input_size]
y_train.shape = [batch_size,1]

1. 输入选取的是Close和Low列作为多维度的输入,所以选择的是data数据中的第一列和第二列作为x_train【因此input_size=2】
2. 输出是选取的Close列作为预测,所以选取data数据的第一列作为y_train【即Close列作为y_train】。

#2.数据集的制作
def split_data(stock, lookback):data_raw = stock.to_numpy() data = []    for index in range(len(data_raw) - lookback): data.append(data_raw[index: index + lookback])data = np.array(data);test_set_size = int(np.round(0.2 * data.shape[0]))train_set_size = data.shape[0] - (test_set_size)x_train = data[:train_set_size,:-1,:]  #x_train.shape =  (198, 4, 2)y_train = data[:train_set_size,-1,0:1] #y_train.shape =  (198, 1)x_test = data[train_set_size:,:-1,:]   #x_test.shape =  (49, 4, 2)y_test = data[train_set_size:,-1,0:1]  #y_test.shape =  (49, 1)return [torch.Tensor(x_train), torch.Tensor(y_train), torch.Tensor(x_test),torch.Tensor(y_test)]lookback = 5
x_train, y_train, x_test, y_test = split_data(price, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)

4.LSTM算法

这里的LSTM算法和单维单步预测中的LSTM预测算法一模一样。只不过我们在制作数据集的时候,对于LSTM模型中输入不一样了。

class LSTM(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(LSTM, self).__init__()self.hidden_dim = hidden_dimself.num_layers = num_layersself.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))out = self.fc(out[:, -1, :]) 

5.预训练

input_dim = 2
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)hist = np.zeros(num_epochs)
lstm = []for t in range(num_epochs):y_train_pred = model(x_train)loss = criterion(y_train_pred, y_train)hist[t] = loss.item()# print("Epoch ", t, "MSE: ", loss.item())optimiser.zero_grad()loss.backward()optimiser.step()

6.绘制预测值和真实值拟合图形,以及loss图形

predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train.detach().numpy()))sns.set_style("darkgrid")    fig = plt.figure()
fig.subplots_adjust(hspace=0.2, wspace=0.2)plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='royalblue')
ax = sns.lineplot(x = predict.index, y = predict[0], label="Training Prediction (LSTM)", color='tomato')
ax.set_title('Stock price', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Cost (USD)", size = 14)
ax.set_xticklabels('', size=10)plt.subplot(1, 2, 2)
ax = sns.lineplot(data=hist, color='royalblue')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
fig.set_figheight(6)
fig.set_figwidth(16)# make predictions
y_test_pred = model(x_test)# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
lstm.append(trainScore)
lstm.append(testScore)
lstm.append(training_time)

完整代码

问题描述:
选用Close和Low两个特征,使用窗口time_steps窗口的2个特征,然后预测Close这一个特征数据未来一天的数据
当batch_first=True,则LSTM的inputs=(batch_size,time_steps,input_size)
batch_size = len(data)-time_steps
time_steps = 滑动窗口,本项目中值为lookback
input_size = 2【因为选取了Close和Low两个特征】
#%%
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import seaborn as sns
import math, time
from sklearn.metrics import mean_squared_errorfilepath = './data/rlData.csv'
data = pd.read_csv(filepath)
data = data.sort_values('Date')
data.head()
data.shapesns.set_style("darkgrid")
plt.figure(figsize = (15,9))
plt.plot(data[['Close']])
plt.xticks(range(0,data.shape[0],20), data['Date'].loc[::20], rotation=45)
plt.title("****** Stock Price",fontsize=18, fontweight='bold')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (USD)',fontsize=18)
plt.show()#1.选取特征工程2个
price = data[['Close', 'Low']]scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))
price['Low'] = scaler.fit_transform(price['Low'].values.reshape(-1,1))#2.数据集的制作
def split_data(stock, lookback):data_raw = stock.to_numpy() data = []    for index in range(len(data_raw) - lookback): data.append(data_raw[index: index + lookback])data = np.array(data);test_set_size = int(np.round(0.2 * data.shape[0]))train_set_size = data.shape[0] - (test_set_size)x_train = data[:train_set_size,:-1,:]  #x_train.shape =  (198, 4, 2)y_train = data[:train_set_size,-1,0:1] #y_train.shape =  (198, 1)x_test = data[train_set_size:,:-1,:]   #x_test.shape =  (49, 4, 2)y_test = data[train_set_size:,-1,0:1]  #y_test.shape =  (49, 1)return [torch.Tensor(x_train), torch.Tensor(y_train), torch.Tensor(x_test),torch.Tensor(y_test)]lookback = 5
x_train, y_train, x_test, y_test = split_data(price, lookback)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ',y_train.shape)
print('x_test.shape = ',x_test.shape)
print('y_test.shape = ',y_test.shape)class LSTM(nn.Module):def __init__(self, input_dim, hidden_dim, num_layers, output_dim):super(LSTM, self).__init__()self.hidden_dim = hidden_dimself.num_layers = num_layersself.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)self.fc = nn.Linear(hidden_dim, output_dim)def forward(self, x):h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))out = self.fc(out[:, -1, :]) return outinput_dim = 2
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)hist = np.zeros(num_epochs)
lstm = []for t in range(num_epochs):y_train_pred = model(x_train)loss = criterion(y_train_pred, y_train)hist[t] = loss.item()# print("Epoch ", t, "MSE: ", loss.item())optimiser.zero_grad()loss.backward()optimiser.step()predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_train.detach().numpy()))sns.set_style("darkgrid")    fig = plt.figure()
fig.subplots_adjust(hspace=0.2, wspace=0.2)plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='royalblue')
ax = sns.lineplot(x = predict.index, y = predict[0], label="Training Prediction (LSTM)", color='tomato')
ax.set_title('Stock price', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Cost (USD)", size = 14)
ax.set_xticklabels('', size=10)plt.subplot(1, 2, 2)
ax = sns.lineplot(data=hist, color='royalblue')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
fig.set_figheight(6)
fig.set_figwidth(16)# make predictions
y_test_pred = model(x_test)# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test.detach().numpy())# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
lstm.append(trainScore)
lstm.append(testScore)
lstm.append(training_time)

参考:https://gitee.com/qiangchen_sh/stock-prediction/blob/master/%E4%BB%A3%E7%A0%81/LSTM%E4%BB%8E%E7%90%86%E8%AE%BA%E5%9F%BA%E7%A1%80%E5%88%B0%E4%BB%A3%E7%A0%81%E5%AE%9E%E6%88%98%204%20%E5%A4%9A%E7%BB%B4%E7%89%B9%E5%BE%81%E8%82%A1%E7%A5%A8%E4%BB%B7%E6%A0%BC%E9%A2%84%E6%B5%8B_Pytorch.ipynb

http://www.lryc.cn/news/224583.html

相关文章:

  • cnn感受野计算方法
  • 百分点科技受邀参加“第五届治理现代化论坛”
  • 基于Springboot的智慧食堂设计与实现(有报告)。Javaee项目,springboot项目。
  • 「Verilog学习笔记」多功能数据处理器
  • OpenHarmony 4.0 Release 编译异常处理
  • 软件测试|MySQL LIKE:深入了解模糊查询
  • linux防火墙设置
  • http 403
  • RAW图像处理软件Capture One 23 Enterprise mac中文版功能特点
  • Linux 进程终止和等待
  • python用tkinter随机数猜数字大小
  • 程序员们保住自己饭碗
  • 顶板事故防治vr实景交互体验提高操作人员安全防护技能水平
  • 为什么推荐从Linux开始了解IT技术
  • 【Mysql】增删改查(基础版)
  • 文件夹找不到了怎么恢复?4个正确恢复方法分享!
  • 迅为RK3568开发板GPS模块测试实验步骤
  • 用趋动云GPU部署自己的Stable Diffusion
  • nfs配置
  • 说话人识别声纹识别CAM++,ECAPA-TDNN等算法
  • 某平台简单尝试一次密码逆向
  • 微信号绑定50个开发者小程序以后超额如何删除不用的
  • 【Cheat Engine7.5】基础教程第三关(步骤4)
  • 141. 环形链表 --力扣 --JAVA
  • 电子元器件的结温壳温与环境温度经验总结
  • Spring Gateway基础知识总结
  • NFS文件系统共享服务器实战
  • CSS的概念和基本用法
  • 万字详解Java的三大特性:封装 | 继承 | 多态
  • TensorFlow学习笔记--(1)张量的随机生成