当前位置: 首页 > news >正文

mindspore的MLP模型(多层感知机)

导入模块

import hashlib
import os
import tarfile
import zipfile
import requests
import numpy as np
import pandas as pd
import mindspore
import mindspore.dataset as ds
from mindspore import nn
import mindspore.ops as ops
import mindspore.numpy as mnp
from mindspore import Tensor
from IPython import display
from matplotlib import pyplot as plt

数据预处理

数据下载:https://www.kaggle.com/datasets/ahsan81/hotel-reservations-classification-dataset

train_data = pd.read_csv("Hotel Reservations_train.csv")
test_data = pd.read_csv("Hotel Reservations_test.csv")print(train_data.shape)
print(test_data.shape)
(30000, 20)
(6275, 20)
# 可去掉第0列与第1列的信息
print(train_data.iloc[0:4, [0, 1, 2, -3, -2, -1]])
   Unnamed: 0 Booking_ID  no_of_adults  avg_price_per_room  \
0           0   INN00001             2               65.00   
1           1   INN00002             2              106.68   
2           2   INN00003             1               60.00   
3           3   INN00004             2              100.00   no_of_special_requests booking_status  
0                       0   Not_Canceled  
1                       1   Not_Canceled  
2                       0       Canceled  
3                       0       Canceled  
# 将train_data和test_data合并,后面做数据预处理方便
all_features = pd.concat((train_data.iloc[:, 2:-1], test_data.iloc[:, 2:-1]))all_features
no_of_adultsno_of_childrenno_of_weekend_nightsno_of_week_nightstype_of_meal_planrequired_car_parking_spaceroom_type_reservedlead_timearrival_yeararrival_montharrival_datemarket_segment_typerepeated_guestno_of_previous_cancellationsno_of_previous_bookings_not_canceledavg_price_per_roomno_of_special_requests
02012Meal Plan 10Room_Type 12242017102Offline00065.000
12023Not Selected0Room_Type 152018116Online000106.681
21021Meal Plan 10Room_Type 112018228Online00060.000
32002Meal Plan 10Room_Type 12112018520Online000100.000
42011Not Selected0Room_Type 1482018411Online00094.500
......................................................
62703026Meal Plan 10Room_Type 485201883Online000167.801
62712013Meal Plan 10Room_Type 122820181017Online00090.952
62722026Meal Plan 10Room_Type 1148201871Online00098.392
62732003Not Selected0Room_Type 1632018421Online00094.500
62742012Meal Plan 10Room_Type 120720181230Offline000161.670

36275 rows × 17 columns

# 将所有缺失的值替换为相应特征的平均值。 通过将特征重新缩放到零均值和单位方差来标准化数据# 先将为数字类型的列取出来,dtypes[all_features.dtypes != 'object'].index 返回类型是数字的列的索引
numeric_features = all_features.dtypes[all_features.dtypes != 'object'].index
# 之后对其应用apply方法 apply中对每列进行了标准化(Z-score标准化方法)
all_features[numeric_features] = all_features[numeric_features].apply(lambda x: (x - x.mean()) / (x.std()))
# 在标准化数据之后,所有均值消失,因此我们可以将缺失值设置为0
all_features[numeric_features] = all_features[numeric_features].fillna(0)
# 处理离散值。我们用独热编码替换它们
# 独热编码:例如,“MSZoning”包含值“RL”和“Rm”。 我们将创建两个新的指示器特征“MSZoning_RL”和“MSZoning_RM”,其值为0或1。print(all_features.shape)# “Dummy_na=True”将“na”(缺失值)视为有效的特征值,并为其创建指示符特征
all_features = pd.get_dummies(all_features, dummy_na=True)print(all_features.shape)
(36275, 17)
(36275, 33)
all_labels = pd.concat((train_data.iloc[:,-1], test_data.iloc[:, -1]))change = {'Not_Canceled':1,'Canceled':0}
all_labels = all_labels.map(change)
all_labels
0       1
1       1
2       0
3       0
4       0..
6270    1
6271    0
6272    1
6273    0
6274    1
Name: booking_status, Length: 36275, dtype: int64
n_train = train_data.shape[0]         # 提取训练样本数
train_features = all_features[:n_train].values.astype(np.float32)      # 注意要统一数据的类型:np.float32
test_features = all_features[n_train:].values.astype(np.float32)
train_labels = all_labels.iloc[:n_train].values.astype(np.int64)
test_labels = all_labels.iloc[n_train:].values.astype(np.int64)
class SyntheticData():  def __init__(self,features,labels):self.features, self.labels = features , labelsdef __getitem__(self, index):   # __getitem__(self, index) 一般用来迭代序列(常见序列如:列表、元组、字符串)return self.features[index], self.labels[index]def __len__(self):return len(self.labels)
# 数据集
train_dataset= ds.GeneratorDataset(source=SyntheticData(train_features, train_labels), column_names=['features', 'label'],python_multiprocessing=False)test_dataset= ds.GeneratorDataset(source=SyntheticData(test_features, test_labels ), column_names=['features', 'label'],python_multiprocessing=False)

构建模型

class Accumulator:  """累加器"""def __init__(self, n):self.data = [0.0] * ndef add(self, *args):self.data = [a + float(b) for a, b in zip(self.data, args)]def reset(self):self.data = [0.0] * len(self.data)def __getitem__(self, idx):return self.data[idx]
def accuracy(y_hat, y):  """计算预测正确的数量"""if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:   # 判断y_hat是不是矩阵y_hat = y_hat.argmax(axis=1)                  # 得到每样本预测概率最大所属分类的下标cmp = y_hat.asnumpy() == y.asnumpy()              # y_hat.asnumpy() == y.asnumpy()返回的是一个布尔数组return float(cmp.sum())def evaluate_accuracy(net, data_iter):  """计算在指定数据集上模型的精度"""metric = Accumulator(2)         # 累加器,metric[0]记录正确预测数,metric[1]记录预测总数for X, y in data_iter:metric.add(accuracy(net(X), y), y.size)return metric[0] / metric[1]    # 正确预测数 / 预测总数
def train_epoch( train_iter, learning_rate, weight_decay, batch_size):  """训练模型一个迭代周期"""net = nn.SequentialCell([nn.Dense(all_features.shape[1], 32),nn.ReLU(),nn.Dense(32, 16),nn.ReLU(),nn.Dense(16, 2)]) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')#optim = nn.SGD(net.trainable_params(), learning_rate = learning_rate, weight_decay = weight_decay)optim = nn.Adam(net.trainable_params(), learning_rate = learning_rate, weight_decay = weight_decay) net_with_loss = nn.WithLossCell(net, loss)                net_train = nn.TrainOneStepCell(net_with_loss, optim)     metric = Accumulator(3)for X, y in train_iter:l = net_train(X, y)y_hat = net(X)metric.add(float(l.sum().asnumpy()), accuracy(y_hat, y), y.size)return metric[0] / metric[2], metric[1] / metric[2] ,net      # 误差 / 预测总数 ,正确预测数 / 预测总数
def trainer( train_iter, test_iter, num_epochs, learning_rate, weight_decay, batch_size, train_acc_plot, test_acc_plot):  """训练模型"""train_iter = train_iter.batch(batch_size = batch_size, num_parallel_workers=1)test_iter = test_iter.batch(batch_size = batch_size, num_parallel_workers=1)for epoch in range(num_epochs):train_metrics = train_epoch(train_iter, learning_rate, weight_decay, batch_size)train_loss, train_acc, net = train_metricstest_acc = evaluate_accuracy(net, test_iter)train_acc_plot.append(float(train_acc))test_acc_plot.append(float(test_acc))print('最终训练集精度:', train_acc, '最终测试集精度:',test_acc )# 检测assert train_loss < 0.6, train_lossassert train_acc <= 1 and train_acc > 0.7, train_accassert test_acc <= 1 and test_acc > 0.7, test_acc

训练

num_epochs,  weight_decay, batch_size  =20, 0, 64# 动态学习率
learning_rate = 0.1
end_learning_rate = 0.05
decay_steps = 6
power = 0.5
learning_rate  = nn.PolynomialDecayLR(learning_rate, end_learning_rate, decay_steps, power)train_acc_plot=[]
test_acc_plot=[]
trainer( train_dataset, test_dataset, num_epochs, learning_rate, weight_decay, batch_size, train_acc_plot, test_acc_plot)
最终训练集精度: 0.8078666666666666 最终测试集精度: 0.8124302788844622
# 构建loss-step曲线可了解loss随epoch的变化情况plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=Falsex=np.linspace(0, num_epochs-1,num_epochs)plt.figure(figsize=(4,3)) 
plt.xlabel(u"epoch")
plt.ylabel(u"精度")
plt.plot(x, train_acc_plot, label='train acc')
plt.plot(x, test_acc_plot, label='test acc')
plt.legend(loc="best")
plt.tight_layout(rect = [0,0,1,1]) 

在这里插入图片描述

http://www.lryc.cn/news/20857.html

相关文章:

  • 【论文极速读】VQ-VAE:一种稀疏表征学习方法
  • Flask-Blueprint
  • png图片转eps格式
  • English Learning - L2 语音作业打卡 Day2 2023.2.23 周四
  • 低频量化之 可转债 配债 策略数据 - 全网独家
  • 论文阅读_DALLE-2的unCLIP模型
  • 软件测试5年,历经3轮面试成功拿下华为Offer,24K/16薪不过分吧
  • 【软件工程】课程作业(三道题目:需求分析、概要设计、详细设计、软件测试)
  • 05 DC-AC逆变器(DCAC Converter / Inverter)简介
  • 带你深层了解c语言指针
  • 2-MATLAB APP Design-下拉菜单栏的使用
  • 七、HTTPTomcatServlet
  • LeetCode 热题 C++ 198. 打家劫舍
  • C语言学习笔记——程序环境和预处理
  • 「JVM 高效并发」Java 内存模型
  • C语言刷题(2)——“C”
  • 第一个 Spring MVC 注解式开发案例(初学必看)
  • openresty学习笔记
  • 微信小程序DAY3
  • 【CAN】手把手教你学习CAN总线(一)
  • JUC 体系的基石——AQS
  • Qt中信号与槽的使用
  • 力扣-销售员
  • HTML综合案例练习
  • MySQL运维
  • 【网络原理10】构造HTTP请求、HTTPS加密
  • Allegro如何锁定报表界面操作指导
  • 基于STM32的微型电子琴设计
  • Shell输入输出重定向
  • 华为OD机试-运维日志排序