当前位置: 首页 > article >正文

Python打卡训练营学习记录Day43

作业:

kaggle找到一个图像数据集,用cnn网络进行训练并且用grad-cam做可视化

进阶:并拆分成多个文件

从谷歌图片中拍摄的 10 种不同类别的动物图片

数据预处理

import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_splitdef load_data(data_dir, batch_size):# 数据预处理data_transform = transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])# 加载数据集image_dataset = datasets.ImageFolder(data_dir, data_transform)# 划分训练集和验证集train_size = int(0.8 * len(image_dataset))val_size = len(image_dataset) - train_sizetrain_dataset, val_dataset = random_split(image_dataset, [train_size, val_size])train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)dataloaders = {'train': train_dataloader, 'val': val_dataloader}dataset_sizes = {'train': train_size, 'val': val_size}class_names = image_dataset.classesreturn dataloaders, dataset_sizes, class_names

构建并训练 CNN 模型

import torch.nn as nnclass SimpleCNN(nn.Module):def __init__(self, num_classes):super(SimpleCNN, self).__init__()# 定义特征提取层self.features = nn.Sequential(nn.Conv2d(3, 16, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(16, 32, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2),nn.Conv2d(32, 64, kernel_size=3, padding=1),nn.ReLU(inplace=True),nn.MaxPool2d(kernel_size=2, stride=2))# 定义分类层self.classifier = nn.Sequential(nn.Linear(64 * 28 * 28, 512),nn.ReLU(inplace=True),nn.Linear(512, num_classes))def forward(self, x):# 前向传播,先通过特征提取层,再通过分类层x = self.features(x)x = x.view(-1, 64 * 28 * 28)x = self.classifier(x)return x

模型训练模块

import torch
import torch.nn as nn
import torch.optim as optimdef train_model(model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=25):# 判断是否有可用的 GPU,若有则使用 GPU 进行训练device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")model.to(device)for epoch in range(num_epochs):print(f'第 {epoch} 个 epoch,共 {num_epochs - 1} 个 epochs')print('-' * 10)# 每个 epoch 都有一个训练和验证阶段for phase in ['train', 'val']:if phase == 'train':model.train()  # 训练模式else:model.eval()   # 评估模式running_loss = 0.0running_corrects = 0# 迭代数据for inputs, labels in dataloaders[phase]:inputs = inputs.to(device)labels = labels.to(device)# 零参数梯度optimizer.zero_grad()# 前向传播# 只有在训练时才跟踪历史with torch.set_grad_enabled(phase == 'train'):outputs = model(inputs)_, preds = torch.max(outputs, 1)loss = criterion(outputs, labels)# 只有在训练阶段才进行反向传播和优化if phase == 'train':loss.backward()optimizer.step()# 统计running_loss += loss.item() * inputs.size(0)running_corrects += torch.sum(preds == labels.data)epoch_loss = running_loss / dataset_sizes[phase]epoch_acc = running_corrects.double() / dataset_sizes[phase]print(f'{phase} 阶段:损失值: {epoch_loss:.4f} 准确率: {epoch_acc:.4f}')return model

Grad-CAM可视化模块 

import torch
import torch.nn.functional as F
import numpy as np
import cv2class GradCAM:def __init__(self, model, target_layer):self.model = modelself.target_layer = target_layerself.gradients = Noneself.activations = None# 反向传播钩子函数,用于捕获梯度def backward_hook(module, grad_input, grad_output):self.gradients = grad_output[0]# 前向传播钩子函数,用于捕获激活值def forward_hook(module, input, output):self.activations = outputtarget_layer.register_forward_hook(forward_hook)target_layer.register_backward_hook(backward_hook)def forward(self, input_tensor):# 将模型设置为评估模式并进行前向传播self.model.eval()output = self.model(input_tensor)return outputdef generate_cam(self, input_tensor, target_class=None):# 进行前向传播output = self.forward(input_tensor)# 如果未指定目标类别,则选择输出概率最大的类别if target_class is None:target_class = torch.argmax(output, dim=1).item()one_hot = torch.zeros_like(output)one_hot[:, target_class] = 1one_hot.requires_grad_(True)# 清零模型参数的梯度self.model.zero_grad()# 计算损失并进行反向传播(one_hot * output).sum().backward(retain_graph=True)gradients = self.gradients[0]activations = self.activations[0]# 对梯度进行全局平均池化pooled_gradients = torch.mean(gradients, dim=[1, 2])for i in range(activations.shape[0]):activations[i, :, :] *= pooled_gradients[i]# 对激活值求和生成 CAM 图cam = torch.sum(activations, dim=0).detach().cpu().numpy()# 取 CAM 图的正值部分cam = np.maximum(cam, 0)# 调整 CAM 图的大小以匹配输入图像cam = cv2.resize(cam, (input_tensor.shape[3], input_tensor.shape[2]))# 归一化 CAM 图cam = cam - np.min(cam)cam = cam / np.max(cam)return cam

 主程序

from data_loader import load_data
from model import SimpleCNN
from train import train_model
from grad_cam import GradCAM
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from PIL import Image
import numpy as np
import cv2
import sys
import os
# 将当前目录添加到 Python 模块搜索路径中
sys.path.append(os.path.dirname(os.path.abspath(__file__)))if __name__ == '__main__':# 加载数据,设置批次大小,你可以根据需要调整该值batch_size = 32# 修改解包操作以处理所有返回值dataloaders, dataset_sizes, class_names = load_data('raw-img', batch_size)# 获取类别数量num_classes = len(class_names)# 使用类别数量初始化模型model = SimpleCNN(num_classes)# 定义损失函数和优化器criterion = nn.CrossEntropyLoss()optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)# 训练模型trained_model = train_model(model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=5)# 生成 Grad - CAM 可视化结果# 修改此处,选择实际存在的卷积层# grad_cam = GradCAM(model, target_layer=model.conv2)grad_cam = GradCAM(model, target_layer=model.features[0])img_path = 'path/to/your/image.jpg'img = Image.open(img_path).convert('RGB')cam = grad_cam(img)plt.imshow(img)plt.imshow(cam, alpha=0.5, cmap='jet')plt.axis('off')plt.savefig('grad_cam_result.jpg')plt.show()

 @浙大疏锦行

http://www.lryc.cn/news/2403081.html

相关文章:

  • 【Android基础回顾】二:handler消息机制
  • 每日Prompt:每天上班的状态
  • .net ORM框架dapper批量插入
  • C++11 右值引用:从入门到精通
  • .net 使用MQTT订阅消息
  • Python实现快速排序的三种经典写法及算法解析
  • 【递归、搜索与回溯】综合练习(四)
  • 强化学习入门:Gym实现CartPole随机智能体
  • STM32:CAN总线精髓:特性、电路、帧格式与波形分析详解
  • 贝叶斯深度学习!华科大《Nat. Commun.》发表BNN重大突破!
  • 【大模型LLM学习】Flash-Attention的学习记录
  • 三、元器件的选型
  • 精益数据分析(95/126):Socialight的定价转型启示——B2B商业模式的价格策略与利润优化
  • stm32_DMA
  • 物联网数据归档之数据存储方案选择分析
  • 【自动驾驶避障开发】如何让障碍物在 RViz 中‘显形’?呈现感知数据转 Polygon 全流程
  • 【C语言】C语言经典小游戏:贪吃蛇(上)
  • usbutils工具的使用帮助
  • vue2中使用jspdf插件实现页面自定义块pdf下载
  • 如何防止服务器被用于僵尸网络(Botnet)攻击 ?
  • 基于cornerstone3D的dicom影像浏览器 第二十九章 自定义菜单组件
  • 【Block总结】DBlock,结合膨胀空间注意模块(Di-SpAM)和频域模块Gated-FFN|即插即用|CVPR2025
  • 【学习笔记】单例类模板
  • 字符串加密(华为OD)
  • 口罩佩戴检测算法AI智能分析网关V4工厂/工业等多场景守护公共卫生安全
  • Double/Debiased Machine Learning
  • HarmonyOS Next 弹窗系列教程(4)
  • 【C】-递归
  • 飞马LiDAR500雷达数据预处理
  • Kerberos面试内容整理-在 Linux/Windows 中的 Kerberos 实践