当前位置：首页 > news >正文

深度学习篇---简单果实分类网络

news 2025/7/7 6:08:50

下面我将提供一个使用 Python 从零实现果实分类模型的完整流程，包括数据准备、模型构建、训练和部署，不依赖任何深度学习框架，仅使用 NumPy 进行数值计算。

1. 数据准备与预处理

首先需要准备果实图像数据集，将其分为好果和坏果两类，并进行预处理：

import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler# 数据加载与预处理
def load_fruit_images(data_dir, img_size=(100, 100)):X = []y = []# 遍历好果和坏果两个类别目录for class_name in ['good', 'bad']:class_dir = os.path.join(data_dir, class_name)if not os.path.exists(class_dir):continue# 遍历目录下的所有图像文件for img_file in os.listdir(class_dir):if img_file.endswith(('.jpg', '.jpeg', '.png')):img_path = os.path.join(class_dir, img_file)# 打开并调整图像大小img = Image.open(img_path).convert('RGB')img = img.resize(img_size)# 将图像转换为numpy数组并展平img_array = np.array(img).flatten()# 添加到数据集X.append(img_array)y.append(1 if class_name == 'bad' else 0)  # 坏果为1，好果为0return np.array(X), np.array(y)# 划分训练集和测试集
def prepare_data(data_dir, test_size=0.2, random_state=42):X, y = load_fruit_images(data_dir)# 划分训练集和测试集X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)# 数据标准化scaler = StandardScaler()X_train = scaler.fit_transform(X_train)X_test = scaler.transform(X_test)return X_train, X_test, y_train, y_test, scaler# 示例使用
data_dir = "path/to/your/fruit_dataset"  # 替换为实际数据集路径
X_train, X_test, y_train, y_test, scaler = prepare_data(data_dir)

2. 神经网络模型构建

接下来实现一个简单的两层神经网络，包含输入层、隐藏层和输出层：

class NeuralNetwork:def __init__(self, input_size, hidden_size, output_size):# 随机初始化权重和偏置self.W1 = np.random.randn(input_size, hidden_size) * 0.01self.b1 = np.zeros((1, hidden_size))self.W2 = np.random.randn(hidden_size, output_size) * 0.01self.b2 = np.zeros((1, output_size))def sigmoid(self, Z):return 1 / (1 + np.exp(-Z))def sigmoid_derivative(self, A):return A * (1 - A)def forward_propagation(self, X):# 前向传播self.Z1 = np.dot(X, self.W1) + self.b1self.A1 = self.sigmoid(self.Z1)self.Z2 = np.dot(self.A1, self.W2) + self.b2self.A2 = self.sigmoid(self.Z2)return self.A2def compute_cost(self, A2, Y):# 计算损失m = Y.shape[0]cost = -np.mean(Y * np.log(A2) + (1 - Y) * np.log(1 - A2))return costdef backward_propagation(self, X, Y):# 反向传播m = X.shape[0]# 计算输出层梯度dZ2 = self.A2 - YdW2 = np.dot(self.A1.T, dZ2) / mdb2 = np.sum(dZ2, axis=0, keepdims=True) / m# 计算隐藏层梯度dZ1 = np.dot(dZ2, self.W2.T) * self.sigmoid_derivative(self.A1)dW1 = np.dot(X.T, dZ1) / mdb1 = np.sum(dZ1, axis=0, keepdims=True) / mreturn dW1, db1, dW2, db2def update_parameters(self, dW1, db1, dW2, db2, learning_rate):# 更新参数self.W1 -= learning_rate * dW1self.b1 -= learning_rate * db1self.W2 -= learning_rate * dW2self.b2 -= learning_rate * db2def train(self, X, Y, num_iterations, learning_rate, print_cost=False):# 训练模型costs = []for i in range(num_iterations):# 前向传播A2 = self.forward_propagation(X)# 计算损失cost = self.compute_cost(A2, Y)# 反向传播dW1, db1, dW2, db2 = self.backward_propagation(X, Y)# 更新参数self.update_parameters(dW1, db1, dW2, db2, learning_rate)# 记录损失if i % 100 == 0:costs.append(cost)if print_cost:print(f"迭代 {i}: 损失 = {cost}")return costsdef predict(self, X):# 预测A2 = self.forward_propagation(X)predictions = (A2 > 0.5).astype(int)return predictions

3. 模型训练与评估

使用准备好的数据训练模型并评估其性能：

# 模型训练
input_size = X_train.shape[1]  # 输入特征数
hidden_size = 128  # 隐藏层神经元数
output_size = 1  # 输出层神经元数（二分类）model = NeuralNetwork(input_size, hidden_size, output_size)
learning_rate = 0.01
num_iterations = 1000# 训练模型
costs = model.train(X_train, y_train.reshape(-1, 1), num_iterations, learning_rate, print_cost=True)# 模型评估
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)# 计算准确率
train_accuracy = np.mean(train_predictions == y_train.reshape(-1, 1))
test_accuracy = np.mean(test_predictions == y_test.reshape(-1, 1))print(f"训练集准确率: {train_accuracy * 100:.2f}%")
print(f"测试集准确率: {test_accuracy * 100:.2f}%")

4. 模型保存与部署

将训练好的模型保存为文件，并实现一个简单的部署接口：

import pickle# 保存模型和标准化器
def save_model(model, scaler, model_path="fruit_classifier.pkl", scaler_path="scaler.pkl"):# 保存模型参数model_params = {'W1': model.W1,'b1': model.b1,'W2': model.W2,'b2': model.b2}with open(model_path, 'wb') as f:pickle.dump(model_params, f)# 保存标准化器with open(scaler_path, 'wb') as f:pickle.dump(scaler, f)# 加载模型和标准化器
def load_model(model_path="fruit_classifier.pkl", scaler_path="scaler.pkl"):# 加载模型参数with open(model_path, 'rb') as f:model_params = pickle.load(f)# 创建模型实例并加载参数input_size = model_params['W1'].shape[0]hidden_size = model_params['W1'].shape[1]output_size = model_params['W2'].shape[1]model = NeuralNetwork(input_size, hidden_size, output_size)model.W1 = model_params['W1']model.b1 = model_params['b1']model.W2 = model_params['W2']model.b2 = model_params['b2']# 加载标准化器with open(scaler_path, 'rb') as f:scaler = pickle.load(f)return model, scaler# 部署示例：预测单张图像
def predict_fruit(image_path, model, scaler, img_size=(100, 100)):# 加载并预处理图像img = Image.open(image_path).convert('RGB')img = img.resize(img_size)img_array = np.array(img).flatten().reshape(1, -1)# 标准化img_scaled = scaler.transform(img_array)# 预测prediction = model.predict(img_scaled)confidence = model.forward_propagation(img_scaled)[0][0]# 返回结果result = "坏果" if prediction[0][0] == 1 else "好果"confidence = confidence if prediction[0][0] == 1 else (1 - confidence)return {'result': result,'confidence': float(confidence) * 100}# 示例使用
save_model(model, scaler)
loaded_model, loaded_scaler = load_model()# 预测新图像
prediction = predict_fruit("path/to/new/fruit_image.jpg", loaded_model, loaded_scaler)
print(f"预测结果: {prediction['result']}, 置信度: {prediction['confidence']:.2f}%")

5. 完整流程总结

这个实现包含了果实分类模型的全流程：

数据准备：加载图像数据，进行预处理和划分
模型构建：使用 NumPy 实现了一个简单的两层神经网络
模型训练：通过前向传播、反向传播和参数更新训练模型
模型评估：计算训练集和测试集上的准确率
模型保存与部署：将模型保存为文件，并实现预测接口

这个实现没有使用任何深度学习框架，完全基于 NumPy，便于理解神经网络的基本原理。在实际应用中，可以通过增加更多层、使用更复杂的激活函数或优化算法来提高模型性能。

根据之前实现的果实分类模型，部署时需要安装以下 Python 包：

基础依赖：
- numpy：用于数值计算和矩阵运算
- Pillow：用于图像处理和加载
- scikit-learn：用于数据标准化（StandardScaler）
摄像头捕获（可选）：
- opencv-python：用于打开摄像头并实时捕获视频流

你可以使用以下命令安装这些依赖：

pip install numpy pillow scikit-learn opencv-python

打开摄像头实时监测的实例程序

下面是一个使用 OpenCV 打开摄像头并实时进行果实分类的示例程序：

import cv2
import numpy as np
from PIL import Image
from sklearn.preprocessing import StandardScaler
import pickle
import timeclass NeuralNetwork:def __init__(self, input_size, hidden_size, output_size):self.W1 = np.random.randn(input_size, hidden_size) * 0.01self.b1 = np.zeros((1, hidden_size))self.W2 = np.random.randn(hidden_size, output_size) * 0.01self.b2 = np.zeros((1, output_size))def sigmoid(self, Z):return 1 / (1 + np.exp(-Z))def sigmoid_derivative(self, A):return A * (1 - A)def forward_propagation(self, X):self.Z1 = np.dot(X, self.W1) + self.b1self.A1 = self.sigmoid(self.Z1)self.Z2 = np.dot(self.A1, self.W2) + self.b2self.A2 = self.sigmoid(self.Z2)return self.A2def predict(self, X):A2 = self.forward_propagation(X)predictions = (A2 > 0.5).astype(int)return predictions# 加载模型和标准化器
def load_model(model_path="fruit_classifier.pkl", scaler_path="scaler.pkl"):with open(model_path, 'rb') as f:model_params = pickle.load(f)input_size = model_params['W1'].shape[0]hidden_size = model_params['W1'].shape[1]output_size = model_params['W2'].shape[1]model = NeuralNetwork(input_size, hidden_size, output_size)model.W1 = model_params['W1']model.b1 = model_params['b1']model.W2 = model_params['W2']model.b2 = model_params['b2']with open(scaler_path, 'rb') as f:scaler = pickle.load(f)return model, scaler# 预处理摄像头帧
def preprocess_frame(frame, img_size=(100, 100)):# 转换为PIL图像并调整大小img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))img = img.resize(img_size)# 转换为numpy数组并展平img_array = np.array(img).flatten().reshape(1, -1)return img_array# 主函数：打开摄像头实时监测
def real_time_detection(model, scaler, img_size=(100, 100), confidence_threshold=0.7):# 打开摄像头cap = cv2.VideoCapture(0)if not cap.isOpened():print("无法打开摄像头")return# 循环读取帧while True:ret, frame = cap.read()if not ret:print("无法获取帧")break# 复制帧用于显示display_frame = frame.copy()# 预处理帧img_array = preprocess_frame(frame, img_size)# 标准化img_scaled = scaler.transform(img_array)# 预测prediction = model.predict(img_scaled)confidence = model.forward_propagation(img_scaled)[0][0]confidence = confidence if prediction[0][0] == 1 else (1 - confidence)# 显示结果result_text = "坏果" if prediction[0][0] == 1 else "好果"confidence_text = f"置信度: {confidence * 100:.2f}%"# 根据结果设置文本颜色text_color = (0, 0, 255) if prediction[0][0] == 1 else (0, 255, 0)# 在图像上绘制结果cv2.putText(display_frame, f"分类: {result_text}", (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)cv2.putText(display_frame, confidence_text, (10, 70),cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)# 显示帧cv2.imshow('果实分类实时监测', display_frame)# 按'q'键退出if cv2.waitKey(1) & 0xFF == ord('q'):break# 释放资源cap.release()cv2.destroyAllWindows()if __name__ == "__main__":# 加载模型和标准化器model, scaler = load_model("fruit_classifier.pkl", "scaler.pkl")# 启动实时监测print("启动摄像头实时监测... 按'q'键退出")real_time_detection(model, scaler)

程序说明

这个实时监测程序实现了以下功能：

模型加载：从保存的文件中加载训练好的神经网络模型和标准化器
摄像头捕获：使用 OpenCV 打开默认摄像头并持续捕获视频帧
预处理：将每一帧图像调整为模型训练时的尺寸，并转换为合适的格式
实时预测：对每一帧进行果实分类预测，并计算置信度
结果可视化：在图像上显示分类结果和置信度，用不同颜色区分好果和坏果

使用方法很简单，运行程序后摄像头会自动打开，将果实对准摄像头即可看到实时分类结果。按键盘上的 'q' 键可以退出程序。

注意，为了获得更好的实时性能，你可能需要调整图像尺寸或简化模型结构。

减少依赖的果实分类模型部署方案

可以通过以下方式进一步减少 Python 包的使用：

替代 OpenCV：使用纯 Python 库pygame处理摄像头捕获（虽然仍需安装，但可替代 OpenCV）
简化标准化：将 StandardScaler 的功能集成到模型中，避免依赖 scikit-learn
移除 PIL 依赖：使用 NumPy 直接处理图像数据

以下是一个更精简的实现方案，仅依赖numpy、pygame和pickle：

import numpy as np
import pygame
import pygame.camera
import pickle
import time
import sysclass NeuralNetwork:def __init__(self, input_size, hidden_size, output_size):self.W1 = np.random.randn(input_size, hidden_size) * 0.01self.b1 = np.zeros((1, hidden_size))self.W2 = np.random.randn(hidden_size, output_size) * 0.01self.b2 = np.zeros((1, output_size))def sigmoid(self, Z):return 1 / (1 + np.exp(-Z))def forward_propagation(self, X):self.Z1 = np.dot(X, self.W1) + self.b1self.A1 = self.sigmoid(self.Z1)self.Z2 = np.dot(self.A1, self.W2) + self.b2self.A2 = self.sigmoid(self.Z2)return self.A2def predict(self, X):A2 = self.forward_propagation(X)return (A2 > 0.5).astype(int)# 简化的标准化类，替代StandardScaler
class SimpleScaler:def __init__(self, mean=None, std=None):self.mean = meanself.std = stddef transform(self, X):return (X - self.mean) / (self.std + 1e-8)  # 防止除零# 加载模型和标准化器
def load_model(model_path="fruit_classifier.pkl", scaler_path="scaler.pkl"):# 加载模型参数with open(model_path, 'rb') as f:model_params = pickle.load(f)# 创建模型实例并加载参数input_size = model_params['W1'].shape[0]hidden_size = model_params['W1'].shape[1]output_size = model_params['W2'].shape[1]model = NeuralNetwork(input_size, hidden_size, output_size)model.W1 = model_params['W1']model.b1 = model_params['b1']model.W2 = model_params['W2']model.b2 = model_params['b2']# 加载标准化器with open(scaler_path, 'rb') as f:scaler_data = pickle.load(f)# 创建简化的标准化器scaler = SimpleScaler(mean=scaler_data.mean_, std=scaler_data.scale_)return model, scaler# 预处理摄像头帧
def preprocess_frame(surface, img_size=(100, 100)):# 调整图像大小surface = pygame.transform.scale(surface, img_size)# 获取像素数据并转换为NumPy数组pixel_array = pygame.surfarray.pixels3d(surface)# 重新排列维度并展平img_array = np.transpose(pixel_array, (1, 0, 2)).reshape(1, -1)return img_array# 主函数：打开摄像头实时监测
def real_time_detection(model, scaler, img_size=(100, 100), confidence_threshold=0.7):# 初始化pygame和摄像头pygame.init()pygame.camera.init()# 获取摄像头列表cameras = pygame.camera.list_cameras()if not cameras:print("未检测到摄像头")return# 打开默认摄像头cam = pygame.camera.Camera(cameras[0])cam.start()# 设置显示窗口display_size = (640, 480)screen = pygame.display.set_mode(display_size)pygame.display.set_caption("果实分类实时监测")# 创建字体用于显示文本font = pygame.font.SysFont("SimHei", 24)# 主循环running = Trueclock = pygame.time.Clock()while running:# 处理事件for event in pygame.event.get():if event.type == pygame.QUIT:running = Falseelif event.type == pygame.KEYDOWN:if event.key == pygame.K_q:  # 按Q键退出running = False# 捕获摄像头帧frame = cam.get_image()# 复制帧用于显示display_frame = pygame.transform.scale(frame, display_size)# 预处理帧img_array = preprocess_frame(frame, img_size)# 标准化img_scaled = scaler.transform(img_array)# 预测prediction = model.predict(img_scaled)confidence = model.forward_propagation(img_scaled)[0][0]confidence = confidence if prediction[0][0] == 1 else (1 - confidence)# 准备显示文本result_text = "坏果" if prediction[0][0] == 1 else "好果"confidence_text = f"置信度: {confidence * 100:.2f}%"# 创建文本表面text_color = (255, 0, 0) if prediction[0][0] == 1 else (0, 255, 0)result_surface = font.render(f"分类: {result_text}", True, text_color)confidence_surface = font.render(confidence_text, True, text_color)# 在屏幕上绘制文本screen.blit(display_frame, (0, 0))screen.blit(result_surface, (10, 10))screen.blit(confidence_surface, (10, 40))# 更新显示pygame.display.flip()# 控制帧率clock.tick(30)# 释放资源cam.stop()pygame.quit()if __name__ == "__main__":# 加载模型和标准化器try:model, scaler = load_model("fruit_classifier.pkl", "scaler.pkl")except FileNotFoundError:print("找不到模型文件，请确保模型文件在正确的路径下")sys.exit(1)# 启动实时监测print("启动摄像头实时监测... 按'q'键或窗口关闭按钮退出")real_time_detection(model, scaler)