
核心实现思路
- 滑动窗口策略:在图像上滑动固定大小的窗口,对每个窗口进行分类
- 多维特征提取:结合统计特征、纹理特征、边缘特征、形状特征等
- 随机森林分类:训练二分类器判断窗口是否包含目标
- 后处理优化:使用非极大值抑制减少重复检测
特征工程的重要性
- LBP纹理特征:捕捉局部纹理模式
- 灰度共生矩阵:描述纹理的统计特性
- 边缘密度:反映目标边界信息
- 形状描述符:圆形度、面积比等几何特征
实际应用建议
- 数据收集:收集大量正负样本进行训练
- 特征优化:根据具体目标调整特征提取策略
- 参数调优:调整窗口大小、步长、置信度阈值等
- 多尺度检测:使用不同尺寸的窗口检测不同大小的目标
适用场景
- 计算资源受限的嵌入式设备
- 目标具有明显纹理或形状特征的场景
- 需要快速部署和调试的原型系统
- 传统图像处理流程的补充
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from skimage.measure import regionprops
import os
from typing import List, Tuple
import matplotlib.pyplot as pltclass RandomForestObjectDetector:"""基于随机森林的目标检测器"""def __init__(self, window_size=(64, 64), step_size=16, n_estimators=100):"""初始化检测器Args:window_size: 滑动窗口大小step_size: 滑动步长n_estimators: 随机森林中树的数量"""self.window_size = window_sizeself.step_size = step_sizeself.rf_classifier = RandomForestClassifier(n_estimators=n_estimators,random_state=42,max_depth=10,min_samples_split=5)self.is_trained = Falsedef extract_features(self, image_patch: np.ndarray) -> np.ndarray:"""从图像块中提取特征Args:image_patch: 输入图像块Returns:特征向量"""features = []if len(image_patch.shape) == 3:gray = cv2.cvtColor(image_patch, cv2.COLOR_BGR2GRAY)else:gray = image_patch.copy()features.extend([np.mean(gray), np.std(gray), np.median(gray), np.min(gray), np.max(gray), np.var(gray) ])radius = 3n_points = 8 * radiuslbp = local_binary_pattern(gray, n_points, radius, method='uniform')lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_points + 2, range=(0, n_points + 2), density=True)features.extend(lbp_hist)try:glcm = graycomatrix(gray, distances=[1], angles=[0, 45, 90, 135], levels=256, symmetric=True, normed=True)contrast = graycoprops(glcm, 'contrast').mean()dissimilarity = graycoprops(glcm, 'dissimilarity').mean()homogeneity = graycoprops(glcm, 'homogeneity').mean()energy = graycoprops(glcm, 'energy').mean()correlation = graycoprops(glcm, 'correlation').mean()features.extend([contrast, dissimilarity, homogeneity, energy, correlation])except:features.extend([0, 0, 0, 0, 0])edges = cv2.Canny(gray, 50, 150)edge_density = np.sum(edges > 0) / edges.sizefeatures.append(edge_density)grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)features.extend([np.mean(grad_magnitude),np.std(grad_magnitude)])_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)if contours:largest_contour = max(contours, key=cv2.contourArea)area = cv2.contourArea(largest_contour)perimeter = cv2.arcLength(largest_contour, True)if perimeter > 0:circularity = 4 * np.pi * area / (perimeter ** 2)else:circularity = 0features.extend([area / (gray.shape[0] * gray.shape[1]), circularity])else:features.extend([0, 0])return np.array(features)def sliding_window(self, image: np.ndarray) -> List[Tuple]:"""在图像上应用滑动窗口Args:image: 输入图像Returns:窗口位置和图像块的列表"""windows = []h, w = image.shape[:2]for y in range(0, h - self.window_size[1] + 1, self.step_size):for x in range(0, w - self.window_size[0] + 1, self.step_size):window = image[y:y + self.window_size[1], x:x + self.window_size[0]]if window.shape[:2] == self.window_size:windows.append(((x, y), window))return windowsdef prepare_training_data(self, positive_samples: List[np.ndarray], negative_samples: List[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:"""准备训练数据Args:positive_samples: 正样本图像块列表negative_samples: 负样本图像块列表Returns:特征矩阵和标签向量"""features = []labels = []print("提取正样本特征...")for sample in positive_samples:feature = self.extract_features(sample)features.append(feature)labels.append(1) print("提取负样本特征...")for sample in negative_samples:feature = self.extract_features(sample)features.append(feature)labels.append(0) return np.array(features), np.array(labels)def train(self, positive_samples: List[np.ndarray], negative_samples: List[np.ndarray]):"""训练随机森林分类器Args:positive_samples: 正样本图像块列表negative_samples: 负样本图像块列表"""print("准备训练数据...")X, y = self.prepare_training_data(positive_samples, negative_samples)print(f"训练数据形状: {X.shape}, 标签分布: {np.bincount(y)}")X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)print("训练随机森林分类器...")self.rf_classifier.fit(X_train, y_train)val_pred = self.rf_classifier.predict(X_val)print("\n验证集性能:")print(classification_report(y_val, val_pred))self.is_trained = Trueprint("训练完成!")def detect(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Tuple]:"""在图像中检测目标Args:image: 输入图像confidence_threshold: 置信度阈值Returns:检测结果列表 [(x, y, w, h, confidence), ...]"""if not self.is_trained:raise ValueError("模型尚未训练,请先调用train()方法")detections = []windows = self.sliding_window(image)print(f"处理 {len(windows)} 个窗口...")for (x, y), window in windows:features = self.extract_features(window).reshape(1, -1)prediction = self.rf_classifier.predict(features)[0]confidence = self.rf_classifier.predict_proba(features)[0][1] if prediction == 1 and confidence >= confidence_threshold:detections.append((x, y, self.window_size[0], self.window_size[1], confidence))return detectionsdef non_max_suppression(self, detections: List[Tuple], overlap_threshold: float = 0.3) -> List[Tuple]:"""非极大值抑制Args:detections: 检测结果列表overlap_threshold: 重叠阈值Returns:过滤后的检测结果"""if not detections:return []detections = sorted(detections, key=lambda x: x[4], reverse=True)keep = []while detections:current = detections.pop(0)keep.append(current)remaining = []for detection in detections:iou = self.calculate_iou(current, detection)if iou < overlap_threshold:remaining.append(detection)detections = remainingreturn keep@staticmethoddef calculate_iou(box1: Tuple, box2: Tuple) -> float:"""计算两个边界框的IoU"""x1, y1, w1, h1, _ = box1x2, y2, w2, h2, _ = box2xi1 = max(x1, x2)yi1 = max(y1, y2)xi2 = min(x1 + w1, x2 + w2)yi2 = min(y1 + h1, y2 + h2)if xi2 <= xi1 or yi2 <= yi1:return 0.0intersection = (xi2 - xi1) * (yi2 - yi1)union = w1 * h1 + w2 * h2 - intersectionreturn intersection / union if union > 0 else 0.0def visualize_detections(self, image: np.ndarray, detections: List[Tuple], title: str = "检测结果"):"""可视化检测结果Args:image: 原始图像detections: 检测结果列表title: 图像标题"""img_vis = image.copy()for x, y, w, h, confidence in detections:cv2.rectangle(img_vis, (x, y), (x + w, y + h), (0, 255, 0), 2)label = f"{confidence:.2f}"cv2.putText(img_vis, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)plt.figure(figsize=(12, 8))plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))plt.title(f"{title} - 检测到 {len(detections)} 个目标")plt.axis('off')plt.show()
def create_sample_data():"""创建示例训练数据"""positive_samples = []for _ in range(100):sample = np.random.randint(50, 100, (64, 64), dtype=np.uint8)cv2.rectangle(sample, (10, 20), (50, 40), 150, -1)noise = np.random.normal(0, 10, sample.shape)sample = np.clip(sample + noise, 0, 255).astype(np.uint8)positive_samples.append(sample)negative_samples = []for _ in range(200):sample = np.random.randint(0, 50, (64, 64), dtype=np.uint8)noise = np.random.normal(0, 15, sample.shape)sample = np.clip(sample + noise, 0, 255).astype(np.uint8)negative_samples.append(sample)return positive_samples, negative_samples
if __name__ == "__main__":detector = RandomForestObjectDetector(window_size=(64, 64), step_size=32)print("创建示例数据...")positive_samples, negative_samples = create_sample_data()detector.train(positive_samples, negative_samples)test_image = np.random.randint(0, 50, (300, 400), dtype=np.uint8)cv2.rectangle(test_image, (50, 50), (114, 114), 150, -1)cv2.rectangle(test_image, (200, 150), (264, 214), 150, -1)print("进行目标检测...")detections = detector.detect(test_image, confidence_threshold=0.6)filtered_detections = detector.non_max_suppression(detections, overlap_threshold=0.3)print(f"原始检测数量: {len(detections)}")print(f"NMS后检测数量: {len(filtered_detections)}")if len(filtered_detections) > 0:detector.visualize_detections(cv2.cvtColor(test_image, cv2.COLOR_GRAY2BGR), filtered_detections)else:print("未检测到目标")