
核心实现思路
- 滑动窗口策略:在图像上滑动固定大小的窗口,对每个窗口进行分类
- 多维特征提取:结合统计特征、纹理特征、边缘特征、形状特征等
- 随机森林分类:训练二分类器判断窗口是否包含目标
- 后处理优化:使用非极大值抑制减少重复检测
特征工程的重要性
- LBP纹理特征:捕捉局部纹理模式
- 灰度共生矩阵:描述纹理的统计特性
- 边缘密度:反映目标边界信息
- 形状描述符:圆形度、面积比等几何特征
实际应用建议
- 数据收集:收集大量正负样本进行训练
- 特征优化:根据具体目标调整特征提取策略
- 参数调优:调整窗口大小、步长、置信度阈值等
- 多尺度检测:使用不同尺寸的窗口检测不同大小的目标
适用场景
- 计算资源受限的嵌入式设备
- 目标具有明显纹理或形状特征的场景
- 需要快速部署和调试的原型系统
- 传统图像处理流程的补充
import cv2
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from skimage.measure import regionprops
import os
from typing import List, Tuple
import matplotlib.pyplot as plt
class RandomForestObjectDetector:
"""基于随机森林的目标检测器"""
def __init__(self, window_size=(64, 64), step_size=16, n_estimators=100):
"""
初始化检测器
Args:
window_size: 滑动窗口大小
step_size: 滑动步长
n_estimators: 随机森林中树的数量
"""
self.window_size = window_size
self.step_size = step_size
self.rf_classifier = RandomForestClassifier(
n_estimators=n_estimators,
random_state=42,
max_depth=10,
min_samples_split=5
)
self.is_trained = False
def extract_features(self, image_patch: np.ndarray) -> np.ndarray:
"""
从图像块中提取特征
Args:
image_patch: 输入图像块
Returns:
特征向量
"""
features = []
if len(image_patch.shape) == 3:
gray = cv2.cvtColor(image_patch, cv2.COLOR_BGR2GRAY)
else:
gray = image_patch.copy()
features.extend([
np.mean(gray),
np.std(gray),
np.median(gray),
np.min(gray),
np.max(gray),
np.var(gray)
])
radius = 3
n_points = 8 * radius
lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
lbp_hist, _ = np.histogram(lbp.ravel(), bins=n_points + 2,
range=(0, n_points + 2), density=True)
features.extend(lbp_hist)
try:
glcm = graycomatrix(gray, distances=[1], angles=[0, 45, 90, 135],
levels=256, symmetric=True, normed=True)
contrast = graycoprops(glcm, 'contrast').mean()
dissimilarity = graycoprops(glcm, 'dissimilarity').mean()
homogeneity = graycoprops(glcm, 'homogeneity').mean()
energy = graycoprops(glcm, 'energy').mean()
correlation = graycoprops(glcm, 'correlation').mean()
features.extend([contrast, dissimilarity, homogeneity, energy, correlation])
except:
features.extend([0, 0, 0, 0, 0])
edges = cv2.Canny(gray, 50, 150)
edge_density = np.sum(edges > 0) / edges.size
features.append(edge_density)
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
grad_magnitude = np.sqrt(grad_x**2 + grad_y**2)
features.extend([
np.mean(grad_magnitude),
np.std(grad_magnitude)
])
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
largest_contour = max(contours, key=cv2.contourArea)
area = cv2.contourArea(largest_contour)
perimeter = cv2.arcLength(largest_contour, True)
if perimeter > 0:
circularity = 4 * np.pi * area / (perimeter ** 2)
else:
circularity = 0
features.extend([area / (gray.shape[0] * gray.shape[1]), circularity])
else:
features.extend([0, 0])
return np.array(features)
def sliding_window(self, image: np.ndarray) -> List[Tuple]:
"""
在图像上应用滑动窗口
Args:
image: 输入图像
Returns:
窗口位置和图像块的列表
"""
windows = []
h, w = image.shape[:2]
for y in range(0, h - self.window_size[1] + 1, self.step_size):
for x in range(0, w - self.window_size[0] + 1, self.step_size):
window = image[y:y + self.window_size[1], x:x + self.window_size[0]]
if window.shape[:2] == self.window_size:
windows.append(((x, y), window))
return windows
def prepare_training_data(self, positive_samples: List[np.ndarray],
negative_samples: List[np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
"""
准备训练数据
Args:
positive_samples: 正样本图像块列表
negative_samples: 负样本图像块列表
Returns:
特征矩阵和标签向量
"""
features = []
labels = []
print("提取正样本特征...")
for sample in positive_samples:
feature = self.extract_features(sample)
features.append(feature)
labels.append(1)
print("提取负样本特征...")
for sample in negative_samples:
feature = self.extract_features(sample)
features.append(feature)
labels.append(0)
return np.array(features), np.array(labels)
def train(self, positive_samples: List[np.ndarray],
negative_samples: List[np.ndarray]):
"""
训练随机森林分类器
Args:
positive_samples: 正样本图像块列表
negative_samples: 负样本图像块列表
"""
print("准备训练数据...")
X, y = self.prepare_training_data(positive_samples, negative_samples)
print(f"训练数据形状: {X.shape}, 标签分布: {np.bincount(y)}")
X_train, X_val, y_train, y_val = train_test_split(
X, y, test_size=0.2, random_state=42, stratify=y
)
print("训练随机森林分类器...")
self.rf_classifier.fit(X_train, y_train)
val_pred = self.rf_classifier.predict(X_val)
print("\n验证集性能:")
print(classification_report(y_val, val_pred))
self.is_trained = True
print("训练完成!")
def detect(self, image: np.ndarray, confidence_threshold: float = 0.7) -> List[Tuple]:
"""
在图像中检测目标
Args:
image: 输入图像
confidence_threshold: 置信度阈值
Returns:
检测结果列表 [(x, y, w, h, confidence), ...]
"""
if not self.is_trained:
raise ValueError("模型尚未训练,请先调用train()方法")
detections = []
windows = self.sliding_window(image)
print(f"处理 {len(windows)} 个窗口...")
for (x, y), window in windows:
features = self.extract_features(window).reshape(1, -1)
prediction = self.rf_classifier.predict(features)[0]
confidence = self.rf_classifier.predict_proba(features)[0][1]
if prediction == 1 and confidence >= confidence_threshold:
detections.append((x, y, self.window_size[0], self.window_size[1], confidence))
return detections
def non_max_suppression(self, detections: List[Tuple],
overlap_threshold: float = 0.3) -> List[Tuple]:
"""
非极大值抑制
Args:
detections: 检测结果列表
overlap_threshold: 重叠阈值
Returns:
过滤后的检测结果
"""
if not detections:
return []
detections = sorted(detections, key=lambda x: x[4], reverse=True)
keep = []
while detections:
current = detections.pop(0)
keep.append(current)
remaining = []
for detection in detections:
iou = self.calculate_iou(current, detection)
if iou < overlap_threshold:
remaining.append(detection)
detections = remaining
return keep
@staticmethod
def calculate_iou(box1: Tuple, box2: Tuple) -> float:
"""计算两个边界框的IoU"""
x1, y1, w1, h1, _ = box1
x2, y2, w2, h2, _ = box2
xi1 = max(x1, x2)
yi1 = max(y1, y2)
xi2 = min(x1 + w1, x2 + w2)
yi2 = min(y1 + h1, y2 + h2)
if xi2 <= xi1 or yi2 <= yi1:
return 0.0
intersection = (xi2 - xi1) * (yi2 - yi1)
union = w1 * h1 + w2 * h2 - intersection
return intersection / union if union > 0 else 0.0
def visualize_detections(self, image: np.ndarray, detections: List[Tuple],
title: str = "检测结果"):
"""
可视化检测结果
Args:
image: 原始图像
detections: 检测结果列表
title: 图像标题
"""
img_vis = image.copy()
for x, y, w, h, confidence in detections:
cv2.rectangle(img_vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
label = f"{confidence:.2f}"
cv2.putText(img_vis, label, (x, y - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
plt.figure(figsize=(12, 8))
plt.imshow(cv2.cvtColor(img_vis, cv2.COLOR_BGR2RGB))
plt.title(f"{title} - 检测到 {len(detections)} 个目标")
plt.axis('off')
plt.show()
def create_sample_data():
"""创建示例训练数据"""
positive_samples = []
for _ in range(100):
sample = np.random.randint(50, 100, (64, 64), dtype=np.uint8)
cv2.rectangle(sample, (10, 20), (50, 40), 150, -1)
noise = np.random.normal(0, 10, sample.shape)
sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
positive_samples.append(sample)
negative_samples = []
for _ in range(200):
sample = np.random.randint(0, 50, (64, 64), dtype=np.uint8)
noise = np.random.normal(0, 15, sample.shape)
sample = np.clip(sample + noise, 0, 255).astype(np.uint8)
negative_samples.append(sample)
return positive_samples, negative_samples
if __name__ == "__main__":
detector = RandomForestObjectDetector(window_size=(64, 64), step_size=32)
print("创建示例数据...")
positive_samples, negative_samples = create_sample_data()
detector.train(positive_samples, negative_samples)
test_image = np.random.randint(0, 50, (300, 400), dtype=np.uint8)
cv2.rectangle(test_image, (50, 50), (114, 114), 150, -1)
cv2.rectangle(test_image, (200, 150), (264, 214), 150, -1)
print("进行目标检测...")
detections = detector.detect(test_image, confidence_threshold=0.6)
filtered_detections = detector.non_max_suppression(detections, overlap_threshold=0.3)
print(f"原始检测数量: {len(detections)}")
print(f"NMS后检测数量: {len(filtered_detections)}")
if len(filtered_detections) > 0:
detector.visualize_detections(cv2.cvtColor(test_image, cv2.COLOR_GRAY2BGR),
filtered_detections)
else:
print("未检测到目标")