目标检测探索指南 🔍
目标检测就像是一位细心的侦探!我们需要在图像中寻找并定位特定的目标,就像侦探在现场搜寻线索一样。让我们一起来探索这个充满挑战的图像处理领域吧!
目录
1. 什么是目标检测?
想象一下,你是一位图像侦探,正在搜寻图像中的"线索"。目标检测就是这样的过程,它可以帮助我们:
- 🔍 定位目标位置(找到"线索"的位置)
- 📏 确定目标大小(测量"线索"的范围)
- 🎯 识别目标类别(判断"线索"的类型)
- 🔄 跟踪目标运动(追踪"线索"的变化)
2. 滑动窗口检测
2.1 基本原理
滑动窗口就像是侦探用放大镜一格一格地检查现场!通过在图像上滑动不同大小的窗口来寻找目标。
关键步骤:
- 多尺度金字塔
- 窗口滑动
- 特征提取
- 分类判断
2.2 实现示例
// 滑动窗口检测实现
vector<DetectionResult> sliding_window_detect(
const Mat& src,
const Size& window_size,
int stride,
float threshold) {
vector<DetectionResult> results;
HOGExtractor hog(window_size);
// 加载预训练的SVM模型
Ptr<ml::SVM> svm = ml::SVM::load("pedestrian_svm.xml");
#pragma omp parallel for
for (int y = 0; y <= src.rows - window_size.height; y += stride) {
for (int x = 0; x <= src.cols - window_size.width; x += stride) {
// 提取窗口
Mat window = src(Rect(x, y, window_size.width, window_size.height));
// 计算HOG特征
vector<float> features = hog.compute(window);
// SVM预测
Mat feature_mat(1, static_cast<int>(features.size()), CV_32F);
memcpy(feature_mat.data, features.data(), features.size() * sizeof(float));
float score = svm->predict(feature_mat, noArray(), ml::StatModel::RAW_OUTPUT);
if (score > threshold) {
DetectionResult det;
det.bbox = Rect(x, y, window_size.width, window_size.height);
det.confidence = score;
det.class_id = 1; // 行人类别
det.label = "pedestrian";
#pragma omp critical
results.push_back(det);
}
}
}
return results;
}
def sliding_window_detection(img_path, window_size=(64, 64), stride=32):
"""
滑动窗口检测实现
参数:
img_path: 输入图像路径
window_size: 窗口大小,默认(64, 64)
stride: 步长,默认32
返回:
检测结果可视化
"""
# 读取图像
img = cv2.imread(img_path)
if img is None:
raise ValueError(f"无法读取图像: {img_path}")
# 复制原图用于绘制结果
result = img.copy()
# 转换为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 定义简单的目标检测函数(这里使用边缘检测作为示例)
def detect_object(window):
edges = cv2.Canny(window, 100, 200)
return np.sum(edges) > 1000 # 简单的阈值判断
# 滑动窗口检测
for y in range(0, img.shape[0] - window_size[1], stride):
for x in range(0, img.shape[1] - window_size[0], stride):
# 提取窗口
window = gray[y:y+window_size[1], x:x+window_size[0]]
# 检测目标
if detect_object(window):
# 绘制检测框
cv2.rectangle(result, (x, y),
(x + window_size[0], y + window_size[1]),
(0, 255, 0), 2)
return result
3. HOG+SVM检测
3.1 算法原理
HOG(方向梯度直方图)特征就像是侦探提取的"纹理线索",SVM(支持向量机)就像是经验丰富的"判断专家"。
HOG特征计算:
g x = I ( x + 1 , y ) − I ( x − 1 , y ) g y = I ( x , y + 1 ) − I ( x , y − 1 ) g = g x 2 + g y 2 θ = arctan ( g y g x ) \begin{aligned} g_x &= I(x+1,y) - I(x-1,y) \\ g_y &= I(x,y+1) - I(x,y-1) \\ g &= \sqrt{g_x^2 + g_y^2} \\ \theta &= \arctan(\frac{g_y}{g_x}) \end{aligned} gxgygθ=I(x+1,y)−I(x−1,y)=I(x,y+1)−I(x,y−1)=gx2+gy2=arctan(gxgy)
3.2 实现示例
// HOG特征提取器
class HOGExtractor {
public:
HOGExtractor(Size win_size = Size(64, 128),
Size block_size = Size(16, 16),
Size block_stride = Size(8, 8),
Size cell_size = Size(8, 8),
int nbins = 9)
: win_size_(win_size),
block_size_(block_size),
block_stride_(block_stride),
cell_size_(cell_size),
nbins_(nbins) {
// 计算HOG特征维度
Size n_cells(win_size.width / cell_size.width,
win_size.height / cell_size.height);
Size n_blocks((n_cells.width - block_size.width / cell_size.width) /
(block_stride.width / cell_size.width) + 1,
(n_cells.height - block_size.height / cell_size.height) /
(block_stride.height / cell_size.height) + 1);
feature_dim_ = n_blocks.width * n_blocks.height *
block_size.width * block_size.height *
nbins / (cell_size.width * cell_size.height);
}
// 计算图像梯度
void computeGradients(const Mat& img, Mat& magnitude, Mat& angle) const {
magnitude = Mat::zeros(img.size(), CV_32F);
angle = Mat::zeros(img.size(), CV_32F);
#pragma omp parallel for
for (int y = 1; y < img.rows - 1; y++) {
for (int x = 1; x < img.cols - 1; x++) {
// 计算x方向梯度
float gx = static_cast<float>(img.at<uchar>(y, x + 1) - img.at<uchar>(y, x - 1));
// 计算y方向梯度
float gy = static_cast<float>(img.at<uchar>(y + 1, x) - img.at<uchar>(y - 1, x));
// 计算梯度幅值
magnitude.at<float>(y, x) = std::sqrt(gx * gx + gy * gy);
// 计算梯度方向(角度)
angle.at<float>(y, x) = static_cast<float>(std::atan2(gy, gx) * 180.0 / CV_PI);
if (angle.at<float>(y, x) < 0) {
angle.at<float>(y, x) += 180.0f;
}
}
}
}
// 计算cell直方图
void computeCellHistogram(const Mat& magnitude, const Mat& angle,
vector<vector<vector<float>>>& cell_hists) const {
Size n_cells(win_size_.width / cell_size_.width,
win_size_.height / cell_size_.height);
#pragma omp parallel for collapse(2)
for (int y = 0; y < magnitude.rows; y++) {
for (int x = 0; x < magnitude.cols; x++) {
float mag = magnitude.at<float>(y, x);
float ang = angle.at<float>(y, x);
// 计算bin索引
float bin_width = 180.0f / nbins_;
int bin = static_cast<int>(ang / bin_width);
int next_bin = (bin + 1) % nbins_;
float alpha = (ang - bin * bin_width) / bin_width;
// 计算cell索引
int cell_x = x / cell_size_.width;
int cell_y = y / cell_size_.height;
if (cell_x < n_cells.width && cell_y < n_cells.height) {
#pragma omp atomic
cell_hists[cell_y][cell_x][bin] += mag * (1 - alpha);
#pragma omp atomic
cell_hists[cell_y][cell_x][next_bin] += mag * alpha;
}
}
}
}
// 计算block特征并归一化
void computeBlockFeatures(const vector<vector<vector<float>>>& cell_hists,
vector<float>& features) const {
Size n_cells(win_size_.width / cell_size_.width,
win_size_.height / cell_size_.height);
Size n_blocks((n_cells.width - block_size_.width / cell_size_.width) /
(block_stride_.width / cell_size_.width) + 1,
(n_cells.height - block_size_.height / cell_size_.height) /
(block_stride_.height / cell_size_.height) + 1);
for (int by = 0; by <= n_cells.height - block_size_.height / cell_size_.height;
by += block_stride_.height / cell_size_.height) {
for (int bx = 0; bx <= n_cells.width - block_size_.width / cell_size_.width;
bx += block_stride_.width / cell_size_.width) {
vector<float> block_features;
float norm = 0;
// 收集block中的所有cell直方图
for (int cy = by; cy < by + block_size_.height / cell_size_.height; cy++) {
for (int cx = bx; cx < bx + block_size_.width / cell_size_.width; cx++) {
block_features.insert(block_features.end(),
cell_hists[cy][cx].begin(),
cell_hists[cy][cx].end());
for (float val : cell_hists[cy][cx]) {
norm += val * val;
}
}
}
// L2-Norm归一化
norm = std::sqrt(norm + 1e-5);
for (float& val : block_features) {
val /= norm;
}
features.insert(features.end(),
block_features.begin(),
block_features.end());
}
}
}
vector<float> compute(const Mat& img) const {
// 调整图像大小
Mat resized;
resize(img, resized, win_size_);
// 计算梯度
Mat magnitude, angle;
computeGradients(resized, magnitude, angle);
// 计算cell直方图
Size n_cells(win_size_.width / cell_size_.width,
win_size_.height / cell_size_.height);
vector<vector<vector<float>>> cell_hists(n_cells.height,
vector<vector<float>>(n_cells.width, vector<float>(nbins_, 0)));
computeCellHistogram(magnitude, angle, cell_hists);
// 计算block特征并归一化
vector<float> features;
features.reserve(feature_dim_);
computeBlockFeatures(cell_hists, features);
return features;
}
private:
Size win_size_;
Size block_size_;
Size block_stride_;
Size cell_size_;
int nbins_;
int feature_dim_;
};
class HOGExtractor:
"""HOG特征提取器"""
def __init__(self, win_size=(64, 128), block_size=(16, 16),
block_stride=(8, 8), cell_size=(8, 8), nbins=9):
self.win_size = win_size
self.block_size = block_size
self.block_stride = block_stride
self.cell_size = cell_size
self.nbins = nbins
# 计算特征维度
n_cells = (win_size[0] // cell_size[0], win_size[1] // cell_size[1])
n_blocks = ((n_cells[0] - block_size[0] // cell_size[0]) // (block_stride[0] // cell_size[0]) + 1,
(n_cells[1] - block_size[1] // cell_size[1]) // (block_stride[1] // cell_size[1]) + 1)
self.feature_dim = n_blocks[0] * n_blocks[1] * block_size[0] * block_size[1] * nbins // (cell_size[0] * cell_size[1])
def compute_gradients(self, img):
"""计算图像梯度"""
# 确保图像是灰度图
if len(img.shape) > 2:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 计算x和y方向的梯度
gx = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize=1)
gy = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize=1)
# 计算梯度幅值和方向
magnitude = np.sqrt(gx**2 + gy**2)
angle = np.arctan2(gy, gx) * 180 / np.pi
angle[angle < 0] += 180
return magnitude, angle
def compute_cell_histogram(self, magnitude, angle):
"""计算cell直方图"""
n_cells = (self.win_size[0] // self.cell_size[0],
self.win_size[1] // self.cell_size[1])
cell_hists = np.zeros((n_cells[1], n_cells[0], self.nbins))
# 计算每个像素的贡献
for y in range(magnitude.shape[0]):
for x in range(magnitude.shape[1]):
mag = magnitude[y, x]
ang = angle[y, x]
# 计算bin索引
bin_width = 180.0 / self.nbins
bin_idx = int(ang / bin_width)
next_bin = (bin_idx + 1) % self.nbins
alpha = (ang - bin_idx * bin_width) / bin_width
# 计算cell索引
cell_x = x // self.cell_size[0]
cell_y = y // self.cell_size[1]
if cell_x < n_cells[0] and cell_y < n_cells[1]:
cell_hists[cell_y, cell_x, bin_idx] += mag * (1 - alpha)
cell_hists[cell_y, cell_x, next_bin] += mag * alpha
return cell_hists
def compute_block_features(self, cell_hists):
"""计算block特征并归一化"""
n_cells = (self.win_size[0] // self.cell_size[0],
self.win_size[1] // self.cell_size[1])
n_blocks = ((n_cells[0] - self.block_size[0] // self.cell_size[0]) //
(self.block_stride[0] // self.cell_size[0]) + 1,
(n_cells[1] - self.block_size[1] // self.cell_size[1]) //
(self.block_stride[1] // self.cell_size[1]) + 1)
features = []
for by in range(0, n_cells[1] - self.block_size[1] // self.cell_size[1] + 1,
self.block_stride[1] // self.cell_size[1]):
for bx in range(0, n_cells[0] - self.block_size[0] // self.cell_size[0] + 1,
self.block_stride[0] // self.cell_size[0]):
# 提取block中的cell直方图
block_features = cell_hists[by:by + self.block_size[1] // self.cell_size[1],
bx:bx + self.block_size[0] // self.cell_size[0]].flatten()
# L2-Norm归一化
norm = np.sqrt(np.sum(block_features**2) + 1e-5)
block_features = block_features / norm
features.extend(block_features)
return np.array(features)
def compute(self, img):
"""计算HOG特征"""
# 调整图像大小
img = cv2.resize(img, self.win_size)
# 计算梯度
magnitude, angle = self.compute_gradients(img)
# 计算cell直方图
cell_hists = self.compute_cell_histogram(magnitude, angle)
# 计算block特征并归一化
features = self.compute_block_features(cell_hists)
return features
4. Haar+AdaBoost检测
4.1 算法原理
Haar特征就像是侦探寻找的"明暗对比线索",AdaBoost就像是多位专家组成的"决策委员会"。
Haar特征计算:
f = ∑ i ∈ w h i t e I ( i ) − ∑ i ∈ b l a c k I ( i ) f = \sum_{i \in white} I(i) - \sum_{i \in black} I(i) f=i∈white∑I(i)−i∈black∑I(i)
4.2 实现示例
// Haar特征提取器
class HaarExtractor {
public:
HaarExtractor() {
// 加载预训练的Haar级联分类器
face_cascade_.load("haarcascade_frontalface_alt.xml");
}
vector<Rect> detect(const Mat& img, double scale_factor = 1.1, int min_neighbors = 3) {
vector<Rect> faces;
face_cascade_.detectMultiScale(img, faces, scale_factor, min_neighbors);
return faces;
}
private:
CascadeClassifier face_cascade_;
};
// 使用示例
Mat detectFaces(const Mat& img) {
Mat result = img.clone();
Mat gray;
cvtColor(img, gray, COLOR_BGR2GRAY);
HaarExtractor haar;
vector<Rect> faces = haar.detect(gray);
for (const Rect& face : faces) {
rectangle(result, face, Scalar(0, 255, 0), 2);
}
return result;
}
def haar_adaboost_detection(img_path):
"""
Haar+AdaBoost检测实现
参数:
img_path: 输入图像路径
返回:
检测结果可视化
"""
# 读取图像
img = cv2.imread(img_path)
result = img.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 加载预训练的人脸检测器
face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
)
# 进行人脸检测
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# 绘制检测结果
for (x, y, w, h) in faces:
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
return result
5. 非极大值抑制
5.1 算法原理
非极大值抑制就像是侦探整理重复的线索,只保留最显著的发现。
NMS算法步骤:
- 按置信度排序
- 计算重叠度
- 抑制重叠框
5.2 实现示例
// 非极大值抑制实现
vector<int> nms(const vector<Rect>& boxes, const vector<float>& scores,
float iou_threshold) {
vector<int> indices(boxes.size());
std::iota(indices.begin(), indices.end(), 0); // 填充为 0, 1, 2, ...
// 按分数排序
sort(indices.begin(), indices.end(),
[&scores](int a, int b) { return scores[a] > scores[b]; });
vector<int> keep;
while (!indices.empty()) {
int curr = indices[0];
keep.push_back(curr);
vector<int> tmp;
for (size_t i = 1; i < indices.size(); i++) {
float iou = compute_iou(boxes[curr], boxes[indices[i]]);
if (iou <= iou_threshold) {
tmp.push_back(indices[i]);
}
}
indices = tmp;
}
return keep;
}
// 计算两个矩形的IoU
float compute_iou(const Rect& a, const Rect& b) {
int x1 = max(a.x, b.x);
int y1 = max(a.y, b.y);
int x2 = min(a.x + a.width, b.x + b.width);
int y2 = min(a.y + a.height, b.y + b.height);
if (x1 >= x2 || y1 >= y2) return 0.0f;
float intersection_area = static_cast<float>((x2 - x1) * (y2 - y1));
float union_area = static_cast<float>(a.width * a.height + b.width * b.height - intersection_area);
return intersection_area / union_area;
}
def compute_nms_manual(boxes, scores, iou_threshold=0.5):
"""
手动实现非极大值抑制算法
参数:
boxes: 边界框坐标列表,每个框为[x1, y1, x2, y2]格式
scores: 每个边界框对应的置信度分数
iou_threshold: IoU阈值,默认0.5
返回:
keep: 保留的边界框索引列表
"""
# 计算所有框的面积
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
# 根据分数排序
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
if order.size == 1:
break
# 计算IoU
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
# 保留IoU小于阈值的框
inds = np.where(ovr <= iou_threshold)[0]
order = order[inds + 1]
return keep
6. 目标跟踪
6.1 基本原理
目标跟踪就像是侦探持续追踪重要线索,需要考虑:
- 运动预测
- 特征匹配
- 轨迹平滑
- 遮挡处理
6.2 实现示例
// 简单的目标跟踪器实现
vector<DetectionResult> track_objects(
const Mat& src,
const Mat& prev,
const vector<DetectionResult>& prev_boxes) {
vector<DetectionResult> curr_boxes;
// 计算光流
vector<Point2f> prev_points, curr_points;
for (const auto& det : prev_boxes) {
prev_points.push_back(Point2f(static_cast<float>(det.bbox.x + det.bbox.width/2),
static_cast<float>(det.bbox.y + det.bbox.height/2)));
}
vector<uchar> status;
vector<float> err;
calcOpticalFlowPyrLK(prev, src, prev_points, curr_points, status, err);
// 更新检测框位置
for (size_t i = 0; i < prev_boxes.size(); i++) {
if (status[i]) {
DetectionResult det = prev_boxes[i];
float dx = curr_points[i].x - prev_points[i].x;
float dy = curr_points[i].y - prev_points[i].y;
det.bbox.x += static_cast<int>(dx);
det.bbox.y += static_cast<int>(dy);
curr_boxes.push_back(det);
}
}
return curr_boxes;
}
// 可视化检测结果
Mat draw_detections(const Mat& src,
const vector<DetectionResult>& detections) {
Mat img_display = src.clone();
if (img_display.channels() == 1) {
cvtColor(img_display, img_display, COLOR_GRAY2BGR);
}
for (const auto& det : detections) {
Scalar color;
if (det.class_id == 1) { // 行人
color = Scalar(0, 255, 0);
} else if (det.class_id == 2) { // 人脸
color = Scalar(0, 0, 255);
} else {
color = Scalar(255, 0, 0);
}
// 绘制边界框
rectangle(img_display, det.bbox, color, 2);
// 绘制标签和置信度
string label = format("%s %.2f", det.label.c_str(), det.confidence);
int baseline = 0;
Size text_size = getTextSize(label, FONT_HERSHEY_SIMPLEX,
0.5, 1, &baseline);
rectangle(img_display,
Point(det.bbox.x, det.bbox.y - text_size.height - 5),
Point(det.bbox.x + text_size.width, det.bbox.y),
color, -1);
putText(img_display, label,
Point(det.bbox.x, det.bbox.y - 5),
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255, 255, 255), 1);
}
return img_display;
}
def object_tracking(img_path, video_path=None):
"""
简单的目标跟踪实现
参数:
img_path: 输入图像路径
video_path: 视频路径(可选)
返回:
跟踪结果可视化
"""
# 读取图像
img = cv2.imread(img_path)
result = img.copy()
# 转换为HSV颜色空间
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# 定义目标颜色范围(以红色为例)
lower_red = np.array([0, 50, 50])
upper_red = np.array([10, 255, 255])
# 创建掩码
mask = cv2.inRange(hsv, lower_red, upper_red)
# 寻找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
if contours:
# 找到最大的轮廓
c = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(c)
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
return result
7. 代码实现与优化
7.1 性能优化技巧
- 使用积分图像加速特征计算
- GPU加速大规模计算
- 多线程并行处理
- 特征金字塔缓存
7.2 优化示例
// 使用积分图像优化特征计算
class IntegralFeature {
private:
Mat integralImg;
Mat integralSqImg;
public:
void compute(const Mat& img) {
integral(img, integralImg, integralSqImg);
}
float getSum(const Rect& r) const {
return integralImg.at<double>(r.y+r.height,r.x+r.width)
+ integralImg.at<double>(r.y,r.x)
- integralImg.at<double>(r.y,r.x+r.width)
- integralImg.at<double>(r.y+r.height,r.x);
}
float getVariance(const Rect& r) const {
float area = r.width * r.height;
float sum = getSum(r);
float sqSum = getSqSum(r);
return (sqSum - sum*sum/area) / area;
}
};
8. 应用场景与实践
8.1 典型应用
- 👤 人脸检测
- 🚗 车辆检测
- 🚶 行人检测
- 📝 文字检测
- 🎯 缺陷检测
8.2 实践建议
数据准备
- 充分的训练数据
- 数据增强
- 标注质量控制
算法选择
- 根据场景选择合适的算法
- 考虑实时性要求
- 权衡精度和速度
部署优化
- 模型压缩
- 计算加速
- 内存优化
总结
目标检测就像是在图像中玩"找茬游戏",我们需要在复杂的场景中找到特定的目标!通过滑动窗口、HOG+SVM、Haar+AdaBoost等方法,我们可以有效地定位和识别这些目标。在实际应用中,需要根据具体场景选择合适的方法,就像选择不同的"放大镜"来观察不同的目标。
记住:好的目标检测系统就像是一个经验丰富的"图像侦探",能够从复杂的场景中发现重要的目标!🔍
参考资料
- Viola P, Jones M. Rapid object detection using a boosted cascade of simple features[C]. CVPR, 2001
- Dalal N, Triggs B. Histograms of oriented gradients for human detection[C]. CVPR, 2005
- OpenCV官方文档: https://docs.opencv.org/
- 更多资源: IP101项目主页