YOLOv5 ONNX推理实战:CUDA加速+可视化

发布于:2025-06-21 ⋅ 阅读:(14) ⋅ 点赞:(0)

本篇记录了如何使用 onnxruntime-gpu 和 OpenCV 对导出的 YOLOv5 ONNX 模型进行推理,支持:

  • ✅ CUDA 加速
  • ✅ 自动 Letterbox 缩放与填充
  • ✅ 非极大值抑制(NMS)
  • ✅ 输出坐标自动还原到原图尺寸
  • ✅ 实时可视化

📦 依赖安装

pip install onnxruntime-gpu opencv-python numpy

🧠 推理代码(支持 YOLOv5 ONNX)

import onnxruntime as ort
import numpy as np
import cv2
import time

class YOLOv5ONNX:
    def __init__(self, model_path, input_size=(640, 640), providers=['CUDAExecutionProvider']):
        self.input_size = input_size  # (w, h)
        self.session = ort.InferenceSession(model_path, providers=providers)
        self.input_name = self.session.get_inputs()[0].name

    def letterbox(self, image, new_shape=(640, 640), color=(114, 114, 114)):
        shape = image.shape[:2]  # current shape [height, width]
        r = min(new_shape[1] / shape[0], new_shape[0] / shape[1])
        new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
        dw = new_shape[0] - new_unpad[0]
        dh = new_shape[1] - new_unpad[1]
        dw /= 2  # divide padding into 2 sides
        dh /= 2

        resized = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
        padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)

        return padded, r, dw, dh

    def preprocess(self, img_bgr):
        img, r, dw, dh = self.letterbox(img_bgr, self.input_size)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_rgb = img_rgb.astype(np.float32) / 255.0
        img_rgb = np.transpose(img_rgb, (2, 0, 1))  # HWC to CHW
        img_rgb = np.expand_dims(img_rgb, axis=0)  # Add batch dimension
        return img_rgb, r, dw, dh

    def scale_coords(self, box, r, dw, dh, orig_shape):
        x1 = (box[0] - dw) / r
        y1 = (box[1] - dh) / r
        x2 = (box[2] - dw) / r
        y2 = (box[3] - dh) / r

        x1 = np.clip(x1, 0, orig_shape[1])
        y1 = np.clip(y1, 0, orig_shape[0])
        x2 = np.clip(x2, 0, orig_shape[1])
        y2 = np.clip(y2, 0, orig_shape[0])

        return x1, y1, x2, y2

    def nms(self, boxes, scores, iou_threshold):
        x1, y1, x2, y2 = boxes.T
        areas = (x2 - x1) * (y2 - y1)
        order = scores.argsort()[::-1]
        keep = []

        while order.size > 0:
            i = order[0]
            keep.append(i)
            if order.size == 1:
                break
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])

            inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
            iou = inter / (areas[i] + areas[order[1:]] - inter)
            order = order[1:][iou < iou_threshold]

        return keep

    def postprocess(self, outputs, orig_shape, r, dw, dh, conf_thres=0.25, iou_thres=0.45):
        pred = outputs[0]  # shape: (1, 25200, 85)
        pred = np.squeeze(pred, axis=0)  # shape: (25200, 85)

        conf = pred[:, 4]
        pred = pred[conf > conf_thres]
        if len(pred) == 0:
            return []

        scores = pred[:, 4]
        class_scores = pred[:, 5:]
        class_ids = np.argmax(class_scores, axis=1)
        class_conf = class_scores[np.arange(len(pred)), class_ids]
        final_scores = scores * class_conf

        boxes = pred[:, :4]
        boxes_xyxy = np.zeros_like(boxes)
        boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2  # x1
        boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2  # y1
        boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2  # x2
        boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2  # y2

        keep = self.nms(boxes_xyxy, final_scores, iou_thres)
        boxes_xyxy = boxes_xyxy[keep]
        final_scores = final_scores[keep]
        class_ids = class_ids[keep]

        results = []
        for box, score, cls in zip(boxes_xyxy, final_scores, class_ids):
            x1, y1, x2, y2 = self.scale_coords(box, r, dw, dh, orig_shape)
            results.append((int(x1), int(y1), int(x2), int(y2), float(score), int(cls)))
        return results

    def infer(self, img_bgr):
        input_tensor, r, dw, dh = self.preprocess(img_bgr)
        outputs = self.session.run(None, {self.input_name: input_tensor})
        return self.postprocess(outputs, img_bgr.shape[:2], r, dw, dh)



🔍 推理与可视化示例

if __name__ == "__main__":
    model_path = "weights/best.onnx"        # 替换为你的 ONNX 模型路径
    image_path = "data/images/bus.jpg"      # 替换为你的测试图片

    detector = YOLOv5ONNX(model_path)
    img = cv2.imread(image_path)

    t0 = time.time()
    results = detector.infer(img)
    print(f"Inference time: {time.time() - t0:.3f}s")

    for (x1, y1, x2, y2, score, cls) in results:
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, f"{cls}:{score:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)

    cv2.imshow("YOLOv5 ONNX", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

📝 总结

本脚本支持加载 YOLOv5 的 ONNX 模型,自动完成:

  • 图像 letterbox 预处理
  • 模型推理(支持 GPU 加速)
  • 输出坐标还原
  • 置信度过滤 + NMS 后处理
  • 可视化结果绘制

网站公告

今日签到

点亮在社区的每一天
去签到