YOLOv10tensorRT推理代码C++

发布于:2025-06-27 ⋅ 阅读:(14) ⋅ 点赞:(0)

最近实现了YOLOv10的tensorRT推理代码除了后处理部分只适合YOLOv10之外,其余部分基本可以在yolo系列通用~学习记录~。

#include <fstream>
#include <iostream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include "preprocess.hpp"
#include "postprocess.hpp"
#include "packageimage.hpp"
using namespace cv;
using namespace std;
using namespace nvinfer1;

const int INPUT_H = 640;
const int INPUT_W = 640;
const int NUM_CLASSES = 5;
const float CONF_THRESH = 0.5;
const float IOU_THRESH = 0.5;
const int Inputsize_w = 640;
const int Inputsize_h = 200;
const int batch_size = 7;

// 替换为你的类别名
std::vector<std::string> class_names = { };

// Logger 类(TensorRT 要求)
class Logger : public ILogger {
public:
    void log(Severity severity, const char* msg) noexcept override {
        if (severity <= Severity::kINFO)
            std::cout << "[TRT] " << msg << std::endl;
    }
} gLogger;

// 安全 CUDA 分配
void* safeCudaMalloc(size_t size) {
    void* ptr = nullptr;
    cudaMalloc(&ptr, size);
    return ptr;
}

int main() {
    std::string engine_path = "";
    std::string image_path = "";
    std::string imagesave_path = "";

    // 读取 engine 文件
    std::ifstream file(engine_path, std::ios::binary);
    if (!file) {
        std::cerr << "Failed to open engine file.\n";
        return -1;
    }
    file.seekg(0, file.end);
    size_t engine_size = file.tellg();
    file.seekg(0, file.beg);
    std::vector<char> engine_data(engine_size);
    file.read(engine_data.data(), engine_size);
    file.close();

    // 创建 runtime 和 engine
    IRuntime* runtime = createInferRuntime(gLogger);
    ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_size);
    IExecutionContext* context = engine->createExecutionContext();

    // 获取输入输出索引
    int inputIndex = engine->getBindingIndex("images");
    //int outputIndex = engine->getBindingIndex(engine->getBindingName(1)); // 假第二个绑定是输出
    int outputIndex = engine->getBindingIndex("output0");

    std::vector<Patches> batches;
    package(batches, image_path, Inputsize_w, Inputsize_h, INPUT_H, INPUT_W);

    // 图像预处理
    /*cv::Mat img = cv::imread(image_path);
    cv::Mat blob;
    float ratio;
    int pad_w, pad_h;
    preprocess(img, blob, ratio, pad_w, pad_h, INPUT_W, INPUT_H);*/

  

    // 分配 Host 和 Device 内存
    size_t input_size = batch_size * 3 * INPUT_H * INPUT_W * sizeof(float);
    size_t output_size = batch_size * 300 * 6 * sizeof(float); // 假设最多 300 个目标,每个6个值

    float* input_host = new float[batch_size * 3 * INPUT_H * INPUT_W];
    float* output_host = new float[batch_size * 300 * 6];

    for (int i = 0; i < batch_size; i++)
    {
        memcpy(input_host + i * 3 * INPUT_H * INPUT_W, batches[i].patch.ptr<float>(), 3 * INPUT_H * INPUT_W * sizeof(float));
    }

    float* input_device = (float*)safeCudaMalloc(input_size);
    float* output_device = (float*)safeCudaMalloc(output_size);

    void* bindings[2];
    bindings[inputIndex] = input_device;
    bindings[outputIndex] = output_device;

    cudaMemcpy(input_device, input_host, input_size, cudaMemcpyHostToDevice);
    context->setBindingDimensions(inputIndex, Dims4(batch_size, 3, INPUT_H, INPUT_W));
    context->enqueueV2(bindings, 0, nullptr);
    cudaMemcpy(output_host, output_device, output_size, cudaMemcpyDeviceToHost);

    // 原图用于绘制结果
    cv::Mat img = cv::imread(image_path);

    std::cout << "batch_size = " << batch_size << ", batches.size() = " << batches.size() << std::endl;

    for (int i = 0; i < batch_size; i++) {
        std::vector<Detection> results;
        float* cur_output = output_host + i * 300 * 6;

        postprocess(cur_output, 300, CONF_THRESH, results,
            batches[i].pad_w, batches[i].pad_h, batches[i].ratio);

        std::cout << "Patch " << i << " detected " << results.size() << " objects." << std::endl;

        for (const auto& det : results) {
            int x = det.bbox.x + batches[i].x_offset;
            int y = det.bbox.y + batches[i].y_offset;
            int w = det.bbox.width;
            int h = det.bbox.height;

            cv::rectangle(img, cv::Point(x, y), cv::Point(x + w, y + h), cv::Scalar(0, 255, 0), 2);
            std::string label = class_names[det.class_id] + " " + std::to_string(det.conf);
            cv::putText(img, label, cv::Point(x, y - 5), 0, 0.5, cv::Scalar(255, 255, 255), 1);
        }
    }


    cv::imwrite(imagesave_path, img);
    std::cout << "Result saved as: " << imagesave_path << std::endl;

    // 清理资源
    delete[] input_host;
    delete[] output_host;
    cudaFree(input_device);
    cudaFree(output_device);
    context->destroy();
    engine->destroy();
    runtime->destroy();

    return 0;
}

预处理:

//头文件:
// preprocess.hpp
#pragma once
#include <opencv2/opencv.hpp>

/**
 * @brief 将原始图像预处理为 TensorRT 模型输入格式
 *
 * @param img      输入图像(原图)
 * @param output   输出张量,格式为 [3, INPUT_H, INPUT_W],float32,CHW
 * @param scale    返回:缩放比例
 * @param pad_w    返回:左侧 padding 宽度
 * @param pad_h    返回:上方 padding 高度
 * @param new_w    模型输入宽度(例如 640)
 * @param new_h    模型输入高度(例如 640)
 */
void preprocess(const cv::Mat& img, cv::Mat& output, float& scale, int& pad_w, int& pad_h, int new_w, int new_h);

//cpp:
#include "preprocess.hpp"
#include <opencv2/opencv.hpp>


void preprocess(const cv::Mat& img, cv::Mat& output, float& scale, int& pad_w, int& pad_h, int new_h, int new_w){ 
	int orig_w = img.cols;
	int orig_h = img.rows;

	scale = std::min((float)new_w / orig_w, (float) new_h / orig_h);
	int resized_w = int(scale * orig_w);
	int resized_h = int(scale * orig_h);

	// 缩放图像
	cv::Mat resized;
	cv::resize(img, resized, cv::Size(resized_w, resized_h));

	cv::Mat padded(new_h, new_w, CV_8UC3, cv::Scalar(114, 114, 114));
	pad_w = (new_w - resized_w) / 2;
	pad_h = (new_h - resized_h) / 2;
	resized.copyTo(padded(cv::Rect(pad_w, pad_h, resized_w, resized_h)));

	cv::Mat float_img;
	padded.convertTo(float_img, CV_32FC3, 1.0 / 255.0);

	// HWC -> CHW
	std::vector<cv::Mat> channels(3);
	cv::split(float_img, channels);
	output = cv::Mat(3, new_h * new_w, CV_32F);

	// 将每个通道拉平并写入 output
	for (int i = 0; i < 3; ++i) {
		memcpy(output.ptr<float>(i), channels[i].data, new_h * new_w * sizeof(float));
	}

}

后处理

//头文件:
#pragma once
#include <vector>
#include <opencv2/opencv.hpp>

struct Detection {
    cv::Rect bbox;    // 检测框
    int class_id;     // 类别编号
    float conf;       // 置信度
};

/**
 * @brief 对模型输出进行后处理:置信度筛选 + NMS
 *
 * @param output      TensorRT 输出(通常是 [N, 6])
 * @param num_preds   预测框数量(例如:1000)
 * @param conf_thresh 置信度阈值
 * @param out_dets    输出:保存最终保留的检测结果
 */
void postprocess(const float* output, int num_preds, float conf_thresh, std::vector<Detection>& out_dets,int pad_w, int pad_h, float ratio);

//cpp:
#include "postprocess.hpp"
#include <opencv2/opencv.hpp>
void postprocess(const float* output, int num_preds, float conf_thresh, std::vector<Detection>& out_dets, int pad_w, int pad_h, float ratio) {

	std::vector<Detection> raw_dets;

	for (int i = 0; i < num_preds; i++) {

		const float* det = output + i * 6;
		float x1 = det[0], y1 = det[1], x2 = det[2], y2 = det[3];
		float conf = det[4];
		int class_id = static_cast<int>(det[5]);

		if (conf < conf_thresh) {
			continue;
		}

		x1 = (x1 - pad_w) / ratio;
		y1 = (y1 - pad_h) / ratio;
		x2 = (x2 - pad_w) / ratio;
		y2 = (y2 - pad_h) / ratio;

		int left = static_cast<int>(x1);
		int top = static_cast<int>(y1);
		int right = static_cast<int>(x2);
		int bottom = static_cast<int>(y2);

		int width = right - left;
		int height = bottom - top;

		cv::Rect bbox(
			static_cast<int>(left),
			static_cast<int>(top),
			static_cast<int>(width),
			static_cast<int>(height)
		);
		std::cout << "x1 = " << x1 << ", y1 = " << y1 << ", x2 = " << x2 << ", y2 = " << y2 << std::endl;
		out_dets.push_back(Detection{ bbox, class_id, conf });
	}
}

网站公告

今日签到

点亮在社区的每一天
去签到