U版RTDETR C++使用ONNXRuntime推理

发布于:2025-08-07 ⋅ 阅读:(31) ⋅ 点赞:(0)

本文使用ONNX版本为1.14.1,OpenCv版本为4.8.0(同时也支持4.5.0,4.7.0),RTDETR不需要后处理所以不需要进行后处理,与YOLO不同的点还有YOLO需要特征金字塔所以输出的tenser尺寸是动态变动的,RTDETR默认输入300个框,所以Tenser在输出上默认为[1,300,class+4]。

#include <iostream>
#include <vector>
#include <string>
#include <memory>
#include <random>
#include <algorithm>

#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>

class RTDETR {
private:
	std::string model_path_;
	std::string img_path_;
	float conf_thres_;

	std::unique_ptr<Ort::Session> session_;
	std::unique_ptr<Ort::Env> env_;
	Ort::MemoryInfo memory_info_;

	int input_width_;
	int input_height_;
	std::vector<std::string> classes_;
	std::vector<cv::Scalar> color_palette_;

	cv::Mat img_;
	int img_height_;
	int img_width_;
    //更换你需要的类别
	void initializeClasses() {
		classes_ = {
			"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
			"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
			"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
			"giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
			"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
			"skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
			"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
			"broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
			"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
			"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
			"refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
			"toothbrush"
		};
	}

	void generateColorPalette() {
		std::random_device rd;
		std::mt19937 gen(rd());
		std::uniform_int_distribution<> dis(0, 255);

		color_palette_.clear();
		for (size_t i = 0; i < classes_.size(); ++i) {
			color_palette_.push_back(cv::Scalar(dis(gen), dis(gen), dis(gen)));
		}
	}

public:
	RTDETR(const std::string& model_path,
		const std::string& img_path,
		float conf_thres = 0.5f)
		: model_path_(model_path), img_path_(img_path),
		conf_thres_(conf_thres),
		memory_info_(Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault)) {

		// Initialize ONNX Runtime
		env_ = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "RTDETR");

		Ort::SessionOptions session_options;
		session_options.SetIntraOpNumThreads(1);
		session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);

		// Create session
#ifdef _WIN32
		std::wstring wide_model_path(model_path.begin(), model_path.end());
		session_ = std::make_unique<Ort::Session>(*env_, wide_model_path.c_str(), session_options);
#else
		session_ = std::make_unique<Ort::Session>(*env_, model_path.c_str(), session_options);
#endif

		// Get input dimensions
		auto input_type_info = session_->GetInputTypeInfo(0);
		auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
		auto input_dims = input_tensor_info.GetShape();

		if (input_dims.size() >= 4) {
			input_height_ = static_cast<int>(input_dims[2]);
			input_width_ = static_cast<int>(input_dims[3]);
		}
		else {
			throw std::runtime_error("Invalid model input dimensions");
		}

		std::cout << "Model input size: " << input_width_ << "x" << input_height_ << std::endl;

		// Initialize classes and color palette
		initializeClasses();
		generateColorPalette();

		std::cout << "Loaded " << classes_.size() << " classes" << std::endl;
	}

	void drawDetections(const std::vector<float>& box, float score, int class_id) {
		if (class_id >= static_cast<int>(color_palette_.size()) || class_id < 0) return;

		int x1 = static_cast<int>(box[0]);
		int y1 = static_cast<int>(box[1]);
		int x2 = static_cast<int>(box[2]);
		int y2 = static_cast<int>(box[3]);

		cv::Scalar color = color_palette_[class_id];

		// Draw bounding box
		cv::rectangle(img_, cv::Point(x1, y1), cv::Point(x2, y2), color, 2);


		// Create label
		std::string label = classes_[class_id] + ": " + std::to_string(score).substr(0, 4);

		// Get text size
		int baseline = 0;
		cv::Size text_size = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseline);

		// Calculate label position
		int label_x = x1;
		int label_y = (y1 - 10 > text_size.height) ? y1 - 10 : y1 + 10;

		// Draw label background
		cv::rectangle(img_,
			cv::Point(label_x, label_y - text_size.height),
			cv::Point(label_x + text_size.width, label_y + text_size.height),
			color, cv::FILLED);

		// Draw label text
		cv::putText(img_, label, cv::Point(label_x, label_y),
			cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0), 1, cv::LINE_AA);
	}

	std::vector<float> preprocess() {
		// Read image
		img_ = cv::imread(img_path_);
		if (img_.empty()) {
			throw std::runtime_error("Could not load image: " + img_path_);
		}

		img_height_ = img_.rows;
		img_width_ = img_.cols;

		std::cout << "Original image size: " << img_width_ << "x" << img_height_ << std::endl;

		// Convert BGR to RGB
		cv::Mat rgb_img;
		cv::cvtColor(img_, rgb_img, cv::COLOR_BGR2RGB);

		// Resize image
		cv::Mat resized_img;
		cv::resize(rgb_img, resized_img, cv::Size(input_width_, input_height_));

		// Normalize and convert to float
		resized_img.convertTo(resized_img, CV_32F, 1.0 / 255.0);

		// Convert to CHW format and flatten
		std::vector<float> input_data;
		input_data.reserve(3 * input_height_ * input_width_);

		// Channel-first format (CHW)
		for (int c = 0; c < 3; ++c) {
			for (int h = 0; h < input_height_; ++h) {
				for (int w = 0; w < input_width_; ++w) {
					input_data.push_back(resized_img.at<cv::Vec3f>(h, w)[c]);
				}
			}
		}

		return input_data;
	}

	std::vector<std::vector<float>> bboxCxcywhToXyxy(const std::vector<std::vector<float>>& boxes) {
		std::vector<std::vector<float>> result;

		for (const auto& box : boxes) {
			if (box.size() < 4) continue;

			float cx = box[0];
			float cy = box[1];
			float w = box[2];
			float h = box[3];

			float half_w = w / 2.0f;
			float half_h = h / 2.0f;

			float x1 = cx - half_w;
			float y1 = cy - half_h;
			float x2 = cx + half_w;
			float y2 = cy + half_h;

			result.push_back({ x1, y1, x2, y2 });
		}

		return result;
	}

	cv::Mat postprocess(const std::vector<float>& model_output) {
		size_t num_classes = classes_.size();
		size_t elements_per_detection = 4 + num_classes; // 4 for bbox + num_classes
		size_t num_detections = model_output.size() / elements_per_detection;

		std::cout << "Output shape: (" << num_detections << ", " << elements_per_detection << ")" << std::endl;

		std::vector<std::vector<float>> boxes;
		std::vector<float> scores;
		std::vector<int> labels;

		for (size_t i = 0; i < num_detections; ++i) {
			size_t offset = i * elements_per_detection;

			// 获取BoX (center_x, center_y, width, height)
			std::vector<float> box = {
				model_output[offset],     // cx
				model_output[offset + 1], // cy
				model_output[offset + 2], // w
				model_output[offset + 3]  // h
			};

			// 获取类别
			std::vector<float> class_scores(model_output.begin() + offset + 4,
				model_output.begin() + offset + 4 + num_classes);

			// 获取最大类别
			auto max_it = std::max_element(class_scores.begin(), class_scores.end());
			float max_score = *max_it;
			int class_id = static_cast<int>(std::distance(class_scores.begin(), max_it));

			if (max_score > conf_thres_) {
				boxes.push_back(box);
				scores.push_back(max_score);
				labels.push_back(class_id);
			}
		}

		// 坐标转换
		auto xyxy_boxes = bboxCxcywhToXyxy(boxes);

		std::cout << "Found " << xyxy_boxes.size() << " detections above threshold" << std::endl;

		// Draw detections
		for (size_t i = 0; i < xyxy_boxes.size(); ++i) {
			// Scale bounding boxes to original image dimensions
			std::vector<float> scaled_box = {
				xyxy_boxes[i][0] * img_width_,  // x1
				xyxy_boxes[i][1] * img_height_, // y1
				xyxy_boxes[i][2] * img_width_,  // x2
				xyxy_boxes[i][3] * img_height_  // y2
			};

			drawDetections(scaled_box, scores[i], labels[i]);
		}

		return img_;
	}

	cv::Mat main() {
		try {
			// Preprocess
			std::vector<float> input_data = preprocess();

			// Prepare input tensor
			std::vector<int64_t> input_shape = { 1, 3, input_height_, input_width_ };
			Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
				memory_info_, input_data.data(), input_data.size(),
				input_shape.data(), input_shape.size());

			// Get input/output names
			Ort::AllocatorWithDefaultOptions allocator;
			auto input_name = session_->GetInputNameAllocated(0, allocator);
			auto output_name = session_->GetOutputNameAllocated(0, allocator);

			const char* input_names[] = { input_name.get() };
			const char* output_names[] = { output_name.get() };

			// Run inference
			std::cout << "Running inference..." << std::endl;
			auto output_tensors = session_->Run(Ort::RunOptions{ nullptr },
				input_names, &input_tensor, 1,
				output_names, 1);

			// Process output
			float* output_data = output_tensors[0].GetTensorMutableData<float>();
			auto output_shape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();

			size_t output_size = 1;
			for (auto dim : output_shape) {
				output_size *= dim;
			}

			std::cout << "Output tensor size: " << output_size << std::endl;

			std::vector<float> model_output(output_data, output_data + output_size);

			return postprocess(model_output);

		}
		catch (const std::exception& e) {
			std::cerr << "Error in main pipeline: " << e.what() << std::endl;
			return cv::Mat();
		}
	}
};

int main() {
	try {
		// 创建检测器实例,使用与Python代码相同的参数
		RTDETR detector("model.onnx",
			"bus.jpg",
			0.5f);

		// 执行检测并获取输出图像
		cv::Mat output_image = detector.main();

		if (output_image.empty()) {
			std::cerr << "Error: Failed to process image" << std::endl;
			return -1;
		}

		// 显示带注释的输出图像
		cv::namedWindow("Output", cv::WINDOW_NORMAL);
		cv::imshow("Output", output_image);
		cv::waitKey(0);
		cv::destroyAllWindows();

		std::cout << "Detection completed successfully!" << std::endl;

	}
	catch (const std::exception& e) {
		std::cerr << "Error: " << e.what() << std::endl;
		return -1;
	}

	return 0;
}

运行结果:


网站公告

今日签到

点亮在社区的每一天
去签到