Yolo-world+Python-OpenCV之摄像头视频实时目标检测

发布于:2024-04-20 ⋅ 阅读:(30) ⋅ 点赞:(0)

上一次介绍了如何使用最基本的 Yolo-word来做检测,现在我们在加opencv来做个实时检测的例子

基本思路

1、读取离线视频流
2、将视频帧给yolo识别
3、根据识别结果 对视频进行绘制边框、加文字之类的

完整代码如下:

import datetime

from ultralytics import YOLO
import cv2
from loguru import logger as log

#加载YOLO模型
model = YOLO('model/yolov8s-world.pt')

resize_width = 1920
resize_height = 1080


def predict(chosen_model, img, classes = [], conf = 0.5):
    img = cv2.resize(img, (resize_width, resize_height))
    if classes:
        results = chosen_model.predict(img, classes = classes, conf = conf, save_txt = False)
    else:
        results = chosen_model.predict(img, conf = conf, save_txt = False)

    return results


def predict_and_detect(chosen_model, img, classes = [], conf = 0.5):
    img = cv2.resize(img, (resize_width, resize_height))

    cv2.putText(img, f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
                (10, 20),
                cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)

    results = predict(chosen_model, img, classes, conf = conf)

    person = 0
    for result in results:
        for box in result.boxes:
            # 如果标签是人的,将盒子做成绿色,并在盒子上用大号字体打印置信度
            if result.names[int(box.cls[0])] == "person":
                person += 1
                cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                              (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (0, 255, 0), 2)
                cv2.putText(img, f"{result.names[int(box.cls[0])]} {box.conf[0]:.2f}",
                            (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                            cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 1)
            else:
                cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                              (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (0, 0, 255), 2)
                cv2.putText(img, f"{result.names[int(box.cls[0])]} {box.conf[0]:.2f}",
                            (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                            cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 1)

    if person > 0:
        log.error(f"当前发现有{person}个人")

    return img, results


def main():
    # 在处理下一个帧之前跳过的帧数
    skip_frames = 2
    frame_count = 0
    cap = cv2.VideoCapture(0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_count = 1 + frame_count
        if frame_count % skip_frames != 0:
            continue

        result_frame, _ = predict_and_detect(model, frame)

        cv2.imshow("video", result_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

运行效果:

在这里插入图片描述