X-AnyLabeling标出保存json转yolo obb 5参数表示的txt文件
X-AnyLabeling标注后生成自己定义的json文件,图片上有旋转框和方形框,导出只能导出水平或者旋转标签,我需要全部转为水平标签格式
json文件
{
"version": "3.2.1",
"flags": {},
"shapes": [
{
"label": "cow",
"score": null,
"points": [
[
547.560975609756,
374.1463414634146
],
[
685.3270740146849,
332.2175289053928
],
[
779.0,
640.0
],
[
641.2339015950713,
681.9288125580218
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rotation",
"flags": {},
"attributes": {},
"kie_linking": [],
"direction": 5.987744470035866
},
{
"label": "cow",
"score": null,
"points": [
[
1008.0,
484.0
],
[
1179.0,
417.0
],
[
1311.0,
753.0
],
[
1140.0,
820.0
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rotation",
"flags": {},
"attributes": {},
"kie_linking": [],
"direction": 5.909756668300514
},
{
"label": "cow",
"score": null,
"points": [
[
1359.0,
423.0
],
[
1494.0,
442.0
],
[
1446.0,
779.0
],
[
1311.0,
760.0
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rotation",
"flags": {},
"attributes": {},
"kie_linking": [],
"direction": 0.1398223687841933
},
{
"label": "cow",
"score": null,
"points": [
[
1492.0,
450.0
],
[
1628.0,
468.0
],
[
1590.0,
755.0
],
[
1454.0,
738.0
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rotation",
"flags": {},
"attributes": {},
"kie_linking": [],
"direction": 0.13158814145792064
},
{
"label": "cow",
"score": null,
"points": [
[
1610.0,
468.0
],
[
1750.0,
453.0
],
[
1779.0,
735.0
],
[
1639.0,
749.0
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rotation",
"flags": {},
"attributes": {},
"kie_linking": [],
"direction": 6.17644963453045
},
{
"label": "head",
"score": null,
"points": [
[
641.4634146341463,
465.60975609756093
],
[
653.6585365853658,
465.60975609756093
],
[
653.6585365853658,
477.80487804878044
],
[
641.4634146341463,
477.80487804878044
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rectangle",
"flags": {},
"attributes": {},
"kie_linking": []
},
{
"label": "head",
"score": null,
"points": [
[
1169.5121951219512,
641.2195121951219
],
[
1184.1463414634145,
641.2195121951219
],
[
1184.1463414634145,
647.3170731707316
],
[
1169.5121951219512,
647.3170731707316
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rectangle",
"flags": {},
"attributes": {},
"kie_linking": []
},
{
"label": "head",
"score": null,
"points": [
[
1392.6829268292681,
625.3658536585365
],
[
1398.780487804878,
625.3658536585365
],
[
1398.780487804878,
638.780487804878
],
[
1392.6829268292681,
638.780487804878
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rectangle",
"flags": {},
"attributes": {},
"kie_linking": []
},
{
"label": "head",
"score": null,
"points": [
[
1529.2682926829268,
633.9024390243902
],
[
1535.3658536585365,
633.9024390243902
],
[
1535.3658536585365,
643.6585365853658
],
[
1529.2682926829268,
643.6585365853658
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rectangle",
"flags": {},
"attributes": {},
"kie_linking": []
},
{
"label": "head",
"score": null,
"points": [
[
1699.9999999999998,
643.6585365853658
],
[
1709.7560975609754,
643.6585365853658
],
[
1709.7560975609754,
647.3170731707316
],
[
1699.9999999999998,
647.3170731707316
]
],
"group_id": null,
"description": "",
"difficult": false,
"shape_type": "rectangle",
"flags": {},
"attributes": {},
"kie_linking": []
}
],
"imagePath": "..\\0000001.png",
"imageData": null,
"imageHeight": 1080,
"imageWidth": 1920
}
转换脚本json_convert_txt
import os
import json
import math
import cv2
import numpy as np
def get_label_map(label_map_path):
"""
加载类别映射文件,创建类别名称到ID的字典。
"""
label_map = {}
with open(label_map_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
label_map[line.strip()] = i
return label_map
def convert_point_to_yolo(points, img_w, img_h):
"""
将 Labelme 的四点坐标转换为 YOLO 的五参数格式。
返回: [x_center, y_center, width, height, angle]
"""
# 1. 确保points是NumPy数组,这是解决报错的关键
points_np = np.array(points, dtype=np.float32).reshape(-1, 1, 2)
# 2. 计算最小外接旋转矩形
rect = cv2.minAreaRect(points_np)
(center_x, center_y), (w, h), angle = rect
# 3. 调整角度,使width为长边,angle在[-90, 0)
if w < h:
w, h = h, w
angle += 90
# 4. 将角度从度数转换为弧度,并确保范围在[0, pi)
# 许多OBB模型将角度归一化到[0, pi]或[-pi/2, pi/2]
angle_rad = (angle + 90) * math.pi / 180.0
# 5. 归一化坐标
x_center_normalized = center_x / img_w
y_center_normalized = center_y / img_h
width_normalized = w / img_w
height_normalized = h / img_h
# 返回统一的5参数格式
return [
x_center_normalized,
y_center_normalized,
width_normalized,
height_normalized,
angle_rad
]
def process_json_to_yolo_txt(json_dir, output_dir, label_map):
"""
遍历文件夹中的所有JSON文件并转换为YOLO格式。
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
for json_file in json_files:
json_path = os.path.join(json_dir, json_file)
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
img_w = data['imageWidth']
img_h = data['imageHeight']
# 创建对应的txt文件
txt_filename = os.path.splitext(json_file)[0] + '.txt'
txt_path = os.path.join(output_dir, txt_filename)
with open(txt_path, 'w', encoding='utf-8') as f:
for shape in data['shapes']:
label = shape['label']
points = shape['points']
if label not in label_map:
print(f"警告: 类别 '{label}' 未在 label_map.txt 中找到,跳过此标注。")
continue
class_id = label_map[label]
# 统一转换为 YOLO OBB 格式
yolo_params = convert_point_to_yolo(points, img_w, img_h)
# 写入txt文件
line = f"{class_id} {' '.join(map(str, yolo_params))}\n"
f.write(line)
if __name__ == "__main__":
# 需要先安装opencv-python库
# pip install opencv-python
json_folder = r'E:\deeplearning\torch\cow_data\json' # 存放JSON文件的文件夹
output_folder = r'E:\deeplearning\torch\cow_data\labels' # 输出YOLO TXT文件的文件夹
label_map_file = r'E:\deeplearning\torch\cow_data\classes.txt' # 类别映射文件
# 获取类别映射
label_map = get_label_map(label_map_file)
print("加载的类别映射:", label_map)
# 执行转换
process_json_to_yolo_txt(json_folder, output_folder, label_map)
print(f"所有JSON文件已转换完成,保存在 '{output_folder}' 文件夹中。")
输出yolo obb 5参数表示
<class_id> <x_center> <y_center> <width> <height> <angle>
1 0.3359375 0.4287037037037037 0.003125 0.003703703703703704 0.0
1 0.609375 0.5796296296296296 0.004166666666666667 0.005555555555555556 0.0
1 0.7307291666666667 0.5574074074074075 0.0020833333333333333 0.003703703703703704 0.0
1 0.7958333333333333 0.5944444444444444 0.004166666666666667 0.003703703703703704 0.0
1 0.88671875 0.5888888888888889 0.0015625 0.003703703703703704 0.0
0 0.34510838541666666 0.4643143518518518 0.07709296874999999 0.2721395370370371 2.863569
0 0.6000128124999999 0.5552342592592593 0.0955703125 0.29858675925925926 2.743585
0 0.7328688020833333 0.5300550925925926 0.06547239583333334 0.25863305555555555 0.288572
0 0.8018531249999999 0.5706671296296296 0.07168947916666667 0.3166819444444445 0.336216
0 0.8861483333333333 0.5624073148148148 0.070671875 0.26672990740740743 0.045002
VOC的xml文件转yolo obb的5参数表示
这是用rolabelimg工具标注后保存的xml文件,我们需要yolo obb的5参数txt文件
<annotation verified="no">
<folder>camera</folder>
<filename>0000001</filename>
<path>E:\deeplearning\torch\cow_data\0000001.png</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>1920</width>
<height>1080</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<type>bndbox</type>
<name>head</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>642</xmin>
<ymin>461</ymin>
<xmax>648</xmax>
<ymax>465</ymax>
</bndbox>
</object>
<object>
<type>bndbox</type>
<name>head</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1166</xmin>
<ymin>623</ymin>
<xmax>1174</xmax>
<ymax>629</ymax>
</bndbox>
</object>
<object>
<type>bndbox</type>
<name>head</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1401</xmin>
<ymin>600</ymin>
<xmax>1405</xmax>
<ymax>604</ymax>
</bndbox>
</object>
<object>
<type>bndbox</type>
<name>head</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1524</xmin>
<ymin>640</ymin>
<xmax>1532</xmax>
<ymax>644</ymax>
</bndbox>
</object>
<object>
<type>bndbox</type>
<name>head</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1701</xmin>
<ymin>634</ymin>
<xmax>1704</xmax>
<ymax>638</ymax>
</bndbox>
</object>
<object>
<type>robndbox</type>
<name>cow</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>662.6081</cx>
<cy>501.4595</cy>
<w>148.0185</w>
<h>293.9107</h>
<angle>2.863569</angle>
</robndbox>
</object>
<object>
<type>robndbox</type>
<name>cow</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>1152.0246</cx>
<cy>599.653</cy>
<w>183.495</w>
<h>322.4737</h>
<angle>2.743585</angle>
</robndbox>
</object>
<object>
<type>robndbox</type>
<name>cow</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>1407.1081</cx>
<cy>572.4595</cy>
<w>125.707</w>
<h>279.3237</h>
<angle>0.288572</angle>
</robndbox>
</object>
<object>
<type>robndbox</type>
<name>cow</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>1539.558</cx>
<cy>616.3205</cy>
<w>137.6438</w>
<h>342.0165</h>
<angle>0.336216</angle>
</robndbox>
</object>
<object>
<type>robndbox</type>
<name>cow</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<robndbox>
<cx>1701.4048</cx>
<cy>607.3999</cy>
<w>135.69</w>
<h>288.0683</h>
<angle>0.045002</angle>
</robndbox>
</object>
</annotation>
转换脚本xml_convert_txt
import os
import xml.etree.ElementTree as ET
import math
# --- 配置路径 ---
XML_FOLDER = r'E:\deeplearning\torch\cow_data\labels' # 输入XML文件所在的文件夹
OUTPUT_FOLDER = r'E:\deeplearning\torch\cow_data\labels' # 输出YOLO TXT文件的文件夹
CLASSES_FILE = r'E:\deeplearning\torch\cow_data\classes.txt' # 包含类别名称的文件,每行一个
# --- 函数定义 ---
def get_label_map(file_path):
"""加载类别名称并创建类别名称到ID的字典。"""
label_map = {}
with open(file_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
label_map[line.strip()] = i
return label_map
def convert_to_yolo_obb(bbox, img_w, img_h, is_rotated=False):
"""
将边界框坐标转换为YOLO OBB 5参数格式。
参数:
bbox (dict): 包含边界框坐标的字典。
img_w (int): 图像宽度。
img_h (int): 图像高度。
is_rotated (bool): 如果边界框是旋转的,则为True,否则为False。
返回:
list: 包含5个归一化浮点数的列表 [x_center, y_center, width, height, angle]。
"""
if is_rotated:
# 针对旋转边界框 (<robndbox>)
cx = float(bbox['cx'])
cy = float(bbox['cy'])
w = float(bbox['w'])
h = float(bbox['h'])
angle = float(bbox['angle'])
# YOLO OBB 通常使用弧度,这里直接使用XML提供的弧度值
yolo_angle = angle
else:
# 针对标准边界框 (<bndbox>)
xmin = float(bbox['xmin'])
ymin = float(bbox['ymin'])
xmax = float(bbox['xmax'])
ymax = float(bbox['ymax'])
# 计算中心点、宽度和高度
cx = (xmin + xmax) / 2
cy = (ymin + ymax) / 2
w = xmax - xmin
h = ymax - ymin
# 水平边界框的角度为0
yolo_angle = 0.0
# 归一化坐标
x_center_norm = cx / img_w
y_center_norm = cy / img_h
w_norm = w / img_w
h_norm = h / img_h
return [x_center_norm, y_center_norm, w_norm, h_norm, yolo_angle]
def process_xml_to_yolo_obb_txt(xml_dir, output_dir, label_map):
"""
处理目录中的所有XML文件并转换为YOLO TXT格式。
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
xml_files = [f for f in os.listdir(xml_dir) if f.endswith('.xml')]
for xml_file in xml_files:
xml_path = os.path.join(xml_dir, xml_file)
try:
tree = ET.parse(xml_path)
root = tree.getroot()
except ET.ParseError:
print(f"跳过格式不正确的XML文件: {xml_file}")
continue
# 获取图像尺寸
size = root.find('size')
img_w = int(size.find('width').text)
img_h = int(size.find('height').text)
# 创建对应的输出TXT文件
txt_filename = os.path.splitext(xml_file)[0] + '.txt'
txt_path = os.path.join(output_dir, txt_filename)
with open(txt_path, 'w', encoding='utf-8') as f:
for obj in root.findall('object'):
obj_name = obj.find('name').text
# 获取边界框类型
bbox_type = obj.find('type').text
if obj_name not in label_map:
print(f"警告: 类别 '{obj_name}' 未在 {CLASSES_FILE} 中找到,跳过此标注。")
continue
class_id = label_map[obj_name]
bbox_data = {}
is_rotated = (bbox_type == 'robndbox')
if is_rotated:
robndbox = obj.find('robndbox')
bbox_data['cx'] = robndbox.find('cx').text
bbox_data['cy'] = robndbox.find('cy').text
bbox_data['w'] = robndbox.find('w').text
bbox_data['h'] = robndbox.find('h').text
bbox_data['angle'] = robndbox.find('angle').text
else:
bndbox = obj.find('bndbox')
bbox_data['xmin'] = bndbox.find('xmin').text
bbox_data['ymin'] = bndbox.find('ymin').text
bbox_data['xmax'] = bndbox.find('xmax').text
bbox_data['ymax'] = bndbox.find('ymax').text
yolo_params = convert_to_yolo_obb(bbox_data, img_w, img_h, is_rotated)
line = f"{class_id} {' '.join(map(str, yolo_params))}\n"
f.write(line)
print(f"转换完成。文件已保存到 '{output_dir}' 文件夹。")
# --- 主程序入口 ---
if __name__ == "__main__":
if not os.path.exists(XML_FOLDER):
print(f"错误: 输入文件夹未找到: {XML_FOLDER}")
elif not os.path.exists(CLASSES_FILE):
print(f"错误: 类别文件未找到: {CLASSES_FILE}")
else:
label_map = get_label_map(CLASSES_FILE)
print("已加载类别映射:", label_map)
process_xml_to_yolo_obb_txt(XML_FOLDER, OUTPUT_FOLDER, label_map)
输出yolo obb 5参数表示
<class_id> <x_center> <y_center> <width> <height> <angle>
0 0.28518800813008127 0.3464317976513098 0.3569411843826484 0.3076088230605489 0.40572916666666664 0.5925925925925926 0.33397599041409964 0.6314155671833536
0 0.525 0.44814814814814813 0.6140625 0.3861111111111111 0.6828125 0.6972222222222222 0.59375 0.7592592592592593
0 0.7078125 0.39166666666666666 0.778125 0.40925925925925927 0.753125 0.7212962962962963 0.6828125 0.7037037037037037
0 0.7770833333333333 0.4166666666666667 0.8479166666666667 0.43333333333333335 0.828125 0.6990740740740741 0.7572916666666667 0.6833333333333333
0 0.8385416666666666 0.43333333333333335 0.9114583333333334 0.41944444444444445 0.9265625 0.6805555555555556 0.8536458333333333 0.6935185185185185
1 0.3340955284552845 0.43112014453477865 0.3404471544715447 0.43112014453477865 0.3404471544715447 0.44241192411924113 0.3340955284552845 0.44241192411924113
1 0.6091209349593496 0.5937217705510388 0.6167428861788617 0.5937217705510388 0.6167428861788617 0.59936766034327 0.6091209349593496 0.59936766034327
1 0.7253556910569104 0.5790424570912375 0.7285315040650406 0.5790424570912375 0.7285315040650406 0.5914634146341463 0.7253556910569104 0.5914634146341463
1 0.7964939024390244 0.5869467028003613 0.7996697154471544 0.5869467028003613 0.7996697154471544 0.5959801264679313 0.7964939024390244 0.5959801264679313
1 0.8854166666666665 0.5959801264679313 0.8904979674796747 0.5959801264679313 0.8904979674796747 0.59936766034327 0.8854166666666665 0.59936766034327
验证工具check_yolo
导入图片和标注的yolo obb 5参数 txt文件,验证标注结果是否正确
import os
import cv2
import numpy as np
import math
# --- 配置路径 ---
IMAGE_DIR = r'E:\deeplearning\torch\cow_data' # 存放图片的文件夹
LABEL_DIR = r'E:\deeplearning\torch\cow_data\labels' # 存放YOLO TXT文件的文件夹
LABEL_MAP_FILE = r'E:\deeplearning\torch\cow_data\classes.txt' # 类别映射文件
OUTPUT_DIR = r'E:\deeplearning\torch\cow_data\labels' # 保存可视化结果的文件夹
def get_label_map(label_map_path):
"""加载类别映射文件,创建ID到类别名称的字典。"""
label_map = {}
with open(label_map_path, 'r', encoding='utf-8') as f:
for i, line in enumerate(f):
label_map[i] = line.strip()
return label_map
def draw_rotated_bbox(img, bbox, class_name, color):
"""
在图片上绘制旋转边界框。
bbox格式: [x_center, y_center, width, height, angle] (归一化)
"""
img_h, img_w = img.shape[:2]
# 将归一化坐标转换为像素坐标
x_c = int(bbox[0] * img_w)
y_c = int(bbox[1] * img_h)
w = int(bbox[2] * img_w)
h = int(bbox[3] * img_h)
angle_rad = bbox[4]
# 将弧度转换为度数,并调整角度以匹配OpenCV约定
angle_deg = angle_rad * 180 / math.pi - 90
# 获取旋转矩形的四个角点
rect = ((x_c, y_c), (w, h), angle_deg)
box = cv2.boxPoints(rect)
box = np.intp(box)
# 绘制边界框
cv2.drawContours(img, [box], 0, color, 2)
# 绘制类别名称
label_size, base_line = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
# 确保文本框不超出图像边界
text_x = int(np.min(box[:, 0]))
text_y = int(np.min(box[:, 1])) - 5
if text_y < 0:
text_y = int(np.min(box[:, 1])) + label_size[1] + 5
cv2.putText(img, class_name, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)
def main():
# 创建输出文件夹
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# 获取类别映射
label_map = get_label_map(LABEL_MAP_FILE)
print("加载的类别映射:", label_map)
# 为每个类别随机分配一个颜色
np.random.seed(0) # 保证每次运行颜色一致
colors = {class_id: [int(c) for c in np.random.randint(0, 256, 3)] for class_id in label_map}
# 遍历所有YOLO TXT文件
label_files = [f for f in os.listdir(LABEL_DIR) if f.endswith('.txt')]
for label_file in label_files:
# 找到对应的图片文件
img_name = os.path.splitext(label_file)[0] + '.png'
img_path = os.path.join(IMAGE_DIR, img_name)
if not os.path.exists(img_path):
print(f"警告: 未找到图片文件 {img_path},跳过。")
continue
# 读取图片
img = cv2.imread(img_path)
if img is None:
print(f"警告: 无法读取图片 {img_path},跳过。")
continue
# 读取标注文件
label_path = os.path.join(LABEL_DIR, label_file)
with open(label_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
# 绘制所有边界框
for line in lines:
parts = line.strip().split()
class_id = int(parts[0])
bbox = [float(p) for p in parts[1:]]
# 获取类别名称和颜色
class_name = label_map.get(class_id, 'unknown')
color = colors.get(class_id, [255, 255, 255])
# 绘制边界框 (适用于5参数OBB格式)
draw_rotated_bbox(img, bbox, class_name, color)
# 保存可视化结果
output_path = os.path.join(OUTPUT_DIR, img_name)
cv2.imwrite(output_path, img)
print(f"已处理并保存: {output_path}")
if __name__ == '__main__':
main()