当前位置: 首页 > article >正文

split_conversion将json转成yolo训练用的txt,在直接按照8:1:1的比例分成训练集,测试集,验证集

第一章  使用说明

类别自己在代码中改,其他四个参数

--json-folder:json文件夹路径

--txt-folder:转换成功后txt的存放路径

--images-dir:图片文件夹路径

--save-dir:转换完成分割后所有文件的路径

终端命令行:python split_conversion.py --json-folder "" --txt-folder "" --images-dir "" --save-dir ""

""处是四个路径

第二章 导包

import json
import os
from tqdm import tqdm
import shutil
import random
import argparse

第三章 转换格式函数

def labelme_json_to_yolo(txt_save_path, json_path, class_map):"""将LabelMe格式JSON转换为YOLO TXT标注文件:param txt_save_path: TXT保存路径:param json_path: LabelMe JSON文件路径:param class_map: 类别名映射字典(如{"person": 0})"""with open(json_path, 'r', encoding='utf-8') as f:data = json.load(f)img_height = data["imageHeight"]img_width = data["imageWidth"]img_name = os.path.basename(data["imagePath"])  # 提取图片文件名txt_name = os.path.splitext(img_name)[0] + ".txt"txt_path = os.path.join(txt_save_path, txt_name)lines = []for shape in data["shapes"]:label = shape["label"]if label not in class_map:continue  # 跳过未定义类别cls_id = class_map[label]# 解析多边形/矩形标注为边界框(xmin, ymin, xmax, ymax)points = shape["points"]x_coords = [p[0] for p in points]y_coords = [p[1] for p in points]xmin = min(x_coords)ymin = min(y_coords)xmax = max(x_coords)ymax = max(y_coords)# 计算YOLO格式坐标(归一化中心坐标+宽高)x_center = (xmin + xmax) / (2 * img_width)y_center = (ymin + ymax) / (2 * img_height)w = (xmax - xmin) / img_widthh = (ymax - ymin) / img_height# 过滤无效坐标(避免越界)if 0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0:lines.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")# 保存TXT文件with open(txt_path, 'w') as f:f.write('\n'.join(lines))

第四章 划分函数

def batch_convert_labelme_to_yolo(json_folder, txt_folder, class_map):"""批量转换LabelMe格式JSON文件夹到YOLO TXT:param json_folder: JSON文件夹路径:param txt_folder: TXT保存路径:param class_map: 类别名映射字典(如{"person": 0})"""os.makedirs(txt_folder, exist_ok=True)json_files = [f for f in os.listdir(json_folder) if f.lower().endswith('.json')]for json_file in tqdm(json_files, desc="转换中"):json_path = os.path.join(json_folder, json_file)labelme_json_to_yolo(txt_folder, json_path, class_map)print(f"批量转换完成!共处理{len(json_files)}个JSON文件,保存到:{txt_folder}")

第五章 确认文件夹是否存在

def mkdir(path):if not os.path.exists(path):os.makedirs(path)

第六章主函数

def main(image_dir, txt_dir, save_dir):# 创建文件夹mkdir(save_dir)images_dir = os.path.join(save_dir, 'images')labels_dir = os.path.join(save_dir, 'labels')img_train_path = os.path.join(images_dir, 'train')img_test_path = os.path.join(images_dir, 'test')img_val_path = os.path.join(images_dir, 'val')label_train_path = os.path.join(labels_dir, 'train')label_test_path = os.path.join(labels_dir, 'test')label_val_path = os.path.join(labels_dir, 'val')mkdir(images_dir)mkdir(labels_dir)mkdir(img_train_path)mkdir(img_test_path)mkdir(img_val_path)mkdir(label_train_path)mkdir(label_test_path)mkdir(label_val_path)# 数据集划分比例,训练集80%,验证集10%,测试集10%,按需修改train_percent = 0.8val_percent = 0.1test_percent = 0.1total_txt = os.listdir(txt_dir)num_txt = len(total_txt)list_all_txt = range(num_txt)  # 范围 range(0, num)num_train = int(num_txt * train_percent)num_val = int(num_txt * val_percent)num_test = num_txt - num_train - num_valtrain = random.sample(list_all_txt, num_train)# 在全部数据集中取出trainval_test = [i for i in list_all_txt if not i in train]# 再从val_test取出num_val个元素,val_test剩下的元素就是testval = random.sample(val_test, num_val)print("训练集数目:{}, 验证集数目:{}, 测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))for i in list_all_txt:name = total_txt[i][:-4]srcImage = os.path.join(image_dir, name + '.jpg')srcLabel = os.path.join(txt_dir, name + '.txt')if i in train:dst_train_Image = os.path.join(img_train_path, name + '.jpg')dst_train_Label = os.path.join(label_train_path, name + '.txt')shutil.copyfile(srcImage, dst_train_Image)shutil.copyfile(srcLabel, dst_train_Label)elif i in val:dst_val_Image = os.path.join(img_val_path, name + '.jpg')dst_val_Label = os.path.join(label_val_path, name + '.txt')shutil.copyfile(srcImage, dst_val_Image)shutil.copyfile(srcLabel, dst_val_Label)else:dst_test_Image = os.path.join(img_test_path, name + '.jpg')dst_test_Label = os.path.join(label_test_path, name + '.txt')shutil.copyfile(srcImage, dst_test_Image)shutil.copyfile(srcLabel, dst_test_Label)

 第七章 主函数调用

if __name__ == "__main__":parser = argparse.ArgumentParser(description='Convert LabelMe JSON to YOLO TXT and split datasets')parser.add_argument('--json-folder', type=str, default=r'',help='LabelMe JSON folder path')parser.add_argument('--txt-folder', type=str, default=r'',help='YOLO TXT save path')parser.add_argument('--class-map', default={"自己的类别": 0}, type=dict,help='Class name mapping dictionary (e.g. {"person": 0})')parser.add_argument('--images-dir', type=str, default=r'', help='images path dir')parser.add_argument('--save-dir', default=r'', type=str, help='save dir')args = parser.parse_args()json_folder = args.json_foldertxt_folder = args.txt_folderclass_map = args.class_mapimage_dir = args.images_dirsave_dir = args.save_dir# 转换LabelMe JSON到YOLO TXTbatch_convert_labelme_to_yolo(json_folder, txt_folder, class_map)# 划分数据集main(image_dir, txt_folder, save_dir)

全部代码如下: 

import json
import os
from tqdm import tqdm  # 可选进度条库
import shutil
import random
import argparsedef labelme_json_to_yolo(txt_save_path, json_path, class_map):"""将LabelMe格式JSON转换为YOLO TXT标注文件:param txt_save_path: TXT保存路径:param json_path: LabelMe JSON文件路径:param class_map: 类别名映射字典(如{"person": 0})"""with open(json_path, 'r', encoding='utf-8') as f:data = json.load(f)img_height = data["imageHeight"]img_width = data["imageWidth"]img_name = os.path.basename(data["imagePath"])  # 提取图片文件名txt_name = os.path.splitext(img_name)[0] + ".txt"txt_path = os.path.join(txt_save_path, txt_name)lines = []for shape in data["shapes"]:label = shape["label"]if label not in class_map:continue  # 跳过未定义类别cls_id = class_map[label]# 解析多边形/矩形标注为边界框(xmin, ymin, xmax, ymax)points = shape["points"]x_coords = [p[0] for p in points]y_coords = [p[1] for p in points]xmin = min(x_coords)ymin = min(y_coords)xmax = max(x_coords)ymax = max(y_coords)# 计算YOLO格式坐标(归一化中心坐标+宽高)x_center = (xmin + xmax) / (2 * img_width)y_center = (ymin + ymax) / (2 * img_height)w = (xmax - xmin) / img_widthh = (ymax - ymin) / img_height# 过滤无效坐标(避免越界)if 0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0:lines.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")# 保存TXT文件with open(txt_path, 'w') as f:f.write('\n'.join(lines))def batch_convert_labelme_to_yolo(json_folder, txt_folder, class_map):"""批量转换LabelMe格式JSON文件夹到YOLO TXT:param json_folder: JSON文件夹路径:param txt_folder: TXT保存路径:param class_map: 类别名映射字典(如{"person": 0})"""os.makedirs(txt_folder, exist_ok=True)json_files = [f for f in os.listdir(json_folder) if f.lower().endswith('.json')]for json_file in tqdm(json_files, desc="转换中"):json_path = os.path.join(json_folder, json_file)labelme_json_to_yolo(txt_folder, json_path, class_map)print(f"批量转换完成!共处理{len(json_files)}个JSON文件,保存到:{txt_folder}")# 检查文件夹是否存在
def mkdir(path):if not os.path.exists(path):os.makedirs(path)def main(image_dir, txt_dir, save_dir):# 创建文件夹mkdir(save_dir)images_dir = os.path.join(save_dir, 'images')labels_dir = os.path.join(save_dir, 'labels')img_train_path = os.path.join(images_dir, 'train')img_test_path = os.path.join(images_dir, 'test')img_val_path = os.path.join(images_dir, 'val')label_train_path = os.path.join(labels_dir, 'train')label_test_path = os.path.join(labels_dir, 'test')label_val_path = os.path.join(labels_dir, 'val')mkdir(images_dir)mkdir(labels_dir)mkdir(img_train_path)mkdir(img_test_path)mkdir(img_val_path)mkdir(label_train_path)mkdir(label_test_path)mkdir(label_val_path)# 数据集划分比例,训练集80%,验证集10%,测试集10%,按需修改train_percent = 0.8val_percent = 0.1test_percent = 0.1total_txt = os.listdir(txt_dir)num_txt = len(total_txt)list_all_txt = range(num_txt)  # 范围 range(0, num)num_train = int(num_txt * train_percent)num_val = int(num_txt * val_percent)num_test = num_txt - num_train - num_valtrain = random.sample(list_all_txt, num_train)# 在全部数据集中取出trainval_test = [i for i in list_all_txt if not i in train]# 再从val_test取出num_val个元素,val_test剩下的元素就是testval = random.sample(val_test, num_val)print("训练集数目:{}, 验证集数目:{}, 测试集数目:{}".format(len(train), len(val), len(val_test) - len(val)))for i in list_all_txt:name = total_txt[i][:-4]srcImage = os.path.join(image_dir, name + '.jpg')srcLabel = os.path.join(txt_dir, name + '.txt')if i in train:dst_train_Image = os.path.join(img_train_path, name + '.jpg')dst_train_Label = os.path.join(label_train_path, name + '.txt')shutil.copyfile(srcImage, dst_train_Image)shutil.copyfile(srcLabel, dst_train_Label)elif i in val:dst_val_Image = os.path.join(img_val_path, name + '.jpg')dst_val_Label = os.path.join(label_val_path, name + '.txt')shutil.copyfile(srcImage, dst_val_Image)shutil.copyfile(srcLabel, dst_val_Label)else:dst_test_Image = os.path.join(img_test_path, name + '.jpg')dst_test_Label = os.path.join(label_test_path, name + '.txt')shutil.copyfile(srcImage, dst_test_Image)shutil.copyfile(srcLabel, dst_test_Label)if __name__ == "__main__":parser = argparse.ArgumentParser(description='Convert LabelMe JSON to YOLO TXT and split datasets')parser.add_argument('--json-folder', type=str, default=r'',help='LabelMe JSON folder path')parser.add_argument('--txt-folder', type=str, default=r'',help='YOLO TXT save path')parser.add_argument('--class-map', default={"自己的类别": 0}, type=dict,help='Class name mapping dictionary (e.g. {"person": 0})')parser.add_argument('--images-dir', type=str, default=r'', help='images path dir')parser.add_argument('--save-dir', default=r'', type=str, help='save dir')args = parser.parse_args()json_folder = args.json_foldertxt_folder = args.txt_folderclass_map = args.class_mapimage_dir = args.images_dirsave_dir = args.save_dir# 转换LabelMe JSON到YOLO TXTbatch_convert_labelme_to_yolo(json_folder, txt_folder, class_map)# 划分数据集main(image_dir, txt_folder, save_dir)

http://www.lryc.cn/news/2393321.html

相关文章:

  • 响应式系统与Spring Boot响应式应用开发
  • 【第1章 基础知识】1.8 在 Canvas 中使用 HTML 元素
  • c++流之sstream/堆or优先队列的应用[1]
  • SAR ADC 比较器噪声分析(二)
  • c#与java的相同点和不同点
  • phpmyadmin
  • 机器学习Day5-模型诊断
  • 如何将 WSL 的 Ubuntu-24.04 迁移到其他电脑
  • 金融欺诈有哪些检测手段
  • HTML5 全面知识点总结
  • vscode一直连接不上虚拟机或者虚拟机容器怎么办?
  • 初学c语言21(文件操作)
  • 数学复习笔记 21
  • 华为OD机试真题——数据分类(2025B卷:100分)Java/python/JavaScript/C++/C语言/GO六种最佳实现
  • JavaWeb开发基础Servlet生命周期与工作原理
  • 三防平板科普:有什么特殊功能?应用在什么场景?
  • 百度外链生态的优劣解构与优化策略深度研究
  • 笔记: 在WPF中ContentElement 和 UIElement 的主要区别
  • 项目中使用到了多个UI组件库,也使用了Tailwindcss,如何确保新开发的组件样式隔离?
  • AI提示工程(Prompt Engineering)高级技巧详解
  • 【速写】PPOTrainer样例与错误思考(少量DAPO)
  • 5.26 面经整理 360共有云 golang
  • 中国移动咪咕助力第五届全国人工智能大赛“AI+数智创新”专项赛道开展
  • 模具制造业数字化转型:精密模塑,以数字之力铸就制造基石
  • PECVD 生成 SiO₂ 的反应方程式
  • React与Vue核心区别对比
  • 2024 CKA模拟系统制作 | Step-By-Step | 17、题目搭建-排查故障节点
  • 如何将图像插入 PDF:最佳工具比较
  • Linux 文件 IO 性能监控与分析指南
  • ABP VNext + Apache Flink 实时流计算:打造高可用“交易风控”系统