当前位置：首页 > news >正文

超详细yolov8/11-segment实例分割全流程概述：配置环境、数据标注、训练、验证/预测、onnx部署(c++/python)详解

news 2025/7/6 10:31:14

因为yolo的检测/分割/姿态/旋转/分类模型的环境配置、训练、推理预测等命令非常类似，这里不再详细叙述，主要参考**【YOLOv8/11-detect目标检测全流程教程】**，下面有相关链接，这里主要针对数据标注、格式转换、模型部署等不同细节部分；

【YOLOv8/11-detect目标检测全流程教程】超详细yolo8/11-detect目标检测全流程概述：配置环境、数据标注、训练、验证/预测、onnx部署(c++/python)详解
【环境配置】Ubuntu/Debian小白从零开始配置深度学习环境和各种软件库(显卡驱动、CUDA、CUDNN、Pytorch、OpenCv、PCL、Cmake
…)【持续维护】
【yolo全家桶github官网】https://github.com/ultralytics/ultralytics
【yolo说明文档】https://docs.ultralytics.com/zh/

文章目录

一、数据准备(标注和转换)
- 1.1分割大模型标注
- 1.2预训练模型onnx自动标注
二、模型部署
- c++版本
- python版本

一、数据准备(标注和转换)

1.1分割大模型标注

分割标注要比检测麻烦很多，一个个的圈多边形，建议使用X–anylabeling，可以加载分割大模型sam，速度要快很多，加载时候需要科学上网下载对应的onnx模型，若是界面上点击会自动下载不用再配置，若是手动下载onnx模型需要写配置模型yaml文件，并在选择模型时候加载这个文件。这个.yaml文件主要修改模型路径。下面是yaml文件的大致内容：
sam_vit_b_01ec64 模型onnx 【百度网盘】【csdn免费资源文件】

type: segment_anything
name: segment_anything_vit_b_quant-r20230520
display_name: Segment Anything (ViT-Base Quant)
# encoder_model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.2.0/sam_vit_b_01ec64.encoder.quant.onnx
# decoder_model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.2.0/sam_vit_b_01ec64.decoder.quant.onnx
encoder_model_path: D:\CvHub_YoLo_obb\sam_vit_h_4b8939.encoder.quant.onnx
decoder_model_path: D:\CvHub_YoLo_obb\sam_vit_h_4b8939.decoder.quant.onnx
input_size: 1024
max_width: 1024
max_height: 682

在这里插入图片描述

使用时候，确保电脑有足够内存，要不然大模型推理速度很慢，当分割不是特别精确时候，多添加先验点和背景点(滤除不必要的边缘)，有时候背景点很重要；点击Finish object，sam分割结束后，若效果不理想，还可以进行微调；

1.2预训练模型onnx自动标注

这个前提是我们已经有目标预训练好的模型，格式onnx，加载这个模型，可以实现一次性标注；比较适用于，可以先标注少部分，训练后一个模型，然后使用这个模型全标注，再微调即可；如果想添加数据集，二次训练，使用这个功能最好了；
1.先写配置文件yolov8n_seg.yaml

type: yolov8_seg
name: yolov8n-seg-r20230620
display_name: YOLOv8n-Seg-My-Model Ultralytics  #界面显示名字
#model_path: https://github.com/CVHub520/X-AnyLabeling/releases/download/v0.1.0/yolov8n-seg.onnx
model_path: /home/xxx/yolov11/yolo_base/model/yolo11n-seg.onnx  #关键是这个路径
nms_threshold: 0.45
confidence_threshold: 0.25
classes:- person- bicycle- car- motorcycle- airplane- bus- train- truck- boat

2.加载模型，就是加载yaml文件，然后一次性标注所有文件就可以；
在这里插入图片描述
标注完成后，需要格式转换，运行下面脚本jsion2txt_seg.py，修改路径和label名称

#json2txt_yolo11_seg.py
import cv2
import os
import json
import glob
import numpy as npclass_names = ["0"]
#class_names = ["0","1","2"]def convert_json_label_to_yolov_seg_label():json_path = "./seg0613"  # 本地json路径json_files = glob.glob(json_path + "/*.json")# print(json_files)# 指定输出文件夹output_folder = "./seg_txt"  # txt存放路径if not os.path.exists(output_folder):os.makedirs(output_folder)for json_file in json_files:# print(json_file)with open(json_file, 'r') as f:json_info = json.load(f)img = cv2.imread(os.path.join(json_path, json_info["imagePath"]))height, width, _ = img.shapenp_w_h = np.array([[width, height]], np.int32)txt_file = os.path.join(output_folder, os.path.basename(json_file).replace(".json", ".txt"))with open(txt_file, "w") as f:for point_json in json_info["shapes"]:txt_content = ""np_points = np.array(point_json["points"], np.int32)label = point_json["label"]index = class_names.index(label)# print(type(label))norm_points = np_points / np_w_hnorm_points_list = norm_points.tolist()txt_content += str(index) + " " + " ".join([" ".join([str(cell[0]), str(cell[1])]) for cell in norm_points_list]) + "\n"f.write(txt_content)convert_json_label_to_yolov_seg_label()
print("end convert!!!")

数据转换完后，再运行txt_split_yolo11.py，把数据划分为训练集、评估集合测试集

# txt_split_yolo11.py
# 将图片和标注数据按比例切分为 训练集和测试集
import shutil
import random
import os# 原始路径
image_original_path = "./seg0613/"
label_original_path = "./seg_txt/"cur_path = os.getcwd() #获取当前工作目录
#cur_path = './chatou_seg'
# 训练集路径
train_image_path = os.path.join(cur_path, "data/images/train/")
train_label_path = os.path.join(cur_path, "data/labels/train/")# 验证集路径
val_image_path = os.path.join(cur_path, "data/images/val/")
val_label_path = os.path.join(cur_path, "data/labels/val/")# 测试集路径
test_image_path = os.path.join(cur_path, "data/images/test/")
test_label_path = os.path.join(cur_path, "data/labels/test/")# 训练集目录
list_train = os.path.join(cur_path, "data/train.txt")
list_val = os.path.join(cur_path, "data/val.txt")
list_test = os.path.join(cur_path, "data/test.txt")train_percent = 0.9
val_percent = 0.1
test_percent = 0.0def del_file(path):for i in os.listdir(path):file_data = path + "\\" + ios.remove(file_data)def mkdir():if not os.path.exists(train_image_path):os.makedirs(train_image_path)else:del_file(train_image_path)if not os.path.exists(train_label_path):os.makedirs(train_label_path)else:del_file(train_label_path)if not os.path.exists(val_image_path):os.makedirs(val_image_path)else:del_file(val_image_path)if not os.path.exists(val_label_path):os.makedirs(val_label_path)else:del_file(val_label_path)if not os.path.exists(test_image_path):os.makedirs(test_image_path)else:del_file(test_image_path)if not os.path.exists(test_label_path):os.makedirs(test_label_path)else:del_file(test_label_path)def clearfile():if os.path.exists(list_train):os.remove(list_train)if os.path.exists(list_val):os.remove(list_val)if os.path.exists(list_test):os.remove(list_test) 
def main():mkdir()clearfile()file_train = open(list_train, 'w')file_val = open(list_val, 'w')file_test = open(list_test, 'w')total_txt = os.listdir(label_original_path)num_txt = len(total_txt)list_all_txt = range(num_txt)num_train = int(num_txt * train_percent)num_val = int(num_txt * val_percent)num_test = num_txt - num_train - num_valtrain = random.sample(list_all_txt, num_train)# train从list_all_txt取出num_train个元素# 所以list_all_txt列表只剩下了这些元素val_test = [i for i in list_all_txt if not i in train]# 再从val_test取出num_val个元素，val_test剩下的元素就是testval = random.sample(val_test, num_val)print("训练集数目：{}, 验证集数目：{}, 测试集数目：{}".format(len(train), len(val), len(val_test) - len(val)))for i in list_all_txt:name = total_txt[i][:-4]srcImage = image_original_path + name + '.bmp'srcLabel = label_original_path + name + ".txt"if i in train:dst_train_Image = train_image_path + name + '.bmp'dst_train_Label = train_label_path + name + '.txt'shutil.copyfile(srcImage, dst_train_Image)shutil.copyfile(srcLabel, dst_train_Label)file_train.write(dst_train_Image + '\n')elif i in val:dst_val_Image = val_image_path + name + '.bmp'dst_val_Label = val_label_path + name + '.txt'shutil.copyfile(srcImage, dst_val_Image)shutil.copyfile(srcLabel, dst_val_Label)file_val.write(dst_val_Image + '\n')else:dst_test_Image = test_image_path + name + '.bmp'dst_test_Label = test_label_path + name + '.txt'shutil.copyfile(srcImage, dst_test_Image)shutil.copyfile(srcLabel, dst_test_Label)file_test.write(dst_test_Image + '\n')file_train.close()file_val.close()file_test.close()
if __name__ == "__main__":main()

分割的训练、预测和导出onnx命令

#模型训练
yolo segment train data=dataset.yaml model=yolo11n.pt epochs=300 imgsz=1920 amp=False batch=2 lr0=0.001 mosaic=0.05 patience=200 
#模型预测
yolo segment predict model=runs/detect/train4/weights/best.pt source=/xxx、images/test save=true conf=0.4 iou=0.5
#模型导出
yolo export model=/xxx/yolov11/runs/segment/train4/weights/best.pt format=onnx opset=17 simplify=True

二、模型部署

c++版本

主要参考大佬github开源文件 https://github.com/UNeedCryDear/yolov8-opencv-onnxruntime-cpp
和检测相似，其中yolov8_utils.h和yolov8_utils.cpp文件不打出来了，和目标检测里面的一样，可以参考上面的大佬的，或者这篇文章超详细yolo8/11-detect目标检测全流程概述：配置环境、数据标注、训练、验证/预测、onnx部署(c++/python)详解

主要涉及五个文件，main.cpp yolov8_utils.h yolov8_seg_onnx.h yolov8_utils.cpp yolov8_seg_onnx.cpp，其中yolov8_utils.h和yolov8_utils.cpp和yolo8/11-detect目标检测一样，这里就不贴码了。

yolov8_seg_onnx.h 分割的头文件

#pragma once
#include <iostream>
#include<memory>
#include <opencv2/opencv.hpp>
#include "yolov8_utils.h"
#include<onnxruntime_cxx_api.h>
//#include <tensorrt_provider_factory.h>  //if use OrtTensorRTProviderOptionsV2
//#include <onnxruntime_c_api.h>class Yolov8SegOnnx {
public:Yolov8SegOnnx() :_OrtMemoryInfo(Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtDeviceAllocator, OrtMemType::OrtMemTypeCPUOutput)) {};~Yolov8SegOnnx() {if (_OrtSession != nullptr)delete _OrtSession;};// delete _OrtMemoryInfo;public:/** \brief Read onnx-model* \param[in] modelPath:onnx-model path* \param[in] isCuda:if true,use Ort-GPU,else run it on cpu.* \param[in] cudaID:if isCuda==true,run Ort-GPU on cudaID.* \param[in] warmUp:if isCuda==true,warm up GPU-model.*/bool ReadModel(const std::string& modelPath, bool isCuda = false, int cudaID = 0, bool warmUp = true);/** \brief  detect.* \param[in] srcImg:a 3-channels image.* \param[out] output:detection results of input image.*/bool OnnxDetect(cv::Mat& srcImg, std::vector<OutputParams>& output);/** \brief  detect,batch size= _batchSize* \param[in] srcImg:A batch of images.* \param[out] output:detection results of input images.*/bool OnnxBatchDetect(std::vector<cv::Mat>& srcImg, std::vector<std::vector<OutputParams>>& output);private:template <typename T>T VectorProduct(const std::vector<T>& v){return std::accumulate(v.begin(), v.end(), 1, std::multiplies<T>());};int PreProcessing(const std::vector<cv::Mat>& srcImgs, std::vector<cv::Mat>& outSrcImgs, std::vector<cv::Vec4d>& params);const int _netWidth = 1920;   //ONNX-net-input-widthconst int _netHeight = 1920;  //ONNX-net-input-heightint _batchSize = 1;  //if multi-batch,set thisbool _isDynamicShape = false;//onnx support dynamic shapefloat _classThreshold = 0.25;float _nmsThreshold = 0.45;float _maskThreshold = 0.4;//ONNXRUNTIME	Ort::Env _OrtEnv = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR, "Yolov8");Ort::SessionOptions _OrtSessionOptions = Ort::SessionOptions();Ort::Session* _OrtSession = nullptr;Ort::MemoryInfo _OrtMemoryInfo;
#if ORT_API_VERSION < ORT_OLD_VISONchar* _inputName, * _output_name0, * _output_name1;
#elsestd::shared_ptr<char> _inputName, _output_name0,_output_name1;
#endifstd::vector<char*> _inputNodeNames; //����ڵ���std::vector<char*> _outputNodeNames;//����ڵ���size_t _inputNodesNum = 0;        //����ڵ���size_t _outputNodesNum = 0;       //����ڵ���ONNXTensorElementDataType _inputNodeDataType; //��������ONNXTensorElementDataType _outputNodeDataType;std::vector<int64_t> _inputTensorShape; //��������shapestd::vector<int64_t> _outputTensorShape;std::vector<int64_t> _outputMaskTensorShape;
public:std::vector<std::string> _className = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush"};
};

yolov8_seg_onnx.cpp

//yolov8_seg_onnx.cpp#include "yolov8_seg_onnx.h"
//using namespace std;
//using namespace cv;
//using namespace cv::dnn;
using namespace Ort;bool Yolov8SegOnnx::ReadModel(const std::string& modelPath, bool isCuda, int cudaID, bool warmUp) {if (_batchSize < 1) _batchSize = 1;try{if (!CheckModelPath(modelPath))return false;std::vector<std::string> available_providers = GetAvailableProviders();auto cuda_available = std::find(available_providers.begin(), available_providers.end(), "CUDAExecutionProvider");if (isCuda && (cuda_available == available_providers.end())){std::cout << "Your ORT build without GPU. Change to CPU." << std::endl;std::cout << "************* Infer model on CPU! *************" << std::endl;}else if (isCuda && (cuda_available != available_providers.end())){std::cout << "************* Infer model on GPU! *************" << std::endl;
#if ORT_API_VERSION < ORT_OLD_VISONOrtCUDAProviderOptions cudaOption;cudaOption.device_id = cudaID;_OrtSessionOptions.AppendExecutionProvider_CUDA(cudaOption);
#elseOrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(_OrtSessionOptions, cudaID);
#endif}else{std::cout << "************* Infer model on CPU! *************" << std::endl;}//_OrtSessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);#ifdef _WIN32std::wstring model_path(modelPath.begin(), modelPath.end());_OrtSession = new Ort::Session(_OrtEnv, model_path.c_str(), _OrtSessionOptions);
#else_OrtSession = new Ort::Session(_OrtEnv, modelPath.c_str(), _OrtSessionOptions);
#endifOrt::AllocatorWithDefaultOptions allocator;//init input_inputNodesNum = _OrtSession->GetInputCount();
#if ORT_API_VERSION < ORT_OLD_VISON_inputName = _OrtSession->GetInputName(0, allocator);_inputNodeNames.push_back(_inputName);
#else_inputName = std::move(_OrtSession->GetInputNameAllocated(0, allocator));_inputNodeNames.push_back(_inputName.get());
#endifOrt::TypeInfo inputTypeInfo = _OrtSession->GetInputTypeInfo(0);auto input_tensor_info = inputTypeInfo.GetTensorTypeAndShapeInfo();_inputNodeDataType = input_tensor_info.GetElementType();_inputTensorShape = input_tensor_info.GetShape();if (_inputTensorShape[0] == -1){_isDynamicShape = true;_inputTensorShape[0] = _batchSize;}if (_inputTensorShape[2] == -1 || _inputTensorShape[3] == -1) {_isDynamicShape = true;_inputTensorShape[2] = _netHeight;_inputTensorShape[3] = _netWidth;}//init output_outputNodesNum = _OrtSession->GetOutputCount();if (_outputNodesNum != 2) {std::cout << "This model has " << _outputNodesNum << "output, which is not a segmentation model.Please check your model name or path!" << std::endl;return false;}
#if ORT_API_VERSION < ORT_OLD_VISON_output_name0 = _OrtSession->GetOutputName(0, allocator);_output_name1 = _OrtSession->GetOutputName(1, allocator);
#else_output_name0 = std::move(_OrtSession->GetOutputNameAllocated(0, allocator));_output_name1 = std::move(_OrtSession->GetOutputNameAllocated(1, allocator));
#endifOrt::TypeInfo type_info_output0(nullptr);Ort::TypeInfo type_info_output1(nullptr);bool flag = false;
#if ORT_API_VERSION < ORT_OLD_VISONflag = strcmp(_output_name0, _output_name1) < 0;
#elseflag = strcmp(_output_name0.get(), _output_name1.get()) < 0;
#endifif (flag)  //make sure "output0" is in front of  "output1"{type_info_output0 = _OrtSession->GetOutputTypeInfo(0);  //output0type_info_output1 = _OrtSession->GetOutputTypeInfo(1);  //output1
#if ORT_API_VERSION < ORT_OLD_VISON_outputNodeNames.push_back(_output_name0);_outputNodeNames.push_back(_output_name1);
#else_outputNodeNames.push_back(_output_name0.get());_outputNodeNames.push_back(_output_name1.get());
#endif}else {type_info_output0 = _OrtSession->GetOutputTypeInfo(1);  //output0type_info_output1 = _OrtSession->GetOutputTypeInfo(0);  //output1
#if ORT_API_VERSION < ORT_OLD_VISON_outputNodeNames.push_back(_output_name1);_outputNodeNames.push_back(_output_name0);
#else_outputNodeNames.push_back(_output_name1.get());_outputNodeNames.push_back(_output_name0.get());
#endif}auto tensor_info_output0 = type_info_output0.GetTensorTypeAndShapeInfo();_outputNodeDataType = tensor_info_output0.GetElementType();_outputTensorShape = tensor_info_output0.GetShape();auto tensor_info_output1 = type_info_output1.GetTensorTypeAndShapeInfo();//_outputMaskNodeDataType = tensor_info_output1.GetElementType(); //the same as output0//_outputMaskTensorShape = tensor_info_output1.GetShape();//if (_outputTensorShape[0] == -1)//{//	_outputTensorShape[0] = _batchSize;//	_outputMaskTensorShape[0] = _batchSize;//}//if (_outputMaskTensorShape[2] == -1) {//	//size_t ouput_rows = 0;//	//for (int i = 0; i < _strideSize; ++i) {//	//	ouput_rows += 3 * (_netWidth / _netStride[i]) * _netHeight / _netStride[i];//	//}//	//_outputTensorShape[1] = ouput_rows;//	_outputMaskTensorShape[2] = _segHeight;//	_outputMaskTensorShape[3] = _segWidth;//}//warm upif (isCuda && warmUp) {//draw runstd::cout << "Start warming up" << std::endl;size_t input_tensor_length = VectorProduct(_inputTensorShape);float* temp = new float[input_tensor_length];std::vector<Ort::Value> input_tensors;std::vector<Ort::Value> output_tensors;input_tensors.push_back(Ort::Value::CreateTensor<float>(_OrtMemoryInfo, temp, input_tensor_length, _inputTensorShape.data(),_inputTensorShape.size()));for (int i = 0; i < 3; ++i) {output_tensors = _OrtSession->Run(Ort::RunOptions{ nullptr },_inputNodeNames.data(),input_tensors.data(),_inputNodeNames.size(),_outputNodeNames.data(),_outputNodeNames.size());}delete[]temp;}}catch (const std::exception&) {return false;}return true;
}int Yolov8SegOnnx::PreProcessing(const std::vector<cv::Mat>& srcImgs, std::vector<cv::Mat>& outSrcImgs, std::vector<cv::Vec4d>& params) {outSrcImgs.clear();cv::Size input_size = cv::Size(_netWidth, _netHeight);for (int i = 0; i < srcImgs.size(); ++i) {cv::Mat temp_img = srcImgs[i];cv::Vec4d temp_param = { 1,1,0,0 };if (temp_img.size() != input_size) {cv::Mat borderImg;LetterBox(temp_img, borderImg, temp_param, input_size, false, false, true, 32);//std::cout << borderImg.size() << std::endl;outSrcImgs.push_back(borderImg);params.push_back(temp_param);}else {outSrcImgs.push_back(temp_img);params.push_back(temp_param);}}int lack_num = srcImgs.size() % _batchSize;if (lack_num != 0) {for (int i = 0; i < lack_num; ++i) {cv::Mat temp_img = cv::Mat::zeros(input_size, CV_8UC3);cv::Vec4d temp_param = { 1,1,0,0 };outSrcImgs.push_back(temp_img);params.push_back(temp_param);}}return 0;}
bool Yolov8SegOnnx::OnnxDetect(cv::Mat& srcImg, std::vector<OutputParams>& output) {std::vector<cv::Mat> input_data = { srcImg };std::vector<std::vector<OutputParams>> tenp_output;if (OnnxBatchDetect(input_data, tenp_output)) {output = tenp_output[0];return true;}else return false;
}
bool Yolov8SegOnnx::OnnxBatchDetect(std::vector<cv::Mat>& srcImgs, std::vector<std::vector<OutputParams>>& output) {std::vector<cv::Vec4d> params;std::vector<cv::Mat> input_images;cv::Size input_size(_netWidth, _netHeight);//preprocessingPreProcessing(srcImgs, input_images, params);cv::Mat blob = cv::dnn::blobFromImages(input_images, 1 / 255.0, input_size, cv::Scalar(0, 0, 0), true, false);int64_t input_tensor_length = VectorProduct(_inputTensorShape);std::vector<Ort::Value> input_tensors;std::vector<Ort::Value> output_tensors;input_tensors.push_back(Ort::Value::CreateTensor<float>(_OrtMemoryInfo, (float*)blob.data, input_tensor_length, _inputTensorShape.data(), _inputTensorShape.size()));output_tensors = _OrtSession->Run(Ort::RunOptions{ nullptr },_inputNodeNames.data(),input_tensors.data(),_inputNodeNames.size(),_outputNodeNames.data(),_outputNodeNames.size());//post-processfloat* all_data = output_tensors[0].GetTensorMutableData<float>();_outputTensorShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();_outputMaskTensorShape = output_tensors[1].GetTensorTypeAndShapeInfo().GetShape();std::vector<int> mask_protos_shape = { 1,(int)_outputMaskTensorShape[1],(int)_outputMaskTensorShape[2],(int)_outputMaskTensorShape[3] };int mask_protos_length = VectorProduct(mask_protos_shape);int64_t one_output_length = VectorProduct(_outputTensorShape) / _outputTensorShape[0];int net_width = (int)_outputTensorShape[1];int socre_array_length = net_width - 4 - _outputMaskTensorShape[1];for (int img_index = 0; img_index < srcImgs.size(); ++img_index) {cv::Mat output0 = cv::Mat(cv::Size((int)_outputTensorShape[2], (int)_outputTensorShape[1]), CV_32F, all_data).t();  //[bs,116,8400]=>[bs,8400,116]all_data += one_output_length;float* pdata = (float*)output0.data;int rows = output0.rows;std::vector<int> class_ids;//���id����std::vector<float> confidences;//���ÿ��id��Ӧ���Ŷ�����std::vector<cv::Rect> boxes;//ÿ��id���ο�std::vector<std::vector<float>> picked_proposals;  //output0[:,:, 5 + _className.size():net_width]===> for maskfor (int r = 0; r < rows; ++r) {    //stridecv::Mat scores(1, socre_array_length, CV_32F, pdata + 4);cv::Point classIdPoint;double max_class_socre;minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);max_class_socre = (float)max_class_socre;if (max_class_socre >= _classThreshold) {std::vector<float> temp_proto(pdata + 4 + socre_array_length, pdata + net_width);picked_proposals.push_back(temp_proto);//rect [x,y,w,h]float x = (pdata[0] - params[img_index][2]) / params[img_index][0];  //xfloat y = (pdata[1] - params[img_index][3]) / params[img_index][1];  //yfloat w = pdata[2] / params[img_index][0];  //wfloat h = pdata[3] / params[img_index][1];  //hint left = MAX(int(x - 0.5 * w + 0.5), 0);int top = MAX(int(y - 0.5 * h + 0.5), 0);class_ids.push_back(classIdPoint.x);confidences.push_back(max_class_socre);boxes.push_back(cv::Rect(left, top, int(w + 0.5), int(h + 0.5)));}pdata += net_width;//��һ��}std::vector<int> nms_result;cv::dnn::NMSBoxes(boxes, confidences, _classThreshold, _nmsThreshold, nms_result);std::vector<std::vector<float>> temp_mask_proposals;cv::Rect holeImgRect(0, 0, srcImgs[img_index].cols, srcImgs[img_index].rows);std::vector<OutputParams> temp_output;for (int i = 0; i < nms_result.size(); ++i) {int idx = nms_result[i];OutputParams result;result.id = class_ids[idx];result.confidence = confidences[idx];result.box = boxes[idx] & holeImgRect;temp_mask_proposals.push_back(picked_proposals[idx]);temp_output.push_back(result);}MaskParams mask_params;mask_params.params = params[img_index];mask_params.srcImgShape = srcImgs[img_index].size();mask_params.netHeight = _netHeight;mask_params.netWidth = _netWidth;mask_params.maskThreshold = _maskThreshold;cv::Mat mask_protos = cv::Mat(mask_protos_shape, CV_32F, output_tensors[1].GetTensorMutableData<float>() + img_index * mask_protos_length);for (int i = 0; i < temp_mask_proposals.size(); ++i) {GetMask2(cv::Mat(temp_mask_proposals[i]).t(), mask_protos, temp_output[i], mask_params);}//******************** ****************// �ϰ汾�ķ�������������ڿ�����ע�͵Ĳ���֮��һֱ����������ʹ�������// If the GetMask2() still reports errors , it is recommended to use GetMask().//cv::Mat mask_proposals;//for (int i = 0; i < temp_mask_proposals.size(); ++i) {//	mask_proposals.push_back(cv::Mat(temp_mask_proposals[i]).t());//}//GetMask(mask_proposals, mask_protos, temp_output, mask_params);//*****************************************************/output.push_back(temp_output);}if (output.size())return true;elsereturn false;
}

main.cpp 在yolov8_onnx函数里面里面添加了分割出的掩膜mask合并的代码

#include <iostream>
#include<opencv2/opencv.hpp>
#include<math.h>
#include "yolov8_seg_onnx.h"
#include<time.h>
//#define  VIDEO_OPENCV //if define, use opencv for video.using namespace std;
using namespace cv;
using namespace dnn;template<typename _Tp>
std::vector<OutputParams> yolov8_onnx(_Tp& task, cv::Mat& img, std::string& model_path)
{// if (task.ReadModel(model_path, false,0,true)) {// 	std::cout << "read net ok!" << std::endl;// }//生成随机颜色std::vector<cv::Scalar> color;srand(time(0));for (int i = 0; i < 80; i++) {int b = rand() % 256;int g = rand() % 256;int r = rand() % 256;color.push_back(cv::Scalar(b, g, r));}std::vector<OutputParams> result;if (task.OnnxDetect(img, result)) {//std::cout<<"111"<<std::endl;DrawPred(img, result, task._className, color,false);// 遍历所有检测结果cv::Mat combinedMask = cv::Mat::zeros(img.size(), CV_8UC1);for (const auto& output : result) {// 获取当前检测框的ROIcv::Mat roi = combinedMask(output.box);cv::Mat boxMaskBinary;output.boxMask.convertTo(boxMaskBinary, CV_8UC1);// 将当前mask合并到总mask上// 这里使用OR操作，可以根据需要改为其他合并方式cv::bitwise_or(roi, boxMaskBinary, roi);}cv::imwrite("combinedMask.png", combinedMask);}else {std::cout << "Detect Failed!" << std::endl;}//system("pause");return result;
}
int main() {std::string img_path = "./images/_20250609_144103.bmp";//std::string img_path = "../rgb/2025-05-27_08-37-46_undistort_bright.bmp";std::string model_path_detect = "./model/0613.onnx";cv::Mat src = imread(img_path);cv::Mat img = src.clone();//Yolov8Onnx			task_detect_ort;Yolov8SegOnnx		task_segment_ort;if (task_segment_ort.ReadModel(model_path_detect, false,0,true)) {std::cout << "read net ok!" << std::endl;}std::vector<OutputParams> results_detect;long long startTime = std::chrono::system_clock::now().time_since_epoch().count(); //nsresults_detect=yolov8_onnx(task_segment_ort, img, model_path_detect); //yolov8 onnxruntime long long timeNow = std::chrono::system_clock::now().time_since_epoch().count();double timeuse = (timeNow - startTime) * 0.000001;//std::cout<<"end detect"<<endl;std::cout << (timeNow - startTime) * 0.000001 << "ms\n";std::cout<<"num: "<<results_detect.size()<<endl;OutputParams out_result;// for (int i = 0; i < results_detect.size(); i++) {// 	cout<<results_detect[i].id<<" "<<task_detect_ort._Name[results_detect[i].id]<<" conf: "<<results_detect[i].confidence<<" rect: "<< results_detect[i].box<<endl;// }	cv::waitKey(0);return 0;
}

在这里插入图片描述

python版本

python部署，相对简单，修改下面模型和图片路径，直接运行就可以。

import cv2
import numpy as np
import onnxruntime as ort
import timeclasses = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck',8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase',29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut',55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet',62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave',69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase',76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}class Colors:"""This class provides methods to work with the Ultralytics color palette, including converting hex color codes toRGB values.Attributes:palette (list of tuple): List of RGB color values.n (int): The number of colors in the palette.pose_palette (np.array): A specific color palette array with dtype np.uint8."""def __init__(self):"""Initialize colors as hex = matplotlib.colors.TABLEAU_COLORS.values()."""hexs = ('FF3838', 'FF9D97', 'FF701F', 'FFB21D', 'CFD231', '48F90A', '92CC17', '3DDB86', '1A9334', '00D4BB','2C99A8', '00C2FF', '344593', '6473FF', '0018EC', '8438FF', '520085', 'CB38FF', 'FF95C8', 'FF37C7')self.palette = [self.hex2rgb(f'#{c}') for c in hexs]self.n = len(self.palette)self.pose_palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102], [230, 230, 0], [255, 153, 255],[153, 204, 255], [255, 102, 255], [255, 51, 255], [102, 178, 255], [51, 153, 255],[255, 153, 153], [255, 102, 102], [255, 51, 51], [153, 255, 153], [102, 255, 102],[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0], [255, 255, 255]],dtype=np.uint8)def __call__(self, i, bgr=False):"""Converts hex color codes to RGB values."""c = self.palette[int(i) % self.n]return (c[2], c[1], c[0]) if bgr else c@staticmethoddef hex2rgb(h):"""Converts hex color codes to RGB values (i.e. default PIL order)."""return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))class YOLOv8Seg:"""YOLOv8 segmentation model."""def __init__(self, onnx_model):"""Initialization.Args:onnx_model (str): Path to the ONNX model."""# Build Ort sessionself.session = ort.InferenceSession(onnx_model,providers=['CUDAExecutionProvider', 'CPUExecutionProvider']if ort.get_device() == 'GPU' else ['CPUExecutionProvider'])# Numpy dtype: support both FP32 and FP16 onnx modelself.ndtype = np.half if self.session.get_inputs()[0].type == 'tensor(float16)' else np.single# Get model width and height(YOLOv8-seg only has one input)self.model_height, self.model_width = [x.shape for x in self.session.get_inputs()][0][-2:]# Load COCO class namesself.classes = classes# Create color paletteself.color_palette = Colors()def __call__(self, im0, conf_threshold=0.4, iou_threshold=0.45, nm=32):"""The whole pipeline: pre-process -> inference -> post-process.Args:im0 (Numpy.ndarray): original input image.conf_threshold (float): confidence threshold for filtering predictions.iou_threshold (float): iou threshold for NMS.nm (int): the number of masks.Returns:boxes (List): list of bounding boxes.segments (List): list of segments.masks (np.ndarray): [N, H, W], output masks."""# Pre-processim, ratio, (pad_w, pad_h) = self.preprocess(im0)# Ort inferencepreds = self.session.run(None, {self.session.get_inputs()[0].name: im})# Post-processboxes, segments, masks = self.postprocess(preds,im0=im0,ratio=ratio,pad_w=pad_w,pad_h=pad_h,conf_threshold=conf_threshold,iou_threshold=iou_threshold,nm=nm)return boxes, segments, masksdef preprocess(self, img):"""Pre-processes the input image.Args:img (Numpy.ndarray): image about to be processed.Returns:img_process (Numpy.ndarray): image preprocessed for inference.ratio (tuple): width, height ratios in letterbox.pad_w (float): width padding in letterbox.pad_h (float): height padding in letterbox."""# Resize and pad input image using letterbox() (Borrowed from Ultralytics)shape = img.shape[:2]  # original image shapenew_shape = (self.model_height, self.model_width)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])ratio = r, rnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2  # wh paddingif shape[::-1] != new_unpad:  # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114))# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0img_process = img[None] if len(img.shape) == 3 else imgreturn img_process, ratio, (pad_w, pad_h)def postprocess(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):"""Post-process the prediction.Args:preds (Numpy.ndarray): predictions come from ort.session.run().im0 (Numpy.ndarray): [h, w, c] original input image.ratio (tuple): width, height ratios in letterbox.pad_w (float): width padding in letterbox.pad_h (float): height padding in letterbox.conf_threshold (float): conf threshold.iou_threshold (float): iou threshold.nm (int): the number of masks.Returns:boxes (List): list of bounding boxes.segments (List): list of segments.masks (np.ndarray): [N, H, W], output masks."""x, protos = preds[0], preds[1]  # Two outputs: predictions and protos# Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)x = np.einsum('bcn->bnc', x)# Predictions filtering by conf-thresholdx = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]# Create a new matrix which merge these(box, score, cls, nm) into one# For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.htmlx = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]# NMS filteringx = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]# Decode and returnif len(x) > 0:# Bounding boxes format change: cxcywh -> xyxyx[..., [0, 1]] -= x[..., [2, 3]] / 2x[..., [2, 3]] += x[..., [0, 1]]# Rescales bounding boxes from model shape(model_height, model_width) to the shape of original imagex[..., :4] -= [pad_w, pad_h, pad_w, pad_h]x[..., :4] /= min(ratio)# Bounding boxes boundary clampx[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])# Process masksmasks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)# Masks -> Segments(contours)segments = self.masks2segments(masks)return x[..., :6], segments, masks  # boxes, segments, maskselse:return [], [], []@staticmethoddef masks2segments(masks):"""It takes a list of masks(n,h,w) and returns a list of segments(n,xy)Args:masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).Returns:segments (List): list of segment masks."""segments = []for x in masks.astype('uint8'):c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0]  # CHAIN_APPROX_SIMPLEif c:c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)else:c = np.zeros((0, 2))  # no segments foundsegments.append(c.astype('float32'))return segments@staticmethoddef crop_mask(masks, boxes):"""It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.Args:masks (Numpy.ndarray): [n, h, w] tensor of masks.boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form.Returns:(Numpy.ndarray): The masks are being cropped to the bounding box."""n, h, w = masks.shapex1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)r = np.arange(w, dtype=x1.dtype)[None, None, :]c = np.arange(h, dtype=x1.dtype)[None, :, None]return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))def process_mask(self, protos, masks_in, bboxes, im0_shape):"""Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher qualitybut is slower.Args:protos (numpy.ndarray): [mask_dim, mask_h, mask_w].masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms.bboxes (numpy.ndarray): bboxes re-scaled to original image shape.im0_shape (tuple): the size of the input image (h,w,c).Returns:(numpy.ndarray): The upsampled masks."""c, mh, mw = protos.shapemasks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0)  # HWNmasks = np.ascontiguousarray(masks)masks = self.scale_mask(masks, im0_shape)  # re-scale mask from P3 shape to original input image shapemasks = np.einsum('HWN -> NHW', masks)  # HWN -> NHWmasks = self.crop_mask(masks, bboxes)return np.greater(masks, 0.5)@staticmethoddef scale_mask(masks, im0_shape, ratio_pad=None):"""Takes a mask, and resizes it to the original image size.Args:masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].im0_shape (tuple): the original image shape.ratio_pad (tuple): the ratio of the padding to the original image.Returns:masks (np.ndarray): The masks that are being returned."""im1_shape = masks.shape[:2]if ratio_pad is None:  # calculate from im0_shapegain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / newpad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh paddingelse:pad = ratio_pad[1]# Calculate tlbr of masktop, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))  # y, xbottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))if len(masks.shape) < 2:raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')masks = masks[top:bottom, left:right]masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),interpolation=cv2.INTER_LINEAR)  # INTER_CUBIC would be betterif len(masks.shape) == 2:masks = masks[:, :, None]return masksdef draw_and_visualize(self, im, bboxes, segments, vis=True, save=False):"""Draw and visualize results.Args:im (np.ndarray): original image, shape [h, w, c].bboxes (numpy.ndarray): [n, 4], n is number of bboxes.segments (List): list of segment masks.vis (bool): imshow using OpenCV.save (bool): save image annotated.Returns:None"""# Draw rectangles and polygonsim_canvas = im.copy()for (*box, conf, cls_), segment in zip(bboxes, segments):# draw contour and fill maskcv2.polylines(im, np.int32([segment]), True, (255, 255, 255), 2)  # white borderlinecv2.fillPoly(im_canvas, np.int32([segment]), self.color_palette(int(cls_), bgr=True))# draw bbox rectanglecv2.rectangle(im, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),self.color_palette(int(cls_), bgr=True), 1, cv2.LINE_AA)cv2.putText(im, f'{self.classes[cls_]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),cv2.FONT_HERSHEY_SIMPLEX, 0.7, self.color_palette(int(cls_), bgr=True), 2, cv2.LINE_AA)# Mix imageim = cv2.addWeighted(im_canvas, 0.3, im, 0.7, 0)return imif __name__ == '__main__':# 模型路径model_path = "yolov8n-seg.onnx"# 实例化模型model = YOLOv8Seg(model_path)conf = 0.35iou = 0.45# 三种模式 1为图片预测，并显示结果图片；2为摄像头检测，并实时显示FPSmode = 1# opencv 读取图片img = cv2.imread('street.jpg')# 推理boxes, segments, _ = model(img, conf_threshold=conf, iou_threshold=iou)# 画图if len(boxes) > 0:output_image = model.draw_and_visualize(img, boxes, segments, vis=False, save=True)else:output_image = imgprint("图片完成检测")cv2.imshow("seg", output_image)cv2.imwrite('image_seg.jpg', output_image)cv2.waitKey(0)cv2.destroyAllWindows()