YOLO11分割模型使用rknn2量化部署
1. Export ONNX
导出不能直接使用yolo11库的导出,否则无法进行量化处理
参考该仓库进行导出
https://github.com/airockchip/ultralytics_yolo11
下载以上仓库后
cd ultralytics_yolo11
cd ultralytics/cfg/default.yaml
修改 model为yolo11n-seg.pt或自己训练的结果cd ultralytics_yolo11
python ./ultralytics/engine/exporter.py
2. Export rknn2
在rk板子上执行
python3 convert_seg.py ./cup_seg2_rk.onnx rk3588 i8 ./cup_seg2_rk.rknn2
convert_seg.py
import sys
from rknn.api import RKNN
import os
DATASET_PATH = '/home/cat/sch/dataset/image_paths.txt'
DEFAULT_RKNN_PATH = '../model/yolov5_seg.rknn'
DEFAULT_QUANT = Truedef parse_arg():if len(sys.argv) < 3:print("Usage: python3 {} onnx_model_path [platform] [dtype(optional)] [output_rknn_path(optional)]".format(sys.argv[0]));print(" platform choose from [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b, rv1109, rv1126, rk1808]")print(" dtype choose from [i8, fp] for [rk3562, rk3566, rk3568, rk3576, rk3588, rv1126b]")print(" dtype choose from [u8, fp] for [rv1109, rv1126, rk1808]")exit(1)model_path = sys.argv[1]platform = sys.argv[2]do_quant = DEFAULT_QUANTif len(sys.argv) > 3:model_type = sys.argv[3]if model_type not in ['i8', 'u8', 'fp']:print("ERROR: Invalid model type: {}".format(model_type))exit(1)elif model_type in ['i8', 'u8']:do_quant = Trueelse:do_quant = Falseif len(sys.argv) > 4:output_path = sys.argv[4]else:output_path = DEFAULT_RKNN_PATHreturn model_path, platform, do_quant, output_pathif __name__ == '__main__':model_path, platform, do_quant, output_path = parse_arg()# Create RKNN objectrknn = RKNN(verbose=False)# Pre-process configprint('--> Config model')# rknn.config(target_platform=platform)rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform=platform)print('done')# Load modelprint('--> Loading model')ret = rknn.load_onnx(model=model_path)if ret != 0:print('Load model failed!')exit(ret)print('done')# Build modelprint('--> Building model')ret = rknn.build(do_quantization=do_quant, dataset=DATASET_PATH, auto_hybrid=False)if ret != 0:print('Build model failed!')exit(ret)print('done')# Export rknn modelprint('--> Export rknn model')ret = rknn.export_rknn(output_path)if ret != 0:print('Export rknn model failed!')exit(ret)print('done')# Releaserknn.release()
3.rknn2 推理
class YOLO11_SEG:def __init__(self, rknn_path, confidence_thres=0.45, iou_thres=0.45, class_name:dict = None):self.confidence_thres = confidence_thresself.iou_thres = iou_thresif class_name == None:self.classes_name = {0: "spoon_in_bowl",1:'spoon_in_cup',2:'spoon_hard',3:'spoon_in_plate'}else:self.classes_name = class_nameself.colors = np.random.uniform(0, 255, size=(len(self.classes_name), 3))# self.onnxsession = ort.InferenceSession("/home/cat/sch/models/cup_bowl_seg_m_rk.onnx",# providers=['CPUExecutionProvider'])self.rknn = RKNN(verbose=False)self.rknn.load_rknn(path=rknn_path)self.rknn.init_runtime(target='rk3588')self.input_width = 640self.input_height = 640def run_inference(self,bgr_img):self.img_height, self.img_width = bgr_img.shape[:2]img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB)img, self.ratio, (self.dw, self.dh) = self.letterbox(img, new_shape=(self.input_width, self.input_height))inputs = [img.astype("uint8")]t1 = time.time()outputs = self.rknn.inference(inputs=inputs,data_format = 'nhwc')print("yolo: ",time.time()-t1)# if False:# image_data = np.array(img) / 255.0# image_data = np.transpose(image_data, (2, 0, 1)) # 通道优先# image_data = np.expand_dims(image_data, axis=0).astype(np.float32)# image_data = image_data.copy()# model_inputs = self.onnxsession.get_inputs()# outputs = self.onnxsession.run(None, {model_inputs[0].name: image_data})if len(outputs)>2:detections = self.postprocess_for_rknn2(outputs)else:detections = self.postprocess(outputs)return detectionsdef release(self):self.rknn.release()def postprocess(self, outputs,nm = 32):# 解析模型输出predictions = outputs[0] # 检测结果 [batch, 40, 8400] num_detections=8400proto = outputs[1] # 掩码原型 [batch, 32, mask_h, mask_w]x = np.einsum("bcn->bnc", predictions)x = x[np.amax(x[..., 4:-nm], axis=-1) > self.confidence_thres]# 合并边界框、置信度、类别和掩膜x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]# NMS 过滤x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], self.confidence_thres, self.iou_thres)]if len(x) > 0:# 边界框格式转换:从 cxcywh -> xyxyx[..., [0, 1]] -= x[..., [2, 3]] / 2x[..., [2, 3]] += x[..., [0, 1]]# 缩放边界框,使其与原始图像尺寸匹配x[..., :4] -= [self.dw, self.dh, self.dw, self.dh]x[..., :4] /= self.ratio[0]# 限制边界框在图像边界内x[..., [0, 2]] = x[:, [0, 2]].clip(0, self.img_width)x[..., [1, 3]] = x[:, [1, 3]].clip(0, self.img_height)masks = self.generate_masks(proto[0], x[:, 6:], x[:, :4])boxes = x[..., :4]scores = x[..., 4]classes = x[..., 5]masks = masksreturn {"boxes":boxes,"scores":scores,"classes":classes.astype(int),"masks":masks}return {}def postprocess_for_rknn2(self, input_data):# input_data[0], input_data[4], and input_data[8] are detection box information# input_data[1], input_data[5], and input_data[9] are category score information# input_data[2], input_data[6], and input_data[10] are confidence score information# input_data[3], input_data[7], and input_data[11] are segmentation information# input_data[12] is the proto informationproto = input_data[-1]boxes, scores, classes_conf, seg_part = [], [], [], []defualt_branch=3pair_per_branch = len(input_data)//defualt_branchfor i in range(defualt_branch):boxes.append(self.box_process(input_data[pair_per_branch*i]))classes_conf.append(input_data[pair_per_branch*i+1])scores.append(np.ones_like(input_data[pair_per_branch*i+1][:,:1,:,:], dtype=np.float32))seg_part.append(input_data[pair_per_branch*i+3])def sp_flatten(_in):ch = _in.shape[1]_in = _in.transpose(0,2,3,1)return _in.reshape(-1, ch)boxes = [sp_flatten(_v) for _v in boxes]classes_conf = [sp_flatten(_v) for _v in classes_conf]scores = [sp_flatten(_v) for _v in scores]seg_part = [sp_flatten(_v) for _v in seg_part]boxes = np.concatenate(boxes) #8400*4 边界框xyxyclasses_conf = np.concatenate(classes_conf) #8400*4 每一类的评分scores = np.concatenate(scores) #8400*1 seg_part = np.concatenate(seg_part) #8400*32 maskboxes, classes, scores, seg_part = self.filter_boxes(boxes, scores, classes_conf, seg_part)# 合并边界框、置信度、类别和掩膜x = np.concatenate([boxes,scores.reshape(-1, 1),classes.reshape(-1, 1),seg_part], axis=1)# NMS 过滤x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], self.confidence_thres, self.iou_thres)]if len(x) > 0:# 边界框格式转换:从 cxcywh -> xyxy# x[..., [0, 1]] -= x[..., [2, 3]] / 2# x[..., [2, 3]] += x[..., [0, 1]]# 缩放边界框,使其与原始图像尺寸匹配x[..., :4] -= [self.dw, self.dh, self.dw, self.dh]x[..., :4] /= self.ratio[0]# 限制边界框在图像边界内x[..., [0, 2]] = x[:, [0, 2]].clip(0, self.img_width)x[..., [1, 3]] = x[:, [1, 3]].clip(0, self.img_height)masks = self.generate_masks(proto[0], x[:, 6:], x[:, :4])boxes = x[..., :4]scores = x[..., 4]classes = x[..., 5]masks = masksreturn {"boxes":boxes,"scores":scores,"classes":classes.astype(int),"masks":masks}return {}def box_process(self, position):grid_h, grid_w = position.shape[2:4]col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))col = col.reshape(1, 1, grid_h, grid_w)row = row.reshape(1, 1, grid_h, grid_w)grid = np.concatenate((col, row), axis=1)stride = np.array([self.input_height//grid_h, self.input_width //grid_w]).reshape(1, 2, 1, 1)position = self.dfl(position)box_xy = grid +0.5 -position[:,0:2,:,:]box_xy2 = grid +0.5 +position[:,2:4,:,:]xyxy = np.concatenate((box_xy*stride, box_xy2*stride), axis=1)return xyxydef dfl(self, position):# Distribution Focal Loss (DFL)x = torch.tensor(position)n,c,h,w = x.shapep_num = 4mc = c//p_numy = x.reshape(n,p_num,mc,h,w)y = y.softmax(2)acc_metrix = torch.tensor(range(mc)).float().reshape(1,1,mc,1,1)y = (y*acc_metrix).sum(2)return y.numpy()def generate_masks(self, proto: np.ndarray, mask_coeffs: np.ndarray, boxes: np.ndarray, ) -> np.ndarray:c, mh, mw = proto.shapemasks = np.matmul(mask_coeffs, proto.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWNmasks = np.ascontiguousarray(masks)masks = self.scale_mask(masks,im0_shape=(self.img_height,self.img_width)) # 将掩膜从 P3 尺寸缩放到原始输入图像大小 160,160 -> 1280*720masks = np.einsum("HWN -> NHW", masks) # HWN -> NHWmasks = self.crop_mask(masks, boxes) # 裁剪掩膜masks = np.greater(masks, 0.5).astype(np.uint8)# cv2.imshow("mask", cv2.normalize(masks[0]*255, None, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_8U))# cv2.waitKey(0)return masks # 返回二值化后的掩膜def filter_boxes(self, boxes, box_confidences, box_class_probs, seg_part):"""Filter boxes with object threshold."""box_confidences = box_confidences.reshape(-1)candidate, class_num = box_class_probs.shapeclass_max_score = np.max(box_class_probs, axis=-1)classes = np.argmax(box_class_probs, axis=-1)_class_pos = np.where(class_max_score * box_confidences >= self.confidence_thres)scores = (class_max_score * box_confidences)[_class_pos]# print(np.max(class_max_score))boxes = boxes[_class_pos]classes = classes[_class_pos]seg_part = (seg_part * box_confidences.reshape(-1, 1))[_class_pos]return boxes, classes, scores, seg_part@staticmethoddef scale_mask(masks, im0_shape, ratio_pad=None):"""将掩膜缩放至原始图像大小Args:masks (np.ndarray): 缩放和填充后的掩膜im0_shape (tuple): 原始图像大小ratio_pad (tuple): 填充与原始图像的比例Returns:masks (np.ndarray): 缩放后的掩膜"""im1_shape = masks.shape[:2]if ratio_pad is None: # 计算比例gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # 比例pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # 填充else:pad = ratio_pad[1]# 计算掩膜的边界top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, xbottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))if len(masks.shape) < 2:raise ValueError(f'"len of masks shape should be 2 or 3, but get" {len(masks.shape)}')masks = masks[top:bottom, left:right]masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]), interpolation=cv2.INTER_LINEAR) # 使用 INTER_LINEAR 插值调整大小if len(masks.shape) == 2:masks = masks[:, :, None]return masks@staticmethoddef crop_mask(masks, boxes):"""裁剪掩膜,使其与边界框对齐Args:masks (Numpy.ndarray): [n, h, w] 掩膜数组boxes (Numpy.ndarray): [n, 4] 边界框Returns:(Numpy.ndarray): 裁剪后的掩膜"""n, h, w = masks.shapex1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)r = np.arange(w, dtype=x1.dtype)[None, None, :]c = np.arange(h, dtype=x1.dtype)[None, :, None]return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))@staticmethoddef masks2segments(masks):"""将掩膜转换为分割区域Args:masks (numpy.ndarray): 模型输出的掩膜,形状为 (n, h, w)Returns:segments (List): 分割区域的列表"""segments = []for x in masks.astype("uint8"):c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # 找到轮廓if c:c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)else:c = np.zeros((0, 2)) # 如果没有找到分割区域,返回空数组segments.append(c.astype("float32"))def letterbox(self, img, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleFill=False, scaleup=True):"""将图像进行 letterbox 填充,保持纵横比不变,并缩放到指定尺寸。"""shape = img.shape[:2] # 当前图像的宽高if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# 计算缩放比例r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) # 选择宽高中最小的缩放比if not scaleup: # 仅缩小,不放大r = min(r, 1.0)# 缩放后的未填充尺寸new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))# 计算需要的填充dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # 计算填充的尺寸dw /= 2 # padding 均分dh /= 2# 缩放图像if shape[::-1] != new_unpad: # 如果当前图像尺寸不等于 new_unpad,则缩放img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)# 为图像添加边框以达到目标尺寸top, bottom = int(round(dh)), int(round(dh))left, right = int(round(dw)), int(round(dw))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)return img, (r, r), (dw, dh)def visualize(self, image: np.ndarray, results, alpha: float = 0.5) -> np.ndarray:"""参数:image: 原始图像BGR格式results: 推理结果字典alpha: 掩码透明度返回:带标注的图像"""img = image.copy()# 绘制掩码if 'masks' in results and len(results['masks']) > 0:for i, mask in enumerate(results['masks']):if mask.sum() == 0: # 跳过空掩码continueclass_id = results['classes'][0]color = self.colors[class_id].tolist()colored_mask = np.zeros_like(img)colored_mask[mask > 0] = colorimg = cv2.addWeighted(img, 1, colored_mask, alpha, 0)# 绘制边界框和标签if 'boxes' in results and len(results['boxes']) > 0:for box, class_id, score in zip(results['boxes'], results['classes'], results['scores']):x1, y1, x2, y2 = map(int, box)class_name = self.classes_name[class_id]color = self.colors[class_id].tolist()cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)# 绘制标签背景label = f"{class_name}: {score:.2f}"(label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)cv2.rectangle(img, (x1, y1 - label_height - 5), (x1 + label_width, y1), color, -1)# 绘制标签文本cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)return img@staticmethoddef extract_bbox_regions(masks: np.ndarray, boxes: np.ndarray) -> list:"""提取每个边界框内的掩码区域(不含外部零值)返回:列表,每个元素是对应边界框内的掩码区域 [h_box, w_box]"""regions = []for i in range(len(boxes)):x1, y1, x2, y2 = map(int, boxes[i])region = masks[i, y1:y2, x1:x2].copy()regions.append(region)return regionsclass_name = {0:"cup",1:'cup_hard'}
# 加载模型
model = YOLO11_SEG( rknn_path="cup_seg2_rki8.rknn2",class_name=class_name)image = cv2.imread(path) #自己修改
results = model.run_inference(image)
visualized = model.visualize(image, results)