当前位置：首页 > news >正文

【深度学习】MNN ImageProcess处理图像顺序，逻辑，均值，方差

news 2025/8/25 16:32:34

文章目录

介绍
Opencv numpy
等效的MNN处理

介绍

MNN ImageProcess处理图像是先reisze还是后resize，均值方差怎么处理，是什么通道顺序？这篇文章告诉你答案。

Opencv numpy

这段代码是一个图像预处理函数，用于对输入的图像进行一系列处理，以便将其用于某些机器学习模型的输入。

cv2.imdecode(np.fromfile(imgpath, dtype=np.uint8), 1)：这行代码从文件中读取图像数据，并使用OpenCV库中的imdecode函数将其解码为图像矩阵。参数1表示图像应该按原样解码，即不进行颜色转换或通道重新排序。
cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)：接下来，将图像调整大小为 (224, 224)，这是因为一些深度学习模型（如AlexNet、VGG等）需要固定大小的输入图像。
img = img.astype(np.float32)：将图像数据类型转换为 32 位浮点数，通常这是深度学习模型期望的输入类型。
img = img[..., ::-1]：颜色通道顺序调整，将图像从 BGR 格式转换为 RGB 格式。
img_norm_cfg：定义了图像的归一化参数，包括均值和标准差。这些参数用于将图像像素值标准化到一个较小的范围，以便模型更好地处理图像数据。
img -= img_norm_cfg['mean']：对图像进行均值归一化。
img *= img_norm_cfg['std']：对图像进行标准差归一化。
img = img.transpose((2, 0, 1))：调整图像的维度顺序，将通道维度置于第一个位置。
img = np.expand_dims(img, axis=0)：在图像的第一个维度（批处理维度）上添加一个维度，使其成为形状为 (1, C, H, W) 的批量图像数据，其中 C 是通道数，H 和 W 是图像的高度和宽度。

最终，函数返回预处理后的图像数据，可以直接用于输入深度学习模型进行训练或推断。

    def preprocess(self, imgpath: str):img = cv2.imdecode(np.fromfile(imgpath, dtype=np.uint8), 1)  # img是矩阵if img is None:raise Exception("image is None:" + imgpath)img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)img = img.astype(np.float32)img = img[..., ::-1]img_norm_cfg = dict(mean=[103.53, 116.28, 123.675],std=[0.01712, 0.01750, 0.01742])img -= img_norm_cfg['mean']img *= img_norm_cfg['std']img = img.transpose((2, 0, 1))img = np.expand_dims(img, axis=0)return img

等效的MNN处理

下面是一个等效的MNN处理：

// 获取模型和会话
ModelData GetDetModel(const char* model_file_name) {using namespace MNN;ModelData modelData;// MNNstd::shared_ptr<Interpreter> interpreter(Interpreter::createFromFile(model_file_name));ScheduleConfig config_s;config_s.type = MNN_FORWARD_AUTO;Session* mSession = interpreter->createSession(config_s);Tensor* mInputTensor = interpreter->getSessionInput(mSession, NULL);Tensor* mOutputTensor = interpreter->getSessionOutput(mSession, NULL);// 输入处理，形成一个mnn张量// dst = (img - mean) * normalMNN::CV::ImageProcess::Config config;config.destFormat = MNN::CV::ImageFormat::RGB;config.sourceFormat = MNN::CV::ImageFormat::BGR;float mean_[4] = {103.53f, 116.28f, 123.675f, 0.0f};memcpy(config.mean, mean_, 4 * sizeof(float));float normal_[4] = {0.01712f, 0.01750f, 0.01742f, 0.0f};memcpy(config.normal, normal_, 4 * sizeof(float));config.filterType = MNN::CV::NEAREST;config.wrap = MNN::CV::ZERO;std::shared_ptr<MNN::CV::ImageProcess> image_process(MNN::CV::ImageProcess::create(config));//    MNN::CV::Matrix transform;//    image_process->setMatrix(transform);modelData.interpreter = interpreter;modelData.session = mSession;modelData.mInputTensor = mInputTensor;modelData.mOutputTensor = mOutputTensor;modelData.image_process = image_process;return modelData;
}// 释放资源
void ReleaseDetModel(ModelData& modelData) {using namespace MNN;auto interpreter = modelData.interpreter;auto mSession = modelData.session;auto mInputTensor = modelData.mInputTensor;auto mOutputTensor = modelData.mOutputTensor;auto image_process = modelData.image_process;interpreter->releaseModel();interpreter->releaseSession(mSession);
}std::vector<float> RunDetModel(ModelData& modelData,  // 模型和会话cv::Mat& img_bgr)      // 图片 opencv mat
{using namespace MNN;auto interpreter = modelData.interpreter;auto mSession = modelData.session;auto mInputTensor = modelData.mInputTensor;auto mOutputTensor = modelData.mOutputTensor;auto image_process = modelData.image_process;cv::Mat srcimgx;srcimgx = img_bgr.clone();cv::resize(srcimgx, srcimgx, cv::Size(224, 224), 0, 0, cv::INTER_LINEAR);int img_resize_height = srcimgx.rows;int img_resize_width = srcimgx.cols;// resizeSession//    interpreter->resizeTensor(mInputTensor, {1, 3, img_resize_height, img_resize_width});//    interpreter->resizeSession(mSession);// 输入处理，形成一个mnn张量std::vector<int> shape = {1, 3, img_resize_height, img_resize_width};std::shared_ptr<MNN::Tensor> input_tensor(MNN::Tensor::create<float>(shape, nullptr, MNN::Tensor::CAFFE));image_process->convert(srcimgx.data, img_resize_width, img_resize_height, 0, input_tensor.get());// 给入mInputTensormInputTensor->copyFromHostTensor(input_tensor.get());// Run mSessioninterpreter->runSession(mSession);// Get outputauto nchwTensorOt = new Tensor(mOutputTensor, Tensor::CAFFE);// 拷贝出去mOutputTensor->copyToHostTensor(nchwTensorOt);// 使用auto type = nchwTensorOt->getType();auto size = nchwTensorOt->elementSize();std::vector<int> shape_out = nchwTensorOt->shape();// values 输出形状是 img_fp_height, img_fp_width，直接给到cv::Matauto values = nchwTensorOt->host<float>();// log values sizestd::vector<float> outimg(values, values + size);delete nchwTensorOt;return outimg;
}