engine
#include<iostream>
#include<NvInfer.h>
#include <fstream>
#include <assert.h>
class TRTLogger : public nvinfer1::ILogger {void log(Severity severity, const char *msg) noexcept override {if (severity != Severity::kINFO) {std::cout << msg << std::endl;}}
}gLogger;int main() {TRTLogger logger;nvinfer1::IBuilder *builder = nvinfer1::createInferBuilder(logger);auto explicitBatch = 1U << static_cast<uint32_t>(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);nvinfer1::INetworkDefinition *network = builder->createNetworkV2(explicitBatch);nvinfer1::IBuilderConfig *config = builder->createBuilderConfig();config->setMaxWorkspaceSize(1 << 20);nvinfer1::ICudaEngine *engine = builder->buildEngineWithConfig(*network, *config);if (!engine) {std::cout << "创建失败" << std::endl;return -1;}nvinfer1::IHostMemory *serialized_engine = engine->serialize();std::ofstream outfile("model/mlp.engine", std::ios::binary);assert(outfile.is_open() && "打开失败");outfile.write((char *)serialized_engine->data(), serialized_engine->size());outfile.close();}
runtime推理
#include<iostream>
#include<vector>
#include<fstream>
#include<cassert>#include"cuda_runtime.h"
#include"NvInfer.h"class TRTLogger : public nvinfer1::ILogger {void log(Severity severity, const char *msg) noexcept override {if (severity != Severity::kINFO) {std::cout << msg << std::endl;}}
}gLogger;
std::vector<unsigned char>loadEngineModel(const std::string &filename) {std::ifstream file(filename, std::ios::binary); assert(file.is_open && "打开文件失败");file.seekg(0, std::ios::end);size_t size = file.tellg(); std::vector<unsigned char> data(size); file.seekg(0, std::ios::beg); file.read((char *)data.data(), size); file.close();return data;}int main() {TRTLogger logger;nvinfer1::IRuntime *runtime = nvinfer1::createInferRuntime(logger);auto engineModel = loadEngineModel("/mlp.engine");nvinfer1::ICudaEngine *engine = runtime->deserializeCudaEngine(engineModel.data(),engineModel.size(),nullptr);if (!engine) {std::cout << "反序列化失败" << std::endl;return -1;}nvinfer1::IExecutionContext *context = engine->createExecutionContext();float *host_input_data = new float[3]{ 2,4,8 }; int input_data_size = 3 * sizeof(float); float *device_input_data = nullptr; float *host_output_data = new float[2]; int output_data_size = 2 * sizeof(float); float *device_output_data = nullptr; cudaMalloc((void **)&device_input_data, input_data_size);cudaMalloc((void **)&device_output_data, output_data_size);cudaStream_t stream = nullptr;cudaStreamCreate(&stream);cudaMemcpyAsync(device_input_data, host_input_data, input_data_size, cudaMemcpyHostToDevice,stream);float * bindings[] = { device_input_data,device_output_data };bool success = context->enqueueV2((void **)bindings, stream, nullptr);cudaMemcpyAsync(host_output_data, device_output_data, output_data_size, cudaMemcpyDeviceToHost, stream);cudaStreamSynchronize(stream);std::cout << host_output_data << std::endl;
}