当前位置: 首页 > news >正文

llama.cpp学习笔记:后端加载

单例
struct ggml_backend_registry {std::vector<ggml_backend_reg_entry> backends;std::vector<ggml_backend_dev_t> devices;// ...
}struct ggml_backend_reg_entry {ggml_backend_reg_t reg;dl_handle_ptr handle;
};typedef struct ggml_backend_reg * ggml_backend_reg_t;typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};

每种后端一个单例:cuda、cpu等

版本号、接口和上下文信息

    struct ggml_backend_reg {int api_version; // initialize to GGML_BACKEND_API_VERSIONstruct ggml_backend_reg_i iface;void * context;};

后端对应的设备:接口、所属后端和上下文信息

typedef struct ggml_backend_device * ggml_backend_dev_t;
struct ggml_backend_device {struct ggml_backend_device_i iface;ggml_backend_reg_t reg;void * context;
};

cuda后端

cuda后端iface

static const ggml_backend_reg_i ggml_backend_cuda_reg_interface = {/* .get_name          = */ ggml_backend_cuda_reg_get_name,/* .get_device_count  = */ ggml_backend_cuda_reg_get_device_count,/* .get_device        = */ ggml_backend_cuda_reg_get_device,/* .get_proc_address  = */ ggml_backend_cuda_reg_get_proc_address,
};

cuda后端context,存储多个cuda设备(GPU)的接口、所属后端和上下文等信息

struct ggml_backend_cuda_reg_context {std::vector<ggml_backend_dev_t> devices;
};

 cuda设备GPU的接口iface

static const ggml_backend_device_i ggml_backend_cuda_device_interface = {/* .get_name                = */ ggml_backend_cuda_device_get_name,/* .get_description         = */ ggml_backend_cuda_device_get_description,/* .get_memory              = */ ggml_backend_cuda_device_get_memory,/* .get_type                = */ ggml_backend_cuda_device_get_type,/* .get_props               = */ ggml_backend_cuda_device_get_props,/* .init_backend            = */ ggml_backend_cuda_device_init_backend,/* .get_buffer_type         = */ ggml_backend_cuda_device_get_buffer_type,/* .get_host_buffer_type    = */ ggml_backend_cuda_device_get_host_buffer_type,/* .buffer_from_host_ptr    = */ NULL,/* .supports_op             = */ ggml_backend_cuda_device_supports_op,/* .supports_buft           = */ ggml_backend_cuda_device_supports_buft,/* .offload_op              = */ ggml_backend_cuda_device_offload_op,/* .event_new               = */ ggml_backend_cuda_device_event_new,/* .event_free              = */ ggml_backend_cuda_device_event_free,/* .event_synchronize       = */ ggml_backend_cuda_device_event_synchronize,
};

cuda设备GPU的context

struct ggml_backend_cuda_device_context {int device;std::string name;std::string description;
};

 

CPU后端

CPU后端iface

static const struct ggml_backend_reg_i ggml_backend_cpu_reg_i = {/* .get_name         = */ ggml_backend_cpu_reg_get_name,/* .get_device_count = */ ggml_backend_cpu_reg_get_device_count,/* .get_device       = */ ggml_backend_cpu_reg_get_device,/* .get_proc_address = */ ggml_backend_cpu_get_proc_address,
};

CPU后端无context

CPU设备接口iface

static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {/* .get_name             = */ ggml_backend_cpu_device_get_name,/* .get_description      = */ ggml_backend_cpu_device_get_description,/* .get_memory           = */ ggml_backend_cpu_device_get_memory,/* .get_type             = */ ggml_backend_cpu_device_get_type,/* .get_props            = */ ggml_backend_cpu_device_get_props,/* .init_backend         = */ ggml_backend_cpu_device_init_backend,/* .get_buffer_type      = */ ggml_backend_cpu_device_get_buffer_type,/* .get_host_buffer_type = */ NULL,/* .buffer_from_host_ptr = */ ggml_backend_cpu_device_buffer_from_host_ptr,/* .supports_op          = */ ggml_backend_cpu_device_supports_op,/* .supports_buft        = */ ggml_backend_cpu_device_supports_buft,/* .offload_op           = */ NULL,/* .event_new            = */ NULL,/* .event_free           = */ NULL,/* .event_synchronize    = */ NULL,
};

CPU设备CPU的context

struct ggml_backend_cpu_device_context {std::string description = "CPU";// ...
}

http://www.lryc.cn/news/576728.html

相关文章:

  • M1芯片最终oracle成功版本拉取方法及配置
  • 【Linux庖丁解牛】— 文件系统!
  • JDK21 基于 Spring-AI 集成大模型实现聊天机器人
  • 【智能协同云图库】智能协同云图库第三弹:基于腾讯云 COS 对象存储—开发图片模块
  • Leetcode 3598. Longest Common Prefix Between Adjacent Strings After Removals
  • [database] Closure computation | e-r diagram | SQL
  • 【LeetCode 热题 100】560. 和为 K 的子数组——(解法二)前缀和+哈希表
  • swift-22-面向协议编程、响应式编程
  • SpringSecurity6-oauth2-三方gitee授权-授权码模式
  • 加密货币:USDC和比特币有什么区别?
  • web3区块链-ETH以太坊
  • 代理模式 - Flutter中的智能替身,掌控对象访问的每一道关卡!
  • aws(学习笔记第四十八课) appsync-graphql-dynamodb
  • Docker错误问题解决方法
  • Keil MDK 的 STM32 开发问题:重定向 printf 函数效果不生效(Keil MDK 中标准库未正确链接)
  • 基于springboot+vue的数字科技风险报告管理系统
  • 现代 JavaScript (ES6+) 入门到实战(一):告别 var!拥抱 let 与 const,彻底搞懂作用域
  • 领域驱动设计(DDD)【23】之泛化:从概念到实践
  • 网络缓冲区
  • DOP数据开放平台(真实线上项目)
  • 马斯克的 Neuralink:当意念突破肉体的边界,未来已来
  • Word之电子章制作——1
  • 【编译原理】期末
  • 华为云Flexus+DeepSeek征文|利用华为云一键部署的Dify平台构建高效智能电商客服系统实战
  • Youtube双塔模型
  • C++共享型智能指针std::shared_ptr使用介绍
  • cocos creator 3.8 - 精品源码 - 挪车超人(挪车消消乐)
  • Neo4j无法建立到 localhost:7474 服务器的连接出现404错误
  • Linux基本命令篇 —— less命令
  • springboot+Vue驾校管理系统