当前位置: 首页 > news >正文

Cuda By Example - 11 (Texture Memory 2-D)

跟1D一样,2D的代码也没有运行过。旧的方法看看就好。

声明二维Texture

texture<float, 2> texConstSrc;
texture<float, 2> texIn;
texture<float, 2> texOut;

访问二维Texture

使用2D的Texture的便利性体现在blend_kernel函数里。不再需要通过x,y去计算一维索引。二维texture使用tex2D()去读取数据。

__global__ void blend_kernel( float *dst,bool dstOut ) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;float   t, l, c, r, b;if (dstOut) {t = tex2D(texIn,x,y-1);l = tex2D(texIn,x-1,y);c = tex2D(texIn,x,y);r = tex2D(texIn,x+1,y);b = tex2D(texIn,x,y+1);} else {t = tex2D(texOut,x,y-1);l = tex2D(texOut,x-1,y);c = tex2D(texOut,x,y);r = tex2D(texOut,x+1,y);b = tex2D(texOut,x,y+1);}dst[offset] = c + SPEED * (t + b + r + l - 4 * c);
}

然后拷贝热源数据

__global__ void copy_const_kernel( float *iptr ) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;float c = tex2D(texConstSrc,x,y);if (c != 0)iptr[offset] = c;
}

二维Texture绑定

使用2维Texture去绑定一维数组,稍微复杂一些:

cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
HANDLE_ERROR( cudaBindTexture2D( NULL, texConstSrc, data.dev_constSrc,desc, DIM, DIM, sizeof(float)*DIM));
HANDLE_ERROR( cudaBindTexture2D( NULL, texIn, data.dev_inSrc,desc, DIM, DIM, sizeof(float)*DIM));
HANDLE_ERROR( cudaBindTexture2D( NULL, texOut, data.dev_outSrc,desc, DIM, DIM, sizeof(float)*DIM));

解除绑定

解除绑定的方式跟1D相同

cudaUnbindTexture(texIn);
cudaUnbindTexture(texOut);
cudaUnbindTexture(texConstSrc);

完整代码

#include "../common/book.h"
#include "../common/cpu_anim.h"#define DIM 1024
#define PI 3.1415926535897932f
#define MAX_TEMP 1.0f
#define MIN_TEMP 0.0001f
#define SPEED   0.25f// these exist on the GPU side
texture<float,2>  texConstSrc;
texture<float,2>  texIn;
texture<float,2>  texOut;__global__ void blend_kernel( float *dst,bool dstOut ) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;float   t, l, c, r, b;if (dstOut) {t = tex2D(texIn,x,y-1);l = tex2D(texIn,x-1,y);c = tex2D(texIn,x,y);r = tex2D(texIn,x+1,y);b = tex2D(texIn,x,y+1);} else {t = tex2D(texOut,x,y-1);l = tex2D(texOut,x-1,y);c = tex2D(texOut,x,y);r = tex2D(texOut,x+1,y);b = tex2D(texOut,x,y+1);}dst[offset] = c + SPEED * (t + b + r + l - 4 * c);
}__global__ void copy_const_kernel( float *iptr ) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;float c = tex2D(texConstSrc,x,y);if (c != 0)iptr[offset] = c;
}// globals needed by the update routine
struct DataBlock {unsigned char   *output_bitmap;float           *dev_inSrc;float           *dev_outSrc;float           *dev_constSrc;CPUAnimBitmap  *bitmap;cudaEvent_t     start, stop;float           totalTime;float           frames;
};void anim_gpu( DataBlock *d, int ticks ) {HANDLE_ERROR( cudaEventRecord( d->start, 0 ) );dim3    blocks(DIM/16,DIM/16);dim3    threads(16,16);CPUAnimBitmap  *bitmap = d->bitmap;// since tex is global and bound, we have to use a flag to// select which is in/out per iterationvolatile bool dstOut = true;for (int i=0; i<90; i++) {float   *in, *out;if (dstOut) {in  = d->dev_inSrc;out = d->dev_outSrc;} else {out = d->dev_inSrc;in  = d->dev_outSrc;}copy_const_kernel<<<blocks,threads>>>( in );blend_kernel<<<blocks,threads>>>( out, dstOut );dstOut = !dstOut;}float_to_color<<<blocks,threads>>>( d->output_bitmap,d->dev_inSrc );HANDLE_ERROR( cudaMemcpy( bitmap->get_ptr(),d->output_bitmap,bitmap->image_size(),cudaMemcpyDeviceToHost ) );HANDLE_ERROR( cudaEventRecord( d->stop, 0 ) );HANDLE_ERROR( cudaEventSynchronize( d->stop ) );float   elapsedTime;HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime,d->start, d->stop ) );d->totalTime += elapsedTime;++d->frames;printf( "Average Time per frame:  %3.1f ms\n",d->totalTime/d->frames  );
}// clean up memory allocated on the GPU
void anim_exit( DataBlock *d ) {cudaUnbindTexture( texIn );cudaUnbindTexture( texOut );cudaUnbindTexture( texConstSrc );HANDLE_ERROR( cudaFree( d->dev_inSrc ) );HANDLE_ERROR( cudaFree( d->dev_outSrc ) );HANDLE_ERROR( cudaFree( d->dev_constSrc ) );HANDLE_ERROR( cudaEventDestroy( d->start ) );HANDLE_ERROR( cudaEventDestroy( d->stop ) );
}int main( void ) {DataBlock   data;CPUAnimBitmap bitmap( DIM, DIM, &data );data.bitmap = &bitmap;data.totalTime = 0;data.frames = 0;HANDLE_ERROR( cudaEventCreate( &data.start ) );HANDLE_ERROR( cudaEventCreate( &data.stop ) );int imageSize = bitmap.image_size();HANDLE_ERROR( cudaMalloc( (void**)&data.output_bitmap,imageSize ) );// assume float == 4 chars in size (ie rgba)HANDLE_ERROR( cudaMalloc( (void**)&data.dev_inSrc,imageSize ) );HANDLE_ERROR( cudaMalloc( (void**)&data.dev_outSrc,imageSize ) );HANDLE_ERROR( cudaMalloc( (void**)&data.dev_constSrc,imageSize ) );cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();HANDLE_ERROR( cudaBindTexture2D( NULL, texConstSrc,data.dev_constSrc,desc, DIM, DIM,sizeof(float) * DIM ) );HANDLE_ERROR( cudaBindTexture2D( NULL, texIn,data.dev_inSrc,desc, DIM, DIM,sizeof(float) * DIM ) );HANDLE_ERROR( cudaBindTexture2D( NULL, texOut,data.dev_outSrc,desc, DIM, DIM,sizeof(float) * DIM ) );// initialize the constant datafloat *temp = (float*)malloc( imageSize );for (int i=0; i<DIM*DIM; i++) {temp[i] = 0;int x = i % DIM;int y = i / DIM;if ((x>300) && (x<600) && (y>310) && (y<601))temp[i] = MAX_TEMP;}temp[DIM*100+100] = (MAX_TEMP + MIN_TEMP)/2;temp[DIM*700+100] = MIN_TEMP;temp[DIM*300+300] = MIN_TEMP;temp[DIM*200+700] = MIN_TEMP;for (int y=800; y<900; y++) {for (int x=400; x<500; x++) {temp[x+y*DIM] = MIN_TEMP;}}HANDLE_ERROR( cudaMemcpy( data.dev_constSrc, temp,imageSize,cudaMemcpyHostToDevice ) );    // initialize the input datafor (int y=800; y<DIM; y++) {for (int x=0; x<200; x++) {temp[x+y*DIM] = MAX_TEMP;}}HANDLE_ERROR( cudaMemcpy( data.dev_inSrc, temp,imageSize,cudaMemcpyHostToDevice ) );free( temp );bitmap.anim_and_exit( (void (*)(void*,int))anim_gpu,(void (*)(void*))anim_exit );
}

http://www.lryc.cn/news/473933.html

相关文章:

  • Go匿名结构体使用场景
  • Vue 发布十年了!你知道我这十年是怎么过的吗?
  • Unity 6 来袭
  • SpringMVC课时1
  • 【小白学机器学习30】样本统计的核心参数:均值/期望,方差,标准差,标准值。
  • flink1.17.2安装和使用
  • C向C++入门-- C语言填坑
  • 扫雷游戏(C语言详解)
  • 信刻全自动光盘摆渡系统
  • 计算机网络的数据链路层
  • 从0开始搭建一个生产级SpringBoot2.0.X项目(三)SpringBoot接口统一返回和全局异常处理
  • Mybatis-plus-扩展功能
  • 【AI辅助】AWS Toolkit+AmazonQ
  • 云手机简述(概况,使用场景,自己部署云手机)
  • Java已死,大模型才是未来?
  • NCCL安装(Ubuntu等)
  • 加载视频显示 - python 实现
  • 数据结构模拟题[五]
  • IDEA切换窗口快捷键失效
  • QT中使用图表之QChart绘制X轴为日期时间轴的折线图
  • 【传知代码】短期电力负荷(论文复现)
  • ubuntu20.04 加固方案-设置重复登录失败后锁定时间限制
  • 【综合算法学习】(第十三篇)
  • Web3 Key Talking #4|Sui有何不同?及其发展路线图
  • Axios 请求超时设置无效的问题及解决方案
  • 数据结构+算法
  • 利用ExcelJS封装一个excel表格的导出
  • AI 原生时代,更要上云:百度智能云云原生创新实践
  • C语言程序编译运行
  • 视频点播系统扩展示例