CUDA 二维纹理的创建和使用测试

CUDA 复杂问题 + 细节问题解答见 CUDA复杂问题 + 细节问题解答

首先先把程序贴上：

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda_runtime_api.h>
#include <cuda.h>
#include <iostream>
#define width 10
#define height 11

using std::cout;
using std::endl;

texture<float, 2, cudaReadModeElementType> tex;

__global__ void kernel(float *arr_cpy)
{
	float i = threadIdx.x + blockIdx.x*blockDim.x;
	float j = threadIdx.y + blockIdx.y*blockDim.y;
	arr_cpy[(int)(i*width+j)] = tex2D(tex,j + 0.5f ,  i + 0.5f);
}

float *arr;
int main(void)
{
	arr = (float*)malloc(width*height * sizeof(float));
	for (int i = 0;i < width;i++) {
		for (int j = 0;j < height;j++) {
			arr[i*width + j] = i*width + j;
		}
	}

	size_t pitch, tex_ofs;
	float *arr_d = 0;
	cudaMallocPitch((void**)&arr_d, &pitch, width * sizeof(float), height);
	cudaMemcpy2D(arr_d, pitch, arr, width * sizeof(arr[0]),
		width * sizeof(arr[0]), height, cudaMemcpyHostToDevice);
	tex.normalized = false;
	cudaBindTexture2D(&tex_ofs, &tex, arr_d, &tex.channelDesc,width, height, pitch);

	float *arr_cpy;
	float *hos_c = 0;
	hos_c = (float*)malloc(width*height * sizeof(float));
	cudaMalloc((void**)&arr_cpy, width*height * sizeof(float));
		
	dim3 blocks(2,2);
	dim3 threads(5, 5);
	kernel << <blocks, threads >> >(arr_cpy);

	cudaMemcpy(hos_c, arr_cpy, width*height*sizeof(float), cudaMemcpyDeviceToHost);
	for (int i = 0;i < width*height;i++) {
		cout << hos_c[i] << endl;
	}

	cudaDeviceSynchronize();

	system("pause");
	return EXIT_SUCCESS;
}

注意几个问题：

第一，一维纹理不管是Linear Memory还是使用cudaMallocPitch，都是可以使用tex1Dfetch和tex1D这两个函数进行采样的。而对于二维纹理，不管是cudaArray还是cudaMallocPitch都是使用tex2D。

第二，#define width 10 #define height 11，height必须大于width，否则会报错。

第三，采样的时候长宽是颠倒的：

	float i = threadIdx.x + blockIdx.x*blockDim.x;
	float j = threadIdx.y + blockIdx.y*blockDim.y;
	arr_cpy[(int)(i*width+j)] = tex2D(tex,j + 0.5f ,  i + 0.5f);

前面是 j ，后面是 i 。

CUDA 二维纹理的创建和使用测试

猜你喜欢