CUDA 复杂问题 + 细节问题 解答 见 CUDA复杂问题 + 细节问题 解答
先直接贴上总代码
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"
#include <iostream>
using std::cout;
using std::endl;
#define arrayNum 100
texture<float, cudaTextureType1D, cudaReadModeElementType> tex1DTest;
cudaArray* gputexArray = NULL;
void Bind1DTex()
{
tex1DTest.normalized = true;
tex1DTest.filterMode = cudaFilterModeLinear;
tex1DTest.addressMode[0] = cudaAddressModeClamp;
float testArray[arrayNum];
for (int i = 0; i < arrayNum; i++)
testArray[i] = i;
cudaChannelFormatDesc ChannelDesc = cudaCreateChannelDesc<float>();
if (gputexArray == NULL)
cudaMallocArray(&gputexArray, &ChannelDesc, arrayNum, 1);
cudaMemcpyToArray(gputexArray, 0, 0, testArray, arrayNum * sizeof(float), cudaMemcpyHostToDevice);
cudaBindTextureToArray(tex1DTest, gputexArray, ChannelDesc);
}
__global__ void cudaTest(float *c)
{
int i = threadIdx.x;
c[i] = tex1D(tex1DTest, (float)(i+0.5)/(float)arrayNum);
}
int main()
{
float *dev_c = 0;
float *c = 0;
c = (float*)malloc(arrayNum * sizeof(float));
cudaMalloc((void**)&dev_c, arrayNum * sizeof(float));
Bind1DTex();
cudaTest << <1, arrayNum >> >(dev_c);
cudaMemcpy(c, dev_c, arrayNum * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0;i < arrayNum;i++) {
cout << c[i] << endl;
}
system("pause");
return 0;
}
有个细节需要注意一下:
c[i] = tex1D(tex1DTest, (float)(i+0.5)/(float)arrayNum);
采样纹理的时候需要加个0.5的偏置,否则我们采样到的结果就是:0 0.5 1.5 2.5 ...
而加上0.5的偏置以后,采样结果就是:0 1 2 3 4 ...了