前言
由于最近忙着秋招,本系列博客最近只记录下自己学CUDA的例子,不会做出注释,后期有空会补上基础知识和代码详解。
#include <stdio.h>
const double a = 1.23;
const double b = 2.34;
void __global__ add(const double *x, const double *y, const double *z,const int N );
void __global__ add(const double *x, const double *y, double *z, const int N)
{
const int tid = blockDim.x * blockIdx.x + threadIdx.x;
if(tid < N)
{
z[tid] = x[tid] + y[tid];
}
}
int main()
{
const int N = 1000;
const int M = sizeof(double) * N;
double *ha = (double *)malloc(M);
double *hb = (double *)malloc(M);
double *hc = (double *)malloc(M);
// assignment
for(int i=0; i < N; ++i)
{
ha[i] = a;
hb[i] = b;
}
//
double *da, *db, *dc;
cudaMalloc((void**)&da, M);
cudaMalloc((void**)&db, M);
cudaMalloc((void**)&dc, M);
cudaMemcpy(da,ha,M,cudaMemcpyHostToDevice);
cudaMemcpy(db,hb,M,cudaMemcpyHostToDevice);
// kernel fun
const int block_size = 128;
const int grid_size = (N + block_size -1)/ block_size;
add<<<grid_size,block_size>>>(da,db,dc,N);
cudaMemcpy(hc,dc,M,cudaMemcpyDeviceToHost);
free(ha);
free(hb);
free(hc);
cudaFree(da);
cudaFree(db);
cudaFree(dc);
return 0;
}
编译指令
nvcc -arch=sm_75 add.cu -o add