#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cpu_bitmap.h"
#include <stdio.h>
#include "cuda.h"
#include <math.h>
#define PI 3.1415926f
const int DIM = 1024;
__global__ void kernel(unsigned char* ptr)
{
//将线程映射到像素位置
int x = threadIdx.x + blockIdx.x*blockDim.x;
int y = threadIdx.y + blockIdx.y*blockDim.y;
int offset = x + y*blockDim.x*gridDim.x;
//计算每个像素的值
__shared__ float shared[16][16];//共享内存
const float period = 128.0f;
shared[threadIdx.x][threadIdx.y] = 255 * (sinf(x*2.0f*PI / period) + 1.0f)*
(sinf(y*2.0f*PI / period) + 1.0f) / 4.0f;
//
cudaThreadSynchronize();//同步
//将每个值赋给像素
ptr[offset * 4 + 0] = 0;
ptr[offset * 4 + 1] = shared[15 - threadIdx.x][15 - threadIdx.y];
ptr[offset * 4 + 2] = 0;
ptr[offset * 4 + 3] = 255;
}
int main()
{
CPUBitmap bitmap(DIM, DIM);
unsigned char* dev_bitmap;
cudaMalloc((void**)&dev_bitmap, bitmap.image_size());
dim3 grids(DIM / 16, DIM / 16);
dim3 threads(16, 16);
kernel << <grids, threads >> > ;
cudaMemcpy(bitmap.get_ptr(), dev_bitmap, bitmap.image_size(), cudaMemcpyDeviceToHost);
bitmap.display_and_exit();
cudaFree(dev_bitmap);
}
CUDA(四)——图片处理
猜你喜欢
转载自blog.csdn.net/u014413083/article/details/53228215
今日推荐
周排行