"__kernel __attribute__((reqd_work_group_size(%d, %d, 1)))\n" // opencl_local_work[0], opencl_local_work[1]
"void %s(uint i0_width, uint i0_height, __global uchar * i0_buf, uint i0_stride, uint i0_offset,\n"
" uint i1_width, uint i1_height, __global uchar * i1_buf, uint i1_stride, uint i1_offset,\n"
" uint o0_width, uint o0_height, __global uchar * o0_buf, uint o0_stride, uint o0_offset)\n"
"{\n"
" int gx = get_global_id(0);\n"
" int gy = get_global_id(1);\n"
" if ((gx < %d) && (gy < %d)) {\n" // work_items[0], work_items[1]
" uint3 i0 = *(__global uint3 *) (i0_buf + i0_offset + (gy * i0_stride) + (gx * 12));\n"
" uint4 i1 = *(__global uint4 *) (i1_buf + i1_offset + (gy * i1_stride) + (gx * 16));\n"
" uint3 o0;\n"
" float4 f; float alpha0, alpha1, alpha_normalizer = 0.0039215686274509803921568627451f;\n"
" alpha1 = amd_unpack3(i1.s0)*alpha_normalizer; alpha0 = 1.0f - alpha1;\n"
" f.s0 = mad(amd_unpack0(i0.s0), alpha0, amd_unpack0(i1.s0)*alpha1);\n"
" f.s1 = mad(amd_unpack1(i0.s0), alpha0, amd_unpack1(i1.s0)*alpha1);\n"
" f.s2 = mad(amd_unpack2(i0.s0), alpha0, amd_unpack2(i1.s0)*alpha1);\n"
" alpha1 = amd_unpack3(i1.s1)*alpha_normalizer; alpha0 = 1.0f - alpha1;\n"
" f.s3 = mad(amd_unpack3(i0.s0), alpha0, amd_unpack0(i1.s1)*alpha1);\n"
" o0.s0 = amd_pack(f);\n"
" f.s0 = mad(amd_unpack0(i0.s1), alpha0, amd_unpack1(i1.s1)*alpha1);\n"
" f.s1 = mad(amd_unpack1(i0.s1), alpha0, amd_unpack2(i1.s1)*alpha1);\n"
" alpha1 = amd_unpack3(i1.s2)*alpha_normalizer; alpha0 = 1.0f - alpha1;\n"
" f.s2 = mad(amd_unpack2(i0.s1), alpha0, amd_unpack0(i1.s2)*alpha1);\n"
" f.s3 = mad(amd_unpack3(i0.s1), alpha0, amd_unpack1(i1.s2)*alpha1);\n"
" o0.s1 = amd_pack(f);\n"
" f.s0 = mad(amd_unpack0(i0.s2), alpha0, amd_unpack2(i1.s2)*alpha1);\n"
" alpha1 = amd_unpack3(i1.s3)*alpha_normalizer; alpha0 = 1.0f - alpha1;\n"
" f.s1 = mad(amd_unpack1(i0.s2), alpha0, amd_unpack0(i1.s3)*alpha1);\n"
" f.s2 = mad(amd_unpack2(i0.s2), alpha0, amd_unpack1(i1.s3)*alpha1);\n"
" f.s3 = mad(amd_unpack3(i0.s2), alpha0, amd_unpack2(i1.s3)*alpha1);\n "
" o0.s2 = amd_pack(f);\n"
" *(__global uint3 *) (o0_buf + o0_offset + (gy * o0_stride) + (gx * 12)) = o0;\n"
" }\n"
"}\n"
GLM_FUNC_QUALIFIER vec4 unpackUnorm4x8(uint p)
{
union
{
uint in;
u8 out[4];
} u;
u.in = p;
return vec4(u.out[0], u.out[1], u.out[2], u.out[3]) * 0.0039215686274509803921568627451f;
}
i0是uint3 3个像素
i1是uint4 3个像素加一个alpha值