Implement gpu_bzero

This commit is contained in:
Jukka Ojanen
2021-07-17 19:00:10 +03:00
parent 8066a47ac5
commit a2a1d04bcf
3 changed files with 168 additions and 57 deletions

View File

@@ -117,10 +117,7 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
#if defined IS_NATIVE
r = value;
#elif defined IS_OPENCL
r.s0 = value;
r.s1 = value;
r.s2 = value;
r.s3 = value;
r = (uint4) (value);
#elif defined IS_CUDA
r.x = value;
r.y = value;
@@ -136,6 +133,33 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
buf[gid] = r;
}
KERNEL_FQ void gpu_bzero(GLOBAL_AS uint4* buf, const u64 gid_max)
{
const u64 gid = get_global_id(0);
if (gid >= gid_max) return;
uint4 r;
#if defined IS_NATIVE
r = 0;
#elif defined IS_OPENCL
r = (uint4) (0);
#elif defined IS_CUDA
r.x = 0;
r.y = 0;
r.z = 0;
r.w = 0;
#elif defined IS_HIP
r.x = 0;
r.y = 0;
r.z = 0;
r.w = 0;
#endif
buf[gid] = r;
}
KERNEL_FQ void gpu_atinit (GLOBAL_AS pw_t *buf, const u64 gid_max)
{
const u64 gid = get_global_id (0);