Apply previous blowfish optimization for -m 3200 also on -m 9000 and -m 18600

This commit is contained in:
jsteube
2019-03-18 12:42:47 +01:00
parent a172ab7d8a
commit 5ef67a8ab7
10 changed files with 370 additions and 125 deletions

View File

@@ -80,11 +80,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
// based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value
// which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result.
// therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1.
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
// on my development system. no clue where the 4 bytes are spent.
// I did some research on this and it seems to be related with the datatype.
// For example, if i used u8 instead, there's only 1 byte wasted.
u32 fixed_local_size = 0;
@@ -94,7 +89,20 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
}
else
{
fixed_local_size = (device_param->device_local_mem_size - 4) / 4096;
u32 overhead = 0;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
// on my development system. no clue where the 4 bytes are spent.
// I did some research on this and it seems to be related with the datatype.
// For example, if i used u8 instead, there's only 1 byte wasted.
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);

View File

@@ -70,18 +70,31 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 kernel_threads_min = 8; // Blowfish
char *jit_build_options = NULL;
return kernel_threads_min;
}
u32 fixed_local_size = 0;
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_max = 8; // Blowfish
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
{
fixed_local_size = 1;
}
else
{
u32 overhead = 0;
return kernel_threads_max;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
return jit_build_options;
}
bool module_outfile_check_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@@ -183,14 +196,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;

View File

@@ -62,18 +62,31 @@ typedef struct odf11
static const char *SIGNATURE_ODF = "$odf$";
u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 kernel_threads_max = 8; // Blowfish enforced
char *jit_build_options = NULL;
return kernel_threads_max;
}
u32 fixed_local_size = 0;
u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_threads_min = 8; // Blowfish enforced
if (device_param->device_type & CL_DEVICE_TYPE_CPU)
{
fixed_local_size = 1;
}
else
{
u32 overhead = 0;
return kernel_threads_min;
if (device_param->device_vendor_id == VENDOR_ID_NV)
{
overhead = 4;
}
fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size);
return jit_build_options;
}
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
@@ -335,14 +348,14 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_threads_max = module_kernel_threads_max;
module_ctx->module_kernel_threads_min = module_kernel_threads_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
module_ctx->module_opti_type = module_opti_type;