From df8a9ab5e5e680eb0bc37df3b73b718396cccdd2 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sat, 29 Oct 2016 14:02:29 +0200 Subject: [PATCH] Support mixed kernel thread count for mixed kernels in the same source file Get rid of one global kernel_threads variable Recognize reqd_work_group_size() values from OpenCL kernels and use them in host if possible Fix some white spaces Remove unused weak* kernels Rename hashconfig_enforce_kernel_threads() to hashconfig_get_kernel_threads() - we do not enforce anymore Rename hashconfig_enforce_kernel_loops() to hashconfig_get_kernel_loops() - we do not enforce anymore Add some missing checks for --quiet --- include/interface.h | 28 +++--- include/types.h | 20 +++- src/autotune.c | 12 ++- src/interface.c | 102 ++++++++++---------- src/opencl.c | 221 ++++++++++++++++++++++++++++++-------------- 5 files changed, 240 insertions(+), 143 deletions(-) diff --git a/include/interface.h b/include/interface.h index 6300c2872..531c7b2b1 100644 --- a/include/interface.h +++ b/include/interface.h @@ -37,23 +37,23 @@ typedef struct pdf int enc_md; - u32 id_buf[8]; - u32 u_buf[32]; - u32 o_buf[32]; + u32 id_buf[8]; + u32 u_buf[32]; + u32 o_buf[32]; int id_len; int o_len; int u_len; - u32 rc4key[2]; - u32 rc4data[2]; + u32 rc4key[2]; + u32 rc4data[2]; } pdf_t; typedef struct wpa { - u32 pke[25]; - u32 eapol[64]; + u32 pke[25]; + u32 eapol[64]; int eapol_size; int keyver; u8 orig_mac1[6]; @@ -1509,12 +1509,12 @@ void to_hccap_t (hashcat_ctx_t *hashcat_ctx, hccap_t *hccap, const u32 salt_pos, int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const u32 salt_pos, const u32 digest_pos); -int hashconfig_init (hashcat_ctx_t *hashcat_ctx); -void hashconfig_destroy (hashcat_ctx_t *hashcat_ctx); -u32 hashconfig_enforce_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param); -u32 hashconfig_enforce_kernel_loops (hashcat_ctx_t *hashcat_ctx); -int hashconfig_general_defaults (hashcat_ctx_t *hashcat_ctx); -void hashconfig_benchmark_defaults (hashcat_ctx_t *hashcat_ctx, salt_t *salt, void *esalt); -char *hashconfig_benchmark_mask (hashcat_ctx_t *hashcat_ctx); +int hashconfig_init (hashcat_ctx_t *hashcat_ctx); +void hashconfig_destroy (hashcat_ctx_t *hashcat_ctx); +u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param); +u32 hashconfig_get_kernel_loops (hashcat_ctx_t *hashcat_ctx); +int hashconfig_general_defaults (hashcat_ctx_t *hashcat_ctx); +void hashconfig_benchmark_defaults (hashcat_ctx_t *hashcat_ctx, salt_t *salt, void *esalt); +char *hashconfig_benchmark_mask (hashcat_ctx_t *hashcat_ctx); #endif // _INTERFACE_H diff --git a/include/types.h b/include/types.h index 2db6597dd..65ac45f92 100644 --- a/include/types.h +++ b/include/types.h @@ -663,8 +663,8 @@ typedef struct wordr typedef struct hc_device_param { - cl_device_id device; - cl_device_type device_type; + cl_device_id device; + cl_device_type device_type; u32 device_id; u32 platform_devices_id; // for mapping with hms devices @@ -683,7 +683,20 @@ typedef struct hc_device_param u32 vector_width; - u32 kernel_threads; + u32 kernel_threads_by_user; + + u32 kernel_threads_by_wgs_kernel1; + u32 kernel_threads_by_wgs_kernel12; + u32 kernel_threads_by_wgs_kernel2; + u32 kernel_threads_by_wgs_kernel23; + u32 kernel_threads_by_wgs_kernel3; + u32 kernel_threads_by_wgs_kernel_mp; + u32 kernel_threads_by_wgs_kernel_mp_l; + u32 kernel_threads_by_wgs_kernel_mp_r; + u32 kernel_threads_by_wgs_kernel_amp; + u32 kernel_threads_by_wgs_kernel_tm; + u32 kernel_threads_by_wgs_kernel_memset; + u32 kernel_loops; u32 kernel_accel; u32 kernel_loops_min; @@ -770,7 +783,6 @@ typedef struct hc_device_param cl_kernel kernel_mp_r; cl_kernel kernel_amp; cl_kernel kernel_tm; - cl_kernel kernel_weak; cl_kernel kernel_memset; cl_context context; diff --git a/src/autotune.c b/src/autotune.c index 782952f96..d9edf456d 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -15,18 +15,20 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par { hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads * kernel_accel; - device_param->kernel_params_buf32[28] = 0; device_param->kernel_params_buf32[29] = kernel_loops; // not a bug, both need to be set device_param->kernel_params_buf32[30] = kernel_loops; // because there's two variables for inner iters for slow and fast hashes if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { + const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads_by_wgs_kernel1 * kernel_accel; + run_kernel (hashcat_ctx, device_param, KERN_RUN_1, kernel_power_try, true, 0); } else { + const u32 kernel_power_try = device_param->device_processors * device_param->kernel_threads_by_wgs_kernel2 * kernel_accel; + run_kernel (hashcat_ctx, device_param, KERN_RUN_2, kernel_power_try, true, 0); } @@ -79,7 +81,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_accel = kernel_accel; device_param->kernel_loops = kernel_loops; - const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; + const u32 kernel_power = device_param->device_processors * device_param->kernel_threads_by_user * device_param->kernel_accel; device_param->kernel_power = kernel_power; @@ -89,7 +91,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param // from here it's clear we are allowed to autotune // so let's init some fake words - const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads * kernel_accel_max; + const u32 kernel_power_max = device_param->device_processors * device_param->kernel_threads_by_user * kernel_accel_max; int CL_rc; @@ -283,7 +285,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param device_param->kernel_accel = kernel_accel; device_param->kernel_loops = kernel_loops; - const u32 kernel_power = device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel; + const u32 kernel_power = device_param->device_processors * device_param->kernel_threads_by_user * device_param->kernel_accel; device_param->kernel_power = kernel_power; diff --git a/src/interface.c b/src/interface.c index 97e1effd4..8d941fc0e 100644 --- a/src/interface.c +++ b/src/interface.c @@ -13120,8 +13120,8 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const u32 salt_pos, } u32 isSalted = ((hashconfig->salt_type == SALT_TYPE_INTERN) - | (hashconfig->salt_type == SALT_TYPE_EXTERN) - | (hashconfig->salt_type == SALT_TYPE_EMBEDDED)); + | (hashconfig->salt_type == SALT_TYPE_EXTERN) + | (hashconfig->salt_type == SALT_TYPE_EMBEDDED)); salt_t salt; @@ -13200,7 +13200,7 @@ int ascii_digest (hashcat_ctx_t *hashcat_ctx, char *out_buf, const u32 salt_pos, // u32 out_buf_plain[256] = { 0 }; - u32 out_buf_salt[256] = { 0 }; + u32 out_buf_salt[256] = { 0 }; char tmp_buf[1024] = { 0 }; @@ -19908,9 +19908,9 @@ void hashconfig_destroy (hashcat_ctx_t *hashcat_ctx) memset (hashconfig, 0, sizeof (hashconfig_t)); } -u32 hashconfig_enforce_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param) +u32 hashconfig_get_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_device_param_t *device_param) { - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; u32 kernel_threads = MIN (KERNEL_THREADS_MAX, device_param->device_maxworkgroup_size); @@ -19943,10 +19943,10 @@ u32 hashconfig_enforce_kernel_threads (hashcat_ctx_t *hashcat_ctx, const hc_devi return kernel_threads; } -u32 hashconfig_enforce_kernel_loops (hashcat_ctx_t *hashcat_ctx) +u32 hashconfig_get_kernel_loops (hashcat_ctx_t *hashcat_ctx) { - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - user_options_t *user_options = hashcat_ctx->user_options; + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const user_options_t *user_options = hashcat_ctx->user_options; u32 kernel_loops_fixed = 0; @@ -19990,9 +19990,9 @@ u32 hashconfig_enforce_kernel_loops (hashcat_ctx_t *hashcat_ctx) int hashconfig_general_defaults (hashcat_ctx_t *hashcat_ctx) { - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - hashes_t *hashes = hashcat_ctx->hashes; - user_options_t *user_options = hashcat_ctx->user_options; + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const hashes_t *hashes = hashcat_ctx->hashes; + const user_options_t *user_options = hashcat_ctx->user_options; salt_t *salts_buf = hashes->salts_buf; void *esalts_buf = hashes->esalts_buf; @@ -20154,61 +20154,61 @@ void hashconfig_benchmark_defaults (hashcat_ctx_t *hashcat_ctx, salt_t *salt, vo switch (hashconfig->hash_mode) { - case 2500: ((wpa_t *) esalt)->eapol_size = 128; + case 2500: ((wpa_t *) esalt)->eapol_size = 128; break; - case 5300: ((ikepsk_t *) esalt)->nr_len = 1; - ((ikepsk_t *) esalt)->msg_len = 1; + case 5300: ((ikepsk_t *) esalt)->nr_len = 1; + ((ikepsk_t *) esalt)->msg_len = 1; break; - case 5400: ((ikepsk_t *) esalt)->nr_len = 1; - ((ikepsk_t *) esalt)->msg_len = 1; + case 5400: ((ikepsk_t *) esalt)->nr_len = 1; + ((ikepsk_t *) esalt)->msg_len = 1; break; - case 5500: ((netntlm_t *) esalt)->user_len = 1; - ((netntlm_t *) esalt)->domain_len = 1; - ((netntlm_t *) esalt)->srvchall_len = 1; - ((netntlm_t *) esalt)->clichall_len = 1; + case 5500: ((netntlm_t *) esalt)->user_len = 1; + ((netntlm_t *) esalt)->domain_len = 1; + ((netntlm_t *) esalt)->srvchall_len = 1; + ((netntlm_t *) esalt)->clichall_len = 1; break; - case 5600: ((netntlm_t *) esalt)->user_len = 1; - ((netntlm_t *) esalt)->domain_len = 1; - ((netntlm_t *) esalt)->srvchall_len = 1; - ((netntlm_t *) esalt)->clichall_len = 1; + case 5600: ((netntlm_t *) esalt)->user_len = 1; + ((netntlm_t *) esalt)->domain_len = 1; + ((netntlm_t *) esalt)->srvchall_len = 1; + ((netntlm_t *) esalt)->clichall_len = 1; break; - case 7300: ((rakp_t *) esalt)->salt_len = 32; + case 7300: ((rakp_t *) esalt)->salt_len = 32; break; - case 10400: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 32; - ((pdf_t *) esalt)->u_len = 32; + case 10400: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 32; + ((pdf_t *) esalt)->u_len = 32; break; - case 10410: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 32; - ((pdf_t *) esalt)->u_len = 32; + case 10410: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 32; + ((pdf_t *) esalt)->u_len = 32; break; - case 10420: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 32; - ((pdf_t *) esalt)->u_len = 32; + case 10420: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 32; + ((pdf_t *) esalt)->u_len = 32; break; - case 10500: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 32; - ((pdf_t *) esalt)->u_len = 32; + case 10500: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 32; + ((pdf_t *) esalt)->u_len = 32; break; - case 10600: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 127; - ((pdf_t *) esalt)->u_len = 127; + case 10600: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 127; + ((pdf_t *) esalt)->u_len = 127; break; - case 10700: ((pdf_t *) esalt)->id_len = 16; - ((pdf_t *) esalt)->o_len = 127; - ((pdf_t *) esalt)->u_len = 127; + case 10700: ((pdf_t *) esalt)->id_len = 16; + ((pdf_t *) esalt)->o_len = 127; + ((pdf_t *) esalt)->u_len = 127; break; - case 11600: ((seven_zip_t *) esalt)->iv_len = 16; - ((seven_zip_t *) esalt)->data_len = 112; - ((seven_zip_t *) esalt)->unpack_size = 112; + case 11600: ((seven_zip_t *) esalt)->iv_len = 16; + ((seven_zip_t *) esalt)->data_len = 112; + ((seven_zip_t *) esalt)->unpack_size = 112; break; - case 13400: ((keepass_t *) esalt)->version = 2; + case 13400: ((keepass_t *) esalt)->version = 2; break; - case 13500: ((pstoken_t *) esalt)->salt_len = 113; + case 13500: ((pstoken_t *) esalt)->salt_len = 113; break; - case 13600: ((zip2_t *) esalt)->salt_len = 16; - ((zip2_t *) esalt)->data_len = 32; - ((zip2_t *) esalt)->mode = 3; + case 13600: ((zip2_t *) esalt)->salt_len = 16; + ((zip2_t *) esalt)->data_len = 32; + ((zip2_t *) esalt)->mode = 3; break; } } diff --git a/src/opencl.c b/src/opencl.c index 3c38f54ed..828d5a328 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -1119,29 +1119,43 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u32 num, const u32 event_update, const u32 iteration) { - hashconfig_t *hashconfig = hashcat_ctx->hashconfig; - status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - user_options_t *user_options = hashcat_ctx->user_options; + const hashconfig_t *hashconfig = hashcat_ctx->hashconfig; + const status_ctx_t *status_ctx = hashcat_ctx->status_ctx; + const user_options_t *user_options = hashcat_ctx->user_options; u32 num_elements = num; device_param->kernel_params_buf32[34] = num; - u32 kernel_threads = device_param->kernel_threads; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = NULL; + cl_kernel kernel = NULL; + u32 kernel_threads = 0; switch (kern_run) { - case KERN_RUN_1: kernel = device_param->kernel1; break; - case KERN_RUN_12: kernel = device_param->kernel12; break; - case KERN_RUN_2: kernel = device_param->kernel2; break; - case KERN_RUN_23: kernel = device_param->kernel23; break; - case KERN_RUN_3: kernel = device_param->kernel3; break; + case KERN_RUN_1: + kernel = device_param->kernel1; + kernel_threads = device_param->kernel_threads_by_wgs_kernel1; + break; + case KERN_RUN_12: + kernel = device_param->kernel12; + kernel_threads = device_param->kernel_threads_by_wgs_kernel12; + break; + case KERN_RUN_2: + kernel = device_param->kernel2; + kernel_threads = device_param->kernel_threads_by_wgs_kernel2; + break; + case KERN_RUN_23: + kernel = device_param->kernel23; + kernel_threads = device_param->kernel_threads_by_wgs_kernel23; + break; + case KERN_RUN_3: + kernel = device_param->kernel3; + kernel_threads = device_param->kernel_threads_by_wgs_kernel3; + break; } + while (num_elements % kernel_threads) num_elements++; + int CL_rc; CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 24, sizeof (cl_uint), device_param->kernel_params[24]); if (CL_rc == -1) return -1; @@ -1272,22 +1286,27 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, case KERN_RUN_MP_L: device_param->kernel_params_mp_l_buf32[9] = num; break; } - // causes problems with special threads like in bcrypt - // const u32 kernel_threads = device_param->kernel_threads; - - u32 kernel_threads = device_param->kernel_threads; - - while (num_elements % kernel_threads) num_elements++; - - cl_kernel kernel = NULL; + cl_kernel kernel = NULL; + u32 kernel_threads = 0; switch (kern_run) { - case KERN_RUN_MP: kernel = device_param->kernel_mp; break; - case KERN_RUN_MP_R: kernel = device_param->kernel_mp_r; break; - case KERN_RUN_MP_L: kernel = device_param->kernel_mp_l; break; + case KERN_RUN_MP: + kernel = device_param->kernel_mp; + kernel_threads = device_param->kernel_threads_by_wgs_kernel_mp; + break; + case KERN_RUN_MP_R: + kernel = device_param->kernel_mp_r; + kernel_threads = device_param->kernel_threads_by_wgs_kernel_mp_r; + break; + case KERN_RUN_MP_L: + kernel = device_param->kernel_mp_l; + kernel_threads = device_param->kernel_threads_by_wgs_kernel_mp_l; + break; } + while (num_elements % kernel_threads) num_elements++; + switch (kern_run) { case KERN_RUN_MP: CL_rc = hc_clSetKernelArg (hashcat_ctx, kernel, 3, sizeof (cl_ulong), device_param->kernel_params_mp[3]); if (CL_rc == -1) return -1; @@ -1338,7 +1357,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) const u32 num_elements = 1024; // fixed - u32 kernel_threads = 32; + u32 kernel_threads = device_param->kernel_threads_by_wgs_kernel_tm; cl_kernel kernel = device_param->kernel_tm; @@ -1371,7 +1390,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, // causes problems with special threads like in bcrypt // const u32 kernel_threads = device_param->kernel_threads; - u32 kernel_threads = device_param->kernel_threads; + u32 kernel_threads = device_param->kernel_threads_by_wgs_kernel_amp; while (num_elements % kernel_threads) num_elements++; @@ -1411,7 +1430,7 @@ int run_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par device_param->kernel_params_memset_buf32[1] = value; device_param->kernel_params_memset_buf32[2] = num16d; - u32 kernel_threads = device_param->kernel_threads; + u32 kernel_threads = device_param->kernel_threads_by_wgs_kernel_memset; u32 num_elements = num16d; @@ -2414,7 +2433,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_endian_little == CL_FALSE) { - event_log_warning (hashcat_ctx, "* Device #%u: Not a little endian device", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Not a little endian device", device_id + 1); device_param->skipped = 1; } @@ -2429,7 +2448,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_available == CL_FALSE) { - event_log_warning (hashcat_ctx, "* Device #%u: Device not available", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Device not available", device_id + 1); device_param->skipped = 1; } @@ -2444,7 +2463,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_compiler_available == CL_FALSE) { - event_log_warning (hashcat_ctx, "* Device #%u: No compiler available for device", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: No compiler available for device", device_id + 1); device_param->skipped = 1; } @@ -2459,7 +2478,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if ((device_execution_capabilities & CL_EXEC_KERNEL) == 0) { - event_log_warning (hashcat_ctx, "* Device #%u: Device does not support executing kernels", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Device does not support executing kernels", device_id + 1); device_param->skipped = 1; } @@ -2480,14 +2499,14 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (strstr (device_extensions, "base_atomics") == 0) { - event_log_warning (hashcat_ctx, "* Device #%u: Device does not support base atomics", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Device does not support base atomics", device_id + 1); device_param->skipped = 1; } if (strstr (device_extensions, "byte_addressable_store") == 0) { - event_log_warning (hashcat_ctx, "* Device #%u: Device does not support byte addressable store", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Device does not support byte addressable store", device_id + 1); device_param->skipped = 1; } @@ -2504,7 +2523,7 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) if (device_local_mem_size < 32768) { - event_log_warning (hashcat_ctx, "* Device #%u: Device local mem size is too small", device_id + 1); + event_log_error (hashcat_ctx, "* Device #%u: Device local mem size is too small", device_id + 1); device_param->skipped = 1; } @@ -2521,8 +2540,8 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) { if (user_options->force == 0) { - event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime, expect massive speed loss", device_id + 1); - event_log_warning (hashcat_ctx, " You can use --force to override this but do not post error reports if you do so"); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Not a native Intel OpenCL runtime, expect massive speed loss", device_id + 1); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " You can use --force to override this but do not post error reports if you do so"); device_param->skipped = 1; } @@ -2703,8 +2722,8 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) { if (device_param->kernel_exec_timeout != 0) { - event_log_warning (hashcat_ctx, "* Device #%u: Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1); - event_log_warning (hashcat_ctx, " See the wiki on how to disable it: https://hashcat.net/wiki/doku.php?id=timeout_patch"); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel exec timeout is not disabled, it might cause you errors of code 702", device_id + 1); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, " See the wiki on how to disable it: https://hashcat.net/wiki/doku.php?id=timeout_patch"); } } } @@ -2873,6 +2892,39 @@ void opencl_ctx_devices_kernel_loops (hashcat_ctx_t *hashcat_ctx) } } +static int get_kernel_threads (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_kernel kernel, u32 *result) +{ + int CL_rc; + + size_t work_group_size; + + CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (work_group_size), &work_group_size, NULL); + + if (CL_rc == -1) return -1; + + size_t compile_work_group_size[3]; + + CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, kernel, device_param->device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof (compile_work_group_size), &compile_work_group_size, NULL); + + if (CL_rc == -1) return -1; + + u32 kernel_threads = device_param->kernel_threads_by_user; + + if (work_group_size > 0) + { + kernel_threads = MIN (kernel_threads, work_group_size); + } + + if (compile_work_group_size[0] > 0) + { + kernel_threads = MIN (kernel_threads, compile_work_group_size[0]); + } + + *result = kernel_threads; + + return 0; +} + int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) { bitmap_ctx_t *bitmap_ctx = hashcat_ctx->bitmap_ctx; @@ -3050,9 +3102,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) * there needs to be some upper limit, otherwise there's too much overhead */ - u32 kernel_threads = hashconfig_enforce_kernel_threads (hashcat_ctx, device_param); + const u32 kernel_threads = hashconfig_get_kernel_threads (hashcat_ctx, device_param); - device_param->kernel_threads = kernel_threads; + device_param->kernel_threads_by_user = kernel_threads; device_param->hardware_power = device_processors * kernel_threads; @@ -3166,18 +3218,18 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) size_scrypt /= 1u << tmto; - size_scrypt *= device_param->device_processors * device_param->kernel_threads * device_param->kernel_accel_max; + size_scrypt *= device_param->hardware_power * device_param->kernel_accel_max; if ((size_scrypt / 4) > device_param->device_maxmem_alloc) { - event_log_warning (hashcat_ctx, "Not enough single-block device memory allocatable to use --scrypt-tmto %d, increasing...", tmto); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "Not enough single-block device memory allocatable to use --scrypt-tmto %d, increasing...", tmto); continue; } if (size_scrypt > device_param->device_global_mem) { - event_log_warning (hashcat_ctx, "Not enough total device memory allocatable to use --scrypt-tmto %d, increasing...", tmto); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "Not enough total device memory allocatable to use --scrypt-tmto %d, increasing...", tmto); continue; } @@ -3209,7 +3261,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) * some algorithms need a fixed kernel-loops count */ - const u32 kernel_loops_fixed = hashconfig_enforce_kernel_loops (hashcat_ctx); + const u32 kernel_loops_fixed = hashconfig_get_kernel_loops (hashcat_ctx); if (kernel_loops_fixed != 0) { @@ -3228,7 +3280,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) while (kernel_accel_max >= kernel_accel_min) { - const u32 kernel_power_max = device_processors * kernel_threads * kernel_accel_max; + const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max; // size_pws @@ -3403,7 +3455,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) strncpy (build_opts, build_opts_new, sizeof (build_opts)); #if defined (DEBUG) - event_log_warning (hashcat_ctx, "* Device #%u: build_opts '%s'", device_id + 1, build_opts); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: build_opts '%s'", device_id + 1, build_opts); #endif /** @@ -3526,7 +3578,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) else { #if defined (DEBUG) - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, cached_file, cst.st_size); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, cached_file, cst.st_size); #endif const int rc_read_kernel = read_kernel_binary (hashcat_ctx, cached_file, 1, kernel_lengths, kernel_sources); @@ -3545,7 +3597,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) else { #if defined (DEBUG) - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, source_file, sst.st_size); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, source_file, sst.st_size); #endif const int rc_read_kernel = read_kernel_binary (hashcat_ctx, source_file, 1, kernel_lengths, kernel_sources); @@ -3730,7 +3782,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) else { #if defined (DEBUG) - event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, cached_file, cst.st_size); + if (user_options->quiet == false) event_log_warning (hashcat_ctx, "* Device #%u: Kernel %s (%ld bytes)", device_id + 1, cached_file, cst.st_size); #endif const int rc_read_kernel = read_kernel_binary (hashcat_ctx, cached_file, 1, kernel_lengths, kernel_sources); @@ -4136,8 +4188,6 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) * kernel name */ - size_t kernel_wgs_tmp; - char kernel_name[64] = { 0 }; if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) @@ -4193,7 +4243,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_tm, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_tm, &device_param->kernel_threads_by_wgs_kernel_tm); if (CL_rc == -1) return -1; } @@ -4201,24 +4251,32 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) } else { + // kernel1 + snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_init", hashconfig->kern_type); CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel1); if (CL_rc == -1) return -1; + // kernel2 + snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_loop", hashconfig->kern_type); CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel2); if (CL_rc == -1) return -1; + // kernel3 + snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_comp", hashconfig->kern_type); CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program, kernel_name, &device_param->kernel3); if (CL_rc == -1) return -1; + // kernel12 + if (hashconfig->opts_type & OPTS_TYPE_HOOK12) { snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_hook12", hashconfig->kern_type); @@ -4227,11 +4285,13 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel12, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel12, &device_param->kernel_threads_by_wgs_kernel12); if (CL_rc == -1) return -1; } + // kernel23 + if (hashconfig->opts_type & OPTS_TYPE_HOOK23) { snprintf (kernel_name, sizeof (kernel_name) - 1, "m%05d_hook23", hashconfig->kern_type); @@ -4240,15 +4300,29 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel23, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel23, &device_param->kernel_threads_by_wgs_kernel23); if (CL_rc == -1) return -1; } } - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel1, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel2, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel3, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); if (CL_rc == -1) return -1; + // kernel1 + + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel1, &device_param->kernel_threads_by_wgs_kernel1); + + if (CL_rc == -1) return -1; + + // kernel2 + + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel2, &device_param->kernel_threads_by_wgs_kernel2); + + if (CL_rc == -1) return -1; + + // kernel3 + + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel3, &device_param->kernel_threads_by_wgs_kernel3); + + if (CL_rc == -1) return -1; for (u32 i = 0; i <= 23; i++) { @@ -4276,7 +4350,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_memset, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_memset, &device_param->kernel_threads_by_wgs_kernel_memset); if (CL_rc == -1) return -1; @@ -4288,11 +4362,25 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (user_options->attack_mode == ATTACK_MODE_BF) { - CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "l_markov", &device_param->kernel_mp_l); if (CL_rc == -1) return -1; + // mp_l + + CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "l_markov", &device_param->kernel_mp_l); + + if (CL_rc == -1) return -1; + + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_mp_l, &device_param->kernel_threads_by_wgs_kernel_mp_l); + + if (CL_rc == -1) return -1; + + // mp_r + CL_rc = hc_clCreateKernel (hashcat_ctx, device_param->program_mp, "r_markov", &device_param->kernel_mp_r); if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_mp_l, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_mp_r, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); if (CL_rc == -1) return -1; + if (CL_rc == -1) return -1; + + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_mp_r, &device_param->kernel_threads_by_wgs_kernel_mp_r); + + if (CL_rc == -1) return -1; if (hashconfig->opts_type & OPTS_TYPE_PT_BITSLICE) { @@ -4306,7 +4394,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_mp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_threads_by_wgs_kernel_mp); if (CL_rc == -1) return -1; } @@ -4316,7 +4404,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_mp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_mp, &device_param->kernel_threads_by_wgs_kernel_mp); if (CL_rc == -1) return -1; } @@ -4331,7 +4419,7 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) if (CL_rc == -1) return -1; - CL_rc = hc_clGetKernelWorkGroupInfo (hashcat_ctx, device_param->kernel_amp, device_param->device, CL_KERNEL_WORK_GROUP_SIZE, sizeof (size_t), &kernel_wgs_tmp, NULL); kernel_threads = MIN (kernel_threads, kernel_wgs_tmp); + CL_rc = get_kernel_threads (hashcat_ctx, device_param, device_param->kernel_amp, &device_param->kernel_threads_by_wgs_kernel_amp); if (CL_rc == -1) return -1; } @@ -4357,11 +4445,6 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) } } - // maybe this has been updated by clGetKernelWorkGroupInfo() - // value can only be decreased, so we don't need to reallocate buffers - - device_param->kernel_threads = kernel_threads; - // zero some data buffers CL_rc = run_kernel_bzero (hashcat_ctx, device_param, device_param->d_pws_buf, size_pws); if (CL_rc == -1) return -1;