diff --git a/OpenCL/inc_types.cl b/OpenCL/inc_types.cl index 50c789aa5..a18ad242a 100644 --- a/OpenCL/inc_types.cl +++ b/OpenCL/inc_types.cl @@ -39,6 +39,60 @@ typedef VTYPE(uint, VECT_SIZE) u32x; typedef VTYPE(ulong, VECT_SIZE) u64x; #endif +// unions + +typedef union vconv32 +{ + u64 v32; + + struct + { + u16 v16a; + u16 v16b; + }; + + struct + { + u8 v8a; + u8 v8b; + u8 v8c; + u8 v8d; + }; + +} vconv32_t; + +typedef union vconv64 +{ + u64 v64; + + struct + { + u32 v32a; + u32 v32b; + }; + + struct + { + u16 v16a; + u16 v16b; + u16 v16c; + u16 v16d; + }; + + struct + { + u8 v8a; + u8 v8b; + u8 v8c; + u8 v8d; + u8 v8e; + u8 v8f; + u8 v8g; + u8 v8h; + }; + +} vconv64_t; + DECLSPEC u32 l32_from_64_S (u64 a); DECLSPEC u32 l32_from_64_S (u64 a) { @@ -1426,3 +1480,189 @@ typedef struct keyboard_layout_mapping int dst_len; } keyboard_layout_mapping_t; + +// functions + +DECLSPEC u8 v8a_from_v32_S (const u32 v32); +DECLSPEC u8 v8a_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v8a; +} + +DECLSPEC u8 v8b_from_v32_S (const u32 v32); +DECLSPEC u8 v8b_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v8b; +} + +DECLSPEC u8 v8c_from_v32_S (const u32 v32); +DECLSPEC u8 v8c_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v8c; +} + +DECLSPEC u8 v8d_from_v32_S (const u32 v32); +DECLSPEC u8 v8d_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v8d; +} + +DECLSPEC u16 v16a_from_v32_S (const u32 v32); +DECLSPEC u16 v16a_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v16a; +} + +DECLSPEC u16 v16b_from_v32_S (const u32 v32); +DECLSPEC u16 v16b_from_v32_S (const u32 v32) +{ + vconv32_t v; + + v.v32 = v32; + + return v.v16b; +} + +DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b); +DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b) +{ + vconv32_t v; + + v.v16a = v16a; + v.v16b = v16b; + + return v.v32; +} + +DECLSPEC u32 v32a_from_v64_S (const u64 v64); +DECLSPEC u32 v32a_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v32a; +} + +DECLSPEC u32 v32b_from_v64_S (const u64 v64); +DECLSPEC u32 v32b_from_v64_S (const u64 v64) +{ + vconv64_t v; + + v.v64 = v64; + + return v.v32b; +} + +DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b); +DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b) +{ + vconv64_t v; + + v.v32a = v32a; + v.v32b = v32b; + + return v.v64; +} + +// unpack function are similar, but always return u32 + +DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32); +DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32) +{ + u32 r; + + #if defined IS_NV + asm ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); + #elif defined IS_AMD + #ifdef HAS_VBFE + __asm__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 0) & 0xff; + #endif + #else + r = (v32 >> 0) & 0xff; + #endif + + return r; +} + +DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32); +DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32) +{ + u32 r; + + #if defined IS_NV + asm ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); + #elif defined IS_AMD + #ifdef HAS_VBFE + __asm__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 8) & 0xff; + #endif + #else + r = (v32 >> 8) & 0xff; + #endif + + return r; +} + +DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32); +DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32) +{ + u32 r; + + #if defined IS_NV + asm ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); + #elif defined IS_AMD + #ifdef HAS_VBFE + __asm__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 16) & 0xff; + #endif + #else + r = (v32 >> 16) & 0xff; + #endif + + return r; +} + +DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32); +DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32) +{ + u32 r; + + #if defined IS_NV + asm ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); + #elif defined IS_AMD + #ifdef HAS_VBFE + __asm__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 24) & 0xff; + #endif + #else + r = (v32 >> 24) & 0xff; + #endif + + return r; +} diff --git a/OpenCL/m03200-pure.cl b/OpenCL/m03200-pure.cl index b950da688..03782bdd0 100644 --- a/OpenCL/m03200-pure.cl +++ b/OpenCL/m03200-pure.cl @@ -299,21 +299,21 @@ __constant u32a c_sbox3[256] = 0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6 }; -#define BF_ROUND(L,R,N) \ -{ \ - u32 tmp; \ - \ - const u32 r0 = hc_bfe_S ((L), 24, 8); \ - const u32 r1 = hc_bfe_S ((L), 16, 8); \ - const u32 r2 = hc_bfe_S ((L), 8, 8); \ - const u32 r3 = hc_bfe_S ((L), 0, 8); \ - \ - tmp = S0[r0]; \ - tmp += S1[r1]; \ - tmp ^= S2[r2]; \ - tmp += S3[r3]; \ - \ - (R) ^= tmp ^ P[(N)]; \ +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = S0[r0]; \ + tmp += S1[r1]; \ + tmp ^= S2[r2]; \ + tmp += S3[r3]; \ + \ + (R) ^= tmp ^ P[(N)]; \ } #define BF_ENCRYPT(L,R) \ @@ -658,9 +658,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032 L0 = 0; R0 = 0; - #ifdef _unroll - #pragma unroll - #endif for (u32 i = 0; i < 9; i++) { BF_ENCRYPT (L0, R0); @@ -723,9 +720,6 @@ __kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m032 L0 = 0; R0 = 0; - #ifdef _unroll - #pragma unroll - #endif for (u32 i = 0; i < 9; i++) { BF_ENCRYPT (L0, R0); diff --git a/OpenCL/m09000-pure.cl b/OpenCL/m09000-pure.cl index bf7fa986c..f4c3b89b1 100644 --- a/OpenCL/m09000-pure.cl +++ b/OpenCL/m09000-pure.cl @@ -308,51 +308,22 @@ __constant u32a c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; -#ifdef IS_AMD -#define BF_ROUND(L,R,N) \ -{ \ - uchar4 c = as_uchar4 ((L)); \ - \ - u32 tmp; \ - \ - tmp = S0[c.s3]; \ - tmp += S1[c.s2]; \ - tmp ^= S2[c.s1]; \ - tmp += S3[c.s0]; \ - \ - (R) ^= tmp ^ P[(N)]; \ +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = S0[r0]; \ + tmp += S1[r1]; \ + tmp ^= S2[r2]; \ + tmp += S3[r3]; \ + \ + (R) ^= tmp ^ P[(N)]; \ } -#endif - -#ifdef IS_NV -#define BF_ROUND(L,R,N) \ -{ \ - u32 tmp; \ - \ - tmp = S0[hc_bfe_S ((L), 24, 8)]; \ - tmp += S1[hc_bfe_S ((L), 16, 8)]; \ - tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \ - tmp += S3[hc_bfe_S ((L), 0, 8)]; \ - \ - (R) ^= tmp ^ P[(N)]; \ -} -#endif - -#ifdef IS_GENERIC -#define BF_ROUND(L,R,N) \ -{ \ - uchar4 c = as_uchar4 ((L)); \ - \ - u32 tmp; \ - \ - tmp = S0[c.s3]; \ - tmp += S1[c.s2]; \ - tmp ^= S2[c.s1]; \ - tmp += S3[c.s0]; \ - \ - (R) ^= tmp ^ P[(N)]; \ -} -#endif #define BF_ENCRYPT(L,R) \ { \ @@ -512,7 +483,7 @@ DECLSPEC void sha1_transform (const u32 *w0, const u32 *w1, const u32 *w2, const digest[4] += E; } -__kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t)) +__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t)) { /** * base @@ -626,10 +597,10 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t)) P[i] = c_pbox[i]; } - __local u32 S0_all[8][256]; - __local u32 S1_all[8][256]; - __local u32 S2_all[8][256]; - __local u32 S3_all[8][256]; + __local u32 S0_all[FIXED_LOCAL_SIZE][256]; + __local u32 S1_all[FIXED_LOCAL_SIZE][256]; + __local u32 S2_all[FIXED_LOCAL_SIZE][256]; + __local u32 S3_all[FIXED_LOCAL_SIZE][256]; __local u32 *S0 = S0_all[lid]; __local u32 *S1 = S1_all[lid]; @@ -731,7 +702,7 @@ __kernel void m09000_init (KERN_ATTR_TMPS (pwsafe2_tmp_t)) } } -__kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t)) +__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t)) { /** * base @@ -752,23 +723,21 @@ __kernel void m09000_loop (KERN_ATTR_TMPS (pwsafe2_tmp_t)) u32 P[18]; - #pragma unroll for (u32 i = 0; i < 18; i++) { P[i] = tmps[gid].P[i]; } - __local u32 S0_all[8][256]; - __local u32 S1_all[8][256]; - __local u32 S2_all[8][256]; - __local u32 S3_all[8][256]; + __local u32 S0_all[FIXED_LOCAL_SIZE][256]; + __local u32 S1_all[FIXED_LOCAL_SIZE][256]; + __local u32 S2_all[FIXED_LOCAL_SIZE][256]; + __local u32 S3_all[FIXED_LOCAL_SIZE][256]; __local u32 *S0 = S0_all[lid]; __local u32 *S1 = S1_all[lid]; __local u32 *S2 = S2_all[lid]; __local u32 *S3 = S3_all[lid]; - #pragma unroll for (u32 i = 0; i < 256; i++) { S0[i] = tmps[gid].S0[i]; diff --git a/OpenCL/m18600-pure.cl b/OpenCL/m18600-pure.cl index f57e6dd7e..15693ba80 100644 --- a/OpenCL/m18600-pure.cl +++ b/OpenCL/m18600-pure.cl @@ -318,18 +318,21 @@ __constant u32a c_pbox[18] = 0x9216d5d9, 0x8979fb1b }; -#define BF_ROUND(L,R,N) \ -{ \ - uchar4 c = as_uchar4 ((L)); \ - \ - u32 tmp; \ - \ - tmp = S0[c.s3]; \ - tmp += S1[c.s2]; \ - tmp ^= S2[c.s1]; \ - tmp += S3[c.s0]; \ - \ - (R) ^= tmp ^ P[(N)]; \ +#define BF_ROUND(L,R,N) \ +{ \ + u32 tmp; \ + \ + const u32 r0 = unpack_v8d_from_v32_S ((L)); \ + const u32 r1 = unpack_v8c_from_v32_S ((L)); \ + const u32 r2 = unpack_v8b_from_v32_S ((L)); \ + const u32 r3 = unpack_v8a_from_v32_S ((L)); \ + \ + tmp = S0[r0]; \ + tmp += S1[r1]; \ + tmp ^= S2[r2]; \ + tmp += S3[r3]; \ + \ + (R) ^= tmp ^ P[(N)]; \ } #define BF_ENCRYPT(L,R) \ @@ -582,7 +585,7 @@ __kernel void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) } } -__kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) +__kernel void __attribute__((reqd_work_group_size(FIXED_LOCAL_SIZE, 1, 1))) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) { const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); @@ -612,10 +615,10 @@ __kernel void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t)) P[i] = c_pbox[i] ^ ukey[i % 4]; } - __local u32 S0_all[8][256]; - __local u32 S1_all[8][256]; - __local u32 S2_all[8][256]; - __local u32 S3_all[8][256]; + __local u32 S0_all[FIXED_LOCAL_SIZE][256]; + __local u32 S1_all[FIXED_LOCAL_SIZE][256]; + __local u32 S2_all[FIXED_LOCAL_SIZE][256]; + __local u32 S3_all[FIXED_LOCAL_SIZE][256]; __local u32 *S0 = S0_all[lid]; __local u32 *S1 = S1_all[lid]; diff --git a/docs/changes.txt b/docs/changes.txt index f4d3760de..afd5e52f4 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -43,7 +43,7 @@ ## Improvements ## -- Cracking bcrypt: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count +- Cracking bcrypt and Password Safe v2: Use a feedback from the OpenCL runtime to dynamically find out optimal thread count - Bitcoin Wallet: Be more user friendly by allowing a larger data range for ckey and public_key - Building: Updated BUILD.md - My Wallet: Added additional plaintext pattern used in newer versions diff --git a/include/types.h b/include/types.h index 0c61a6662..16b7e1ee5 100644 --- a/include/types.h +++ b/include/types.h @@ -1317,6 +1317,7 @@ typedef struct hc_device_param bool has_vperm; bool has_vadd3; + bool has_vbfe; double spin_damp; diff --git a/src/modules/module_03200.c b/src/modules/module_03200.c index a9d4efc0c..3f6c57ced 100644 --- a/src/modules/module_03200.c +++ b/src/modules/module_03200.c @@ -80,11 +80,6 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result. // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1. - // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with: - // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max) - // on my development system. no clue where the 4 bytes are spent. - // I did some research on this and it seems to be related with the datatype. - // For example, if i used u8 instead, there's only 1 byte wasted. u32 fixed_local_size = 0; @@ -94,7 +89,20 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY } else { - fixed_local_size = (device_param->device_local_mem_size - 4) / 4096; + u32 overhead = 0; + + if (device_param->device_vendor_id == VENDOR_ID_NV) + { + // note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with: + // Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max) + // on my development system. no clue where the 4 bytes are spent. + // I did some research on this and it seems to be related with the datatype. + // For example, if i used u8 instead, there's only 1 byte wasted. + + overhead = 4; + } + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; } hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c index 86017b1ba..2ecfae886 100644 --- a/src/modules/module_09000.c +++ b/src/modules/module_09000.c @@ -70,18 +70,31 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c return tmp_size; } -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u32 kernel_threads_min = 8; // Blowfish + char *jit_build_options = NULL; - return kernel_threads_min; -} + u32 fixed_local_size = 0; -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_max = 8; // Blowfish + if (device_param->device_type & CL_DEVICE_TYPE_CPU) + { + fixed_local_size = 1; + } + else + { + u32 overhead = 0; - return kernel_threads_max; + if (device_param->device_vendor_id == VENDOR_ID_NV) + { + overhead = 4; + } + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); + + return jit_build_options; } bool module_outfile_check_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -183,14 +196,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/modules/module_18600.c b/src/modules/module_18600.c index 14e9d35b1..6614e46e8 100644 --- a/src/modules/module_18600.c +++ b/src/modules/module_18600.c @@ -62,18 +62,31 @@ typedef struct odf11 static const char *SIGNATURE_ODF = "$odf$"; -u32 module_kernel_threads_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) { - const u32 kernel_threads_max = 8; // Blowfish enforced + char *jit_build_options = NULL; - return kernel_threads_max; -} + u32 fixed_local_size = 0; -u32 module_kernel_threads_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 kernel_threads_min = 8; // Blowfish enforced + if (device_param->device_type & CL_DEVICE_TYPE_CPU) + { + fixed_local_size = 1; + } + else + { + u32 overhead = 0; - return kernel_threads_min; + if (device_param->device_vendor_id == VENDOR_ID_NV) + { + overhead = 4; + } + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); + + return jit_build_options; } u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) @@ -335,14 +348,14 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_hook23 = MODULE_DEFAULT; module_ctx->module_hook_salt_size = MODULE_DEFAULT; module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; module_ctx->module_jit_cache_disable = MODULE_DEFAULT; module_ctx->module_kernel_accel_max = MODULE_DEFAULT; module_ctx->module_kernel_accel_min = MODULE_DEFAULT; module_ctx->module_kernel_loops_max = MODULE_DEFAULT; module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = module_kernel_threads_max; - module_ctx->module_kernel_threads_min = module_kernel_threads_min; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; module_ctx->module_kern_type = module_kern_type; module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; module_ctx->module_opti_type = module_opti_type; diff --git a/src/opencl.c b/src/opencl.c index c58a2cbfb..76161cfa0 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -3958,6 +3958,10 @@ int opencl_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime) const bool has_vadd3 = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_ADD3_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); device_param->has_vadd3 = has_vadd3; + + const bool has_vbfe = test_instruction (hashcat_ctx, context, device_param->device, "__kernel void test () { uint r; __asm__ (\"V_BFE_U32 %0, 0, 0, 0;\" : \"=v\"(r)); }"); + + device_param->has_vbfe = has_vbfe; } // device_available_mem @@ -4852,9 +4856,9 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) // we don't have sm_* on vendors not NV but it doesn't matter #if defined (DEBUG) - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); #else - build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); + build_options_len += snprintf (build_options_buf + build_options_len, build_options_sz - build_options_len, "-D LOCAL_MEM_TYPE=%u -D VENDOR_ID=%u -D CUDA_ARCH=%u -D HAS_VPERM=%u -D HAS_VADD3=%u -D HAS_VBFE=%u -D VECT_SIZE=%u -D DEVICE_TYPE=%u -D DGST_R0=%u -D DGST_R1=%u -D DGST_R2=%u -D DGST_R3=%u -D DGST_ELEM=%u -D KERN_TYPE=%u -D _unroll -w ", device_param->device_local_mem_type, device_param->platform_vendor_id, (device_param->sm_major * 100) + device_param->sm_minor, device_param->has_vperm, device_param->has_vadd3, device_param->has_vbfe, device_param->vector_width, (u32) device_param->device_type, hashconfig->dgst_pos0, hashconfig->dgst_pos1, hashconfig->dgst_pos2, hashconfig->dgst_pos3, hashconfig->dgst_size / 4, kern_type); #endif /*