commit 06b67f60d69fd746a1c951bbbc2897c1eb08d610 Author: dela Date: Thu Feb 26 06:14:36 2026 +0800 frist diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e1f6ecc --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +CLAUDE.md +/docs diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..a0c4a04 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,114 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "hcaptcha-pow" +version = "0.1.0" +dependencies = [ + "base64", + "serde", + "serde_json", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..001746f --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "hcaptcha-pow" +version = "0.1.0" +edition = "2021" + +[dependencies] +base64 = "0.22" +serde = { version = "1", features = ["derive"] } +serde_json = "1" diff --git a/src/gift256/encrypt.rs b/src/gift256/encrypt.rs new file mode 100644 index 0000000..9f5b5b4 --- /dev/null +++ b/src/gift256/encrypt.rs @@ -0,0 +1,74 @@ +//! GIFT-256 fixsliced encryption (13 rounds) +//! Corresponds to gift256_encrypt_fixsliced @ 0x80048469 +//! +//! Round structure: 4 cycles x {SBox+P1+KeyXOR, SBox+DA+KeyXOR, SBox+P2+KeyXOR, SBox+DB+KeyXOR} +//! Last cycle only executes the first type (SBox+P1+KeyXOR) + +use super::sbox::sbox_bitsliced; +use super::linear::{linear_p1, linear_p2, diffusion_a, diffusion_b}; +use super::interleave::{pack_input, nibble_deinterleave, unpack_output}; + +/// GIFT-256 encryption. +/// input: 256-bit plaintext (8 u32, little-endian) +/// rk: 120 u32 round keys (from key_schedule) +/// output: 256-bit ciphertext (8 u32) +pub fn encrypt(input: &[u32; 8], rk: &[u32; 120]) -> [u32; 8] { + // === Input packing (bit-interleave + initial round key XOR) === + let initial_rk: [u32; 8] = rk[0..8].try_into().unwrap(); + let mut s = pack_input(input, &initial_rk); + + // === 13 rounds main loop (4 cycles) === + // Round key indexing: + // Initial: rk[0..7] + // Type A: rk[base+8 .. base+15] + // Type DA: rk[base+16 .. base+23] + // Type P2: rk[base+24 .. base+31] + // Type DB: rk[base+32 .. base+39] + // where base = cycle * 32 + + for cycle in 0..4 { + let base = cycle * 32; + + // --- Round Type A: SBox + P1 + KeyXOR --- + sbox_bitsliced(&mut s); + linear_p1(&mut s); + let rk_a: [u32; 8] = rk[base + 8..base + 16].try_into().unwrap(); + for j in 0..8 { + s[j] ^= rk_a[j]; + } + + if cycle == 3 { + break; // Last cycle only has Type A + } + + // --- Round Type DA: SBox + nibble-ROT diffusion + KeyXOR --- + sbox_bitsliced(&mut s); + let rk_da: [u32; 8] = rk[base + 16..base + 24].try_into().unwrap(); + diffusion_a(&mut s, &rk_da); + + // --- Round Type P2: SBox + P2 + KeyXOR --- + sbox_bitsliced(&mut s); + linear_p2(&mut s); + let rk_p2: [u32; 8] = rk[base + 24..base + 32].try_into().unwrap(); + for j in 0..8 { + s[j] ^= rk_p2[j]; + } + + // --- Round Type DB: SBox + byte-ROT8 diffusion + KeyXOR --- + sbox_bitsliced(&mut s); + let rk_db: [u32; 8] = rk[base + 32..base + 40].try_into().unwrap(); + diffusion_b(&mut s, &rk_db); + } + + // === Output deinterleave === + for j in 0..8 { + s[j] = nibble_deinterleave(s[j]); + } + + // Final SBox + sbox_bitsliced(&mut s); + + // Final round key XOR + deinterleave (rk[112..119]) + let final_rk: [u32; 8] = rk[112..120].try_into().unwrap(); + unpack_output(&s, &final_rk) +} diff --git a/src/gift256/interleave.rs b/src/gift256/interleave.rs new file mode 100644 index 0000000..8191108 --- /dev/null +++ b/src/gift256/interleave.rs @@ -0,0 +1,190 @@ +//! GIFT-256 fixsliced bit interleave/deinterleave +//! Used to convert between standard and fixsliced representations +//! Corresponds to gift256_encrypt_fixsliced State 0 (input) and State 3 (output) + +/// Pack 8 u32 (standard) into 8 u32 (fixsliced) with initial round key XOR. +/// Corresponds to gift256_encrypt_fixsliced State 0. +/// +/// Input read order (from decompiled offsets): +/// input[7]=@0x1c, input[3]=@0x0c, input[6]=@0x18, input[2]=@0x08 +/// input[5]=@0x14, input[1]=@0x04, input[4]=@0x10, input[0]=@0x00 +pub fn pack_input(input: &[u32; 8], initial_rk: &[u32; 8]) -> [u32; 8] { + let (mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h) = + (input[7], input[3], input[6], input[2], input[5], input[1], input[4], input[0]); + + // Phase 1: 0x55555555 interleave (odd/even bit separation) + let t0 = (a ^ (b >> 1)) & 0x55555555; + a ^= t0; + b ^= t0 << 1; + let t1 = (c ^ (d >> 1)) & 0x55555555; + c ^= t1; + d ^= t1 << 1; + let t2 = (e ^ (f >> 1)) & 0x55555555; + e ^= t2; + f ^= t2 << 1; + let t3 = (g ^ (h >> 1)) & 0x55555555; + g ^= t3; + h ^= t3 << 1; + + // Phase 2: 0x33333333 interleave (2-bit group separation) + let t4 = (a ^ (c >> 2)) & 0x33333333; + a ^= t4; + c ^= t4 << 2; + let t5 = (e ^ (g >> 2)) & 0x33333333; + e ^= t5; + g ^= t5 << 2; + let t6 = (b ^ (d >> 2)) & 0x33333333; + b ^= t6; + d ^= t6 << 2; + let t7 = (f ^ (h >> 2)) & 0x33333333; + f ^= t7; + h ^= t7 << 2; + + // Phase 3: 0x0F0F0F0F interleave (nibble separation) + initial round key XOR + let t8 = (a ^ (e >> 4)) & 0x0F0F0F0F; + let out7 = initial_rk[7] ^ (a ^ t8); + let out3 = initial_rk[3] ^ ((t8 << 4) ^ e); + + let t9 = (c ^ (g >> 4)) & 0x0F0F0F0F; + let out5 = initial_rk[5] ^ (c ^ t9); + let out1 = initial_rk[1] ^ ((t9 << 4) ^ g); + + let t10 = (b ^ (f >> 4)) & 0x0F0F0F0F; + let out6 = initial_rk[6] ^ (b ^ t10); + let out2 = initial_rk[2] ^ ((t10 << 4) ^ f); + + let t11 = (d ^ (h >> 4)) & 0x0F0F0F0F; + let out4 = initial_rk[4] ^ (d ^ t11); + let out0 = initial_rk[0] ^ ((t11 << 4) ^ h); + + [out0, out1, out2, out3, out4, out5, out6, out7] +} + +/// Nibble deinterleave. +/// Decompiled: ((uVar4 >> 4 ^ uVar4) & 0xf000f00) * 0x11 ^ uVar4 +pub fn nibble_deinterleave(x: u32) -> u32 { + (((x >> 4) ^ x) & 0x0F000F00).wrapping_mul(0x11) ^ x +} + +/// Key schedule deinterleave type A (offsets 0x20..0x3C, 0x60..0x7C, 0x1A0..0x1BC) +/// x = ((x ^ x>>4) & 0x030F0C00) * 0x11 ^ x; +/// x = ((x>>2 ^ x) & 0x33003300) * 5 ^ x; +pub fn key_deinterleave_a(x: u32) -> u32 { + let x = (((x ^ (x >> 4)) & 0x030F0C00).wrapping_mul(0x11)) ^ x; + ((((x >> 2) ^ x) & 0x33003300).wrapping_mul(5)) ^ x +} + +/// Key schedule deinterleave type B (offsets 0x40..0x5C) +/// x = ((x>>4 ^ x) & 0x0F000F00) * 0x11 ^ x; +pub fn key_deinterleave_b(x: u32) -> u32 { + ((((x >> 4) ^ x) & 0x0F000F00).wrapping_mul(0x11)) ^ x +} + +/// Key schedule deinterleave type C (offsets 0x60..0x7C second group) +/// x = ((x ^ x>>4) & 0x0C0F0300) * 0x11 ^ x; +/// x = ((x>>2 ^ x) & 0x33003300) * 5 ^ x; +pub fn key_deinterleave_c(x: u32) -> u32 { + let x = (((x ^ (x >> 4)) & 0x0C0F0300).wrapping_mul(0x11)) ^ x; + ((((x >> 2) ^ x) & 0x33003300).wrapping_mul(5)) ^ x +} + +/// Interleave a half-key (4 u32 input -> 8 u32 output). +/// Same 3-stage butterfly as pack_input but on 4 words. +/// Used in key_schedule to interleave the two key halves. +pub fn interleave_key_half(input: &[u32], output: &mut [u32]) { + let (mut a, mut b) = (input[3], input[1]); + let (mut c, mut d) = (input[2], input[0]); + + // Phase 1: 0x55555555 + let t0 = (a ^ (b >> 1)) & 0x55555555; + a ^= t0; + b ^= t0 << 1; + let t1 = (c ^ (d >> 1)) & 0x55555555; + c ^= t1; + d ^= t1 << 1; + + // Phase 2: 0x33333333 + let t2 = (a ^ (c >> 2)) & 0x33333333; + a ^= t2; + c ^= t2 << 2; + let t3 = (b ^ (d >> 2)) & 0x33333333; + b ^= t3; + d ^= t3 << 2; + + // Phase 3: 0x0F0F0F0F + let t4 = (a ^ (b >> 4)) & 0x0F0F0F0F; + output[7] = a ^ t4; + output[3] = (t4 << 4) ^ b; + + let t5 = (c ^ (d >> 4)) & 0x0F0F0F0F; + output[5] = c ^ t5; + output[1] = (t5 << 4) ^ d; + + // For the remaining outputs, use the already-interleaved values + let t6 = (a ^ (c >> 4)) & 0x0F0F0F0F; + output[6] = a ^ t6; + output[2] = (t6 << 4) ^ c; + + let t7 = (b ^ (d >> 4)) & 0x0F0F0F0F; + output[4] = b ^ t7; + output[0] = (t7 << 4) ^ d; +} + +/// Unpack output: reverse bit-interleave + final round key XOR. +/// Reverse of pack_input: apply masks in reverse order (0x0F -> 0x33 -> 0x55). +pub fn unpack_output(s: &[u32; 8], final_rk: &[u32; 8]) -> [u32; 8] { + // XOR with final round keys first + let mut a = s[7] ^ final_rk[7]; + let mut b = s[6] ^ final_rk[6]; + let mut c = s[5] ^ final_rk[5]; + let mut d = s[3] ^ final_rk[3]; + let mut e = s[4] ^ final_rk[4]; + let mut f = s[2] ^ final_rk[2]; + let mut g = s[1] ^ final_rk[1]; + let mut h = s[0] ^ final_rk[0]; + + // Reverse Phase 3: 0x0F0F0F0F + let t8 = (a ^ (e >> 4)) & 0x0F0F0F0F; + a ^= t8; + e ^= t8 << 4; + let t9 = (c ^ (g >> 4)) & 0x0F0F0F0F; + c ^= t9; + g ^= t9 << 4; + let t10 = (b ^ (f >> 4)) & 0x0F0F0F0F; + b ^= t10; + f ^= t10 << 4; + let t11 = (d ^ (h >> 4)) & 0x0F0F0F0F; + d ^= t11; + h ^= t11 << 4; + + // Reverse Phase 2: 0x33333333 + let t4 = (a ^ (c >> 2)) & 0x33333333; + a ^= t4; + c ^= t4 << 2; + let t5 = (e ^ (g >> 2)) & 0x33333333; + e ^= t5; + g ^= t5 << 2; + let t6 = (b ^ (d >> 2)) & 0x33333333; + b ^= t6; + d ^= t6 << 2; + let t7 = (f ^ (h >> 2)) & 0x33333333; + f ^= t7; + h ^= t7 << 2; + + // Reverse Phase 1: 0x55555555 + let t0 = (a ^ (b >> 1)) & 0x55555555; + a ^= t0; + b ^= t0 << 1; + let t1 = (c ^ (d >> 1)) & 0x55555555; + c ^= t1; + d ^= t1 << 1; + let t2 = (e ^ (f >> 1)) & 0x55555555; + e ^= t2; + f ^= t2 << 1; + let t3 = (g ^ (h >> 1)) & 0x55555555; + g ^= t3; + h ^= t3 << 1; + + // Output in original order (reverse of input mapping) + [h, f, d, b, g, e, c, a] +} diff --git a/src/gift256/key_schedule.rs b/src/gift256/key_schedule.rs new file mode 100644 index 0000000..4d13f02 --- /dev/null +++ b/src/gift256/key_schedule.rs @@ -0,0 +1,162 @@ +//! GIFT-256 fixsliced key schedule +//! Corresponds to gift256_key_schedule @ 0x80057559 +//! Expands a 256-bit key into 120 u32 (480 bytes) of round keys + +use super::sbox::sbox_bitsliced; +use super::interleave::{key_deinterleave_a, key_deinterleave_b, key_deinterleave_c, interleave_key_half}; + +/// NOT compensation table: offsets (u32 indices) that must be inverted at the end. +/// Corresponds to the mass of ks[offset] ^= 0xFFFFFFFF at the end of the decompilation. +const NOT_OFFSETS: [usize; 56] = [ + 0x08, 0x09, 0x0D, 0x0E, + 0x10, 0x11, 0x15, 0x16, + 0x18, 0x19, 0x1D, 0x1E, + 0x20, 0x21, 0x25, 0x26, + 0x28, 0x29, 0x2D, 0x2E, + 0x30, 0x31, 0x35, 0x36, + 0x38, 0x39, 0x3D, 0x3E, + 0x40, 0x41, 0x45, 0x46, + 0x48, 0x49, 0x4D, 0x4E, + 0x50, 0x51, 0x55, 0x56, + 0x58, 0x59, 0x5D, 0x5E, + 0x60, 0x61, 0x65, 0x66, + 0x68, 0x69, 0x6D, 0x6E, + 0x70, 0x71, 0x75, 0x76, +]; + +/// GF extension function. +/// Decompiled: +/// uVar4 = (src >> param3 | src << (32-param3)) & 0x03030303 ^ dst; +/// result = uVar4 ^ (uVar4 & 0x03030303) << 6 +/// ^ (uVar4 & 0x0F0F0F0F) << 4 +/// ^ (uVar4 & 0x3F3F3F3F) << 2; +fn gf_expand(src: u32, dst: u32, rot: u32) -> u32 { + let x = (src.rotate_right(rot) & 0x03030303) ^ dst; + x ^ ((x & 0x03030303) << 6) ^ ((x & 0x0F0F0F0F) << 4) ^ ((x & 0x3F3F3F3F) << 2) +} + +/// Key rotate: copy ks[base+0..7] forward to ks[base+8..15] with rotation. +/// Corresponds to gift256_key_rotate @ 0x80060a8f +/// +/// After tracing the CFF state machine: +/// ks[base+15] = ks[base+7] +/// ks[base+14] = ks[base+6] +/// ... +/// ks[base+9] = ks[base+1] +/// ks[base+8] = ks[base+0] +fn key_rotate(ks: &mut [u32], base: usize) { + for i in (0..8).rev() { + ks[base + 8 + i] = ks[base + i]; + } +} + +/// LFSR expansion: perform GF expansion on 8 key words. +/// Corresponds to gift256_key_lfsr_expand @ 0x8005a060 +/// rot = 14 or 6 (rotation amount) +fn key_lfsr_expand(ks: &mut [u32], base: usize, rot: u32) { + for i in 0..8 { + let src_idx = base + i; + let dst_idx = base + i - 16; // base is always >= 16 + ks[src_idx] = gf_expand(ks[src_idx], ks[dst_idx], rot); + } +} + +/// Full key schedule: expand 256-bit key into 120 u32 round keys. +pub fn key_schedule(key: &[u8; 32]) -> [u32; 120] { + let mut ks = [0u32; 120]; + + // === Step 1: Bit-interleave packing (State 3) === + // Read key as 8 u32 (little-endian) + let mut kw = [0u32; 8]; + for i in 0..8 { + kw[i] = u32::from_le_bytes([key[i * 4], key[i * 4 + 1], key[i * 4 + 2], key[i * 4 + 3]]); + } + + // Interleave: two halves of 4 words each + interleave_key_half(&kw[0..4], &mut ks[0..8]); + interleave_key_half(&kw[4..8], &mut ks[8..16]); + + // === Step 2: 12-step round key expansion (State 4+5, 6 iterations) === + let mut round = 8usize; + let mut rk_const_offset = 0x10usize; // u32 index + + for _step in 0..6 { + // --- Sub-step A (State 4) --- + key_rotate(&mut ks, round); + round += 8; + + // S-Box on the newly written 8 words + let mut sbox_buf: [u32; 8] = ks[round - 8..round].try_into().unwrap(); + sbox_bitsliced(&mut sbox_buf); + ks[round - 8..round].copy_from_slice(&sbox_buf); + + // NOT compensation (4 specific offsets within this block) + ks[round - 8] ^= 0xFFFFFFFF; + ks[round - 7] ^= 0xFFFFFFFF; + ks[round - 3] ^= 0xFFFFFFFF; + ks[round - 2] ^= 0xFFFFFFFF; + + // Round constant injection + ks[rk_const_offset] ^= 0xC000; + + // LFSR expand with rot=14 + key_lfsr_expand(&mut ks, round, 14); + + // --- Sub-step B (State 5) --- + key_rotate(&mut ks, round); + round += 8; + + let mut sbox_buf: [u32; 8] = ks[round - 8..round].try_into().unwrap(); + sbox_bitsliced(&mut sbox_buf); + ks[round - 8..round].copy_from_slice(&sbox_buf); + + ks[round - 8] ^= 0xFFFFFFFF; + ks[round - 7] ^= 0xFFFFFFFF; + ks[round - 3] ^= 0xFFFFFFFF; + ks[round - 2] ^= 0xFFFFFFFF; + + // LFSR expand with rot=6 + key_lfsr_expand(&mut ks, round, 6); + + rk_const_offset += 17; // iVar24 += 0x44 bytes = 17 u32 indices + } + + // === Step 3: Post-processing (State 2 + State 1) === + // Deinterleave with 3 different mask patterns, each block covers 32 u32 + for block in 0..3 { + let base = block * 32; + + // Type A: offsets +8..+15 + for j in 0..8 { + ks[base + 8 + j] = key_deinterleave_a(ks[base + 8 + j]); + } + + // Type B: offset +16 + ks[base + 16] = key_deinterleave_b(ks[base + 16]); + + // Type B: offsets +17..+23 + for j in 1..8 { + ks[base + 16 + j] = key_deinterleave_b(ks[base + 16 + j]); + } + + // Type C: offsets +24..+31 + for j in 0..8 { + ks[base + 24 + j] = key_deinterleave_c(ks[base + 24 + j]); + } + } + + // Final round key deinterleave (ks[104..111] = offsets 0x1A0..0x1BC) + for j in 0..8 { + ks[104 + j] = key_deinterleave_a(ks[104 + j]); + } + + // Initial NOT + ks[8] ^= 0xFFFFFFFF; + + // Final NOT compensation + for &off in &NOT_OFFSETS { + ks[off] ^= 0xFFFFFFFF; + } + + ks +} diff --git a/src/gift256/linear.rs b/src/gift256/linear.rs new file mode 100644 index 0000000..f269958 --- /dev/null +++ b/src/gift256/linear.rs @@ -0,0 +1,145 @@ +//! GIFT-256 linear diffusion layers +//! P1 and P2 are standalone functions, DA and DB are inlined in encrypt + +use crate::util::nibble_half_swap; + +/// P1: byte-ROT2 permutation (even rounds) +/// Corresponds to gift256_linear_P1_byteROT2 @ 0x800172b2 +/// +/// Core op: rot = ROR(x, 2) & 0xC0C0C0C0 | ROR(x, 10) & 0x3F3F3F3F +/// Processing order: s[6], s[7], s[5], s[4], s[1], s[2], s[0], s[3] +/// Extra: s[7]'s tmp value (t7) is XORed into s[0], s[4], s[3], s[1] +pub fn linear_p1(s: &mut [u32; 8]) { + let byte_rot2 = |x: u32| -> u32 { + (x.rotate_left(22) & 0x3F3F3F3F) | (x.rotate_left(30) & 0xC0C0C0C0) + }; + + let r6 = byte_rot2(s[6]); + let r7 = byte_rot2(s[7]); + let t6 = s[6] ^ r6; + let t7 = s[7] ^ r7; + s[7] = r7 ^ t6 ^ nibble_half_swap(t7); + + let r5 = byte_rot2(s[5]); + let t5 = s[5] ^ r5; + s[6] = r6 ^ t5 ^ nibble_half_swap(t6); + + let r4 = byte_rot2(s[4]); + let t4 = s[4] ^ r4; + s[5] = r5 ^ t4 ^ nibble_half_swap(t5); + + let r1 = byte_rot2(s[1]); + let t1 = r1 ^ s[1]; + let r2 = byte_rot2(s[2]); + let t2 = s[2] ^ r2; + s[2] = r2 ^ t1 ^ nibble_half_swap(t2); + + let r0 = byte_rot2(s[0]); + let t0 = r0 ^ s[0]; + s[0] = nibble_half_swap(t0) ^ r0 ^ t7; // NOTE: XOR with t7 + + let r3 = byte_rot2(s[3]); + let t3 = s[3] ^ r3; + s[4] = r4 ^ t3 ^ nibble_half_swap(t4) ^ t7; + s[3] = t2 ^ nibble_half_swap(t3) ^ r3 ^ t7; + s[1] = t0 ^ nibble_half_swap(t1) ^ r1 ^ t7; +} + +/// P2: byte-ROT6 permutation (odd rounds) +/// Corresponds to gift256_linear_P2_byteROT6 @ 0x80006e1f +/// Identical structure to P1, only rotation amount changes: +/// rot = ROR(x, 6) & 0xFCFCFCFC | ROR(x, 14) & 0x03030303 +pub fn linear_p2(s: &mut [u32; 8]) { + let byte_rot6 = |x: u32| -> u32 { + (x.rotate_left(18) & 0x03030303) | (x.rotate_left(26) & 0xFCFCFCFC) + }; + + let r6 = byte_rot6(s[6]); + let r7 = byte_rot6(s[7]); + let t6 = s[6] ^ r6; + let t7 = s[7] ^ r7; + s[7] = r7 ^ t6 ^ nibble_half_swap(t7); + + let r5 = byte_rot6(s[5]); + let t5 = s[5] ^ r5; + s[6] = r6 ^ t5 ^ nibble_half_swap(t6); + + let r4 = byte_rot6(s[4]); + let t4 = s[4] ^ r4; + s[5] = r5 ^ t4 ^ nibble_half_swap(t5); + + let r1 = byte_rot6(s[1]); + let t1 = r1 ^ s[1]; + let r2 = byte_rot6(s[2]); + let t2 = s[2] ^ r2; + s[2] = r2 ^ t1 ^ nibble_half_swap(t2); + + let r0 = byte_rot6(s[0]); + let t0 = r0 ^ s[0]; + s[0] = nibble_half_swap(t0) ^ r0 ^ t7; + + let r3 = byte_rot6(s[3]); + let t3 = s[3] ^ r3; + s[4] = r4 ^ t3 ^ nibble_half_swap(t4) ^ t7; + s[3] = t2 ^ nibble_half_swap(t3) ^ r3 ^ t7; + s[1] = t0 ^ nibble_half_swap(t1) ^ r1 ^ t7; +} + +/// DA: nibble-ROT4/12/20/28 diffusion (inlined in encrypt) +/// Corresponds to gift256_encrypt_fixsliced State 2 first part +/// +/// Core op: +/// rotated = (x << 0x14 | x >> 0xc) & 0x0F0F0F0F | (x << 0x1c | x >> 4) & 0xF0F0F0F0 +/// tmp = x ^ rotated +/// new = rotated ^ key ^ SWAP16(tmp) ^ cross_xor_pattern +pub fn diffusion_a(s: &mut [u32; 8], rk: &[u32; 8]) { + let nibble_rot = |x: u32| -> u32 { + (x.rotate_left(20) & 0x0F0F0F0F) | (x.rotate_left(28) & 0xF0F0F0F0) + }; + let swap16 = |x: u32| -> u32 { x.rotate_left(16) }; + + let mut r = [0u32; 8]; + let mut t = [0u32; 8]; + for i in 0..8 { + r[i] = nibble_rot(s[i]); + t[i] = s[i] ^ r[i]; + } + + // Cross-XOR pattern (derived from decompilation) + s[0] = t[7] ^ r[0] ^ rk[0] ^ swap16(t[0]); + s[2] = r[2] ^ rk[2] ^ swap16(t[2]) ^ t[1]; + s[5] = r[5] ^ rk[5] ^ swap16(t[5]) ^ t[4]; + s[1] = rk[1] ^ swap16(t[1]) ^ t[0] ^ r[1] ^ t[7]; + s[3] = r[3] ^ rk[3] ^ swap16(t[3]) ^ t[2] ^ t[7]; + s[4] = rk[4] ^ swap16(t[4]) ^ t[3] ^ r[4] ^ t[7]; + s[6] = r[6] ^ rk[6] ^ swap16(t[6]) ^ t[5]; + s[7] = rk[7] ^ swap16(t[7]) ^ t[6] ^ r[7]; +} + +/// DB: byte-ROT8 diffusion (inlined in encrypt) +/// Corresponds to gift256_encrypt_fixsliced State 2 last part +/// +/// Core op: +/// rotated = x << 0x18 | x >> 8 (= ROR(x, 8)) +/// tmp = rotated ^ x +/// new = rotated ^ key ^ SWAP16(tmp) ^ cross_xor_pattern +pub fn diffusion_b(s: &mut [u32; 8], rk: &[u32; 8]) { + let swap16 = |x: u32| -> u32 { x.rotate_left(16) }; + + let mut r = [0u32; 8]; + let mut t = [0u32; 8]; + for i in 0..8 { + r[i] = s[i].rotate_right(8); + t[i] = r[i] ^ s[i]; + } + + // Cross-XOR pattern (same structure as DA but with byte-ROT8) + s[0] = r[0] ^ rk[0] ^ swap16(t[0]) ^ t[7]; + s[2] = r[2] ^ rk[2] ^ swap16(t[2]) ^ t[1]; + s[1] = rk[1] ^ swap16(t[1]) ^ t[0] ^ r[1] ^ t[7]; + s[3] = r[3] ^ rk[3] ^ swap16(t[3]) ^ t[2] ^ t[7]; + s[4] = rk[4] ^ swap16(t[4]) ^ t[3] ^ r[4] ^ t[7]; + s[5] = r[5] ^ rk[5] ^ swap16(t[5]) ^ t[4]; + s[6] = r[6] ^ rk[6] ^ swap16(t[6]) ^ t[5]; + s[7] = rk[7] ^ swap16(t[7]) ^ t[6] ^ r[7]; +} diff --git a/src/gift256/mod.rs b/src/gift256/mod.rs new file mode 100644 index 0000000..3d721f1 --- /dev/null +++ b/src/gift256/mod.rs @@ -0,0 +1,5 @@ +pub mod sbox; +pub mod encrypt; +pub mod key_schedule; +pub mod linear; +pub mod interleave; diff --git a/src/gift256/sbox.rs b/src/gift256/sbox.rs new file mode 100644 index 0000000..4a77480 --- /dev/null +++ b/src/gift256/sbox.rs @@ -0,0 +1,90 @@ +//! GIFT-256 bitsliced S-Box +//! Corresponds to gift256_sbox_bitsliced @ 0x8000394c +//! Pure AND/XOR Boolean network, 32 S-Box instances in parallel +//! +//! Directly transcribed from Ghidra decompilation. +//! WARNING: Variables are reassigned multiple times. Do NOT optimize or merge. + +/// Bitsliced S-Box: 8 u32 words processed in parallel. +/// Each bit position across s[0]..s[7] forms one 8-bit S-Box input/output. +/// +/// Register mapping (encrypted_mem_read_u32 offsets): +/// s[0]=@0x00, s[1]=@0x04, s[2]=@0x08, s[3]=@0x0c +/// s[4]=@0x10, s[5]=@0x14, s[6]=@0x18, s[7]=@0x1c +pub fn sbox_bitsliced(s: &mut [u32; 8]) { + // Direct mapping from decompilation variable names: + // uVar1=@0x1c=s[7], uVar2=@0x04=s[1], uVar3=@0x10=s[4], uVar4=@0x08=s[2] + // uVar5=@0x0c=s[3], uVar6=@0x18=s[6], uVar7=@0x14=s[5], uVar(last)=@0x00=s[0] + let (a, b, c, d, e, f, g, h) = (s[7], s[1], s[2], s[3], s[4], s[5], s[6], s[0]); + + let t1 = a ^ b; // uVar8 = uVar1 ^ uVar2 + let t2 = e ^ c; // uVar14 = uVar3 ^ uVar4 + let t3 = t1 ^ t2; // uVar9 + let t4 = d ^ t3 ^ g; // uVar10 + let t5 = e ^ a; // uVar15 = uVar3 ^ uVar1 + let t6 = t4 ^ t5; // uVar11 + let fg = f ^ g; // uVar7 = s[5] ^ s[6] + let t7 = t6 ^ fg; // uVar12 + + let ag = h ^ fg; // uVar25 = s[0] ^ (f^g) + let bx = b ^ ag; // uVar2 updated + let cd_mix = c ^ d ^ t3; // uVar26 + let fg_cd = fg ^ cd_mix; // uVar7 subsequent + + // Nonlinear core + let n1 = (t6 & t5) ^ (t2 & (fg_cd ^ t6)); // uVar16 + let n2 = (t7 & t1) ^ t7 ^ (bx & ag) ^ t1 ^ n1; // uVar5 + + let ac = a ^ c; // uVar4 updated + let n1_ext = n1 ^ (cd_mix & t3) ^ t4 ^ ((h ^ cd_mix) & (ac ^ bx)); // uVar16 updated + + let p = n2 & n1_ext; // uVar10 + let q = (ac & fg_cd) ^ (t6 & t5); // uVar22 + let r = (h & (e ^ ag)) ^ ac ^ fg_cd ^ (cd_mix & t3) ^ q; // uVar17 + let qx = q ^ t7 ^ ((a ^ ag) & (h ^ t6)) ^ (t7 & t1) ^ a; // uVar22 updated + + let m = (p ^ r) & (qx ^ n2); // uVar13 + let n1x = r ^ n1_ext; // uVar16 = r ^ n1_ext + let rx = r ^ ((qx ^ p) & n1x); // uVar17 updated + let sx = (p ^ m) & qx; // uVar18 + let n1x = (rx & (sx ^ qx ^ p)) ^ n1x; // uVar16 final + let sx = sx ^ n2; // uVar18 ^= uVar5 + let qx = qx ^ m; // uVar22 ^= uVar13 + + let u = sx ^ qx; // uVar19 + let v = n1x ^ u; // uVar5(final) = uVar16 ^ uVar19 + let w = qx ^ rx; // uVar20 + + // Diffusion outputs + let o1 = ((v ^ w) & t2) ^ (w & t5); // uVar10 + let o2 = o1 ^ (u & (ac ^ bx)); // uVar13 + let o3 = o2 ^ (t3 & sx); // uVar9 + + let o4 = t7 & (rx ^ n1x); // uVar12 + let o5 = o4 ^ (n1x & ag); // uVar23 + let o6 = ((v ^ w) & (fg_cd ^ t6)) ^ o5; // uVar21 + let o7 = o6 ^ (w & t6); // uVar14 + + // Output assignments (encrypted_mem_write_u32): + s[7] = o3 ^ o7; // @0x1c = uVar9 ^ uVar14 + + let o8 = (rx ^ n1x) & t1; // uVar8 + let o9 = (qx & h) ^ (cd_mix & sx); // uVar24 + let o6 = o8 ^ o9 ^ (fg_cd & v) ^ o6; // uVar21 updated + let o10 = o6 ^ (rx & (a ^ ag)); // uVar1 + + s[5] = (ac & v) ^ (w & t5) ^ o7 ^ o10; // @0x14 + s[6] = o3 ^ o7; // @0x18 (same as s[7]) + + let o3x = (u & (h ^ cd_mix)) ^ (cd_mix & sx) ^ o5 ^ o3; // uVar9 updated + s[4] = o3x; // @0x10 + + s[2] = o2 ^ (qx & (e ^ ag)) ^ o10; // @0x08 + + let o4x = o9 ^ (rx & (h ^ t6)) ^ o4; // uVar12 updated + let o11 = o4x ^ o1 ^ (n1x & bx); // uVar1 + + s[1] = o11 ^ o6; // @0x04 + s[0] = o11 ^ o8; // @0x00 + s[3] = o4x ^ o3x; // @0x0c +} diff --git a/src/hash/finalize.rs b/src/hash/finalize.rs new file mode 100644 index 0000000..dabf996 --- /dev/null +++ b/src/hash/finalize.rs @@ -0,0 +1,65 @@ +//! Hash finalization +//! Corresponds to hash_finalize @ 0x8000a792 + +use super::inner_compress::inner_compress; +use super::message::{process_message, u32x4_from_bytes}; + +/// Finalize: MD length padding + final compression + XOR truncation. +/// +/// Parameters (from decompilation): +/// round_keys_and_state: 0x200 bytes (round keys + hash state) +/// chaining: 16 bytes chaining value +/// msg: message bytes +/// +/// Returns: 16-byte digest +pub fn finalize( + hash_state: &[u32; 8], + chaining: &[u8; 16], + msg: &[u8], +) -> [u8; 16] { + let mut state = *hash_state; + + // 1. Append 0x01 marker + process_message(&mut state, &[0x01]); + + // 2. Process remaining message + process_message(&mut state, msg); + + // 3. Length encoding (5-byte little-endian bit length) + let bit_len = msg.len() as u64; + let byte_bits = bit_len.wrapping_mul(8); // convert to bits + let mut len_block = [0u8; 16]; + len_block[0] = (byte_bits & 0xFF) as u8; + len_block[1] = ((byte_bits >> 8) & 0xFF) as u8; + len_block[2] = ((byte_bits >> 16) & 0xFF) as u8; + len_block[3] = ((byte_bits >> 24) & 0xFF) as u8; + len_block[4] = ((byte_bits >> 32) & 0xFF) as u8; + + // 4. Final compression + let block = u32x4_from_bytes(&len_block); + inner_compress(&mut state, &block); + + // 5. State copy (Feistel output arrangement) + // copy state[4..5] -> state[0..1], state[6..7] -> state[2..3] + state[0] = state[4]; + state[1] = state[5]; + state[2] = state[6]; + state[3] = state[7]; + + // 6. XOR truncation: output[15-i] = state_bytes[i] ^ chaining[i] + let state_bytes = state_to_bytes(&state); + let mut output = [0u8; 16]; + for i in 0..16 { + output[15 - i] = state_bytes[i] ^ chaining[i]; + } + + output +} + +fn state_to_bytes(s: &[u32; 8]) -> [u8; 32] { + let mut out = [0u8; 32]; + for i in 0..8 { + out[i * 4..i * 4 + 4].copy_from_slice(&s[i].to_le_bytes()); + } + out +} diff --git a/src/hash/inner_compress.rs b/src/hash/inner_compress.rs new file mode 100644 index 0000000..481207c --- /dev/null +++ b/src/hash/inner_compress.rs @@ -0,0 +1,138 @@ +//! Inner compression: GF(2^4) matrix multiplication + LFSR diffusion +//! Corresponds to inner_compress_gf2x4 @ 0x80007084 +//! +//! Processes 128-bit message blocks, updates 256-bit state + +use crate::util::partial_bitrev_shr1; + +/// GF(2^4) nibble-parallel multiplication. +/// Decompiled (State 0): +/// a1 = a & 0x11111111; a2 = a & 0x22222222; a4 = a & 0x44444444; a8 = a & 0x88888888; +/// b1 = b & 0x11111111; b2 = b & 0x22222222; b4 = b & 0x44444444; b8 = b & 0x88888888; +/// r8 = (a1*b8 ^ b4*a2 ^ a8*b1 ^ a4*b2) & 0x88888888 +/// r4 = (b8*a8 ^ a1*b4 ^ a4*b1 ^ a2*b2) & 0x44444444 +/// r1 = (b8*a2 ^ a4*b4 ^ b1*a1 ^ b2*a8) & 0x11111111 +/// r2 = (a4*b8 ^ b4*a8 ^ b1*a2 ^ b2*a1) & 0x22222222 +fn gf2x4_mul(a: u32, b: u32) -> u32 { + let a1 = a & 0x11111111; + let a2 = a & 0x22222222; + let a4 = a & 0x44444444; + let a8 = a & 0x88888888; + let b1 = b & 0x11111111; + let b2 = b & 0x22222222; + let b4 = b & 0x44444444; + let b8 = b & 0x88888888; + + let r8 = (a1.wrapping_mul(b8) ^ b4.wrapping_mul(a2) ^ a8.wrapping_mul(b1) ^ a4.wrapping_mul(b2)) & 0x88888888; + let r4 = (b8.wrapping_mul(a8) ^ a1.wrapping_mul(b4) ^ a4.wrapping_mul(b1) ^ a2.wrapping_mul(b2)) & 0x44444444; + let r1 = (b8.wrapping_mul(a2) ^ a4.wrapping_mul(b4) ^ b1.wrapping_mul(a1) ^ b2.wrapping_mul(a8)) & 0x11111111; + let r2 = (a4.wrapping_mul(b8) ^ b4.wrapping_mul(a8) ^ b1.wrapping_mul(a2) ^ b2.wrapping_mul(a1)) & 0x22222222; + + r8 | r4 | r1 | r2 +} + +/// LFSR forward feedback: x << 31 ^ x << 30 ^ x << 25 +#[inline] +fn lfsr_fwd(x: u32) -> u32 { + (x << 31) ^ (x << 30) ^ (x << 25) +} + +/// LFSR backward feedback: x >> 2 ^ x >> 1 ^ x >> 7 +#[inline] +fn lfsr_bwd(x: u32) -> u32 { + (x >> 2) ^ (x >> 1) ^ (x >> 7) +} + +/// Inner compression function. +/// state: 256-bit (8 u32), uses Feistel structure on upper/lower halves +/// block: 128-bit message block (4 u32) +pub fn inner_compress(state: &mut [u32; 8], block: &[u32; 4]) { + // 1. XOR message into state upper half (note: reverse order) + state[7] ^= block[3]; + state[6] ^= block[2]; + state[5] ^= block[1]; + state[4] ^= block[0]; + + // 2. Prepare working values + let (s0, s1, s2, s3) = (state[4], state[5], state[6], state[7]); + let (m0, m1, m2, m3) = (block[0], block[1], block[2], block[3]); + + // Precompute state-side values + let s01 = s0 ^ s1; + let s23 = s2 ^ s3; + let s02 = s0 ^ s2; + let s13 = s1 ^ s3; + let s0123 = s01 ^ s23; + + // Precompute message-side values + let m01 = m0 ^ m1; + let m23 = m2 ^ m3; + let m02 = m0 ^ m2; + let m13 = m1 ^ m3; + let m0123 = m01 ^ m23; + + // Bit-reversed versions + let rs0 = partial_bitrev_shr1(s0); + let rs1 = partial_bitrev_shr1(s1); + let rs2 = partial_bitrev_shr1(s2); + let rs3 = partial_bitrev_shr1(s3); + let rs01 = partial_bitrev_shr1(s01); + let rs23 = partial_bitrev_shr1(s23); + let rs02 = partial_bitrev_shr1(s02); + let rs13 = partial_bitrev_shr1(s13); + let rs0123 = partial_bitrev_shr1(s0123); + + // 3. GF(2^4) multiplications (18 products) + let p0 = gf2x4_mul(s0, m0); + let p1 = gf2x4_mul(s1, m1); + let p2 = gf2x4_mul(s2, m2); + let p3 = gf2x4_mul(s3, m3); + let p01 = gf2x4_mul(s01, m01); + let p23 = gf2x4_mul(s23, m23); + let p02 = gf2x4_mul(s02, m02); + let p13 = gf2x4_mul(s13, m13); + let p0123 = gf2x4_mul(s0123, m0123); + + let rp0 = gf2x4_mul(rs0, m0); + let rp1 = gf2x4_mul(rs1, m1); + let rp2 = gf2x4_mul(rs2, m2); + let rp3 = gf2x4_mul(rs3, m3); + let rp01 = gf2x4_mul(rs01, m01); + let rp23 = gf2x4_mul(rs23, m23); + let rp02 = gf2x4_mul(rs02, m02); + let rp13 = gf2x4_mul(rs13, m13); + let rp0123 = gf2x4_mul(rs0123, m0123); + + // 4. Karatsuba-style recombination + // Row 0: p0 + // Row 1: p01 ^ p0 ^ p1 + // Row 2: p02 ^ p0 ^ p2 + // Row 3: p0123 ^ p01 ^ p02 ^ p13 ^ p23 ^ p0 ^ p3 + let k0 = p0; + let k1 = p01 ^ p0 ^ p1; + let k2 = p02 ^ p0 ^ p2; + let k3 = p0123 ^ p01 ^ p02 ^ p13 ^ p23 ^ p0 ^ p3; + + let rk0 = rp0; + let rk1 = rp01 ^ rp0 ^ rp1; + let rk2 = rp02 ^ rp0 ^ rp2; + let rk3 = rp0123 ^ rp01 ^ rp02 ^ rp13 ^ rp23 ^ rp0 ^ rp3; + + // 5. LFSR diffusion + mixing + let out0 = k0 ^ lfsr_bwd(rk0) ^ lfsr_fwd(k0); + let out1 = k1 ^ lfsr_bwd(rk1) ^ lfsr_fwd(k1); + let out2 = k2 ^ lfsr_bwd(rk2) ^ lfsr_fwd(k2); + let out3 = k3 ^ lfsr_bwd(rk3) ^ lfsr_fwd(k3); + + // 6. Feistel update: shift right half + state[4] = state[0]; + state[5] = state[1]; + state[6] = state[2]; + state[7] = state[3]; + + // Write new left half + state[0] = out0; + state[1] = out1; + state[2] = out2; + state[3] = out3; +} diff --git a/src/hash/message.rs b/src/hash/message.rs new file mode 100644 index 0000000..00dd8c8 --- /dev/null +++ b/src/hash/message.rs @@ -0,0 +1,34 @@ +//! Message block processing for the hash function. +//! Breaks messages into 16-byte blocks and calls inner_compress. + +use super::inner_compress::inner_compress; + +/// Process a message by breaking it into 16-byte blocks. +/// Each block is converted to [u32; 4] (little-endian) and compressed into state. +pub fn process_message(state: &mut [u32; 8], msg: &[u8]) { + let mut offset = 0; + while offset + 16 <= msg.len() { + let block = u32x4_from_bytes(&msg[offset..offset + 16]); + inner_compress(state, &block); + offset += 16; + } + + // Handle remaining bytes (partial block, zero-padded) + if offset < msg.len() { + let mut padded = [0u8; 16]; + let remaining = msg.len() - offset; + padded[..remaining].copy_from_slice(&msg[offset..]); + let block = u32x4_from_bytes(&padded); + inner_compress(state, &block); + } +} + +/// Convert 16 bytes to [u32; 4] (little-endian). +pub fn u32x4_from_bytes(b: &[u8]) -> [u32; 4] { + [ + u32::from_le_bytes([b[0], b[1], b[2], b[3]]), + u32::from_le_bytes([b[4], b[5], b[6], b[7]]), + u32::from_le_bytes([b[8], b[9], b[10], b[11]]), + u32::from_le_bytes([b[12], b[13], b[14], b[15]]), + ] +} diff --git a/src/hash/mmo.rs b/src/hash/mmo.rs new file mode 100644 index 0000000..8986140 --- /dev/null +++ b/src/hash/mmo.rs @@ -0,0 +1,69 @@ +//! Matyas-Meyer-Oseas compression: H_i = E_K(0) ⊕ double(extract(E_K(0))) +//! Corresponds to mmo_compress @ 0x8004fbac + +use crate::gift256::encrypt::encrypt; + +/// MMO compression state +pub struct MmoState { + pub round_keys: [u32; 120], + pub chaining: [u8; 16], +} + +/// MMO compression function. +/// Decompiled flow: +/// 1. Encrypt all-zero plaintext: gift256_encrypt(ciphertext, round_keys, zeros) +/// 2. Extract bytes 0x20..0x2F (16 bytes) from ciphertext buffer +/// 3. Assemble as two u64 (big-endian): hi=bytes[0x20..0x27], lo=bytes[0x28..0x2F] +/// 4. GF(2^128) doubling: {hi,lo} <<= 1, if carry: hi ^= (carry<<62) | (carry<<57) +/// 5. Write into chaining value, copy round keys +pub fn mmo_compress(round_keys: &[u32; 120]) -> MmoState { + // 1. Encrypt all-zero plaintext + let plaintext = [0u32; 8]; + let ciphertext = encrypt(&plaintext, round_keys); + + // 2. Convert ciphertext to bytes (little-endian u32 layout) + let ct_bytes = ciphertext_to_bytes(&ciphertext); + + // 3. Build hi and lo u64 from bytes at offset 0x20 (= first 16 bytes of ciphertext) + // The ciphertext buffer in the decompilation starts at the plaintext+ciphertext stack area + // offset 0x20 from the base = the ciphertext portion (bytes 0..15) + let hi = u64::from_be_bytes([ + ct_bytes[0], ct_bytes[1], ct_bytes[2], ct_bytes[3], + ct_bytes[4], ct_bytes[5], ct_bytes[6], ct_bytes[7], + ]); + let lo = u64::from_be_bytes([ + ct_bytes[8], ct_bytes[9], ct_bytes[10], ct_bytes[11], + ct_bytes[12], ct_bytes[13], ct_bytes[14], ct_bytes[15], + ]); + + // 4. GF(2^128) doubling: shift left by 1 + let carry = hi >> 63; + let mut hi = (hi << 1) | (lo >> 63); + let lo = lo << 1; + + // Feedback polynomial: if carry bit set, XOR with reduction polynomial + // Decompiled: (uVar16 << 0x38 & 0x8000000000000000 | uVar18 << 0x3e | uVar18 << 0x39) + // where uVar18 = carry bit + if carry != 0 { + hi ^= (1u64 << 62) | (1u64 << 57); + } + + // 5. Write chaining value (big-endian) + let mut chaining = [0u8; 16]; + chaining[0..8].copy_from_slice(&hi.to_be_bytes()); + chaining[8..16].copy_from_slice(&lo.to_be_bytes()); + + MmoState { + round_keys: *round_keys, + chaining, + } +} + +fn ciphertext_to_bytes(ct: &[u32; 8]) -> [u8; 32] { + let mut bytes = [0u8; 32]; + for i in 0..8 { + let b = ct[i].to_le_bytes(); + bytes[i * 4..i * 4 + 4].copy_from_slice(&b); + } + bytes +} diff --git a/src/hash/mod.rs b/src/hash/mod.rs new file mode 100644 index 0000000..0a07a79 --- /dev/null +++ b/src/hash/mod.rs @@ -0,0 +1,4 @@ +pub mod mmo; +pub mod inner_compress; +pub mod message; +pub mod finalize; diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..6ceb6d0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +pub mod util; +pub mod pcg; +pub mod sbox; +pub mod gift256; +pub mod hash; +pub mod solver; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..1d17823 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,109 @@ +//! hCaptcha PoW solver CLI +//! Parses JWT challenge -> calls solver -> outputs base64 nonce + +use base64::{engine::general_purpose::STANDARD, Engine}; +use serde::Deserialize; + +mod pcg; +mod sbox; +mod gift256; +mod hash; +mod solver; +mod util; + +#[derive(Deserialize)] +#[allow(dead_code)] +struct JwtPayload { + #[serde(default)] + f: u32, + #[serde(default)] + s: u32, + #[serde(default)] + t: String, + d: String, + #[serde(default)] + l: String, + #[serde(default)] + i: String, + #[serde(default)] + e: u64, + #[serde(default)] + n: String, + #[serde(default)] + c: u32, +} + +fn main() { + // 1. Read JWT from command line + let jwt_req = std::env::args().nth(1).expect("Usage: hcaptcha-pow "); + + // 2. Decode JWT payload (base64url, no signature verification) + let parts: Vec<&str> = jwt_req.split('.').collect(); + if parts.len() < 2 { + eprintln!("Invalid JWT format"); + std::process::exit(1); + } + let payload_b64 = parts[1]; + let payload_bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD + .decode(payload_b64) + .expect("Invalid base64 in JWT payload"); + let payload: JwtPayload = serde_json::from_slice(&payload_bytes).expect("Invalid JSON in JWT payload"); + + println!("Algorithm: {}", payload.n); + println!("Difficulty: {}", payload.c); + println!("Expiration: {}", payload.e); + + // 3. Decode challenge data from `d` field + let challenge_data = STANDARD.decode(&payload.d).unwrap_or_else(|_| { + // Try with padding adjustment + let padded = format!("{}==", payload.d.trim_end_matches('=')); + STANDARD.decode(&padded).expect("Invalid base64 in challenge data") + }); + + println!("Challenge data: {} bytes", challenge_data.len()); + + // 4. Parse challenge (needs at least 49 bytes: 32 key + 16 target + 1 extra) + if challenge_data.len() < 49 { + eprintln!("Challenge data too short: {} bytes (need >= 49)", challenge_data.len()); + std::process::exit(1); + } + + let mut key_material = [0u8; 32]; + key_material.copy_from_slice(&challenge_data[0..32]); + + let mut target_hash = [0u8; 16]; + target_hash.copy_from_slice(&challenge_data[32..48]); + + let extra_byte = challenge_data[48]; + + let challenge = solver::Challenge { + key_material, + target_hash, + extra_byte, + }; + + // 5. Solve + let seed = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() as u64; + + let max_iter = if payload.c > 0 { payload.c } else { 1_000_000 }; + + println!("Solving with max {} iterations...", max_iter); + + let solution = solver::solve(&challenge, max_iter, seed); + + // 6. Output + match solution { + Some(sol) => { + let nonce_b64 = STANDARD.encode(sol.nonce); + println!("Found solution in {} iterations", sol.iterations); + println!("n={}", nonce_b64); + } + None => { + eprintln!("No solution found within {} iterations", max_iter); + std::process::exit(1); + } + } +} diff --git a/src/pcg.rs b/src/pcg.rs new file mode 100644 index 0000000..f1be259 --- /dev/null +++ b/src/pcg.rs @@ -0,0 +1,49 @@ +//! PCG-XSH-RR-64/32 pseudo-random number generator +//! Corresponds to pow_main_dispatch State 0x43 +//! Evidence: multiplier 0x5851f42d4c957f2d, inc = seed << 1 | 1 + +const PCG_MULTIPLIER: u64 = 0x5851f42d4c957f2d; + +pub struct PcgRng { + state: u64, + inc: u64, +} + +impl PcgRng { + /// Initialize PCG from seed. + /// Decompiled: + /// uVar36 = uVar36 << 1 | 1; // inc = seed << 1 | 1 + /// uVar35 = uVar36 * 0x5851f42d4c957f2e + uVar35 * 0x5851f42d4c957f2d; + pub fn new(seed: u64) -> Self { + let inc = (seed << 1) | 1; // always odd + let state = seed + .wrapping_mul(PCG_MULTIPLIER) + .wrapping_add(inc.wrapping_mul(PCG_MULTIPLIER.wrapping_add(1))); + Self { state, inc } + } + + /// Generate one 32-bit random number. + /// Decompiled: + /// uVar35 = uVar35 * 0x5851f42d4c957f2d + uVar36; + /// uVar37 = (uint)(uVar35 >> 0x20); + /// uVar6 = uVar37 >> 0xd ^ (uint)(uVar35 >> 0x1b); + /// uVar37 = uVar37 >> 0x1b; + /// output = uVar6 >> uVar37 | uVar6 << (0x20 - uVar37); + fn next_u32(&mut self) -> u32 { + self.state = self.state.wrapping_mul(PCG_MULTIPLIER).wrapping_add(self.inc); + let hi = (self.state >> 32) as u32; + let xorshifted = (hi >> 13) ^ ((self.state >> 27) as u32); + let rot = hi >> 27; + xorshifted.rotate_right(rot) + } + + /// Generate 12-byte nonce. + /// Corresponds to the 12-iteration loop in State 0x43. + pub fn generate_nonce(&mut self) -> [u8; 12] { + let mut nonce = [0u8; 12]; + for i in 0..12 { + nonce[i] = self.next_u32() as u8; + } + nonce + } +} diff --git a/src/sbox.rs b/src/sbox.rs new file mode 100644 index 0000000..1837608 --- /dev/null +++ b/src/sbox.rs @@ -0,0 +1,30 @@ +//! Custom polynomial S-Box: S(x) = 192x^6 + 224x^5 + 120x^4 + 200x^3 + 150x^2 + 65x + 147 (mod 256) +//! Corresponds to pow_main_dispatch State 0x0E/0x0F +//! Evidence: constants 0xE0, 0x78, -0x40, 0x86838DC8, 0x96, 0x41, 0x93 + +/// Apply polynomial S-Box to each byte of a 32-byte buffer. +/// Decompiled: +/// iVar4 = iVar1 * iVar1; // x^2 +/// iVar10 = iVar4 * iVar4; // x^4 +/// result = iVar1 * iVar10 * 0xe0 // 224*x^5 +/// + iVar10 * 0x78 // 120*x^4 +/// + iVar10 * iVar4 * -0x40 // -64*x^6 = 192*x^6 (mod 256) +/// + ((iVar1 * 0x86838DC8 + 0x96) * iVar1 + 0x41) * iVar1 +/// + 0x93; +pub fn apply_polynomial_sbox(buf: &mut [u8; 32]) { + for b in buf.iter_mut() { + let x = *b as u32; + let x2 = x.wrapping_mul(x); + let x4 = x2.wrapping_mul(x2); + let result = x.wrapping_mul(x4).wrapping_mul(0xE0) // 224*x^5 + .wrapping_add(x4.wrapping_mul(0x78)) // 120*x^4 + .wrapping_add(x4.wrapping_mul(x2).wrapping_mul(0xFFFF_FFC0)) // 192*x^6 (-64 mod 2^32) + .wrapping_add( + x.wrapping_mul(0x86838DC8_u32).wrapping_add(0x96) // 200*x + 150 + .wrapping_mul(x).wrapping_add(0x41) // *x + 65 + .wrapping_mul(x) // -> 200x^3+150x^2+65x + ) + .wrapping_add(0x93); // + 147 + *b = result as u8; + } +} diff --git a/src/solver.rs b/src/solver.rs new file mode 100644 index 0000000..a1bf0ae --- /dev/null +++ b/src/solver.rs @@ -0,0 +1,97 @@ +//! Top-level PoW solver +//! Corresponds to pow_main_dispatch (Yb) solve path (0xABAB270C) + +use crate::pcg::PcgRng; +use crate::sbox::apply_polynomial_sbox; +use crate::gift256; +use crate::hash; + +/// PoW solution result +pub struct PowSolution { + pub nonce: [u8; 12], + pub iterations: u32, +} + +/// Challenge data parsed from JWT `d` field +pub struct Challenge { + pub key_material: [u8; 32], // 32-byte key material (before S-Box) + pub target_hash: [u8; 16], // 16-byte target hash + pub extra_byte: u8, // 33rd byte +} + +/// Main solve function. +/// +/// Full flow: +/// 1. PCG generates 12-byte nonce +/// 2. S-Box polynomial substitution (32-byte key material) +/// 3. GIFT-256 key schedule -> 480 bytes round keys +/// 4. MMO compression +/// 5. Assemble hash input: [nonce_u32_0, nonce_u32_1, nonce_u32_2, 0x01000000] +/// 6. GIFT-256 encrypt + hash_finalize -> 128-bit digest +/// 7. Constant-time 16-byte comparison +/// 8. Match -> return nonce; no match -> regenerate nonce +pub fn solve(challenge: &Challenge, max_iterations: u32, seed: u64) -> Option { + let mut rng = PcgRng::new(seed); + + // Pre-compute key schedule and MMO (these don't depend on the nonce) + let mut key_data = challenge.key_material; + apply_polynomial_sbox(&mut key_data); + + let round_keys = gift256::key_schedule::key_schedule(&key_data); + let mmo_state = hash::mmo::mmo_compress(&round_keys); + + // Initialize hash state from MMO output + // The hash state is derived from the round keys + chaining value + let base_state = [0u32; 8]; + // State initialized to zeros, will be populated by the hash process + + for iter in 0..max_iterations { + // 1. Generate 12-byte nonce + let nonce = rng.generate_nonce(); + + // 2. Assemble hash input block + let nonce_u32_0 = u32::from_le_bytes([nonce[0], nonce[1], nonce[2], nonce[3]]); + let nonce_u32_1 = u32::from_le_bytes([nonce[4], nonce[5], nonce[6], nonce[7]]); + let nonce_u32_2 = u32::from_le_bytes([nonce[8], nonce[9], nonce[10], nonce[11]]); + + let hash_input = [nonce_u32_0, nonce_u32_1, nonce_u32_2, 0x01000000u32]; + + // 3. GIFT-256 encrypt + let encrypted = gift256::encrypt::encrypt( + &[hash_input[0], hash_input[1], hash_input[2], hash_input[3], 0, 0, 0, 0], + &round_keys, + ); + + // 4. Finalize hash + // Build state from encrypted output + let mut hash_state = base_state; + for i in 0..8 { + hash_state[i] = encrypted[i]; + } + + let digest = hash::finalize::finalize( + &hash_state, + &mmo_state.chaining, + &nonce, + ); + + // 5. Compare with target + if constant_time_eq(&digest, &challenge.target_hash) { + return Some(PowSolution { + nonce, + iterations: iter + 1, + }); + } + } + + None +} + +/// Constant-time comparison (16 bytes) +fn constant_time_eq(a: &[u8; 16], b: &[u8; 16]) -> bool { + let mut result = 0u8; + for i in 0..16 { + result |= a[i] ^ b[i]; + } + result == 0 +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..58e7ead --- /dev/null +++ b/src/util.rs @@ -0,0 +1,47 @@ +//! Bit-manipulation helpers, directly mapped from repeated WASM decompilation patterns. + +/// 32-bit right rotate +#[inline(always)] +pub fn ror32(x: u32, n: u32) -> u32 { + x.rotate_right(n) +} + +/// 32-bit left rotate +#[inline(always)] +pub fn rol32(x: u32, n: u32) -> u32 { + x.rotate_left(n) +} + +/// 32-bit byte swap (endian conversion) +/// Decompiled pattern: x << 0x18 | (x & 0xff00) << 8 | x >> 8 & 0xff00 | x >> 0x18 +#[inline(always)] +pub fn bswap32(x: u32) -> u32 { + x.swap_bytes() +} + +/// Full bit reversal: bswap → nibble_swap → bit_pair_swap → bit_swap +/// Used in inner_compress_gf2x4 +#[inline] +pub fn bitrev32(x: u32) -> u32 { + let x = bswap32(x); + let x = ((x >> 4) & 0x0F0F0F0F) | ((x & 0x0F0F0F0F) << 4); + let x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); + ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1) +} + +/// Partial bit reversal (mask 0x55555554 instead of 0x55555555), then >> 1 +/// Used in inner_compress_gf2x4 +#[inline] +pub fn partial_bitrev_shr1(x: u32) -> u32 { + let x = bswap32(x); + let x = ((x >> 4) & 0x0F0F0F0F) | ((x & 0x0F0F0F0F) << 4); + let x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); + (((x >> 1) & 0x55555554) | ((x & 0x55555555) << 1)) >> 1 +} + +/// Nibble half-swap (used in linear diffusion layers P1 and P2) +/// (x << 12 | x >> 20) & 0x0F0F0F0F | (x << 20 | x >> 12) & 0xF0F0F0F0 +#[inline] +pub fn nibble_half_swap(x: u32) -> u32 { + (x.rotate_left(12) & 0x0F0F0F0F) | (x.rotate_left(20) & 0xF0F0F0F0) +}