#include "target.h" #include #include #include #include #include #include using namespace std; #include "cpucycles.h" long long innerloopcycles; class uint32x4 { __m128i x; public: inline uint32x4() { } inline uint32x4(unsigned int u) { x = _mm_set_epi32(u,u,u,u); } inline uint32x4(__m128i u) { x = u; } inline uint32x4(const uint32x4 &a) { x = a.x; } unsigned int uint0() const { return _mm_cvtsi128_si32(x); } unsigned int uint1() const { return _mm_cvtsi128_si32(_mm_shuffle_epi32(x,0x39)); } unsigned int uint2() const { return _mm_cvtsi128_si32(_mm_shuffle_epi32(x,0x4e)); } unsigned int uint3() const { return _mm_cvtsi128_si32(_mm_shuffle_epi32(x,0x93)); } friend inline uint32x4 operator+(uint32x4 a,uint32x4 b) { return _mm_add_epi32(a.x,b.x); } friend inline uint32x4 operator|(uint32x4 a,uint32x4 b) { return _mm_or_si128(a.x,b.x); } friend inline uint32x4 operator&(uint32x4 a,uint32x4 b) { return _mm_and_si128(a.x,b.x); } friend inline uint32x4 operator^(uint32x4 a,uint32x4 b) { return _mm_xor_si128(a.x,b.x); } friend inline uint32x4 operator>>(uint32x4 a,int b) { return _mm_srli_epi32(a.x,b); } friend inline uint32x4 andnot(uint32x4 a,uint32x4 b) { return _mm_andnot_si128(b.x,a.x); } friend inline uint32x4 rotate1(uint32x4 a) { return _mm_xor_si128(_mm_slli_epi32(a.x,1),_mm_srli_epi32(a.x,31)); } friend inline uint32x4 rotate5(uint32x4 a) { return _mm_xor_si128(_mm_slli_epi32(a.x,5),_mm_srli_epi32(a.x,27)); } friend inline uint32x4 rotate30(uint32x4 a) { return _mm_xor_si128(_mm_slli_epi32(a.x,30),_mm_srli_epi32(a.x,2)); } friend ostream& operator<<(ostream& o,const uint32x4& u) { unsigned int r = u.uint0(); o << hex << setw(2) << setfill('0') << ((r >> 24) & 255); o << hex << setw(2) << setfill('0') << ((r >> 16) & 255); o << hex << setw(2) << setfill('0') << ((r >> 8) & 255); o << hex << setw(2) << setfill('0') << (r & 255); return o; } } ; class hash { uint32x4 state[5]; public: hash() { } hash(const hash &x) { state[0] = x.state[0]; state[1] = x.state[1]; state[2] = x.state[2]; state[3] = x.state[3]; state[4] = x.state[4]; } void init() { state[0] = 0x67452301; state[1] = 0xefcdab89; state[2] = 0x98badcfe; state[3] = 0x10325476; state[4] = 0xc3d2e1f0; } uint32x4 hammingdistance(hash b) { uint32x4 x0 = state[0] ^ b.state[0]; uint32x4 x1 = state[1] ^ b.state[1]; uint32x4 x2 = state[2] ^ b.state[2]; uint32x4 x3 = state[3] ^ b.state[3]; uint32x4 x4 = state[4] ^ b.state[4]; uint32x4 mask; // 32 1-bit chunks mask = 0x55555555; x0 = (x0 & mask) + ((x0 >> 1) & mask); x1 = (x1 & mask) + ((x1 >> 1) & mask); x2 = (x2 & mask) + ((x2 >> 1) & mask); x3 = (x3 & mask) + ((x3 >> 1) & mask); x4 = (x4 & mask) + ((x4 >> 1) & mask); // 16 2-bit chunks: 012,012,012,012,012,012,012,012,012,012,012,012,012,012,012,012 mask = 0x33333333; x0 = (x0 & mask) + ((x0 >> 2) & mask); x1 = (x1 & mask) + ((x1 >> 2) & mask); x2 = (x2 & mask) + ((x2 >> 2) & mask); x3 = (x3 & mask) + ((x3 >> 2) & mask); x4 = (x4 & mask) + ((x4 >> 2) & mask); // 8 4-bit chunks: 01234,01234,01234,01234,01234,01234,01234,01234 mask = 0x0f0f0f0f; x0 = (x0 & mask) + ((x0 >> 4) & mask); x1 = (x1 & mask) + ((x1 >> 4) & mask); x2 = (x2 & mask) + ((x2 >> 4) & mask); x3 = (x3 & mask) + ((x3 >> 4) & mask); x4 = (x4 & mask) + ((x4 >> 4) & mask); // 4 8-bit chunks: 012345678,012345678,012345678,012345678 mask = 0x00ff00ff; x0 = (x0 & mask) + ((x0 >> 8) & mask); x1 = (x1 & mask) + ((x1 >> 8) & mask); x2 = (x2 & mask) + ((x2 >> 8) & mask); x3 = (x3 & mask) + ((x3 >> 8) & mask); x4 = (x4 & mask) + ((x4 >> 8) & mask); mask = 0x0000ffff; x0 = (x0 & mask) + ((x0 >> 16) & mask); x1 = (x1 & mask) + ((x1 >> 16) & mask); x2 = (x2 & mask) + ((x2 >> 16) & mask); x3 = (x3 & mask) + ((x3 >> 16) & mask); x4 = (x4 & mask) + ((x4 >> 16) & mask); return x0 + x1 + x2 + x3 + x4; } friend ostream& operator<<(ostream& o,const hash& h) { o << h.state[0]; o << h.state[1]; o << h.state[2]; o << h.state[3]; o << h.state[4]; return o; } #define X(x0,x2,x8,x13) \ x0 = x0 ^ x8; \ x0 = x0 ^ x13; \ x0 = x0 ^ x2; \ x0 = rotate1(x0); #define R1(x0,a,b,c,d,e) \ e = e + delta; \ e = e + (c & b); \ e = e + andnot(d,b); \ e = e + x0; \ b = rotate30(b); \ e = e + rotate5(a); \ #define R2(x0,a,b,c,d,e) \ e = e + delta; \ e = e + (b ^ c ^ d); \ e = e + x0; \ b = rotate30(b); \ e = e + rotate5(a); \ #define R3(x0,a,b,c,d,e) \ e = e + delta; \ e = e + (((b | d) & c) | (b & d)); \ e = e + x0; \ b = rotate30(b); \ e = e + rotate5(a); \ #define R10(a,b,c,d,e) \ e = e + delta; \ e = e + (c & b); \ e = e + andnot(d,b); \ b = rotate30(b); \ e = e + rotate5(a); \ #define R20(a,b,c,d,e) \ e = e + delta; \ e = e + (b ^ c ^ d); \ b = rotate30(b); \ e = e + rotate5(a); \ #define R30(a,b,c,d,e) \ e = e + delta; \ e = e + (((b | d) & c) | (b & d)); \ b = rotate30(b); \ e = e + rotate5(a); \ void update(const uint32x4 *in,unsigned long long inblocks) { uint32x4 a = state[0]; uint32x4 b = state[1]; uint32x4 c = state[2]; uint32x4 d = state[3]; uint32x4 e = state[4]; uint32x4 delta; uint32x4 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; while (inblocks > 0) { delta = 0x5a827999; x0 = in[0]; R1(x0,a,b,c,d,e) x1 = in[1]; R1(x1,e,a,b,c,d) x2 = in[2]; R1(x2,d,e,a,b,c) x3 = in[3]; R1(x3,c,d,e,a,b) x4 = in[4]; R1(x4,b,c,d,e,a) x5 = in[5]; R1(x5,a,b,c,d,e) x6 = in[6]; R1(x6,e,a,b,c,d) x7 = in[7]; R1(x7,d,e,a,b,c) x8 = in[8]; R1(x8,c,d,e,a,b) x9 = in[9]; R1(x9,b,c,d,e,a) x10 = in[10]; R1(x10,a,b,c,d,e) x11 = in[11]; R1(x11,e,a,b,c,d) x12 = in[12]; R1(x12,d,e,a,b,c) x13 = in[13]; R1(x13,c,d,e,a,b) x14 = in[14]; R1(x14,b,c,d,e,a) x15 = in[15]; R1(x15,a,b,c,d,e) X(x0,x2,x8,x13) R1(x0,e,a,b,c,d) X(x1,x3,x9,x14) R1(x1,d,e,a,b,c) X(x2,x4,x10,x15) R1(x2,c,d,e,a,b) X(x3,x5,x11,x0) R1(x3,b,c,d,e,a) delta = 0x6ed9eba1; X(x4,x6,x12,x1) R2(x4,a,b,c,d,e) X(x5,x7,x13,x2) R2(x5,e,a,b,c,d) X(x6,x8,x14,x3) R2(x6,d,e,a,b,c) X(x7,x9,x15,x4) R2(x7,c,d,e,a,b) X(x8,x10,x0,x5) R2(x8,b,c,d,e,a) X(x9,x11,x1,x6) R2(x9,a,b,c,d,e) X(x10,x12,x2,x7) R2(x10,e,a,b,c,d) X(x11,x13,x3,x8) R2(x11,d,e,a,b,c) X(x12,x14,x4,x9) R2(x12,c,d,e,a,b) X(x13,x15,x5,x10) R2(x13,b,c,d,e,a) X(x14,x0,x6,x11) R2(x14,a,b,c,d,e) X(x15,x1,x7,x12) R2(x15,e,a,b,c,d) X(x0,x2,x8,x13) R2(x0,d,e,a,b,c) X(x1,x3,x9,x14) R2(x1,c,d,e,a,b) X(x2,x4,x10,x15) R2(x2,b,c,d,e,a) X(x3,x5,x11,x0) R2(x3,a,b,c,d,e) X(x4,x6,x12,x1) R2(x4,e,a,b,c,d) X(x5,x7,x13,x2) R2(x5,d,e,a,b,c) X(x6,x8,x14,x3) R2(x6,c,d,e,a,b) X(x7,x9,x15,x4) R2(x7,b,c,d,e,a) delta = 0x8f1bbcdc; X(x8,x10,x0,x5) R3(x8,a,b,c,d,e) X(x9,x11,x1,x6) R3(x9,e,a,b,c,d) X(x10,x12,x2,x7) R3(x10,d,e,a,b,c) X(x11,x13,x3,x8) R3(x11,c,d,e,a,b) X(x12,x14,x4,x9) R3(x12,b,c,d,e,a) X(x13,x15,x5,x10) R3(x13,a,b,c,d,e) X(x14,x0,x6,x11) R3(x14,e,a,b,c,d) X(x15,x1,x7,x12) R3(x15,d,e,a,b,c) X(x0,x2,x8,x13) R3(x0,c,d,e,a,b) X(x1,x3,x9,x14) R3(x1,b,c,d,e,a) X(x2,x4,x10,x15) R3(x2,a,b,c,d,e) X(x3,x5,x11,x0) R3(x3,e,a,b,c,d) X(x4,x6,x12,x1) R3(x4,d,e,a,b,c) X(x5,x7,x13,x2) R3(x5,c,d,e,a,b) X(x6,x8,x14,x3) R3(x6,b,c,d,e,a) X(x7,x9,x15,x4) R3(x7,a,b,c,d,e) X(x8,x10,x0,x5) R3(x8,e,a,b,c,d) X(x9,x11,x1,x6) R3(x9,d,e,a,b,c) X(x10,x12,x2,x7) R3(x10,c,d,e,a,b) X(x11,x13,x3,x8) R3(x11,b,c,d,e,a) delta = 0xca62c1d6; X(x12,x14,x4,x9) R2(x12,a,b,c,d,e) X(x13,x15,x5,x10) R2(x13,e,a,b,c,d) X(x14,x0,x6,x11) R2(x14,d,e,a,b,c) X(x15,x1,x7,x12) R2(x15,c,d,e,a,b) X(x0,x2,x8,x13) R2(x0,b,c,d,e,a) X(x1,x3,x9,x14) R2(x1,a,b,c,d,e) X(x2,x4,x10,x15) R2(x2,e,a,b,c,d) X(x3,x5,x11,x0) R2(x3,d,e,a,b,c) X(x4,x6,x12,x1) R2(x4,c,d,e,a,b) X(x5,x7,x13,x2) R2(x5,b,c,d,e,a) X(x6,x8,x14,x3) R2(x6,a,b,c,d,e) X(x7,x9,x15,x4) R2(x7,e,a,b,c,d) X(x8,x10,x0,x5) R2(x8,d,e,a,b,c) X(x9,x11,x1,x6) R2(x9,c,d,e,a,b) X(x10,x12,x2,x7) R2(x10,b,c,d,e,a) X(x11,x13,x3,x8) R2(x11,a,b,c,d,e) X(x12,x14,x4,x9) R2(x12,e,a,b,c,d) X(x13,x15,x5,x10) R2(x13,d,e,a,b,c) X(x14,x0,x6,x11) R2(x14,c,d,e,a,b) X(x15,x1,x7,x12) R2(x15,b,c,d,e,a) --inblocks; in += 16; a = a + state[0]; b = b + state[1]; c = c + state[2]; d = d + state[3]; e = e + state[4]; state[0] = a; state[1] = b; state[2] = c; state[3] = d; state[4] = e; } } void update1(const uint32x4 *in) { uint32x4 a = state[0]; uint32x4 b = state[1]; uint32x4 c = state[2]; uint32x4 d = state[3]; uint32x4 e = state[4]; uint32x4 delta; uint32x4 x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; delta = 0x5a827999; x0 = in[0]; R1(x0,a,b,c,d,e) x1 = in[1]; R1(x1,e,a,b,c,d) R10(d,e,a,b,c) R10(c,d,e,a,b) R10(b,c,d,e,a) R10(a,b,c,d,e) R10(e,a,b,c,d) R10(d,e,a,b,c) R10(c,d,e,a,b) R10(b,c,d,e,a) R10(a,b,c,d,e) R10(e,a,b,c,d) R10(d,e,a,b,c) R10(c,d,e,a,b) R10(b,c,d,e,a) x15 = in[15]; R1(x15,a,b,c,d,e) x0 = rotate1(x0); R1(x0,e,a,b,c,d) x1 = rotate1(x1); R1(x1,d,e,a,b,c) x2 = rotate1(x15); R1(x2,c,d,e,a,b) x3 = rotate1(x0); R1(x3,b,c,d,e,a) delta = 0x6ed9eba1; x4 = rotate1(x1); R2(x4,a,b,c,d,e) x5 = rotate1(x2); R2(x5,e,a,b,c,d) x6 = rotate1(x3); R2(x6,d,e,a,b,c) x7 = x15 ^ x4; x7 = rotate1(x7); R2(x7,c,d,e,a,b) x8 = x0 ^ x5; x8 = rotate1(x8); R2(x8,b,c,d,e,a) x9 = x1 ^ x6; x9 = rotate1(x9); R2(x9,a,b,c,d,e) x10 = x2 ^ x7; x10 = rotate1(x10); R2(x10,e,a,b,c,d) x11 = x3 ^ x8; x11 = rotate1(x11); R2(x11,d,e,a,b,c) x12 = x4 ^ x9; x12 = rotate1(x12); R2(x12,c,d,e,a,b) x13 = x5 ^ x15; x13 = x13 ^ x10; x13 = rotate1(x13); R2(x13,b,c,d,e,a) x14 = x6 ^ x0; x14 = x14 ^ x11; x14 = rotate1(x14); R2(x14,a,b,c,d,e) x15 = x15 ^ x7; x15 = x15 ^ x1; x15 = x15 ^ x12; x15 = rotate1(x15); R2(x15,e,a,b,c,d) x0 = x0 ^ x8; x0 = x0 ^ x2; x0 = x0 ^ x13; x0 = rotate1(x0); R2(x0,d,e,a,b,c) x1 = x1 ^ x9; x1 = x1 ^ x3; x1 = x1 ^ x14; x1 = rotate1(x1); R2(x1,c,d,e,a,b) x2 = x2 ^ x10; x2 = x2 ^ x4; x2 = x2 ^ x15; x2 = rotate1(x2); R2(x2,b,c,d,e,a) x3 = x3 ^ x11; x3 = x3 ^ x5; x3 = x3 ^ x0; x3 = rotate1(x3); R2(x3,a,b,c,d,e) x4 = x4 ^ x12; x4 = x4 ^ x6; x4 = x4 ^ x1; x4 = rotate1(x4); R2(x4,e,a,b,c,d) x5 = x5 ^ x13; x5 = x5 ^ x7; x5 = x5 ^ x2; x5 = rotate1(x5); R2(x5,d,e,a,b,c) x6 = x6 ^ x14; x6 = x6 ^ x8; x6 = x6 ^ x3; x6 = rotate1(x6); R2(x6,c,d,e,a,b) x7 = x7 ^ x15; x7 = x7 ^ x9; x7 = x7 ^ x4; x7 = rotate1(x7); R2(x7,b,c,d,e,a) delta = 0x8f1bbcdc; x8 = x8 ^ x0; x8 = x8 ^ x10; x8 = x8 ^ x5; x8 = rotate1(x8); R3(x8,a,b,c,d,e) x9 = x9 ^ x1; x9 = x9 ^ x11; x9 = x9 ^ x6; x9 = rotate1(x9); R3(x9,e,a,b,c,d) x10 = x10 ^ x2; x10 = x10 ^ x12; x10 = x10 ^ x7; x10 = rotate1(x10); R3(x10,d,e,a,b,c) x11 = x11 ^ x3; x11 = x11 ^ x13; x11 = x11 ^ x8; x11 = rotate1(x11); R3(x11,c,d,e,a,b) x12 = x12 ^ x4; x12 = x12 ^ x14; x12 = x12 ^ x9; x12 = rotate1(x12); R3(x12,b,c,d,e,a) x13 = x13 ^ x5; x13 = x13 ^ x15; x13 = x13 ^ x10; x13 = rotate1(x13); R3(x13,a,b,c,d,e) x14 = x14 ^ x6; x14 = x14 ^ x0; x14 = x14 ^ x11; x14 = rotate1(x14); R3(x14,e,a,b,c,d) x15 = x15 ^ x7; x15 = x15 ^ x1; x15 = x15 ^ x12; x15 = rotate1(x15); R3(x15,d,e,a,b,c) x0 = x0 ^ x8; x0 = x0 ^ x2; x0 = x0 ^ x13; x0 = rotate1(x0); R3(x0,c,d,e,a,b) x1 = x1 ^ x9; x1 = x1 ^ x3; x1 = x1 ^ x14; x1 = rotate1(x1); R3(x1,b,c,d,e,a) x2 = x2 ^ x10; x2 = x2 ^ x4; x2 = x2 ^ x15; x2 = rotate1(x2); R3(x2,a,b,c,d,e) x3 = x3 ^ x11; x3 = x3 ^ x5; x3 = x3 ^ x0; x3 = rotate1(x3); R3(x3,e,a,b,c,d) x4 = x4 ^ x12; x4 = x4 ^ x6; x4 = x4 ^ x1; x4 = rotate1(x4); R3(x4,d,e,a,b,c) x5 = x5 ^ x13; x5 = x5 ^ x7; x5 = x5 ^ x2; x5 = rotate1(x5); R3(x5,c,d,e,a,b) x6 = x6 ^ x14; x6 = x6 ^ x8; x6 = x6 ^ x3; x6 = rotate1(x6); R3(x6,b,c,d,e,a) x7 = x7 ^ x15; x7 = x7 ^ x9; x7 = x7 ^ x4; x7 = rotate1(x7); R3(x7,a,b,c,d,e) x8 = x8 ^ x0; x8 = x8 ^ x10; x8 = x8 ^ x5; x8 = rotate1(x8); R3(x8,e,a,b,c,d) x9 = x9 ^ x1; x9 = x9 ^ x11; x9 = x9 ^ x6; x9 = rotate1(x9); R3(x9,d,e,a,b,c) x10 = x10 ^ x2; x10 = x10 ^ x12; x10 = x10 ^ x7; x10 = rotate1(x10); R3(x10,c,d,e,a,b) x11 = x11 ^ x3; x11 = x11 ^ x13; x11 = x11 ^ x8; x11 = rotate1(x11); R3(x11,b,c,d,e,a) delta = 0xca62c1d6; x12 = x12 ^ x4; x12 = x12 ^ x14; x12 = x12 ^ x9; x12 = rotate1(x12); R2(x12,a,b,c,d,e) x13 = x13 ^ x5; x13 = x13 ^ x15; x13 = x13 ^ x10; x13 = rotate1(x13); R2(x13,e,a,b,c,d) x14 = x14 ^ x6; x14 = x14 ^ x0; x14 = x14 ^ x11; x14 = rotate1(x14); R2(x14,d,e,a,b,c) x15 = x15 ^ x7; x15 = x15 ^ x1; x15 = x15 ^ x12; x15 = rotate1(x15); R2(x15,c,d,e,a,b) x0 = x0 ^ x8; x0 = x0 ^ x2; x0 = x0 ^ x13; x0 = rotate1(x0); R2(x0,b,c,d,e,a) x1 = x1 ^ x9; x1 = x1 ^ x3; x1 = x1 ^ x14; x1 = rotate1(x1); R2(x1,a,b,c,d,e) x2 = x2 ^ x10; x2 = x2 ^ x4; x2 = x2 ^ x15; x2 = rotate1(x2); R2(x2,e,a,b,c,d) x3 = x3 ^ x11; x3 = x3 ^ x5; x3 = x3 ^ x0; x3 = rotate1(x3); R2(x3,d,e,a,b,c) x4 = x4 ^ x12; x4 = x4 ^ x6; x4 = x4 ^ x1; x4 = rotate1(x4); R2(x4,c,d,e,a,b) x5 = x5 ^ x13; x5 = x5 ^ x7; x5 = x5 ^ x2; x5 = rotate1(x5); R2(x5,b,c,d,e,a) x6 = x6 ^ x14; x6 = x6 ^ x8; x6 = x6 ^ x3; x6 = rotate1(x6); R2(x6,a,b,c,d,e) x7 = x7 ^ x15; x7 = x7 ^ x9; x7 = x7 ^ x4; x7 = rotate1(x7); R2(x7,e,a,b,c,d) x8 = x8 ^ x0; x8 = x8 ^ x10; x8 = x8 ^ x5; x8 = rotate1(x8); R2(x8,d,e,a,b,c) x9 = x9 ^ x1; x9 = x9 ^ x11; x9 = x9 ^ x6; x9 = rotate1(x9); R2(x9,c,d,e,a,b) x10 = x10 ^ x2; x10 = x10 ^ x12; x10 = x10 ^ x7; x10 = rotate1(x10); R2(x10,b,c,d,e,a) x11 = x11 ^ x3; x11 = x11 ^ x13; x11 = x11 ^ x8; x11 = rotate1(x11); R2(x11,a,b,c,d,e) x12 = x12 ^ x4; x12 = x12 ^ x14; x12 = x12 ^ x9; x12 = rotate1(x12); R2(x12,e,a,b,c,d) x13 = x13 ^ x5; x13 = x13 ^ x15; x13 = x13 ^ x10; x13 = rotate1(x13); R2(x13,d,e,a,b,c) x14 = x14 ^ x6; x14 = x14 ^ x0; x14 = x14 ^ x11; x14 = rotate1(x14); R2(x14,c,d,e,a,b) x15 = x15 ^ x7; x15 = x15 ^ x1; x15 = x15 ^ x12; x15 = rotate1(x15); R2(x15,b,c,d,e,a) state[0] = state[0] + a; state[1] = state[1] + b; state[2] = state[2] + c; state[3] = state[3] + d; state[4] = state[4] + e; } } ; const char ALPHABET[64] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/_"; #define ALPHABETUSED 32 // has to be a multiple of 4 for vectorization #define WORDS (64 + 16 * ((sizeof s + sizeof target) / 64)) unsigned int words[WORDS]; uint32x4 wordsx4[WORDS]; int main() { int i; int c0; int c1; int c2; int c3; int c4; int pos; long long startcycles = cpucycles(); long long hashes = 1; long long targetlen = 0; while (target[targetlen]) ++targetlen; long long targetblocks = (targetlen + 72) / 64; for (i = 0;i < WORDS;++i) words[i] = 0; for (i = 0;i < targetlen;++i) ((unsigned char *) words)[i ^ 3] = target[i]; ((unsigned char *) words)[targetlen ^ 3] = 0x80; words[targetblocks * 16 - 1] = targetlen * 8; for (i = 0;i < WORDS;++i) wordsx4[i] = words[i]; hash targethash; targethash.init(); targethash.update(wordsx4,targetblocks); cout << 0 << " " << targethash << " " << target << "\n"; unsigned char flip[sizeof s]; long long slen = 0; while (s[slen]) ++slen; if (slen < 5) return 100; long long sblocks = (slen + 72) / 64; long long sblockspre = (slen - 5) / 64; if (slen != 69) return 100; for (i = 0;i < slen;++i) flip[i] = 0; #ifndef NONRANDOM srandom(cpucycles()); // XXX: randomize better #endif for (i = 0;i < slen;++i) if (random() & 1) if (s[i] != ' ') s[i] ^= 32; #ifndef NONRANDOM srandom(cpucycles() >> 10); for (i = 0;i < slen;++i) if (random() & 1) if (s[i] != ' ') s[i] ^= 32; #endif for (i = 0;i < sizeof words;++i) ((unsigned char *) words)[i] = 0; ((unsigned char *) words)[slen ^ 3] = 0x80; words[sblocks * 16 - 1] = slen * 8; for (i = 0;i < slen;++i) ((unsigned char *) words)[i ^ 3] = s[i]; for (;;) { for (i = 0;i < WORDS;++i) wordsx4[i] = words[i]; hash shashpre; shashpre.init(); shashpre.update(wordsx4,sblockspre); for (i = 0;i < slen;++i) { pos = i ^ 3; s[i] = ((unsigned char *) (wordsx4 + pos / 4))[3 & pos]; } cout << "cycles/hash " << dec << (cpucycles() - startcycles) / hashes << " " << (innerloopcycles) / hashes << " " << s << "\n" << flush; for (c0 = 0;c0 < ALPHABETUSED;++c0) { int pos = (slen - 5) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c0]; ((unsigned char *) (wordsx4 + pos / 4))[4 + (3 & pos)] = ALPHABET[c0]; ((unsigned char *) (wordsx4 + pos / 4))[8 + (3 & pos)] = ALPHABET[c0]; ((unsigned char *) (wordsx4 + pos / 4))[12 + (3 & pos)] = ALPHABET[c0]; for (c1 = 0;c1 < ALPHABETUSED;++c1) { pos = (slen - 4) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c1]; ((unsigned char *) (wordsx4 + pos / 4))[4 + (3 & pos)] = ALPHABET[c1]; ((unsigned char *) (wordsx4 + pos / 4))[8 + (3 & pos)] = ALPHABET[c1]; ((unsigned char *) (wordsx4 + pos / 4))[12 + (3 & pos)] = ALPHABET[c1]; for (c2 = 0;c2 < ALPHABETUSED;++c2) { pos = (slen - 3) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c2]; ((unsigned char *) (wordsx4 + pos / 4))[4 + (3 & pos)] = ALPHABET[c2]; ((unsigned char *) (wordsx4 + pos / 4))[8 + (3 & pos)] = ALPHABET[c2]; ((unsigned char *) (wordsx4 + pos / 4))[12 + (3 & pos)] = ALPHABET[c2]; innerloopcycles -= cpucycles(); for (c3 = 0;c3 < ALPHABETUSED;++c3) { pos = (slen - 2) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c3]; ((unsigned char *) (wordsx4 + pos / 4))[4 + (3 & pos)] = ALPHABET[c3]; ((unsigned char *) (wordsx4 + pos / 4))[8 + (3 & pos)] = ALPHABET[c3]; ((unsigned char *) (wordsx4 + pos / 4))[12 + (3 & pos)] = ALPHABET[c3]; for (c4 = 0;c4 + 4 <= ALPHABETUSED;c4 += 4) { pos = (slen - 1) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c4]; ((unsigned char *) (wordsx4 + pos / 4))[4 + (3 & pos)] = ALPHABET[c4 + 1]; ((unsigned char *) (wordsx4 + pos / 4))[8 + (3 & pos)] = ALPHABET[c4 + 2]; ((unsigned char *) (wordsx4 + pos / 4))[12 + (3 & pos)] = ALPHABET[c4 + 3]; hash shash(shashpre); shash.update1(wordsx4 + sblockspre * 16); uint32x4 d = shash.hammingdistance(targethash); if (d.uint0() < 40) { pos = (slen - 1) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c4]; hash shash(shashpre); shash.update1(wordsx4 + sblockspre * 16); for (i = 0;i < slen;++i) { pos = i ^ 3; s[i] = ((unsigned char *) (wordsx4 + pos / 4))[3 & pos]; } cout << dec << d.uint0() << " " << shash << " " << s << "\n" << flush; } if (d.uint1() < 40) { pos = (slen - 1) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c4 + 1]; hash shash(shashpre); shash.update1(wordsx4 + sblockspre * 16); for (i = 0;i < slen;++i) { pos = i ^ 3; s[i] = ((unsigned char *) (wordsx4 + pos / 4))[3 & pos]; } cout << dec << d.uint1() << " " << shash << " " << s << "\n" << flush; } if (d.uint2() < 40) { pos = (slen - 1) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c4 + 2]; hash shash(shashpre); shash.update1(wordsx4 + sblockspre * 16); for (i = 0;i < slen;++i) { pos = i ^ 3; s[i] = ((unsigned char *) (wordsx4 + pos / 4))[3 & pos]; } cout << dec << d.uint2() << " " << shash << " " << s << "\n" << flush; } if (d.uint3() < 40) { pos = (slen - 1) ^ 3; ((unsigned char *) (wordsx4 + pos / 4))[3 & pos] = ALPHABET[c4 + 3]; hash shash(shashpre); shash.update1(wordsx4 + sblockspre * 16); for (i = 0;i < slen;++i) { pos = i ^ 3; s[i] = ((unsigned char *) (wordsx4 + pos / 4))[3 & pos]; } cout << dec << d.uint3() << " " << shash << " " << s << "\n" << flush; } } hashes += c4; } innerloopcycles += cpucycles(); } } } for (i = 0;i < slen - 5;++i) if (s[i] != ' ') { ((unsigned char *) words)[i ^ 3] ^= 32; flip[i] ^= 32; if (flip[i]) break; } if (i == slen - 5) return 0; } }