Crypto++  8.7
Free C++ class library of cryptographic schemes
lsh256_sse.cpp
1 // lsh.cpp - written and placed in the public domain by Jeffrey Walton
2 // Based on the specification and source code provided by
3 // Korea Internet & Security Agency (KISA) website. Also
4 // see https://seed.kisa.or.kr/kisa/algorithm/EgovLSHInfo.do
5 // and https://seed.kisa.or.kr/kisa/Board/22/detailView.do.
6 
7 // We are hitting some sort of GCC bug in the LSH AVX2 code path.
8 // Clang is OK on the AVX2 code path. We believe it is GCC Issue
9 // 82735, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82735. It
10 // makes using zeroupper a little tricky.
11 
12 #include "pch.h"
13 #include "config.h"
14 
15 #include "lsh.h"
16 #include "cpu.h"
17 #include "misc.h"
18 
19 // Squash MS LNK4221 and libtool warnings
20 extern const char LSH256_SSE_FNAME[] = __FILE__;
21 
22 #if defined(CRYPTOPP_SSSE3_AVAILABLE) && defined(CRYPTOPP_ENABLE_64BIT_SSE)
23 
24 #if defined(CRYPTOPP_SSSE3_AVAILABLE)
25 # include <emmintrin.h>
26 # include <tmmintrin.h>
27 #endif
28 
29 #if defined(CRYPTOPP_XOP_AVAILABLE)
30 # include <ammintrin.h>
31 #endif
32 
33 // GCC at 4.5. Clang is unknown. Also see https://stackoverflow.com/a/42493893.
34 #if (CRYPTOPP_GCC_VERSION >= 40500)
35 # include <x86intrin.h>
36 #endif
37 
38 ANONYMOUS_NAMESPACE_BEGIN
39 
40 /* LSH Constants */
41 
42 const unsigned int LSH256_MSG_BLK_BYTE_LEN = 128;
43 // const unsigned int LSH256_MSG_BLK_BIT_LEN = 1024;
44 // const unsigned int LSH256_CV_BYTE_LEN = 64;
45 const unsigned int LSH256_HASH_VAL_MAX_BYTE_LEN = 32;
46 
47 // const unsigned int MSG_BLK_WORD_LEN = 32;
48 const unsigned int CV_WORD_LEN = 16;
49 const unsigned int CONST_WORD_LEN = 8;
50 // const unsigned int HASH_VAL_MAX_WORD_LEN = 8;
51 // const unsigned int WORD_BIT_LEN = 32;
52 const unsigned int NUM_STEPS = 26;
53 
54 const unsigned int ROT_EVEN_ALPHA = 29;
55 const unsigned int ROT_EVEN_BETA = 1;
56 const unsigned int ROT_ODD_ALPHA = 5;
57 const unsigned int ROT_ODD_BETA = 17;
58 
59 const unsigned int LSH_TYPE_256_256 = 0x0000020;
60 const unsigned int LSH_TYPE_256_224 = 0x000001C;
61 
62 // const unsigned int LSH_TYPE_224 = LSH_TYPE_256_224;
63 // const unsigned int LSH_TYPE_256 = LSH_TYPE_256_256;
64 
65 /* Error Code */
66 
67 const unsigned int LSH_SUCCESS = 0x0;
68 // const unsigned int LSH_ERR_NULL_PTR = 0x2401;
69 // const unsigned int LSH_ERR_INVALID_ALGTYPE = 0x2402;
70 const unsigned int LSH_ERR_INVALID_DATABITLEN = 0x2403;
71 const unsigned int LSH_ERR_INVALID_STATE = 0x2404;
72 
73 /* Index into our state array */
74 
75 const unsigned int AlgorithmType = 80;
76 const unsigned int RemainingBits = 81;
77 
78 NAMESPACE_END
79 
80 NAMESPACE_BEGIN(CryptoPP)
81 NAMESPACE_BEGIN(LSH)
82 
83 // lsh256.cpp
84 extern const word32 LSH256_IV224[CV_WORD_LEN];
85 extern const word32 LSH256_IV256[CV_WORD_LEN];
86 extern const word32 LSH256_StepConstants[CONST_WORD_LEN * NUM_STEPS];
87 
88 NAMESPACE_END // LSH
89 NAMESPACE_END // Crypto++
90 
91 ANONYMOUS_NAMESPACE_BEGIN
92 
93 using CryptoPP::byte;
94 using CryptoPP::word32;
97 
98 using CryptoPP::GetBlock;
102 
103 typedef byte lsh_u8;
104 typedef word32 lsh_u32;
105 typedef word32 lsh_uint;
106 typedef word32 lsh_err;
107 typedef word32 lsh_type;
108 
109 using CryptoPP::LSH::LSH256_IV224;
110 using CryptoPP::LSH::LSH256_IV256;
111 using CryptoPP::LSH::LSH256_StepConstants;
112 
113 struct LSH256_SSSE3_Context
114 {
115  LSH256_SSSE3_Context(word32* state, word32 algType, word32& remainingBitLength) :
116  cv_l(state+0), cv_r(state+8), sub_msgs(state+16),
117  last_block(reinterpret_cast<byte*>(state+48)),
118  remain_databitlen(remainingBitLength),
119  alg_type(static_cast<lsh_type>(algType)) {}
120 
121  lsh_u32* cv_l; // start of our state block
122  lsh_u32* cv_r;
123  lsh_u32* sub_msgs;
124  lsh_u8* last_block;
125  lsh_u32& remain_databitlen;
126  lsh_type alg_type;
127 };
128 
129 struct LSH256_SSSE3_Internal
130 {
131  LSH256_SSSE3_Internal(word32* state) :
132  submsg_e_l(state+16), submsg_e_r(state+24),
133  submsg_o_l(state+32), submsg_o_r(state+40) { }
134 
135  lsh_u32* submsg_e_l; /* even left sub-message */
136  lsh_u32* submsg_e_r; /* even right sub-message */
137  lsh_u32* submsg_o_l; /* odd left sub-message */
138  lsh_u32* submsg_o_r; /* odd right sub-message */
139 };
140 
141 // const word32 g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
142 
143 /* LSH AlgType Macro */
144 
145 inline bool LSH_IS_LSH512(lsh_uint val) {
146  return (val & 0xf0000) == 0;
147 }
148 
149 inline lsh_uint LSH_GET_SMALL_HASHBIT(lsh_uint val) {
150  return val >> 24;
151 }
152 
153 inline lsh_uint LSH_GET_HASHBYTE(lsh_uint val) {
154  return val & 0xffff;
155 }
156 
157 inline lsh_uint LSH_GET_HASHBIT(lsh_uint val) {
158  return (LSH_GET_HASHBYTE(val) << 3) - LSH_GET_SMALL_HASHBIT(val);
159 }
160 
161 inline lsh_u32 loadLE32(lsh_u32 v) {
163 }
164 
165 lsh_u32 ROTL(lsh_u32 x, lsh_u32 r) {
166  return rotlFixed(x, r);
167 }
168 
169 // Original code relied upon unaligned lsh_u32 buffer
170 inline void load_msg_blk(LSH256_SSSE3_Internal* i_state, const lsh_u8 msgblk[LSH256_MSG_BLK_BYTE_LEN])
171 {
172  CRYPTOPP_ASSERT(i_state != NULLPTR);
173  lsh_u32* submsg_e_l = i_state->submsg_e_l;
174  lsh_u32* submsg_e_r = i_state->submsg_e_r;
175  lsh_u32* submsg_o_l = i_state->submsg_o_l;
176  lsh_u32* submsg_o_r = i_state->submsg_o_r;
177 
178  _mm_storeu_si128(M128_CAST(submsg_e_l+0),
179  _mm_loadu_si128(CONST_M128_CAST(msgblk+0)));
180  _mm_storeu_si128(M128_CAST(submsg_e_l+4),
181  _mm_loadu_si128(CONST_M128_CAST(msgblk+16)));
182  _mm_storeu_si128(M128_CAST(submsg_e_r+0),
183  _mm_loadu_si128(CONST_M128_CAST(msgblk+32)));
184  _mm_storeu_si128(M128_CAST(submsg_e_r+4),
185  _mm_loadu_si128(CONST_M128_CAST(msgblk+48)));
186  _mm_storeu_si128(M128_CAST(submsg_o_l+0),
187  _mm_loadu_si128(CONST_M128_CAST(msgblk+64)));
188  _mm_storeu_si128(M128_CAST(submsg_o_l+4),
189  _mm_loadu_si128(CONST_M128_CAST(msgblk+80)));
190  _mm_storeu_si128(M128_CAST(submsg_o_r+0),
191  _mm_loadu_si128(CONST_M128_CAST(msgblk+96)));
192  _mm_storeu_si128(M128_CAST(submsg_o_r+4),
193  _mm_loadu_si128(CONST_M128_CAST(msgblk+112)));
194 }
195 
196 inline void msg_exp_even(LSH256_SSSE3_Internal* i_state)
197 {
198  CRYPTOPP_ASSERT(i_state != NULLPTR);
199 
200  lsh_u32* submsg_e_l = i_state->submsg_e_l;
201  lsh_u32* submsg_e_r = i_state->submsg_e_r;
202  lsh_u32* submsg_o_l = i_state->submsg_o_l;
203  lsh_u32* submsg_o_r = i_state->submsg_o_r;
204 
205  _mm_storeu_si128(M128_CAST(submsg_e_l+0), _mm_add_epi32(
206  _mm_shuffle_epi32(
207  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(3,2,1,0)),
208  _mm_shuffle_epi32(
209  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(1,0,2,3))));
210 
211  _mm_storeu_si128(M128_CAST(submsg_e_l+4), _mm_add_epi32(
212  _mm_shuffle_epi32(
213  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(3,2,1,0)),
214  _mm_shuffle_epi32(
215  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(2,1,0,3))));
216 
217  _mm_storeu_si128(M128_CAST(submsg_e_r+0), _mm_add_epi32(
218  _mm_shuffle_epi32(
219  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(3,2,1,0)),
220  _mm_shuffle_epi32(
221  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(1,0,2,3))));
222 
223  _mm_storeu_si128(M128_CAST(submsg_e_r+4), _mm_add_epi32(
224  _mm_shuffle_epi32(
225  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(3,2,1,0)),
226  _mm_shuffle_epi32(
227  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(2,1,0,3))));
228 }
229 
230 inline void msg_exp_odd(LSH256_SSSE3_Internal* i_state)
231 {
232  CRYPTOPP_ASSERT(i_state != NULLPTR);
233 
234  lsh_u32* submsg_e_l = i_state->submsg_e_l;
235  lsh_u32* submsg_e_r = i_state->submsg_e_r;
236  lsh_u32* submsg_o_l = i_state->submsg_o_l;
237  lsh_u32* submsg_o_r = i_state->submsg_o_r;
238 
239  _mm_storeu_si128(M128_CAST(submsg_o_l+0), _mm_add_epi32(
240  _mm_shuffle_epi32(
241  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0)), _MM_SHUFFLE(3,2,1,0)),
242  _mm_shuffle_epi32(
243  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+0)), _MM_SHUFFLE(1,0,2,3))));
244 
245  _mm_storeu_si128(M128_CAST(submsg_o_l+4), _mm_add_epi32(
246  _mm_shuffle_epi32(
247  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4)), _MM_SHUFFLE(3,2,1,0)),
248  _mm_shuffle_epi32(
249  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4)), _MM_SHUFFLE(2,1,0,3))));
250 
251  _mm_storeu_si128(M128_CAST(submsg_o_r+0), _mm_add_epi32(
252  _mm_shuffle_epi32(
253  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0)), _MM_SHUFFLE(3,2,1,0)),
254  _mm_shuffle_epi32(
255  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+0)), _MM_SHUFFLE(1,0,2,3))));
256 
257  _mm_storeu_si128(M128_CAST(submsg_o_r+4), _mm_add_epi32(
258  _mm_shuffle_epi32(
259  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4)), _MM_SHUFFLE(3,2,1,0)),
260  _mm_shuffle_epi32(
261  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4)), _MM_SHUFFLE(2,1,0,3))));
262 }
263 
264 inline void load_sc(const lsh_u32** p_const_v, size_t i)
265 {
266  CRYPTOPP_ASSERT(p_const_v != NULLPTR);
267 
268  *p_const_v = &LSH256_StepConstants[i];
269 }
270 
271 inline void msg_add_even(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
272 {
273  CRYPTOPP_ASSERT(i_state != NULLPTR);
274 
275  lsh_u32* submsg_e_l = i_state->submsg_e_l;
276  lsh_u32* submsg_e_r = i_state->submsg_e_r;
277 
278  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_xor_si128(
279  _mm_loadu_si128(CONST_M128_CAST(cv_l+0)),
280  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+0))));
281  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
282  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
283  _mm_loadu_si128(CONST_M128_CAST(submsg_e_l+4))));
284  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_xor_si128(
285  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
286  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+0))));
287  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
288  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
289  _mm_loadu_si128(CONST_M128_CAST(submsg_e_r+4))));
290 }
291 
292 inline void msg_add_odd(lsh_u32 cv_l[8], lsh_u32 cv_r[8], LSH256_SSSE3_Internal* i_state)
293 {
294  CRYPTOPP_ASSERT(i_state != NULLPTR);
295 
296  lsh_u32* submsg_o_l = i_state->submsg_o_l;
297  lsh_u32* submsg_o_r = i_state->submsg_o_r;
298 
299  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
300  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
301  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l))));
302  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
303  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
304  _mm_loadu_si128(CONST_M128_CAST(submsg_o_l+4))));
305  _mm_storeu_si128(M128_CAST(cv_r), _mm_xor_si128(
306  _mm_loadu_si128(CONST_M128_CAST(cv_r)),
307  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r))));
308  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_xor_si128(
309  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
310  _mm_loadu_si128(CONST_M128_CAST(submsg_o_r+4))));
311 }
312 
313 inline void add_blk(lsh_u32 cv_l[8], const lsh_u32 cv_r[8])
314 {
315  _mm_storeu_si128(M128_CAST(cv_l), _mm_add_epi32(
316  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
317  _mm_loadu_si128(CONST_M128_CAST(cv_r))));
318  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_add_epi32(
319  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
320  _mm_loadu_si128(CONST_M128_CAST(cv_r+4))));
321 }
322 
323 template <unsigned int R>
324 inline void rotate_blk(lsh_u32 cv[8])
325 {
326 #if defined(CRYPTOPP_XOP_AVAILABLE)
327  _mm_storeu_si128(M128_CAST(cv),
328  _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R));
329  _mm_storeu_si128(M128_CAST(cv+4),
330  _mm_roti_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R));
331 #else
332  _mm_storeu_si128(M128_CAST(cv), _mm_or_si128(
333  _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), R),
334  _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv)), 32-R)));
335  _mm_storeu_si128(M128_CAST(cv+4), _mm_or_si128(
336  _mm_slli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), R),
337  _mm_srli_epi32(_mm_loadu_si128(CONST_M128_CAST(cv+4)), 32-R)));
338 #endif
339 }
340 
341 inline void xor_with_const(lsh_u32* cv_l, const lsh_u32* const_v)
342 {
343  _mm_storeu_si128(M128_CAST(cv_l), _mm_xor_si128(
344  _mm_loadu_si128(CONST_M128_CAST(cv_l)),
345  _mm_loadu_si128(CONST_M128_CAST(const_v))));
346  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_xor_si128(
347  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)),
348  _mm_loadu_si128(CONST_M128_CAST(const_v+4))));
349 }
350 
351 inline void rotate_msg_gamma(lsh_u32 cv_r[8])
352 {
353  // g_gamma256[8] = { 0, 8, 16, 24, 24, 16, 8, 0 };
354  _mm_storeu_si128(M128_CAST(cv_r+0),
355  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+0)),
356  _mm_set_epi8(12,15,14,13, 9,8,11,10, 6,5,4,7, 3,2,1,0)));
357  _mm_storeu_si128(M128_CAST(cv_r+4),
358  _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(cv_r+4)),
359  _mm_set_epi8(15,14,13,12, 10,9,8,11, 5,4,7,6, 0,3,2,1)));
360 }
361 
362 inline void word_perm(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
363 {
364  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_shuffle_epi32(
365  _mm_loadu_si128(CONST_M128_CAST(cv_l+0)), _MM_SHUFFLE(3,1,0,2)));
366  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_shuffle_epi32(
367  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)), _MM_SHUFFLE(3,1,0,2)));
368  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_shuffle_epi32(
369  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)), _MM_SHUFFLE(1,2,3,0)));
370  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_shuffle_epi32(
371  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)), _MM_SHUFFLE(1,2,3,0)));
372 
373  __m128i temp = _mm_loadu_si128(CONST_M128_CAST(cv_l+0));
374  _mm_storeu_si128(M128_CAST(cv_l+0),
375  _mm_loadu_si128(CONST_M128_CAST(cv_l+4)));
376  _mm_storeu_si128(M128_CAST(cv_l+4),
377  _mm_loadu_si128(CONST_M128_CAST(cv_r+4)));
378  _mm_storeu_si128(M128_CAST(cv_r+4),
379  _mm_loadu_si128(CONST_M128_CAST(cv_r+0)));
380  _mm_storeu_si128(M128_CAST(cv_r+0), temp);
381 };
382 
383 /* -------------------------------------------------------- *
384 * step function
385 * -------------------------------------------------------- */
386 
387 template <unsigned int Alpha, unsigned int Beta>
388 inline void mix(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 const_v[8])
389 {
390  add_blk(cv_l, cv_r);
391  rotate_blk<Alpha>(cv_l);
392  xor_with_const(cv_l, const_v);
393  add_blk(cv_r, cv_l);
394  rotate_blk<Beta>(cv_r);
395  add_blk(cv_l, cv_r);
396  rotate_msg_gamma(cv_r);
397 }
398 
399 /* -------------------------------------------------------- *
400 * compression function
401 * -------------------------------------------------------- */
402 
403 inline void compress(LSH256_SSSE3_Context* ctx, const lsh_u8 pdMsgBlk[LSH256_MSG_BLK_BYTE_LEN])
404 {
405  CRYPTOPP_ASSERT(ctx != NULLPTR);
406 
407  LSH256_SSSE3_Internal s_state(ctx->cv_l);
408  LSH256_SSSE3_Internal* i_state = &s_state;
409 
410  const lsh_u32* const_v = NULL;
411  lsh_u32* cv_l = ctx->cv_l;
412  lsh_u32* cv_r = ctx->cv_r;
413 
414  load_msg_blk(i_state, pdMsgBlk);
415 
416  msg_add_even(cv_l, cv_r, i_state);
417  load_sc(&const_v, 0);
418  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
419  word_perm(cv_l, cv_r);
420 
421  msg_add_odd(cv_l, cv_r, i_state);
422  load_sc(&const_v, 8);
423  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
424  word_perm(cv_l, cv_r);
425 
426  for (size_t i = 1; i < NUM_STEPS / 2; i++)
427  {
428  msg_exp_even(i_state);
429  msg_add_even(cv_l, cv_r, i_state);
430  load_sc(&const_v, 16 * i);
431  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
432  word_perm(cv_l, cv_r);
433 
434  msg_exp_odd(i_state);
435  msg_add_odd(cv_l, cv_r, i_state);
436  load_sc(&const_v, 16 * i + 8);
437  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
438  word_perm(cv_l, cv_r);
439  }
440 
441  msg_exp_even(i_state);
442  msg_add_even(cv_l, cv_r, i_state);
443 }
444 
445 /* -------------------------------------------------------- */
446 
447 inline void load_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8], const lsh_u32 iv[16])
448 {
449  _mm_storeu_si128(M128_CAST(cv_l+ 0),
450  _mm_load_si128(CONST_M128_CAST(iv+ 0)));
451  _mm_storeu_si128(M128_CAST(cv_l+ 4),
452  _mm_load_si128(CONST_M128_CAST(iv+ 4)));
453  _mm_storeu_si128(M128_CAST(cv_r+ 0),
454  _mm_load_si128(CONST_M128_CAST(iv+ 8)));
455  _mm_storeu_si128(M128_CAST(cv_r+ 4),
456  _mm_load_si128(CONST_M128_CAST(iv+12)));
457 }
458 
459 inline void zero_iv(lsh_u32 cv_l[8], lsh_u32 cv_r[8])
460 {
461  _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128());
462  _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128());
463  _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128());
464  _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128());
465 }
466 
467 inline void zero_submsgs(LSH256_SSSE3_Context* ctx)
468 {
469  lsh_u32* sub_msgs = ctx->sub_msgs;
470 
471  _mm_storeu_si128(M128_CAST(sub_msgs+ 0), _mm_setzero_si128());
472  _mm_storeu_si128(M128_CAST(sub_msgs+ 4), _mm_setzero_si128());
473  _mm_storeu_si128(M128_CAST(sub_msgs+ 8), _mm_setzero_si128());
474  _mm_storeu_si128(M128_CAST(sub_msgs+12), _mm_setzero_si128());
475  _mm_storeu_si128(M128_CAST(sub_msgs+16), _mm_setzero_si128());
476  _mm_storeu_si128(M128_CAST(sub_msgs+20), _mm_setzero_si128());
477  _mm_storeu_si128(M128_CAST(sub_msgs+24), _mm_setzero_si128());
478  _mm_storeu_si128(M128_CAST(sub_msgs+28), _mm_setzero_si128());
479 }
480 
481 inline void init224(LSH256_SSSE3_Context* ctx)
482 {
483  CRYPTOPP_ASSERT(ctx != NULLPTR);
484 
485  zero_submsgs(ctx);
486  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV224);
487 }
488 
489 inline void init256(LSH256_SSSE3_Context* ctx)
490 {
491  CRYPTOPP_ASSERT(ctx != NULLPTR);
492 
493  zero_submsgs(ctx);
494  load_iv(ctx->cv_l, ctx->cv_r, LSH256_IV256);
495 }
496 
497 /* -------------------------------------------------------- */
498 
499 inline void fin(LSH256_SSSE3_Context* ctx)
500 {
501  CRYPTOPP_ASSERT(ctx != NULLPTR);
502 
503  _mm_storeu_si128(M128_CAST(ctx->cv_l+0), _mm_xor_si128(
504  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+0)),
505  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+0))));
506  _mm_storeu_si128(M128_CAST(ctx->cv_l+4), _mm_xor_si128(
507  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_l+4)),
508  _mm_loadu_si128(CONST_M128_CAST(ctx->cv_r+4))));
509 }
510 
511 /* -------------------------------------------------------- */
512 
513 inline void get_hash(LSH256_SSSE3_Context* ctx, lsh_u8* pbHashVal)
514 {
515  CRYPTOPP_ASSERT(ctx != NULLPTR);
516  CRYPTOPP_ASSERT(ctx->alg_type != 0);
517  CRYPTOPP_ASSERT(pbHashVal != NULLPTR);
518 
519  lsh_uint alg_type = ctx->alg_type;
520  lsh_uint hash_val_byte_len = LSH_GET_HASHBYTE(alg_type);
521  lsh_uint hash_val_bit_len = LSH_GET_SMALL_HASHBIT(alg_type);
522 
523  // Multiplying by sizeof(lsh_u8) looks odd...
524  memcpy(pbHashVal, ctx->cv_l, hash_val_byte_len);
525  if (hash_val_bit_len){
526  pbHashVal[hash_val_byte_len-1] &= (((lsh_u8)0xff) << hash_val_bit_len);
527  }
528 }
529 
530 /* -------------------------------------------------------- */
531 
532 lsh_err lsh256_ssse3_init(LSH256_SSSE3_Context* ctx)
533 {
534  CRYPTOPP_ASSERT(ctx != NULLPTR);
535  CRYPTOPP_ASSERT(ctx->alg_type != 0);
536 
537  lsh_u32 alg_type = ctx->alg_type;
538  const lsh_u32* const_v = NULL;
539  ctx->remain_databitlen = 0;
540 
541  switch (alg_type)
542  {
543  case LSH_TYPE_256_256:
544  init256(ctx);
545  return LSH_SUCCESS;
546  case LSH_TYPE_256_224:
547  init224(ctx);
548  return LSH_SUCCESS;
549  default:
550  break;
551  }
552 
553  lsh_u32* cv_l = ctx->cv_l;
554  lsh_u32* cv_r = ctx->cv_r;
555 
556  zero_iv(cv_l, cv_r);
557  cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN;
558  cv_l[1] = LSH_GET_HASHBIT(alg_type);
559 
560  for (size_t i = 0; i < NUM_STEPS / 2; i++)
561  {
562  //Mix
563  load_sc(&const_v, i * 16);
564  mix<ROT_EVEN_ALPHA, ROT_EVEN_BETA>(cv_l, cv_r, const_v);
565  word_perm(cv_l, cv_r);
566 
567  load_sc(&const_v, i * 16 + 8);
568  mix<ROT_ODD_ALPHA, ROT_ODD_BETA>(cv_l, cv_r, const_v);
569  word_perm(cv_l, cv_r);
570  }
571 
572  return LSH_SUCCESS;
573 }
574 
575 lsh_err lsh256_ssse3_update(LSH256_SSSE3_Context* ctx, const lsh_u8* data, size_t databitlen)
576 {
577  CRYPTOPP_ASSERT(ctx != NULLPTR);
578  CRYPTOPP_ASSERT(data != NULLPTR);
579  CRYPTOPP_ASSERT(databitlen % 8 == 0);
580  CRYPTOPP_ASSERT(ctx->alg_type != 0);
581 
582  if (databitlen == 0){
583  return LSH_SUCCESS;
584  }
585 
586  // We are byte oriented. tail bits will always be 0.
587  size_t databytelen = databitlen >> 3;
588  // lsh_uint pos2 = databitlen & 0x7;
589  const size_t pos2 = 0;
590 
591  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
592  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
593  const size_t remain_msg_bit = 0;
594 
595  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
596  return LSH_ERR_INVALID_STATE;
597  }
598  if (remain_msg_bit > 0){
599  return LSH_ERR_INVALID_DATABITLEN;
600  }
601 
602  if (databytelen + remain_msg_byte < LSH256_MSG_BLK_BYTE_LEN)
603  {
604  memcpy(ctx->last_block + remain_msg_byte, data, databytelen);
605  ctx->remain_databitlen += (lsh_uint)databitlen;
606  remain_msg_byte += (lsh_uint)databytelen;
607  if (pos2){
608  ctx->last_block[remain_msg_byte] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
609  }
610  return LSH_SUCCESS;
611  }
612 
613  if (remain_msg_byte > 0){
614  size_t more_byte = LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte;
615  memcpy(ctx->last_block + remain_msg_byte, data, more_byte);
616  compress(ctx, ctx->last_block);
617  data += more_byte;
618  databytelen -= more_byte;
619  remain_msg_byte = 0;
620  ctx->remain_databitlen = 0;
621  }
622 
623  while (databytelen >= LSH256_MSG_BLK_BYTE_LEN)
624  {
625  // This call to compress caused some trouble.
626  // The data pointer can become unaligned in the
627  // previous block.
628  compress(ctx, data);
629  data += LSH256_MSG_BLK_BYTE_LEN;
630  databytelen -= LSH256_MSG_BLK_BYTE_LEN;
631  }
632 
633  if (databytelen > 0){
634  memcpy(ctx->last_block, data, databytelen);
635  ctx->remain_databitlen = (lsh_uint)(databytelen << 3);
636  }
637 
638  if (pos2){
639  ctx->last_block[databytelen] = data[databytelen] & ((0xff >> pos2) ^ 0xff);
640  ctx->remain_databitlen += pos2;
641  }
642 
643  return LSH_SUCCESS;
644 }
645 
646 lsh_err lsh256_ssse3_final(LSH256_SSSE3_Context* ctx, lsh_u8* hashval)
647 {
648  CRYPTOPP_ASSERT(ctx != NULLPTR);
649  CRYPTOPP_ASSERT(hashval != NULLPTR);
650 
651  // We are byte oriented. tail bits will always be 0.
652  size_t remain_msg_byte = ctx->remain_databitlen >> 3;
653  // lsh_uint remain_msg_bit = ctx->remain_databitlen & 7;
654  const size_t remain_msg_bit = 0;
655 
656  if (remain_msg_byte >= LSH256_MSG_BLK_BYTE_LEN){
657  return LSH_ERR_INVALID_STATE;
658  }
659 
660  if (remain_msg_bit){
661  ctx->last_block[remain_msg_byte] |= (0x1 << (7 - remain_msg_bit));
662  }
663  else{
664  ctx->last_block[remain_msg_byte] = 0x80;
665  }
666  memset(ctx->last_block + remain_msg_byte + 1, 0, LSH256_MSG_BLK_BYTE_LEN - remain_msg_byte - 1);
667 
668  compress(ctx, ctx->last_block);
669 
670  fin(ctx);
671  get_hash(ctx, hashval);
672 
673  return LSH_SUCCESS;
674 }
675 
676 ANONYMOUS_NAMESPACE_END // Anonymous
677 
678 NAMESPACE_BEGIN(CryptoPP)
679 
680 extern
681 void LSH256_Base_Restart_SSSE3(word32* state)
682 {
683  state[RemainingBits] = 0;
684  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
685  lsh_err err = lsh256_ssse3_init(&ctx);
686 
687  if (err != LSH_SUCCESS)
688  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_init failed");
689 }
690 
691 extern
692 void LSH256_Base_Update_SSSE3(word32* state, const byte *input, size_t size)
693 {
694  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
695  lsh_err err = lsh256_ssse3_update(&ctx, input, 8*size);
696 
697  if (err != LSH_SUCCESS)
698  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_update failed");
699 }
700 
701 extern
702 void LSH256_Base_TruncatedFinal_SSSE3(word32* state, byte *hash, size_t)
703 {
704  LSH256_SSSE3_Context ctx(state, state[AlgorithmType], state[RemainingBits]);
705  lsh_err err = lsh256_ssse3_final(&ctx, hash);
706 
707  if (err != LSH_SUCCESS)
708  throw Exception(Exception::OTHER_ERROR, "LSH256_Base: lsh256_ssse3_final failed");
709 }
710 
711 NAMESPACE_END
712 
713 #endif // CRYPTOPP_SSSE3_AVAILABLE
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Base class for all exceptions thrown by the library.
Definition: cryptlib.h:159
@ OTHER_ERROR
Some other error occurred not belonging to other categories.
Definition: cryptlib.h:177
Library configuration file.
unsigned char byte
8-bit unsigned datatype
Definition: config_int.h:56
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
EnumToType< ByteOrder, LITTLE_ENDIAN_ORDER > LittleEndian
Provides a constant for LittleEndian.
Definition: cryptlib.h:150
Classes for the LSH hash functions.
Utility functions for the Crypto++ library.
T rotlConstant(T x)
Performs a left rotate.
Definition: misc.h:1548
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2208
T rotlFixed(T x, unsigned int y)
Performs a left rotate.
Definition: misc.h:1599
Crypto++ library namespace.
Precompiled header file.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68