master
zone117x@gmail.com 2014-03-30 04:12:48 -04:00
parent 5e9699704b
commit 29010cc5a9
28 changed files with 3914 additions and 7 deletions

View File

@ -0,0 +1,32 @@
cmd_Release/obj.target/multihashing/multihashing.o := g++ '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -fno-rtti -fno-exceptions -MMD -MF ./Release/.deps/Release/obj.target/multihashing/multihashing.o.d.raw -c -o Release/obj.target/multihashing/multihashing.o ../multihashing.cc
Release/obj.target/multihashing/multihashing.o: ../multihashing.cc \
/root/.node-gyp/0.10.26/src/node.h \
/root/.node-gyp/0.10.26/deps/uv/include/uv.h \
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-unix.h \
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/ngx-queue.h \
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-linux.h \
/root/.node-gyp/0.10.26/deps/v8/include/v8.h \
/root/.node-gyp/0.10.26/deps/v8/include/v8stdint.h \
/root/.node-gyp/0.10.26/src/node_object_wrap.h \
/root/.node-gyp/0.10.26/src/node.h \
/root/.node-gyp/0.10.26/src/node_buffer.h ../bcrypt.h ../keccak.h \
../quark.h ../scrypt.h ../scryptjane.h ../scryptn.h ../skein.h ../x11.h
../multihashing.cc:
/root/.node-gyp/0.10.26/src/node.h:
/root/.node-gyp/0.10.26/deps/uv/include/uv.h:
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-unix.h:
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/ngx-queue.h:
/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-linux.h:
/root/.node-gyp/0.10.26/deps/v8/include/v8.h:
/root/.node-gyp/0.10.26/deps/v8/include/v8stdint.h:
/root/.node-gyp/0.10.26/src/node_object_wrap.h:
/root/.node-gyp/0.10.26/src/node.h:
/root/.node-gyp/0.10.26/src/node_buffer.h:
../bcrypt.h:
../keccak.h:
../quark.h:
../scrypt.h:
../scryptjane.h:
../scryptn.h:
../skein.h:
../x11.h:

View File

@ -0,0 +1,4 @@
cmd_Release/obj.target/multihashing/scrypt.o := cc '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -MMD -MF ./Release/.deps/Release/obj.target/multihashing/scrypt.o.d.raw -c -o Release/obj.target/multihashing/scrypt.o ../scrypt.c
Release/obj.target/multihashing/scrypt.o: ../scrypt.c ../scrypt.h
../scrypt.c:
../scrypt.h:

View File

@ -0,0 +1,30 @@
cmd_Release/obj.target/multihashing/scryptjane.o := cc '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -MMD -MF ./Release/.deps/Release/obj.target/multihashing/scryptjane.o.d.raw -c -o Release/obj.target/multihashing/scryptjane.o ../scryptjane.c
Release/obj.target/multihashing/scryptjane.o: ../scryptjane.c \
../scryptjane.h ../scryptjane/scrypt-jane-portable.h \
../scryptjane/scrypt-jane-portable-x86.h \
../scryptjane/scrypt-jane-hash.h ../scryptjane/scrypt-jane-hash_keccak.h \
../scryptjane/scrypt-jane-pbkdf2.h ../scryptjane/scrypt-jane-romix.h \
../scryptjane/scrypt-jane-chacha.h \
../scryptjane/scrypt-jane-romix-basic.h \
../scryptjane/scrypt-jane-mix_chacha-avx.h \
../scryptjane/scrypt-jane-mix_chacha-ssse3.h \
../scryptjane/scrypt-jane-mix_chacha-sse2.h \
../scryptjane/scrypt-jane-mix_chacha.h \
../scryptjane/scrypt-jane-romix-template.h \
../scryptjane/scrypt-jane-test-vectors.h
../scryptjane.c:
../scryptjane.h:
../scryptjane/scrypt-jane-portable.h:
../scryptjane/scrypt-jane-portable-x86.h:
../scryptjane/scrypt-jane-hash.h:
../scryptjane/scrypt-jane-hash_keccak.h:
../scryptjane/scrypt-jane-pbkdf2.h:
../scryptjane/scrypt-jane-romix.h:
../scryptjane/scrypt-jane-chacha.h:
../scryptjane/scrypt-jane-romix-basic.h:
../scryptjane/scrypt-jane-mix_chacha-avx.h:
../scryptjane/scrypt-jane-mix_chacha-ssse3.h:
../scryptjane/scrypt-jane-mix_chacha-sse2.h:
../scryptjane/scrypt-jane-mix_chacha.h:
../scryptjane/scrypt-jane-romix-template.h:
../scryptjane/scrypt-jane-test-vectors.h:

Binary file not shown.

View File

@ -193,8 +193,8 @@ Handle<Value> scryptjane(const Arguments& args) {
Local<Number> num = args[1]->ToNumber();
int timestamp = num->Value();
Local<Number> num = args[2]->ToNumber();
int nChainStarTime = num->Value();
Local<Number> num2 = args[2]->ToNumber();
int nChainStarTime = num2->Value();
char * input = Buffer::Data(target);

View File

@ -15,7 +15,7 @@
},
"keywords": [
"scrypt",
"scrypt-jane",
"scryptjane",
"script-n",
"x11",
"quark",

View File

@ -0,0 +1,132 @@
#define SCRYPT_MIX_BASE "ChaCha20/8"
typedef uint32_t scrypt_mix_word_t;
#define SCRYPT_WORDTO8_LE U32TO8_LE
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
#define SCRYPT_BLOCK_BYTES 64
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
/* must have these here in case block bytes is ever != 64 */
#include "scrypt-jane-romix-basic.h"
#include "scrypt-jane-mix_chacha-avx.h"
#include "scrypt-jane-mix_chacha-ssse3.h"
#include "scrypt-jane-mix_chacha-sse2.h"
#include "scrypt-jane-mix_chacha.h"
#if defined(SCRYPT_CHACHA_AVX)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
#define SCRYPT_MIX_FN chacha_core_avx
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
#include "scrypt-jane-romix-template.h"
#endif
#if defined(SCRYPT_CHACHA_SSSE3)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
#define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
#define SCRYPT_MIX_FN chacha_core_ssse3
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
#include "scrypt-jane-romix-template.h"
#endif
#if defined(SCRYPT_CHACHA_SSE2)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
#define SCRYPT_MIX_FN chacha_core_sse2
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
#include "scrypt-jane-romix-template.h"
#endif
/* cpu agnostic */
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
#define SCRYPT_MIX_FN chacha_core_basic
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
#include "scrypt-jane-romix-template.h"
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
static scrypt_ROMixfn
scrypt_getROMix() {
size_t cpuflags = detect_cpu();
#if defined(SCRYPT_CHACHA_AVX)
if (cpuflags & cpu_avx)
return scrypt_ROMix_avx;
else
#endif
#if defined(SCRYPT_CHACHA_SSSE3)
if (cpuflags & cpu_ssse3)
return scrypt_ROMix_ssse3;
else
#endif
#if defined(SCRYPT_CHACHA_SSE2)
if (cpuflags & cpu_sse2)
return scrypt_ROMix_sse2;
else
#endif
return scrypt_ROMix_basic;
}
#endif
#if defined(SCRYPT_TEST_SPEED)
static size_t
available_implementations() {
size_t flags = 0;
#if defined(SCRYPT_CHACHA_AVX)
flags |= cpu_avx;
#endif
#if defined(SCRYPT_CHACHA_SSSE3)
flags |= cpu_ssse3;
#endif
#if defined(SCRYPT_CHACHA_SSE2)
flags |= cpu_sse2;
#endif
return flags;
}
#endif
static int
scrypt_test_mix() {
static const uint8_t expected[16] = {
0x48,0x2b,0x2d,0xb8,0xa1,0x33,0x22,0x73,0xcd,0x16,0xc4,0xb4,0xb0,0x7f,0xb1,0x8a,
};
int ret = 1;
size_t cpuflags = detect_cpu();
#if defined(SCRYPT_CHACHA_AVX)
if (cpuflags & cpu_avx)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, scrypt_romix_nop, scrypt_romix_nop, expected);
#endif
#if defined(SCRYPT_CHACHA_SSSE3)
if (cpuflags & cpu_ssse3)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, scrypt_romix_nop, scrypt_romix_nop, expected);
#endif
#if defined(SCRYPT_CHACHA_SSE2)
if (cpuflags & cpu_sse2)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, scrypt_romix_nop, scrypt_romix_nop, expected);
#endif
#if defined(SCRYPT_CHACHA_BASIC)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
#endif
return ret;
}

View File

@ -0,0 +1,48 @@
#if defined(SCRYPT_BLAKE512)
#include "scrypt-jane-hash_blake512.h"
#elif defined(SCRYPT_BLAKE256)
#include "scrypt-jane-hash_blake256.h"
#elif defined(SCRYPT_SHA512)
#include "scrypt-jane-hash_sha512.h"
#elif defined(SCRYPT_SHA256)
#include "scrypt-jane-hash_sha256.h"
#elif defined(SCRYPT_SKEIN512)
#include "scrypt-jane-hash_skein512.h"
#elif defined(SCRYPT_KECCAK512) || defined(SCRYPT_KECCAK256)
#include "scrypt-jane-hash_keccak.h"
#else
#define SCRYPT_HASH "ERROR"
#define SCRYPT_HASH_BLOCK_SIZE 64
#define SCRYPT_HASH_DIGEST_SIZE 64
typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state;
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
static void scrypt_hash_init(scrypt_hash_state *S) {}
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {}
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {}
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0};
#error must define a hash function!
#endif
#include "scrypt-jane-pbkdf2.h"
#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
static int
scrypt_test_hash() {
scrypt_hash_state st;
scrypt_hash_digest hash, final;
uint8_t msg[SCRYPT_TEST_HASH_LEN];
size_t i;
for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++)
msg[i] = (uint8_t)i;
scrypt_hash_init(&st);
for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) {
scrypt_hash(hash, msg, i);
scrypt_hash_update(&st, hash, sizeof(hash));
}
scrypt_hash_finish(&st, final);
return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
}

View File

@ -0,0 +1,168 @@
#if defined(SCRYPT_KECCAK256)
#define SCRYPT_HASH "Keccak-256"
#define SCRYPT_HASH_DIGEST_SIZE 32
#else
#define SCRYPT_HASH "Keccak-512"
#define SCRYPT_HASH_DIGEST_SIZE 64
#endif
#define SCRYPT_KECCAK_F 1600
#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 256=512, 512=1024 */
#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 256=1088, 512=576 */
#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8)
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
typedef struct scrypt_hash_state_t {
uint64_t state[SCRYPT_KECCAK_F / 64];
uint32_t leftover;
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
} scrypt_hash_state;
static const uint64_t keccak_round_constants[24] = {
0x0000000000000001ull, 0x0000000000008082ull,
0x800000000000808aull, 0x8000000080008000ull,
0x000000000000808bull, 0x0000000080000001ull,
0x8000000080008081ull, 0x8000000000008009ull,
0x000000000000008aull, 0x0000000000000088ull,
0x0000000080008009ull, 0x000000008000000aull,
0x000000008000808bull, 0x800000000000008bull,
0x8000000000008089ull, 0x8000000000008003ull,
0x8000000000008002ull, 0x8000000000000080ull,
0x000000000000800aull, 0x800000008000000aull,
0x8000000080008081ull, 0x8000000000008080ull,
0x0000000080000001ull, 0x8000000080008008ull
};
static void
keccak_block(scrypt_hash_state *S, const uint8_t *in) {
size_t i;
uint64_t *s = S->state, t[5], u[5], v, w;
/* absorb input */
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8)
s[i] ^= U8TO64_LE(in);
for (i = 0; i < 24; i++) {
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
u[0] = t[4] ^ ROTL64(t[1], 1);
u[1] = t[0] ^ ROTL64(t[2], 1);
u[2] = t[1] ^ ROTL64(t[3], 1);
u[3] = t[2] ^ ROTL64(t[4], 1);
u[4] = t[3] ^ ROTL64(t[0], 1);
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
/* rho pi: b[..] = rotl(a[..], ..) */
v = s[ 1];
s[ 1] = ROTL64(s[ 6], 44);
s[ 6] = ROTL64(s[ 9], 20);
s[ 9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[ 2], 62);
s[ 2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[ 4], 27);
s[ 4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[ 8], 55);
s[ 8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[ 5], 36);
s[ 5] = ROTL64(s[ 3], 28);
s[ 3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[ 7], 6);
s[ 7] = ROTL64(s[10], 3);
s[10] = ROTL64( v, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
}
static void
scrypt_hash_init(scrypt_hash_state *S) {
memset(S, 0, sizeof(*S));
}
static void
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
size_t want;
/* handle the previous data */
if (S->leftover) {
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
want = (want < inlen) ? want : inlen;
memcpy(S->buffer + S->leftover, in, want);
S->leftover += (uint32_t)want;
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
return;
in += want;
inlen -= want;
keccak_block(S, S->buffer);
}
/* handle the current data */
while (inlen >= SCRYPT_HASH_BLOCK_SIZE) {
keccak_block(S, in);
in += SCRYPT_HASH_BLOCK_SIZE;
inlen -= SCRYPT_HASH_BLOCK_SIZE;
}
/* handle leftover data */
S->leftover = (uint32_t)inlen;
if (S->leftover)
memcpy(S->buffer, in, S->leftover);
}
static void
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
size_t i;
S->buffer[S->leftover] = 0x01;
memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1));
S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80;
keccak_block(S, S->buffer);
for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) {
U64TO8_LE(&hash[i], S->state[i / 8]);
}
}
#if defined(SCRYPT_KECCAK256)
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
0x26,0xb7,0x10,0xb3,0x66,0xb1,0xd1,0xb1,0x25,0xfc,0x3e,0xe3,0x1e,0x33,0x1d,0x19,
0x94,0xaa,0x63,0x7a,0xd5,0x77,0x29,0xb4,0x27,0xe9,0xe0,0xf4,0x19,0xba,0x68,0xea,
};
#else
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
0x17,0xc7,0x8c,0xa0,0xd9,0x08,0x1d,0xba,0x8a,0xc8,0x3e,0x07,0x90,0xda,0x91,0x88,
0x25,0xbd,0xd3,0xf8,0x78,0x4a,0x8d,0x5e,0xe4,0x96,0x9c,0x01,0xf3,0xeb,0xdc,0x12,
0xea,0x35,0x57,0xba,0x94,0xb8,0xe9,0xb9,0x27,0x45,0x0a,0x48,0x5c,0x3d,0x69,0xf0,
0xdb,0x22,0x38,0xb5,0x52,0x22,0x29,0xea,0x7a,0xb2,0xe6,0x07,0xaa,0x37,0x4d,0xe6,
};
#endif

View File

@ -0,0 +1,135 @@
#define SCRYPT_HASH "SHA-2-256"
#define SCRYPT_HASH_BLOCK_SIZE 64
#define SCRYPT_HASH_DIGEST_SIZE 32
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
typedef struct scrypt_hash_state_t {
uint32_t H[8];
uint64_t T;
uint32_t leftover;
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
} scrypt_hash_state;
static const uint32_t sha256_constants[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) (((x | y) & z) | (x & y))
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
#define W0(in,i) (U8TO32_BE(&in[i * 4]))
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
#define STEP(i) \
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \
r[7] = r[6]; \
r[6] = r[5]; \
r[5] = r[4]; \
r[4] = r[3] + t0; \
r[3] = r[2]; \
r[2] = r[1]; \
r[1] = r[0]; \
r[0] = t0 + t1;
static void
sha256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
uint32_t r[8], w[64], t0, t1;
size_t i;
for (i = 0; i < 8; i++) r[i] = S->H[i];
while (blocks--) {
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
for (i = 16; i < 64; i++) { w[i] = W1(i); }
for (i = 0; i < 64; i++) { STEP(i); }
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
S->T += SCRYPT_HASH_BLOCK_SIZE * 8;
in += SCRYPT_HASH_BLOCK_SIZE;
}
}
static void
scrypt_hash_init(scrypt_hash_state *S) {
S->H[0] = 0x6a09e667;
S->H[1] = 0xbb67ae85;
S->H[2] = 0x3c6ef372;
S->H[3] = 0xa54ff53a;
S->H[4] = 0x510e527f;
S->H[5] = 0x9b05688c;
S->H[6] = 0x1f83d9ab;
S->H[7] = 0x5be0cd19;
S->T = 0;
S->leftover = 0;
}
static void
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
size_t blocks, want;
/* handle the previous data */
if (S->leftover) {
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
want = (want < inlen) ? want : inlen;
memcpy(S->buffer + S->leftover, in, want);
S->leftover += (uint32_t)want;
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
return;
in += want;
inlen -= want;
sha256_blocks(S, S->buffer, 1);
}
/* handle the current data */
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
S->leftover = (uint32_t)(inlen - blocks);
if (blocks) {
sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
in += blocks;
}
/* handle leftover data */
if (S->leftover)
memcpy(S->buffer, in, S->leftover);
}
static void
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
uint64_t t = S->T + (S->leftover * 8);
S->buffer[S->leftover] = 0x80;
if (S->leftover <= 55) {
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
} else {
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
sha256_blocks(S, S->buffer, 1);
memset(S->buffer, 0, 56);
}
U64TO8_BE(S->buffer + 56, t);
sha256_blocks(S, S->buffer, 1);
U32TO8_BE(&hash[ 0], S->H[0]);
U32TO8_BE(&hash[ 4], S->H[1]);
U32TO8_BE(&hash[ 8], S->H[2]);
U32TO8_BE(&hash[12], S->H[3]);
U32TO8_BE(&hash[16], S->H[4]);
U32TO8_BE(&hash[20], S->H[5]);
U32TO8_BE(&hash[24], S->H[6]);
U32TO8_BE(&hash[28], S->H[7]);
}
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
0xee,0x36,0xae,0xa6,0x65,0xf0,0x28,0x7d,0xc9,0xde,0xd8,0xad,0x48,0x33,0x7d,0xbf,
0xcb,0xc0,0x48,0xfa,0x5f,0x92,0xfd,0x0a,0x95,0x6f,0x34,0x8e,0x8c,0x1e,0x73,0xad,
};

View File

@ -0,0 +1,340 @@
/* x86 */
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_AVX
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_avx)
a1(push ebx)
a1(push edi)
a1(push esi)
a1(push ebp)
a2(mov ebp,esp)
a2(mov edi,[ebp+20])
a2(mov esi,[ebp+24])
a2(mov eax,[ebp+28])
a2(mov ebx,[ebp+32])
a2(sub esp,64)
a2(and esp,~63)
a2(lea edx,[ebx*2])
a2(shl edx,6)
a2(lea ecx,[edx-64])
a2(and eax, eax)
a2(vmovdqa xmm4,[ssse3_rotl16_32bit])
a2(vmovdqa xmm5,[ssse3_rotl8_32bit])
a2(vmovdqa xmm0,[ecx+esi+0])
a2(vmovdqa xmm1,[ecx+esi+16])
a2(vmovdqa xmm2,[ecx+esi+32])
a2(vmovdqa xmm3,[ecx+esi+48])
a1(jz scrypt_ChunkMix_avx_no_xor1)
a3(vpxor xmm0,xmm0,[ecx+eax+0])
a3(vpxor xmm1,xmm1,[ecx+eax+16])
a3(vpxor xmm2,xmm2,[ecx+eax+32])
a3(vpxor xmm3,xmm3,[ecx+eax+48])
a1(scrypt_ChunkMix_avx_no_xor1:)
a2(xor ecx,ecx)
a2(xor ebx,ebx)
a1(scrypt_ChunkMix_avx_loop:)
a2(and eax, eax)
a3(vpxor xmm0,xmm0,[esi+ecx+0])
a3(vpxor xmm1,xmm1,[esi+ecx+16])
a3(vpxor xmm2,xmm2,[esi+ecx+32])
a3(vpxor xmm3,xmm3,[esi+ecx+48])
a1(jz scrypt_ChunkMix_avx_no_xor2)
a3(vpxor xmm0,xmm0,[eax+ecx+0])
a3(vpxor xmm1,xmm1,[eax+ecx+16])
a3(vpxor xmm2,xmm2,[eax+ecx+32])
a3(vpxor xmm3,xmm3,[eax+ecx+48])
a1(scrypt_ChunkMix_avx_no_xor2:)
a2(vmovdqa [esp+0],xmm0)
a2(vmovdqa [esp+16],xmm1)
a2(vmovdqa [esp+32],xmm2)
a2(vmovdqa [esp+48],xmm3)
a2(mov eax,8)
a1(scrypt_chacha_avx_loop: )
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm4)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpsrld xmm6,xmm1,20)
a3(vpslld xmm1,xmm1,12)
a3(vpxor xmm1,xmm1,xmm6)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm5)
a3(vpshufd xmm0,xmm0,0x93)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpshufd xmm3,xmm3,0x4e)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpshufd xmm2,xmm2,0x39)
a3(vpsrld xmm6,xmm1,25)
a3(vpslld xmm1,xmm1,7)
a3(vpxor xmm1,xmm1,xmm6)
a2(sub eax,2)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm4)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpsrld xmm6,xmm1,20)
a3(vpslld xmm1,xmm1,12)
a3(vpxor xmm1,xmm1,xmm6)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm5)
a3(vpshufd xmm0,xmm0,0x39)
a3(vpaddd xmm2,xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a3(vpxor xmm1,xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a3(vpsrld xmm6,xmm1,25)
a3(vpslld xmm1,xmm1,7)
a3(vpxor xmm1,xmm1,xmm6)
a1(ja scrypt_chacha_avx_loop)
a3(vpaddd xmm0,xmm0,[esp+0])
a3(vpaddd xmm1,xmm1,[esp+16])
a3(vpaddd xmm2,xmm2,[esp+32])
a3(vpaddd xmm3,xmm3,[esp+48])
a2(lea eax,[ebx+ecx])
a2(xor ebx,edx)
a2(and eax,~0x7f)
a2(add ecx,64)
a2(shr eax,1)
a2(add eax, edi)
a2(cmp ecx,edx)
a2(vmovdqa [eax+0],xmm0)
a2(vmovdqa [eax+16],xmm1)
a2(vmovdqa [eax+32],xmm2)
a2(vmovdqa [eax+48],xmm3)
a2(mov eax,[ebp+28])
a1(jne scrypt_ChunkMix_avx_loop)
a2(mov esp,ebp)
a1(pop ebp)
a1(pop esi)
a1(pop edi)
a1(pop ebx)
a1(ret 16)
asm_naked_fn_end(scrypt_ChunkMix_avx)
#endif
/* x64 */
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_AVX
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_avx)
a2(lea rcx,[rcx*2])
a2(shl rcx,6)
a2(lea r9,[rcx-64])
a2(lea rax,[rsi+r9])
a2(lea r9,[rdx+r9])
a2(and rdx, rdx)
a2(vmovdqa xmm4,[ssse3_rotl16_32bit])
a2(vmovdqa xmm5,[ssse3_rotl8_32bit])
a2(vmovdqa xmm0,[rax+0])
a2(vmovdqa xmm1,[rax+16])
a2(vmovdqa xmm2,[rax+32])
a2(vmovdqa xmm3,[rax+48])
a1(jz scrypt_ChunkMix_avx_no_xor1)
a3(vpxor xmm0,xmm0,[r9+0])
a3(vpxor xmm1,xmm1,[r9+16])
a3(vpxor xmm2,xmm2,[r9+32])
a3(vpxor xmm3,xmm3,[r9+48])
a1(scrypt_ChunkMix_avx_no_xor1:)
a2(xor r8,r8)
a2(xor r9,r9)
a1(scrypt_ChunkMix_avx_loop:)
a2(and rdx, rdx)
a3(vpxor xmm0,xmm0,[rsi+r9+0])
a3(vpxor xmm1,xmm1,[rsi+r9+16])
a3(vpxor xmm2,xmm2,[rsi+r9+32])
a3(vpxor xmm3,xmm3,[rsi+r9+48])
a1(jz scrypt_ChunkMix_avx_no_xor2)
a3(vpxor xmm0,xmm0,[rdx+r9+0])
a3(vpxor xmm1,xmm1,[rdx+r9+16])
a3(vpxor xmm2,xmm2,[rdx+r9+32])
a3(vpxor xmm3,xmm3,[rdx+r9+48])
a1(scrypt_ChunkMix_avx_no_xor2:)
a2(vmovdqa xmm8,xmm0)
a2(vmovdqa xmm9,xmm1)
a2(vmovdqa xmm10,xmm2)
a2(vmovdqa xmm11,xmm3)
a2(mov rax,8)
a1(scrypt_chacha_avx_loop: )
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm4)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpsrld xmm12,xmm1,20)
a3(vpslld xmm1,xmm1,12)
a3(vpxor xmm1,xmm1,xmm12)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm5)
a3(vpshufd xmm0,xmm0,0x93)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpshufd xmm3,xmm3,0x4e)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpshufd xmm2,xmm2,0x39)
a3(vpsrld xmm12,xmm1,25)
a3(vpslld xmm1,xmm1,7)
a3(vpxor xmm1,xmm1,xmm12)
a2(sub rax,2)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm4)
a3(vpaddd xmm2,xmm2,xmm3)
a3(vpxor xmm1,xmm1,xmm2)
a3(vpsrld xmm12,xmm1,20)
a3(vpslld xmm1,xmm1,12)
a3(vpxor xmm1,xmm1,xmm12)
a3(vpaddd xmm0,xmm0,xmm1)
a3(vpxor xmm3,xmm3,xmm0)
a3(vpshufb xmm3,xmm3,xmm5)
a3(vpshufd xmm0,xmm0,0x39)
a3(vpaddd xmm2,xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a3(vpxor xmm1,xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a3(vpsrld xmm12,xmm1,25)
a3(vpslld xmm1,xmm1,7)
a3(vpxor xmm1,xmm1,xmm12)
a1(ja scrypt_chacha_avx_loop)
a3(vpaddd xmm0,xmm0,xmm8)
a3(vpaddd xmm1,xmm1,xmm9)
a3(vpaddd xmm2,xmm2,xmm10)
a3(vpaddd xmm3,xmm3,xmm11)
a2(lea rax,[r8+r9])
a2(xor r8,rcx)
a2(and rax,~0x7f)
a2(add r9,64)
a2(shr rax,1)
a2(add rax, rdi)
a2(cmp r9,rcx)
a2(vmovdqa [rax+0],xmm0)
a2(vmovdqa [rax+16],xmm1)
a2(vmovdqa [rax+32],xmm2)
a2(vmovdqa [rax+48],xmm3)
a1(jne scrypt_ChunkMix_avx_loop)
a1(ret)
asm_naked_fn_end(scrypt_ChunkMix_avx)
#endif
/* intrinsic */
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_AVX
static void NOINLINE
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
uint32_t i, blocksPerChunk = r * 2, half = 0;
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
size_t rounds;
/* 1: X = B_{2r - 1} */
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
x0 = xmmp[0];
x1 = xmmp[1];
x2 = xmmp[2];
x3 = xmmp[3];
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
xmmp = (xmmi *)scrypt_block(Bin, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
t0 = x0;
t1 = x1;
t2 = x2;
t3 = x3;
for (rounds = 8; rounds; rounds -= 2) {
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x4);
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x5);
x0 = _mm_shuffle_epi32(x0, 0x93);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x39);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x4);
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x5);
x0 = _mm_shuffle_epi32(x0, 0x39);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x93);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
}
x0 = _mm_add_epi32(x0, t0);
x1 = _mm_add_epi32(x1, t1);
x2 = _mm_add_epi32(x2, t2);
x3 = _mm_add_epi32(x3, t3);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
xmmp[0] = x0;
xmmp[1] = x1;
xmmp[2] = x2;
xmmp[3] = x3;
}
}
#endif
#if defined(SCRYPT_CHACHA_AVX)
#undef SCRYPT_MIX
#define SCRYPT_MIX "ChaCha/8-AVX"
#undef SCRYPT_CHACHA_INCLUDED
#define SCRYPT_CHACHA_INCLUDED
#endif

View File

@ -0,0 +1,371 @@
/* x86 */
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSE2
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_sse2)
a1(push ebx)
a1(push edi)
a1(push esi)
a1(push ebp)
a2(mov ebp,esp)
a2(mov edi,[ebp+20])
a2(mov esi,[ebp+24])
a2(mov eax,[ebp+28])
a2(mov ebx,[ebp+32])
a2(sub esp,16)
a2(and esp,~15)
a2(lea edx,[ebx*2])
a2(shl edx,6)
a2(lea ecx,[edx-64])
a2(and eax, eax)
a2(movdqa xmm0,[ecx+esi+0])
a2(movdqa xmm1,[ecx+esi+16])
a2(movdqa xmm2,[ecx+esi+32])
a2(movdqa xmm3,[ecx+esi+48])
a1(jz scrypt_ChunkMix_sse2_no_xor1)
a2(pxor xmm0,[ecx+eax+0])
a2(pxor xmm1,[ecx+eax+16])
a2(pxor xmm2,[ecx+eax+32])
a2(pxor xmm3,[ecx+eax+48])
a1(scrypt_ChunkMix_sse2_no_xor1:)
a2(xor ecx,ecx)
a2(xor ebx,ebx)
a1(scrypt_ChunkMix_sse2_loop:)
a2(and eax, eax)
a2(pxor xmm0,[esi+ecx+0])
a2(pxor xmm1,[esi+ecx+16])
a2(pxor xmm2,[esi+ecx+32])
a2(pxor xmm3,[esi+ecx+48])
a1(jz scrypt_ChunkMix_sse2_no_xor2)
a2(pxor xmm0,[eax+ecx+0])
a2(pxor xmm1,[eax+ecx+16])
a2(pxor xmm2,[eax+ecx+32])
a2(pxor xmm3,[eax+ecx+48])
a1(scrypt_ChunkMix_sse2_no_xor2:)
a2(movdqa [esp+0],xmm0)
a2(movdqa xmm4,xmm1)
a2(movdqa xmm5,xmm2)
a2(movdqa xmm7,xmm3)
a2(mov eax,8)
a1(scrypt_chacha_sse2_loop: )
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,16)
a2(psrld xmm6,16)
a2(pxor xmm3,xmm6)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,8)
a2(psrld xmm6,24)
a2(pxor xmm3,xmm6)
a3(pshufd xmm0,xmm0,0x93)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x39)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a2(sub eax,2)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,16)
a2(psrld xmm6,16)
a2(pxor xmm3,xmm6)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,8)
a2(psrld xmm6,24)
a2(pxor xmm3,xmm6)
a3(pshufd xmm0,xmm0,0x39)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a1(ja scrypt_chacha_sse2_loop)
a2(paddd xmm0,[esp+0])
a2(paddd xmm1,xmm4)
a2(paddd xmm2,xmm5)
a2(paddd xmm3,xmm7)
a2(lea eax,[ebx+ecx])
a2(xor ebx,edx)
a2(and eax,~0x7f)
a2(add ecx,64)
a2(shr eax,1)
a2(add eax, edi)
a2(cmp ecx,edx)
a2(movdqa [eax+0],xmm0)
a2(movdqa [eax+16],xmm1)
a2(movdqa [eax+32],xmm2)
a2(movdqa [eax+48],xmm3)
a2(mov eax,[ebp+28])
a1(jne scrypt_ChunkMix_sse2_loop)
a2(mov esp,ebp)
a1(pop ebp)
a1(pop esi)
a1(pop edi)
a1(pop ebx)
a1(ret 16)
asm_naked_fn_end(scrypt_ChunkMix_sse2)
#endif
/* x64 */
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSE2
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_sse2)
a2(lea rcx,[rcx*2])
a2(shl rcx,6)
a2(lea r9,[rcx-64])
a2(lea rax,[rsi+r9])
a2(lea r9,[rdx+r9])
a2(and rdx, rdx)
a2(movdqa xmm0,[rax+0])
a2(movdqa xmm1,[rax+16])
a2(movdqa xmm2,[rax+32])
a2(movdqa xmm3,[rax+48])
a1(jz scrypt_ChunkMix_sse2_no_xor1)
a2(pxor xmm0,[r9+0])
a2(pxor xmm1,[r9+16])
a2(pxor xmm2,[r9+32])
a2(pxor xmm3,[r9+48])
a1(scrypt_ChunkMix_sse2_no_xor1:)
a2(xor r9,r9)
a2(xor r8,r8)
a1(scrypt_ChunkMix_sse2_loop:)
a2(and rdx, rdx)
a2(pxor xmm0,[rsi+r9+0])
a2(pxor xmm1,[rsi+r9+16])
a2(pxor xmm2,[rsi+r9+32])
a2(pxor xmm3,[rsi+r9+48])
a1(jz scrypt_ChunkMix_sse2_no_xor2)
a2(pxor xmm0,[rdx+r9+0])
a2(pxor xmm1,[rdx+r9+16])
a2(pxor xmm2,[rdx+r9+32])
a2(pxor xmm3,[rdx+r9+48])
a1(scrypt_ChunkMix_sse2_no_xor2:)
a2(movdqa xmm8,xmm0)
a2(movdqa xmm9,xmm1)
a2(movdqa xmm10,xmm2)
a2(movdqa xmm11,xmm3)
a2(mov rax,8)
a1(scrypt_chacha_sse2_loop: )
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,16)
a2(psrld xmm6,16)
a2(pxor xmm3,xmm6)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,8)
a2(psrld xmm6,24)
a2(pxor xmm3,xmm6)
a3(pshufd xmm0,xmm0,0x93)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x39)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a2(sub rax,2)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,16)
a2(psrld xmm6,16)
a2(pxor xmm3,xmm6)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(movdqa xmm6,xmm3)
a2(pslld xmm3,8)
a2(psrld xmm6,24)
a2(pxor xmm3,xmm6)
a3(pshufd xmm0,xmm0,0x39)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a1(ja scrypt_chacha_sse2_loop)
a2(paddd xmm0,xmm8)
a2(paddd xmm1,xmm9)
a2(paddd xmm2,xmm10)
a2(paddd xmm3,xmm11)
a2(lea rax,[r8+r9])
a2(xor r8,rcx)
a2(and rax,~0x7f)
a2(add r9,64)
a2(shr rax,1)
a2(add rax, rdi)
a2(cmp r9,rcx)
a2(movdqa [rax+0],xmm0)
a2(movdqa [rax+16],xmm1)
a2(movdqa [rax+32],xmm2)
a2(movdqa [rax+48],xmm3)
a1(jne scrypt_ChunkMix_sse2_loop)
a1(ret)
asm_naked_fn_end(scrypt_ChunkMix_sse2)
#endif
/* intrinsic */
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSE2
static void NOINLINE
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
uint32_t i, blocksPerChunk = r * 2, half = 0;
xmmi *xmmp,x0,x1,x2,x3,x4,t0,t1,t2,t3;
size_t rounds;
/* 1: X = B_{2r - 1} */
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
x0 = xmmp[0];
x1 = xmmp[1];
x2 = xmmp[2];
x3 = xmmp[3];
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
xmmp = (xmmi *)scrypt_block(Bin, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
t0 = x0;
t1 = x1;
t2 = x2;
t3 = x3;
for (rounds = 8; rounds; rounds -= 2) {
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x4 = x3;
x3 = _mm_or_si128(_mm_slli_epi32(x3, 16), _mm_srli_epi32(x4, 16));
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x4 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x4 = x3;
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
x0 = _mm_shuffle_epi32(x0, 0x93);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x39);
x4 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x4 = x3;
x3 = _mm_or_si128(_mm_slli_epi32(x3, 16), _mm_srli_epi32(x4, 16));
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x4 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x4 = x3;
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
x0 = _mm_shuffle_epi32(x0, 0x39);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x93);
x4 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
}
x0 = _mm_add_epi32(x0, t0);
x1 = _mm_add_epi32(x1, t1);
x2 = _mm_add_epi32(x2, t2);
x3 = _mm_add_epi32(x3, t3);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
xmmp[0] = x0;
xmmp[1] = x1;
xmmp[2] = x2;
xmmp[3] = x3;
}
}
#endif
#if defined(SCRYPT_CHACHA_SSE2)
#undef SCRYPT_MIX
#define SCRYPT_MIX "ChaCha/8-SSE2"
#undef SCRYPT_CHACHA_INCLUDED
#define SCRYPT_CHACHA_INCLUDED
#endif

View File

@ -0,0 +1,348 @@
/* x86 */
#if defined(X86ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSSE3
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_ssse3)
a1(push ebx)
a1(push edi)
a1(push esi)
a1(push ebp)
a2(mov ebp,esp)
a2(mov edi,[ebp+20])
a2(mov esi,[ebp+24])
a2(mov eax,[ebp+28])
a2(mov ebx,[ebp+32])
a2(sub esp,64)
a2(and esp,~63)
a2(lea edx,[ebx*2])
a2(shl edx,6)
a2(lea ecx,[edx-64])
a2(and eax, eax)
a2(movdqa xmm4,[ssse3_rotl16_32bit])
a2(movdqa xmm5,[ssse3_rotl8_32bit])
a2(movdqa xmm0,[ecx+esi+0])
a2(movdqa xmm1,[ecx+esi+16])
a2(movdqa xmm2,[ecx+esi+32])
a2(movdqa xmm3,[ecx+esi+48])
a1(jz scrypt_ChunkMix_ssse3_no_xor1)
a2(pxor xmm0,[ecx+eax+0])
a2(pxor xmm1,[ecx+eax+16])
a2(pxor xmm2,[ecx+eax+32])
a2(pxor xmm3,[ecx+eax+48])
a1(scrypt_ChunkMix_ssse3_no_xor1:)
a2(xor ecx,ecx)
a2(xor ebx,ebx)
a1(scrypt_ChunkMix_ssse3_loop:)
a2(and eax, eax)
a2(pxor xmm0,[esi+ecx+0])
a2(pxor xmm1,[esi+ecx+16])
a2(pxor xmm2,[esi+ecx+32])
a2(pxor xmm3,[esi+ecx+48])
a1(jz scrypt_ChunkMix_ssse3_no_xor2)
a2(pxor xmm0,[eax+ecx+0])
a2(pxor xmm1,[eax+ecx+16])
a2(pxor xmm2,[eax+ecx+32])
a2(pxor xmm3,[eax+ecx+48])
a1(scrypt_ChunkMix_ssse3_no_xor2:)
a2(movdqa [esp+0],xmm0)
a2(movdqa [esp+16],xmm1)
a2(movdqa [esp+32],xmm2)
a2(movdqa xmm7,xmm3)
a2(mov eax,8)
a1(scrypt_chacha_ssse3_loop: )
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm4)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm5)
a3(pshufd xmm0,xmm0,0x93)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x39)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a2(sub eax,2)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm4)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm6,20)
a2(pxor xmm1,xmm6)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm5)
a3(pshufd xmm0,xmm0,0x39)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a2(movdqa xmm6,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm6,25)
a2(pxor xmm1,xmm6)
a1(ja scrypt_chacha_ssse3_loop)
a2(paddd xmm0,[esp+0])
a2(paddd xmm1,[esp+16])
a2(paddd xmm2,[esp+32])
a2(paddd xmm3,xmm7)
a2(lea eax,[ebx+ecx])
a2(xor ebx,edx)
a2(and eax,~0x7f)
a2(add ecx,64)
a2(shr eax,1)
a2(add eax, edi)
a2(cmp ecx,edx)
a2(movdqa [eax+0],xmm0)
a2(movdqa [eax+16],xmm1)
a2(movdqa [eax+32],xmm2)
a2(movdqa [eax+48],xmm3)
a2(mov eax,[ebp+28])
a1(jne scrypt_ChunkMix_ssse3_loop)
a2(mov esp,ebp)
a1(pop ebp)
a1(pop esi)
a1(pop edi)
a1(pop ebx)
a1(ret 16)
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
#endif
/* x64 */
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSSE3
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_ssse3)
a2(lea rcx,[rcx*2])
a2(shl rcx,6)
a2(lea r9,[rcx-64])
a2(lea rax,[rsi+r9])
a2(lea r9,[rdx+r9])
a2(and rdx, rdx)
a2(movdqa xmm4,[ssse3_rotl16_32bit])
a2(movdqa xmm5,[ssse3_rotl8_32bit])
a2(movdqa xmm0,[rax+0])
a2(movdqa xmm1,[rax+16])
a2(movdqa xmm2,[rax+32])
a2(movdqa xmm3,[rax+48])
a1(jz scrypt_ChunkMix_ssse3_no_xor1)
a2(pxor xmm0,[r9+0])
a2(pxor xmm1,[r9+16])
a2(pxor xmm2,[r9+32])
a2(pxor xmm3,[r9+48])
a1(scrypt_ChunkMix_ssse3_no_xor1:)
a2(xor r8,r8)
a2(xor r9,r9)
a1(scrypt_ChunkMix_ssse3_loop:)
a2(and rdx, rdx)
a2(pxor xmm0,[rsi+r9+0])
a2(pxor xmm1,[rsi+r9+16])
a2(pxor xmm2,[rsi+r9+32])
a2(pxor xmm3,[rsi+r9+48])
a1(jz scrypt_ChunkMix_ssse3_no_xor2)
a2(pxor xmm0,[rdx+r9+0])
a2(pxor xmm1,[rdx+r9+16])
a2(pxor xmm2,[rdx+r9+32])
a2(pxor xmm3,[rdx+r9+48])
a1(scrypt_ChunkMix_ssse3_no_xor2:)
a2(movdqa xmm8,xmm0)
a2(movdqa xmm9,xmm1)
a2(movdqa xmm10,xmm2)
a2(movdqa xmm11,xmm3)
a2(mov rax,8)
a1(scrypt_chacha_ssse3_loop: )
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm4)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm12,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm12,20)
a2(pxor xmm1,xmm12)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm5)
a3(pshufd xmm0,xmm0,0x93)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x39)
a2(movdqa xmm12,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm12,25)
a2(pxor xmm1,xmm12)
a2(sub rax,2)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm4)
a2(paddd xmm2,xmm3)
a2(pxor xmm1,xmm2)
a2(movdqa xmm12,xmm1)
a2(pslld xmm1,12)
a2(psrld xmm12,20)
a2(pxor xmm1,xmm12)
a2(paddd xmm0,xmm1)
a2(pxor xmm3,xmm0)
a2(pshufb xmm3,xmm5)
a3(pshufd xmm0,xmm0,0x39)
a2(paddd xmm2,xmm3)
a3(pshufd xmm3,xmm3,0x4e)
a2(pxor xmm1,xmm2)
a3(pshufd xmm2,xmm2,0x93)
a2(movdqa xmm12,xmm1)
a2(pslld xmm1,7)
a2(psrld xmm12,25)
a2(pxor xmm1,xmm12)
a1(ja scrypt_chacha_ssse3_loop)
a2(paddd xmm0,xmm8)
a2(paddd xmm1,xmm9)
a2(paddd xmm2,xmm10)
a2(paddd xmm3,xmm11)
a2(lea rax,[r8+r9])
a2(xor r8,rcx)
a2(and rax,~0x7f)
a2(add r9,64)
a2(shr rax,1)
a2(add rax, rdi)
a2(cmp r9,rcx)
a2(movdqa [rax+0],xmm0)
a2(movdqa [rax+16],xmm1)
a2(movdqa [rax+32],xmm2)
a2(movdqa [rax+48],xmm3)
a1(jne scrypt_ChunkMix_ssse3_loop)
a1(ret)
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
#endif
/* intrinsic */
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
#define SCRYPT_CHACHA_SSSE3
static void NOINLINE
scrypt_ChunkMix_ssse3(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
uint32_t i, blocksPerChunk = r * 2, half = 0;
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
size_t rounds;
/* 1: X = B_{2r - 1} */
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
x0 = xmmp[0];
x1 = xmmp[1];
x2 = xmmp[2];
x3 = xmmp[3];
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
xmmp = (xmmi *)scrypt_block(Bin, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
t0 = x0;
t1 = x1;
t2 = x2;
t3 = x3;
for (rounds = 8; rounds; rounds -= 2) {
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x4);
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x5);
x0 = _mm_shuffle_epi32(x0, 0x93);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x39);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x4);
x2 = _mm_add_epi32(x2, x3);
x1 = _mm_xor_si128(x1, x2);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
x0 = _mm_add_epi32(x0, x1);
x3 = _mm_xor_si128(x3, x0);
x3 = _mm_shuffle_epi8(x3, x5);
x0 = _mm_shuffle_epi32(x0, 0x39);
x2 = _mm_add_epi32(x2, x3);
x3 = _mm_shuffle_epi32(x3, 0x4e);
x1 = _mm_xor_si128(x1, x2);
x2 = _mm_shuffle_epi32(x2, 0x93);
x6 = x1;
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
}
x0 = _mm_add_epi32(x0, t0);
x1 = _mm_add_epi32(x1, t1);
x2 = _mm_add_epi32(x2, t2);
x3 = _mm_add_epi32(x3, t3);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
xmmp[0] = x0;
xmmp[1] = x1;
xmmp[2] = x2;
xmmp[3] = x3;
}
}
#endif
#if defined(SCRYPT_CHACHA_SSSE3)
#undef SCRYPT_MIX
#define SCRYPT_MIX "ChaCha/8-SSSE3"
#undef SCRYPT_CHACHA_INCLUDED
#define SCRYPT_CHACHA_INCLUDED
#endif

View File

@ -0,0 +1,69 @@
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)
#undef SCRYPT_MIX
#define SCRYPT_MIX "ChaCha20/8 Ref"
#undef SCRYPT_CHACHA_INCLUDED
#define SCRYPT_CHACHA_INCLUDED
#define SCRYPT_CHACHA_BASIC
static void
chacha_core_basic(uint32_t state[16]) {
size_t rounds = 8;
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
x0 = state[0];
x1 = state[1];
x2 = state[2];
x3 = state[3];
x4 = state[4];
x5 = state[5];
x6 = state[6];
x7 = state[7];
x8 = state[8];
x9 = state[9];
x10 = state[10];
x11 = state[11];
x12 = state[12];
x13 = state[13];
x14 = state[14];
x15 = state[15];
#define quarter(a,b,c,d) \
a += b; t = d^a; d = ROTL32(t,16); \
c += d; t = b^c; b = ROTL32(t,12); \
a += b; t = d^a; d = ROTL32(t, 8); \
c += d; t = b^c; b = ROTL32(t, 7);
for (; rounds; rounds -= 2) {
quarter( x0, x4, x8,x12)
quarter( x1, x5, x9,x13)
quarter( x2, x6,x10,x14)
quarter( x3, x7,x11,x15)
quarter( x0, x5,x10,x15)
quarter( x1, x6,x11,x12)
quarter( x2, x7, x8,x13)
quarter( x3, x4, x9,x14)
}
state[0] += x0;
state[1] += x1;
state[2] += x2;
state[3] += x3;
state[4] += x4;
state[5] += x5;
state[6] += x6;
state[7] += x7;
state[8] += x8;
state[9] += x9;
state[10] += x10;
state[11] += x11;
state[12] += x12;
state[13] += x13;
state[14] += x14;
state[15] += x15;
#undef quarter
}
#endif

View File

@ -0,0 +1,381 @@
/* x86 */
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_AVX
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_avx)
a1(push ebx)
a1(push edi)
a1(push esi)
a1(push ebp)
a2(mov ebp,esp)
a2(mov edi,[ebp+20])
a2(mov esi,[ebp+24])
a2(mov eax,[ebp+28])
a2(mov ebx,[ebp+32])
a2(sub esp,32)
a2(and esp,~63)
a2(lea edx,[ebx*2])
a2(shl edx,6)
a2(lea ecx,[edx-64])
a2(and eax, eax)
a2(movdqa xmm0,[ecx+esi+0])
a2(movdqa xmm1,[ecx+esi+16])
a2(movdqa xmm2,[ecx+esi+32])
a2(movdqa xmm3,[ecx+esi+48])
a1(jz scrypt_ChunkMix_avx_no_xor1)
a3(vpxor xmm0,xmm0,[ecx+eax+0])
a3(vpxor xmm1,xmm1,[ecx+eax+16])
a3(vpxor xmm2,xmm2,[ecx+eax+32])
a3(vpxor xmm3,xmm3,[ecx+eax+48])
a1(scrypt_ChunkMix_avx_no_xor1:)
a2(xor ecx,ecx)
a2(xor ebx,ebx)
a1(scrypt_ChunkMix_avx_loop:)
a2(and eax, eax)
a3(vpxor xmm0,xmm0,[esi+ecx+0])
a3(vpxor xmm1,xmm1,[esi+ecx+16])
a3(vpxor xmm2,xmm2,[esi+ecx+32])
a3(vpxor xmm3,xmm3,[esi+ecx+48])
a1(jz scrypt_ChunkMix_avx_no_xor2)
a3(vpxor xmm0,xmm0,[eax+ecx+0])
a3(vpxor xmm1,xmm1,[eax+ecx+16])
a3(vpxor xmm2,xmm2,[eax+ecx+32])
a3(vpxor xmm3,xmm3,[eax+ecx+48])
a1(scrypt_ChunkMix_avx_no_xor2:)
a2(vmovdqa [esp+0],xmm0)
a2(vmovdqa [esp+16],xmm1)
a2(vmovdqa xmm6,xmm2)
a2(vmovdqa xmm7,xmm3)
a2(mov eax,8)
a1(scrypt_salsa_avx_loop: )
a3(vpaddd xmm4, xmm1, xmm0)
a3(vpsrld xmm5, xmm4, 25)
a3(vpslld xmm4, xmm4, 7)
a3(vpxor xmm3, xmm3, xmm5)
a3(vpxor xmm3, xmm3, xmm4)
a3(vpaddd xmm4, xmm0, xmm3)
a3(vpsrld xmm5, xmm4, 23)
a3(vpslld xmm4, xmm4, 9)
a3(vpxor xmm2, xmm2, xmm5)
a3(vpxor xmm2, xmm2, xmm4)
a3(vpaddd xmm4, xmm3, xmm2)
a3(vpsrld xmm5, xmm4, 19)
a3(vpslld xmm4, xmm4, 13)
a3(vpxor xmm1, xmm1, xmm5)
a3(pshufd xmm3, xmm3, 0x93)
a3(vpxor xmm1, xmm1, xmm4)
a3(vpaddd xmm4, xmm2, xmm1)
a3(vpsrld xmm5, xmm4, 14)
a3(vpslld xmm4, xmm4, 18)
a3(vpxor xmm0, xmm0, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a3(vpxor xmm0, xmm0, xmm4)
a2(sub eax, 2)
a3(vpaddd xmm4, xmm3, xmm0)
a3(pshufd xmm1, xmm1, 0x39)
a3(vpsrld xmm5, xmm4, 25)
a3(vpslld xmm4, xmm4, 7)
a3(vpxor xmm1, xmm1, xmm5)
a3(vpxor xmm1, xmm1, xmm4)
a3(vpaddd xmm4, xmm0, xmm1)
a3(vpsrld xmm5, xmm4, 23)
a3(vpslld xmm4, xmm4, 9)
a3(vpxor xmm2, xmm2, xmm5)
a3(vpxor xmm2, xmm2, xmm4)
a3(vpaddd xmm4, xmm1, xmm2)
a3(vpsrld xmm5, xmm4, 19)
a3(vpslld xmm4, xmm4, 13)
a3(vpxor xmm3, xmm3, xmm5)
a3(pshufd xmm1, xmm1, 0x93)
a3(vpxor xmm3, xmm3, xmm4)
a3(vpaddd xmm4, xmm2, xmm3)
a3(vpsrld xmm5, xmm4, 14)
a3(vpslld xmm4, xmm4, 18)
a3(vpxor xmm0, xmm0, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a3(vpxor xmm0, xmm0, xmm4)
a3(pshufd xmm3, xmm3, 0x39)
a1(ja scrypt_salsa_avx_loop)
a3(vpaddd xmm0,xmm0,[esp+0])
a3(vpaddd xmm1,xmm1,[esp+16])
a3(vpaddd xmm2,xmm2,xmm6)
a3(vpaddd xmm3,xmm3,xmm7)
a2(lea eax,[ebx+ecx])
a2(xor ebx,edx)
a2(and eax,~0x7f)
a2(add ecx,64)
a2(shr eax,1)
a2(add eax, edi)
a2(cmp ecx,edx)
a2(vmovdqa [eax+0],xmm0)
a2(vmovdqa [eax+16],xmm1)
a2(vmovdqa [eax+32],xmm2)
a2(vmovdqa [eax+48],xmm3)
a2(mov eax,[ebp+28])
a1(jne scrypt_ChunkMix_avx_loop)
a2(mov esp,ebp)
a1(pop ebp)
a1(pop esi)
a1(pop edi)
a1(pop ebx)
a1(ret 16)
asm_naked_fn_end(scrypt_ChunkMix_avx)
#endif
/* x64 */
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_AVX
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_avx)
a2(lea rcx,[rcx*2])
a2(shl rcx,6)
a2(lea r9,[rcx-64])
a2(lea rax,[rsi+r9])
a2(lea r9,[rdx+r9])
a2(and rdx, rdx)
a2(vmovdqa xmm0,[rax+0])
a2(vmovdqa xmm1,[rax+16])
a2(vmovdqa xmm2,[rax+32])
a2(vmovdqa xmm3,[rax+48])
a1(jz scrypt_ChunkMix_avx_no_xor1)
a3(vpxor xmm0,xmm0,[r9+0])
a3(vpxor xmm1,xmm1,[r9+16])
a3(vpxor xmm2,xmm2,[r9+32])
a3(vpxor xmm3,xmm3,[r9+48])
a1(scrypt_ChunkMix_avx_no_xor1:)
a2(xor r9,r9)
a2(xor r8,r8)
a1(scrypt_ChunkMix_avx_loop:)
a2(and rdx, rdx)
a3(vpxor xmm0,xmm0,[rsi+r9+0])
a3(vpxor xmm1,xmm1,[rsi+r9+16])
a3(vpxor xmm2,xmm2,[rsi+r9+32])
a3(vpxor xmm3,xmm3,[rsi+r9+48])
a1(jz scrypt_ChunkMix_avx_no_xor2)
a3(vpxor xmm0,xmm0,[rdx+r9+0])
a3(vpxor xmm1,xmm1,[rdx+r9+16])
a3(vpxor xmm2,xmm2,[rdx+r9+32])
a3(vpxor xmm3,xmm3,[rdx+r9+48])
a1(scrypt_ChunkMix_avx_no_xor2:)
a2(vmovdqa xmm8,xmm0)
a2(vmovdqa xmm9,xmm1)
a2(vmovdqa xmm10,xmm2)
a2(vmovdqa xmm11,xmm3)
a2(mov rax,8)
a1(scrypt_salsa_avx_loop: )
a3(vpaddd xmm4, xmm1, xmm0)
a3(vpsrld xmm5, xmm4, 25)
a3(vpslld xmm4, xmm4, 7)
a3(vpxor xmm3, xmm3, xmm5)
a3(vpxor xmm3, xmm3, xmm4)
a3(vpaddd xmm4, xmm0, xmm3)
a3(vpsrld xmm5, xmm4, 23)
a3(vpslld xmm4, xmm4, 9)
a3(vpxor xmm2, xmm2, xmm5)
a3(vpxor xmm2, xmm2, xmm4)
a3(vpaddd xmm4, xmm3, xmm2)
a3(vpsrld xmm5, xmm4, 19)
a3(vpslld xmm4, xmm4, 13)
a3(vpxor xmm1, xmm1, xmm5)
a3(pshufd xmm3, xmm3, 0x93)
a3(vpxor xmm1, xmm1, xmm4)
a3(vpaddd xmm4, xmm2, xmm1)
a3(vpsrld xmm5, xmm4, 14)
a3(vpslld xmm4, xmm4, 18)
a3(vpxor xmm0, xmm0, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a3(vpxor xmm0, xmm0, xmm4)
a2(sub rax, 2)
a3(vpaddd xmm4, xmm3, xmm0)
a3(pshufd xmm1, xmm1, 0x39)
a3(vpsrld xmm5, xmm4, 25)
a3(vpslld xmm4, xmm4, 7)
a3(vpxor xmm1, xmm1, xmm5)
a3(vpxor xmm1, xmm1, xmm4)
a3(vpaddd xmm4, xmm0, xmm1)
a3(vpsrld xmm5, xmm4, 23)
a3(vpslld xmm4, xmm4, 9)
a3(vpxor xmm2, xmm2, xmm5)
a3(vpxor xmm2, xmm2, xmm4)
a3(vpaddd xmm4, xmm1, xmm2)
a3(vpsrld xmm5, xmm4, 19)
a3(vpslld xmm4, xmm4, 13)
a3(vpxor xmm3, xmm3, xmm5)
a3(pshufd xmm1, xmm1, 0x93)
a3(vpxor xmm3, xmm3, xmm4)
a3(vpaddd xmm4, xmm2, xmm3)
a3(vpsrld xmm5, xmm4, 14)
a3(vpslld xmm4, xmm4, 18)
a3(vpxor xmm0, xmm0, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a3(vpxor xmm0, xmm0, xmm4)
a3(pshufd xmm3, xmm3, 0x39)
a1(ja scrypt_salsa_avx_loop)
a3(vpaddd xmm0,xmm0,xmm8)
a3(vpaddd xmm1,xmm1,xmm9)
a3(vpaddd xmm2,xmm2,xmm10)
a3(vpaddd xmm3,xmm3,xmm11)
a2(lea rax,[r8+r9])
a2(xor r8,rcx)
a2(and rax,~0x7f)
a2(add r9,64)
a2(shr rax,1)
a2(add rax, rdi)
a2(cmp r9,rcx)
a2(vmovdqa [rax+0],xmm0)
a2(vmovdqa [rax+16],xmm1)
a2(vmovdqa [rax+32],xmm2)
a2(vmovdqa [rax+48],xmm3)
a1(jne scrypt_ChunkMix_avx_loop)
a1(ret)
asm_naked_fn_end(scrypt_ChunkMix_avx)
#endif
/* intrinsic */
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_AVX
static void NOINLINE
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
uint32_t i, blocksPerChunk = r * 2, half = 0;
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
size_t rounds;
/* 1: X = B_{2r - 1} */
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
x0 = xmmp[0];
x1 = xmmp[1];
x2 = xmmp[2];
x3 = xmmp[3];
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
xmmp = (xmmi *)scrypt_block(Bin, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
t0 = x0;
t1 = x1;
t2 = x2;
t3 = x3;
for (rounds = 8; rounds; rounds -= 2) {
x4 = x1;
x4 = _mm_add_epi32(x4, x0);
x5 = x4;
x4 = _mm_slli_epi32(x4, 7);
x5 = _mm_srli_epi32(x5, 25);
x3 = _mm_xor_si128(x3, x4);
x4 = x0;
x3 = _mm_xor_si128(x3, x5);
x4 = _mm_add_epi32(x4, x3);
x5 = x4;
x4 = _mm_slli_epi32(x4, 9);
x5 = _mm_srli_epi32(x5, 23);
x2 = _mm_xor_si128(x2, x4);
x4 = x3;
x2 = _mm_xor_si128(x2, x5);
x3 = _mm_shuffle_epi32(x3, 0x93);
x4 = _mm_add_epi32(x4, x2);
x5 = x4;
x4 = _mm_slli_epi32(x4, 13);
x5 = _mm_srli_epi32(x5, 19);
x1 = _mm_xor_si128(x1, x4);
x4 = x2;
x1 = _mm_xor_si128(x1, x5);
x2 = _mm_shuffle_epi32(x2, 0x4e);
x4 = _mm_add_epi32(x4, x1);
x5 = x4;
x4 = _mm_slli_epi32(x4, 18);
x5 = _mm_srli_epi32(x5, 14);
x0 = _mm_xor_si128(x0, x4);
x4 = x3;
x0 = _mm_xor_si128(x0, x5);
x1 = _mm_shuffle_epi32(x1, 0x39);
x4 = _mm_add_epi32(x4, x0);
x5 = x4;
x4 = _mm_slli_epi32(x4, 7);
x5 = _mm_srli_epi32(x5, 25);
x1 = _mm_xor_si128(x1, x4);
x4 = x0;
x1 = _mm_xor_si128(x1, x5);
x4 = _mm_add_epi32(x4, x1);
x5 = x4;
x4 = _mm_slli_epi32(x4, 9);
x5 = _mm_srli_epi32(x5, 23);
x2 = _mm_xor_si128(x2, x4);
x4 = x1;
x2 = _mm_xor_si128(x2, x5);
x1 = _mm_shuffle_epi32(x1, 0x93);
x4 = _mm_add_epi32(x4, x2);
x5 = x4;
x4 = _mm_slli_epi32(x4, 13);
x5 = _mm_srli_epi32(x5, 19);
x3 = _mm_xor_si128(x3, x4);
x4 = x2;
x3 = _mm_xor_si128(x3, x5);
x2 = _mm_shuffle_epi32(x2, 0x4e);
x4 = _mm_add_epi32(x4, x3);
x5 = x4;
x4 = _mm_slli_epi32(x4, 18);
x5 = _mm_srli_epi32(x5, 14);
x0 = _mm_xor_si128(x0, x4);
x3 = _mm_shuffle_epi32(x3, 0x39);
x0 = _mm_xor_si128(x0, x5);
}
x0 = _mm_add_epi32(x0, t0);
x1 = _mm_add_epi32(x1, t1);
x2 = _mm_add_epi32(x2, t2);
x3 = _mm_add_epi32(x3, t3);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
xmmp[0] = x0;
xmmp[1] = x1;
xmmp[2] = x2;
xmmp[3] = x3;
}
}
#endif
#if defined(SCRYPT_SALSA_AVX)
/* uses salsa_core_tangle_sse2 */
#undef SCRYPT_MIX
#define SCRYPT_MIX "Salsa/8-AVX"
#undef SCRYPT_SALSA_INCLUDED
#define SCRYPT_SALSA_INCLUDED
#endif

View File

@ -0,0 +1,443 @@
/* x86 */
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_SSE2
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_sse2)
a1(push ebx)
a1(push edi)
a1(push esi)
a1(push ebp)
a2(mov ebp,esp)
a2(mov edi,[ebp+20])
a2(mov esi,[ebp+24])
a2(mov eax,[ebp+28])
a2(mov ebx,[ebp+32])
a2(sub esp,32)
a2(and esp,~63)
a2(lea edx,[ebx*2])
a2(shl edx,6)
a2(lea ecx,[edx-64])
a2(and eax, eax)
a2(movdqa xmm0,[ecx+esi+0])
a2(movdqa xmm1,[ecx+esi+16])
a2(movdqa xmm2,[ecx+esi+32])
a2(movdqa xmm3,[ecx+esi+48])
a1(jz scrypt_ChunkMix_sse2_no_xor1)
a2(pxor xmm0,[ecx+eax+0])
a2(pxor xmm1,[ecx+eax+16])
a2(pxor xmm2,[ecx+eax+32])
a2(pxor xmm3,[ecx+eax+48])
a1(scrypt_ChunkMix_sse2_no_xor1:)
a2(xor ecx,ecx)
a2(xor ebx,ebx)
a1(scrypt_ChunkMix_sse2_loop:)
a2(and eax, eax)
a2(pxor xmm0,[esi+ecx+0])
a2(pxor xmm1,[esi+ecx+16])
a2(pxor xmm2,[esi+ecx+32])
a2(pxor xmm3,[esi+ecx+48])
a1(jz scrypt_ChunkMix_sse2_no_xor2)
a2(pxor xmm0,[eax+ecx+0])
a2(pxor xmm1,[eax+ecx+16])
a2(pxor xmm2,[eax+ecx+32])
a2(pxor xmm3,[eax+ecx+48])
a1(scrypt_ChunkMix_sse2_no_xor2:)
a2(movdqa [esp+0],xmm0)
a2(movdqa [esp+16],xmm1)
a2(movdqa xmm6,xmm2)
a2(movdqa xmm7,xmm3)
a2(mov eax,8)
a1(scrypt_salsa_sse2_loop: )
a2(movdqa xmm4, xmm1)
a2(paddd xmm4, xmm0)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 7)
a2(psrld xmm5, 25)
a2(pxor xmm3, xmm4)
a2(movdqa xmm4, xmm0)
a2(pxor xmm3, xmm5)
a2(paddd xmm4, xmm3)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 9)
a2(psrld xmm5, 23)
a2(pxor xmm2, xmm4)
a2(movdqa xmm4, xmm3)
a2(pxor xmm2, xmm5)
a3(pshufd xmm3, xmm3, 0x93)
a2(paddd xmm4, xmm2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 13)
a2(psrld xmm5, 19)
a2(pxor xmm1, xmm4)
a2(movdqa xmm4, xmm2)
a2(pxor xmm1, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a2(paddd xmm4, xmm1)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 18)
a2(psrld xmm5, 14)
a2(pxor xmm0, xmm4)
a2(movdqa xmm4, xmm3)
a2(pxor xmm0, xmm5)
a3(pshufd xmm1, xmm1, 0x39)
a2(paddd xmm4, xmm0)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 7)
a2(psrld xmm5, 25)
a2(pxor xmm1, xmm4)
a2(movdqa xmm4, xmm0)
a2(pxor xmm1, xmm5)
a2(paddd xmm4, xmm1)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 9)
a2(psrld xmm5, 23)
a2(pxor xmm2, xmm4)
a2(movdqa xmm4, xmm1)
a2(pxor xmm2, xmm5)
a3(pshufd xmm1, xmm1, 0x93)
a2(paddd xmm4, xmm2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 13)
a2(psrld xmm5, 19)
a2(pxor xmm3, xmm4)
a2(movdqa xmm4, xmm2)
a2(pxor xmm3, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a2(paddd xmm4, xmm3)
a2(sub eax, 2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 18)
a2(psrld xmm5, 14)
a2(pxor xmm0, xmm4)
a3(pshufd xmm3, xmm3, 0x39)
a2(pxor xmm0, xmm5)
a1(ja scrypt_salsa_sse2_loop)
a2(paddd xmm0,[esp+0])
a2(paddd xmm1,[esp+16])
a2(paddd xmm2,xmm6)
a2(paddd xmm3,xmm7)
a2(lea eax,[ebx+ecx])
a2(xor ebx,edx)
a2(and eax,~0x7f)
a2(add ecx,64)
a2(shr eax,1)
a2(add eax, edi)
a2(cmp ecx,edx)
a2(movdqa [eax+0],xmm0)
a2(movdqa [eax+16],xmm1)
a2(movdqa [eax+32],xmm2)
a2(movdqa [eax+48],xmm3)
a2(mov eax,[ebp+28])
a1(jne scrypt_ChunkMix_sse2_loop)
a2(mov esp,ebp)
a1(pop ebp)
a1(pop esi)
a1(pop edi)
a1(pop ebx)
a1(ret 16)
asm_naked_fn_end(scrypt_ChunkMix_sse2)
#endif
/* x64 */
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_SSE2
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
asm_naked_fn(scrypt_ChunkMix_sse2)
a2(lea rcx,[rcx*2])
a2(shl rcx,6)
a2(lea r9,[rcx-64])
a2(lea rax,[rsi+r9])
a2(lea r9,[rdx+r9])
a2(and rdx, rdx)
a2(movdqa xmm0,[rax+0])
a2(movdqa xmm1,[rax+16])
a2(movdqa xmm2,[rax+32])
a2(movdqa xmm3,[rax+48])
a1(jz scrypt_ChunkMix_sse2_no_xor1)
a2(pxor xmm0,[r9+0])
a2(pxor xmm1,[r9+16])
a2(pxor xmm2,[r9+32])
a2(pxor xmm3,[r9+48])
a1(scrypt_ChunkMix_sse2_no_xor1:)
a2(xor r9,r9)
a2(xor r8,r8)
a1(scrypt_ChunkMix_sse2_loop:)
a2(and rdx, rdx)
a2(pxor xmm0,[rsi+r9+0])
a2(pxor xmm1,[rsi+r9+16])
a2(pxor xmm2,[rsi+r9+32])
a2(pxor xmm3,[rsi+r9+48])
a1(jz scrypt_ChunkMix_sse2_no_xor2)
a2(pxor xmm0,[rdx+r9+0])
a2(pxor xmm1,[rdx+r9+16])
a2(pxor xmm2,[rdx+r9+32])
a2(pxor xmm3,[rdx+r9+48])
a1(scrypt_ChunkMix_sse2_no_xor2:)
a2(movdqa xmm8,xmm0)
a2(movdqa xmm9,xmm1)
a2(movdqa xmm10,xmm2)
a2(movdqa xmm11,xmm3)
a2(mov rax,8)
a1(scrypt_salsa_sse2_loop: )
a2(movdqa xmm4, xmm1)
a2(paddd xmm4, xmm0)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 7)
a2(psrld xmm5, 25)
a2(pxor xmm3, xmm4)
a2(movdqa xmm4, xmm0)
a2(pxor xmm3, xmm5)
a2(paddd xmm4, xmm3)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 9)
a2(psrld xmm5, 23)
a2(pxor xmm2, xmm4)
a2(movdqa xmm4, xmm3)
a2(pxor xmm2, xmm5)
a3(pshufd xmm3, xmm3, 0x93)
a2(paddd xmm4, xmm2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 13)
a2(psrld xmm5, 19)
a2(pxor xmm1, xmm4)
a2(movdqa xmm4, xmm2)
a2(pxor xmm1, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a2(paddd xmm4, xmm1)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 18)
a2(psrld xmm5, 14)
a2(pxor xmm0, xmm4)
a2(movdqa xmm4, xmm3)
a2(pxor xmm0, xmm5)
a3(pshufd xmm1, xmm1, 0x39)
a2(paddd xmm4, xmm0)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 7)
a2(psrld xmm5, 25)
a2(pxor xmm1, xmm4)
a2(movdqa xmm4, xmm0)
a2(pxor xmm1, xmm5)
a2(paddd xmm4, xmm1)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 9)
a2(psrld xmm5, 23)
a2(pxor xmm2, xmm4)
a2(movdqa xmm4, xmm1)
a2(pxor xmm2, xmm5)
a3(pshufd xmm1, xmm1, 0x93)
a2(paddd xmm4, xmm2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 13)
a2(psrld xmm5, 19)
a2(pxor xmm3, xmm4)
a2(movdqa xmm4, xmm2)
a2(pxor xmm3, xmm5)
a3(pshufd xmm2, xmm2, 0x4e)
a2(paddd xmm4, xmm3)
a2(sub rax, 2)
a2(movdqa xmm5, xmm4)
a2(pslld xmm4, 18)
a2(psrld xmm5, 14)
a2(pxor xmm0, xmm4)
a3(pshufd xmm3, xmm3, 0x39)
a2(pxor xmm0, xmm5)
a1(ja scrypt_salsa_sse2_loop)
a2(paddd xmm0,xmm8)
a2(paddd xmm1,xmm9)
a2(paddd xmm2,xmm10)
a2(paddd xmm3,xmm11)
a2(lea rax,[r8+r9])
a2(xor r8,rcx)
a2(and rax,~0x7f)
a2(add r9,64)
a2(shr rax,1)
a2(add rax, rdi)
a2(cmp r9,rcx)
a2(movdqa [rax+0],xmm0)
a2(movdqa [rax+16],xmm1)
a2(movdqa [rax+32],xmm2)
a2(movdqa [rax+48],xmm3)
a1(jne scrypt_ChunkMix_sse2_loop)
a1(ret)
asm_naked_fn_end(scrypt_ChunkMix_sse2)
#endif
/* intrinsic */
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
#define SCRYPT_SALSA_SSE2
static void NOINLINE
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
uint32_t i, blocksPerChunk = r * 2, half = 0;
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
size_t rounds;
/* 1: X = B_{2r - 1} */
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
x0 = xmmp[0];
x1 = xmmp[1];
x2 = xmmp[2];
x3 = xmmp[3];
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
xmmp = (xmmi *)scrypt_block(Bin, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
if (Bxor) {
xmmp = (xmmi *)scrypt_block(Bxor, i);
x0 = _mm_xor_si128(x0, xmmp[0]);
x1 = _mm_xor_si128(x1, xmmp[1]);
x2 = _mm_xor_si128(x2, xmmp[2]);
x3 = _mm_xor_si128(x3, xmmp[3]);
}
t0 = x0;
t1 = x1;
t2 = x2;
t3 = x3;
for (rounds = 8; rounds; rounds -= 2) {
x4 = x1;
x4 = _mm_add_epi32(x4, x0);
x5 = x4;
x4 = _mm_slli_epi32(x4, 7);
x5 = _mm_srli_epi32(x5, 25);
x3 = _mm_xor_si128(x3, x4);
x4 = x0;
x3 = _mm_xor_si128(x3, x5);
x4 = _mm_add_epi32(x4, x3);
x5 = x4;
x4 = _mm_slli_epi32(x4, 9);
x5 = _mm_srli_epi32(x5, 23);
x2 = _mm_xor_si128(x2, x4);
x4 = x3;
x2 = _mm_xor_si128(x2, x5);
x3 = _mm_shuffle_epi32(x3, 0x93);
x4 = _mm_add_epi32(x4, x2);
x5 = x4;
x4 = _mm_slli_epi32(x4, 13);
x5 = _mm_srli_epi32(x5, 19);
x1 = _mm_xor_si128(x1, x4);
x4 = x2;
x1 = _mm_xor_si128(x1, x5);
x2 = _mm_shuffle_epi32(x2, 0x4e);
x4 = _mm_add_epi32(x4, x1);
x5 = x4;
x4 = _mm_slli_epi32(x4, 18);
x5 = _mm_srli_epi32(x5, 14);
x0 = _mm_xor_si128(x0, x4);
x4 = x3;
x0 = _mm_xor_si128(x0, x5);
x1 = _mm_shuffle_epi32(x1, 0x39);
x4 = _mm_add_epi32(x4, x0);
x5 = x4;
x4 = _mm_slli_epi32(x4, 7);
x5 = _mm_srli_epi32(x5, 25);
x1 = _mm_xor_si128(x1, x4);
x4 = x0;
x1 = _mm_xor_si128(x1, x5);
x4 = _mm_add_epi32(x4, x1);
x5 = x4;
x4 = _mm_slli_epi32(x4, 9);
x5 = _mm_srli_epi32(x5, 23);
x2 = _mm_xor_si128(x2, x4);
x4 = x1;
x2 = _mm_xor_si128(x2, x5);
x1 = _mm_shuffle_epi32(x1, 0x93);
x4 = _mm_add_epi32(x4, x2);
x5 = x4;
x4 = _mm_slli_epi32(x4, 13);
x5 = _mm_srli_epi32(x5, 19);
x3 = _mm_xor_si128(x3, x4);
x4 = x2;
x3 = _mm_xor_si128(x3, x5);
x2 = _mm_shuffle_epi32(x2, 0x4e);
x4 = _mm_add_epi32(x4, x3);
x5 = x4;
x4 = _mm_slli_epi32(x4, 18);
x5 = _mm_srli_epi32(x5, 14);
x0 = _mm_xor_si128(x0, x4);
x3 = _mm_shuffle_epi32(x3, 0x39);
x0 = _mm_xor_si128(x0, x5);
}
x0 = _mm_add_epi32(x0, t0);
x1 = _mm_add_epi32(x1, t1);
x2 = _mm_add_epi32(x2, t2);
x3 = _mm_add_epi32(x3, t3);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
xmmp[0] = x0;
xmmp[1] = x1;
xmmp[2] = x2;
xmmp[3] = x3;
}
}
#endif
#if defined(SCRYPT_SALSA_SSE2)
#undef SCRYPT_MIX
#define SCRYPT_MIX "Salsa/8-SSE2"
#undef SCRYPT_SALSA_INCLUDED
#define SCRYPT_SALSA_INCLUDED
#endif
/* used by avx,etc as well */
#if defined(SCRYPT_SALSA_INCLUDED)
/*
Default layout:
0 1 2 3
4 5 6 7
8 9 10 11
12 13 14 15
SSE2 layout:
0 5 10 15
12 1 6 11
8 13 2 7
4 9 14 3
*/
static void STDCALL
salsa_core_tangle_sse2(uint32_t *blocks, size_t count) {
uint32_t t;
while (count--) {
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
blocks += 16;
}
}
#endif

View File

@ -0,0 +1,70 @@
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)
#undef SCRYPT_MIX
#define SCRYPT_MIX "Salsa20/8 Ref"
#undef SCRYPT_SALSA_INCLUDED
#define SCRYPT_SALSA_INCLUDED
#define SCRYPT_SALSA_BASIC
static void
salsa_core_basic(uint32_t state[16]) {
size_t rounds = 8;
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
x0 = state[0];
x1 = state[1];
x2 = state[2];
x3 = state[3];
x4 = state[4];
x5 = state[5];
x6 = state[6];
x7 = state[7];
x8 = state[8];
x9 = state[9];
x10 = state[10];
x11 = state[11];
x12 = state[12];
x13 = state[13];
x14 = state[14];
x15 = state[15];
#define quarter(a,b,c,d) \
t = a+d; t = ROTL32(t, 7); b ^= t; \
t = b+a; t = ROTL32(t, 9); c ^= t; \
t = c+b; t = ROTL32(t, 13); d ^= t; \
t = d+c; t = ROTL32(t, 18); a ^= t; \
for (; rounds; rounds -= 2) {
quarter( x0, x4, x8,x12)
quarter( x5, x9,x13, x1)
quarter(x10,x14, x2, x6)
quarter(x15, x3, x7,x11)
quarter( x0, x1, x2, x3)
quarter( x5, x6, x7, x4)
quarter(x10,x11, x8, x9)
quarter(x15,x12,x13,x14)
}
state[0] += x0;
state[1] += x1;
state[2] += x2;
state[3] += x3;
state[4] += x4;
state[5] += x5;
state[6] += x6;
state[7] += x7;
state[8] += x8;
state[9] += x9;
state[10] += x10;
state[11] += x11;
state[12] += x12;
state[13] += x13;
state[14] += x14;
state[15] += x15;
#undef quarter
}
#endif

View File

@ -0,0 +1,112 @@
typedef struct scrypt_hmac_state_t {
scrypt_hash_state inner, outer;
} scrypt_hmac_state;
static void
scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
scrypt_hash_state st;
scrypt_hash_init(&st);
scrypt_hash_update(&st, m, mlen);
scrypt_hash_finish(&st, hash);
}
/* hmac */
static void
scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
size_t i;
scrypt_hash_init(&st->inner);
scrypt_hash_init(&st->outer);
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
/* use the key directly if it's <= blocksize bytes */
memcpy(pad, key, keylen);
} else {
/* if it's > blocksize bytes, hash it */
scrypt_hash(pad, key, keylen);
}
/* inner = (key ^ 0x36) */
/* h(inner || ...) */
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
pad[i] ^= 0x36;
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
/* outer = (key ^ 0x5c) */
/* h(outer || ...) */
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
pad[i] ^= (0x5c ^ 0x36);
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
scrypt_ensure_zero(pad, sizeof(pad));
}
static void
scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
/* h(inner || m...) */
scrypt_hash_update(&st->inner, m, mlen);
}
static void
scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
/* h(inner || m) */
scrypt_hash_digest innerhash;
scrypt_hash_finish(&st->inner, innerhash);
/* h(outer || h(inner || m)) */
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
scrypt_hash_finish(&st->outer, mac);
scrypt_ensure_zero(st, sizeof(*st));
}
static void
scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) {
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
scrypt_hash_digest ti, u;
uint8_t be[4];
uint32_t i, j, blocks;
uint64_t c;
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
/* hmac(password, ...) */
scrypt_hmac_init(&hmac_pw, password, password_len);
/* hmac(password, salt...) */
hmac_pw_salt = hmac_pw;
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
for (i = 1; i <= blocks; i++) {
/* U1 = hmac(password, salt || be(i)) */
U32TO8_BE(be, i);
work = hmac_pw_salt;
scrypt_hmac_update(&work, be, 4);
scrypt_hmac_finish(&work, ti);
memcpy(u, ti, sizeof(u));
/* T[i] = U1 ^ U2 ^ U3... */
for (c = 0; c < N - 1; c++) {
/* UX = hmac(password, U{X-1}) */
work = hmac_pw;
scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE);
scrypt_hmac_finish(&work, u);
/* T[i] ^= UX */
for (j = 0; j < sizeof(u); j++)
ti[j] ^= u[j];
}
memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes);
out += SCRYPT_HASH_DIGEST_SIZE;
bytes -= SCRYPT_HASH_DIGEST_SIZE;
}
scrypt_ensure_zero(ti, sizeof(ti));
scrypt_ensure_zero(u, sizeof(u));
scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw));
scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt));
}

View File

@ -0,0 +1,364 @@
#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))
#define X86ASM
/* gcc 2.95 royally screws up stack alignments on variables */
#if (defined(COMPILER_MSVC6PP_AND_LATER) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000)))
#define X86ASM_SSE
#define X86ASM_SSE2
#endif
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= 1400)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102)))
#define X86ASM_SSSE3
#endif
#if ((defined(COMPILER_GCC) && (COMPILER_GCC >= 40400)))
#define X86ASM_AVX
#endif
#endif
#if defined(CPU_X86_64) && defined(COMPILER_GCC)
#define X86_64ASM
#define X86_64ASM_SSE2
#if (COMPILER_GCC >= 40102)
#define X86_64ASM_SSSE3
#endif
#if (COMPILER_GCC >= 40400)
#define X86_64ASM_AVX
#endif
#endif
#if defined(COMPILER_MSVC)
#define X86_INTRINSIC
#if defined(CPU_X86_64) || defined(X86ASM_SSE)
#define X86_INTRINSIC_SSE
#endif
#if defined(CPU_X86_64) || defined(X86ASM_SSE2)
#define X86_INTRINSIC_SSE2
#endif
#if (COMPILER_MSVC >= 1400)
#define X86_INTRINSIC_SSSE3
#endif
#endif
#if defined(COMPILER_MSVC) && defined(CPU_X86_64)
#define X86_64USE_INTRINSIC
#endif
#if defined(COMPILER_MSVC) && defined(CPU_X86_64)
#define X86_64USE_INTRINSIC
#endif
#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS)
#define X86_INTRINSIC
#if defined(__SSE__)
#define X86_INTRINSIC_SSE
#endif
#if defined(__SSE2__)
#define X86_INTRINSIC_SSE2
#endif
#if defined(__SSSE3__)
#define X86_INTRINSIC_SSSE3
#endif
#if defined(__AVX__)
#define X86_INTRINSIC_AVX
#endif
#endif
/* only use simd on windows (or SSE2 on gcc)! */
#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC)
#if defined(X86_INTRINSIC_SSE)
#define X86_INTRINSIC
#include <mmintrin.h>
#include <xmmintrin.h>
typedef __m64 qmm;
typedef __m128 xmm;
typedef __m128d xmmd;
#endif
#if defined(X86_INTRINSIC_SSE2)
#define X86_INTRINSIC_SSE2
#include <emmintrin.h>
typedef __m128i xmmi;
#endif
#if defined(X86_INTRINSIC_SSSE3)
#define X86_INTRINSIC_SSSE3
#include <tmmintrin.h>
#endif
#endif
#if defined(X86_INTRINSIC_SSE2)
typedef union packedelem8_t {
uint8_t u[16];
xmmi v;
} packedelem8;
typedef union packedelem32_t {
uint32_t u[4];
xmmi v;
} packedelem32;
typedef union packedelem64_t {
uint64_t u[2];
xmmi v;
} packedelem64;
#else
typedef union packedelem8_t {
uint8_t u[16];
uint32_t dw[4];
} packedelem8;
typedef union packedelem32_t {
uint32_t u[4];
uint8_t b[16];
} packedelem32;
typedef union packedelem64_t {
uint64_t u[2];
uint8_t b[16];
} packedelem64;
#endif
#if defined(X86_INTRINSIC_SSSE3) || defined(X86ASM_SSSE3) || defined(X86_64ASM_SSSE3)
const packedelem8 MM16 ssse3_rotr16_64bit = {{2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9}};
const packedelem8 MM16 ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}};
const packedelem8 MM16 ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}};
const packedelem8 MM16 ssse3_endian_swap_64bit = {{7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}};
#endif
/*
x86 inline asm for gcc/msvc. usage:
asm_naked_fn_proto(return_type, name) (type parm1, type parm2..)
asm_naked_fn(name)
a1(..)
a2(.., ..)
a3(.., .., ..)
a1(ret)
asm_naked_fn_end(name)
*/
#if defined(X86ASM) || defined(X86_64ASM)
#if defined(COMPILER_MSVC)
#pragma warning(disable : 4731) /* frame pointer modified by inline assembly */
#define a1(x) __asm {x}
#define a2(x, y) __asm {x, y}
#define a3(x, y, z) __asm {x, y, z}
#define a4(x, y, z, w) __asm {x, y, z, w}
#define al(x) __asm {label##x:}
#define aj(x, y, z) __asm {x label##y}
#define asm_align8 a1(ALIGN 8)
#define asm_align16 a1(ALIGN 16)
#define asm_naked_fn_proto(type, fn) static NAKED type STDCALL fn
#define asm_naked_fn(fn) {
#define asm_naked_fn_end(fn) }
#elif defined(COMPILER_GCC)
#define GNU_AS1(x) #x ";\n"
#define GNU_AS2(x, y) #x ", " #y ";\n"
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n"
#define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n"
#define GNU_ASL(x) "\n" #x ":\n"
#define GNU_ASJ(x, y, z) #x " " #y #z ";"
#define a1(x) GNU_AS1(x)
#define a2(x, y) GNU_AS2(x, y)
#define a3(x, y, z) GNU_AS3(x, y, z)
#define a4(x, y, z, w) GNU_AS4(x, y, z, w)
#define al(x) GNU_ASL(x)
#define aj(x, y, z) GNU_ASJ(x, y, z)
#define asm_align8 a1(.align 8)
#define asm_align16 a1(.align 16)
#define asm_naked_fn_proto(type, fn) extern type STDCALL fn
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASL(fn)
#define asm_naked_fn_end(fn) ".att_syntax prefix;\n.type " #fn ",@function\n.size " #fn ",.-" #fn "\n" );
#define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n"
#define asm_gcc_parms() ".att_syntax prefix;"
#define asm_gcc_trashed() __asm__ __volatile__("" :::
#define asm_gcc_end() );
#else
need x86 asm
#endif
#endif /* X86ASM || X86_64ASM */
#if defined(CPU_X86) || defined(CPU_X86_64)
typedef enum cpu_flags_x86_t {
cpu_mmx = 1 << 0,
cpu_sse = 1 << 1,
cpu_sse2 = 1 << 2,
cpu_sse3 = 1 << 3,
cpu_ssse3 = 1 << 4,
cpu_sse4_1 = 1 << 5,
cpu_sse4_2 = 1 << 6,
cpu_avx = 1 << 7
} cpu_flags_x86;
typedef enum cpu_vendors_x86_t {
cpu_nobody,
cpu_intel,
cpu_amd
} cpu_vendors_x86;
typedef struct x86_regs_t {
uint32_t eax, ebx, ecx, edx;
} x86_regs;
#if defined(X86ASM)
asm_naked_fn_proto(int, has_cpuid)(void)
asm_naked_fn(has_cpuid)
a1(pushfd)
a1(pop eax)
a2(mov ecx, eax)
a2(xor eax, 0x200000)
a1(push eax)
a1(popfd)
a1(pushfd)
a1(pop eax)
a2(xor eax, ecx)
a2(shr eax, 21)
a2(and eax, 1)
a1(push ecx)
a1(popfd)
a1(ret)
asm_naked_fn_end(has_cpuid)
#endif /* X86ASM */
static void NOINLINE
get_cpuid(x86_regs *regs, uint32_t flags) {
#if defined(COMPILER_MSVC)
__cpuid((int *)regs, (int)flags);
#else
#if defined(CPU_X86_64)
#define cpuid_bx rbx
#else
#define cpuid_bx ebx
#endif
asm_gcc()
a1(push cpuid_bx)
a1(cpuid)
a2(mov [%1 + 0], eax)
a2(mov [%1 + 4], ebx)
a2(mov [%1 + 8], ecx)
a2(mov [%1 + 12], edx)
a1(pop cpuid_bx)
asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc"
asm_gcc_end()
#endif
}
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
static uint64_t NOINLINE
get_xgetbv(uint32_t flags) {
#if defined(COMPILER_MSVC)
return _xgetbv(flags);
#else
uint32_t lo, hi;
asm_gcc()
a1(xgetbv)
asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi)
asm_gcc_end()
return ((uint64_t)lo | ((uint64_t)hi << 32));
#endif
}
#endif // AVX support
#if defined(SCRYPT_TEST_SPEED)
size_t cpu_detect_mask = (size_t)-1;
#endif
static size_t
detect_cpu(void) {
union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
cpu_vendors_x86 vendor = cpu_nobody;
x86_regs regs;
uint32_t max_level;
size_t cpu_flags = 0;
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
uint64_t xgetbv_flags;
#endif
#if defined(CPU_X86)
if (!has_cpuid())
return cpu_flags;
#endif
get_cpuid(&regs, 0);
max_level = regs.eax;
vendor_string.i[0] = regs.ebx;
vendor_string.i[1] = regs.edx;
vendor_string.i[2] = regs.ecx;
if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12))
vendor = cpu_intel;
else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12))
vendor = cpu_amd;
if (max_level & 0x00000500) {
/* "Intel P5 pre-B0" */
cpu_flags |= cpu_mmx;
return cpu_flags;
}
if (max_level < 1)
return cpu_flags;
get_cpuid(&regs, 1);
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
/* xsave/xrestore */
if (regs.ecx & (1 << 27)) {
xgetbv_flags = get_xgetbv(0);
if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx;
}
#endif
if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2;
if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2;
if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3;
if (regs.ecx & (1 )) cpu_flags |= cpu_sse3;
if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2;
if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse;
if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx;
#if defined(SCRYPT_TEST_SPEED)
cpu_flags &= cpu_detect_mask;
#endif
return cpu_flags;
}
#if defined(SCRYPT_TEST_SPEED)
static const char *
get_top_cpuflag_desc(size_t flag) {
if (flag & cpu_avx) return "AVX";
else if (flag & cpu_sse4_2) return "SSE4.2";
else if (flag & cpu_sse4_1) return "SSE4.1";
else if (flag & cpu_ssse3) return "SSSE3";
else if (flag & cpu_sse2) return "SSE2";
else if (flag & cpu_sse) return "SSE";
else if (flag & cpu_mmx) return "MMX";
else return "Basic";
}
#endif
/* enable the highest system-wide option */
#if defined(SCRYPT_CHOOSE_COMPILETIME)
#if !defined(__AVX__)
#undef X86_64ASM_AVX
#undef X86ASM_AVX
#undef X86_INTRINSIC_AVX
#endif
#if !defined(__SSSE3__)
#undef X86_64ASM_SSSE3
#undef X86ASM_SSSE3
#undef X86_INTRINSIC_SSSE3
#endif
#if !defined(__SSE2__)
#undef X86_64ASM_SSE2
#undef X86ASM_SSE2
#undef X86_INTRINSIC_SSE2
#endif
#endif
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */

View File

@ -0,0 +1,281 @@
/* determine os */
#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
#include <windows.h>
#include <wincrypt.h>
#define OS_WINDOWS
#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
#include <sys/mman.h>
#include <sys/time.h>
#include <fcntl.h>
#define OS_SOLARIS
#else
#include <sys/mman.h>
#include <sys/time.h>
#include <sys/param.h> /* need this to define BSD */
#include <unistd.h>
#include <fcntl.h>
#define OS_NIX
#if defined(__linux__)
#include <endian.h>
#define OS_LINUX
#elif defined(BSD)
#define OS_BSD
#if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
#define OS_OSX
#elif defined(macintosh) || defined(Macintosh)
#define OS_MAC
#elif defined(__OpenBSD__)
#define OS_OPENBSD
#endif
#endif
#endif
/* determine compiler */
#if defined(_MSC_VER)
#define COMPILER_MSVC _MSC_VER
#if ((COMPILER_MSVC > 1200) || defined(_mm_free))
#define COMPILER_MSVC6PP_AND_LATER
#endif
#if (COMPILER_MSVC >= 1500)
#define COMPILER_HAS_TMMINTRIN
#endif
#pragma warning(disable : 4127) /* conditional expression is constant */
#pragma warning(disable : 4100) /* unreferenced formal parameter */
#define _CRT_SECURE_NO_WARNINGS
#include <float.h>
#include <stdlib.h> /* _rotl */
#include <intrin.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed int int32_t;
typedef unsigned __int64 uint64_t;
typedef signed __int64 int64_t;
#define ROTL32(a,b) _rotl(a,b)
#define ROTR32(a,b) _rotr(a,b)
#define ROTL64(a,b) _rotl64(a,b)
#define ROTR64(a,b) _rotr64(a,b)
#undef NOINLINE
#define NOINLINE __declspec(noinline)
#undef INLINE
#define INLINE __forceinline
#undef FASTCALL
#define FASTCALL __fastcall
#undef CDECL
#define CDECL __cdecl
#undef STDCALL
#define STDCALL __stdcall
#undef NAKED
#define NAKED __declspec(naked)
#define MM16 __declspec(align(16))
#endif
#if defined(__ICC)
#define COMPILER_INTEL
#endif
#if defined(__GNUC__)
#if (__GNUC__ >= 3)
#define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__
#else
#define COMPILER_GCC_PATCHLEVEL 0
#endif
#define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL)
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
#define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b)))
#undef NOINLINE
#if (COMPILER_GCC >= 30000)
#define NOINLINE __attribute__((noinline))
#else
#define NOINLINE
#endif
#undef INLINE
#if (COMPILER_GCC >= 30000)
#define INLINE __attribute__((always_inline))
#else
#define INLINE inline
#endif
#undef FASTCALL
#if (COMPILER_GCC >= 30400)
#define FASTCALL __attribute__((fastcall))
#else
#define FASTCALL
#endif
#undef CDECL
#define CDECL __attribute__((cdecl))
#undef STDCALL
#define STDCALL __attribute__((stdcall))
#define MM16 __attribute__((aligned(16)))
#include <stdint.h>
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define COMPILER_MINGW
#endif
#if defined(__PATHCC__)
#define COMPILER_PATHCC
#endif
#define OPTIONAL_INLINE
#if defined(OPTIONAL_INLINE)
#undef OPTIONAL_INLINE
#define OPTIONAL_INLINE INLINE
#else
#define OPTIONAL_INLINE
#endif
#define CRYPTO_FN NOINLINE STDCALL
/* determine cpu */
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
#define CPU_X86_64
#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
#define CPU_X86 500
#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
#define CPU_X86 400
#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
#define CPU_X86 300
#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
#define CPU_IA64
#endif
#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
#define CPU_SPARC
#if defined(__sparcv9)
#define CPU_SPARC64
#endif
#endif
#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
#define CPU_64BITS
#undef FASTCALL
#define FASTCALL
#undef CDECL
#define CDECL
#undef STDCALL
#define STDCALL
#endif
#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
#define CPU_PPC
#if defined(_ARCH_PWR7)
#define CPU_POWER7
#elif defined(__64BIT__)
#define CPU_PPC64
#else
#define CPU_PPC32
#endif
#endif
#if defined(__hppa__) || defined(__hppa)
#define CPU_HPPA
#endif
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
#define CPU_ALPHA
#endif
/* endian */
#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
(defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
(defined(CPU_X86) || defined(CPU_X86_64)) || \
(defined(vax) || defined(MIPSEL) || defined(_MIPSEL)))
#define CPU_LE
#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
(defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \
(defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB))
#define CPU_BE
#else
/* unknown endian! */
#endif
#define U8TO32_BE(p) \
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
#define U8TO32_LE(p) \
(((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
#define U32TO8_BE(p, v) \
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
#define U32TO8_LE(p, v) \
(p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \
(p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24);
#define U8TO64_BE(p) \
(((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4))
#define U8TO64_LE(p) \
(((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32))
#define U64TO8_BE(p, v) \
U32TO8_BE((p), (uint32_t)((v) >> 32)); \
U32TO8_BE((p) + 4, (uint32_t)((v) ));
#define U64TO8_LE(p, v) \
U32TO8_LE((p), (uint32_t)((v) )); \
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
#define U32_SWAP(v) { \
(v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \
(v) = ((v) << 16) | ((v) >> 16); \
}
#define U64_SWAP(v) { \
(v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \
(v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \
(v) = ((v) << 32) | ((v) >> 32); \
}
static int
scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) {
uint32_t differentbits = 0;
while (len--)
differentbits |= (*x++ ^ *y++);
return (1 & ((differentbits - 1) >> 8));
}
void
scrypt_ensure_zero(void *p, size_t len) {
#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC))
__stosb((unsigned char *)p, 0, len);
#elif (defined(CPU_X86) && defined(COMPILER_GCC))
__asm__ __volatile__(
"pushl %%edi;\n"
"pushl %%ecx;\n"
"rep stosb;\n"
"popl %%ecx;\n"
"popl %%edi;\n"
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
);
#elif (defined(CPU_X86_64) && defined(COMPILER_GCC))
__asm__ __volatile__(
"pushq %%rdi;\n"
"pushq %%rcx;\n"
"rep stosb;\n"
"popq %%rcx;\n"
"popq %%rdi;\n"
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
);
#else
volatile uint8_t *b = (volatile uint8_t *)p;
size_t i;
for (i = 0; i < len; i++)
b[i] = 0;
#endif
}
#include "scrypt-jane-portable-x86.h"

View File

@ -0,0 +1,67 @@
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
/* function type returned by scrypt_getROMix, used with cpu detection */
typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r);
#endif
/* romix pre/post nop function */
static void STDCALL
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
}
/* romix pre/post endian conversion function */
static void STDCALL
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
#if !defined(CPU_LE)
static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}};
size_t i;
if (endian_test.w == 0x100) {
nblocks *= SCRYPT_BLOCK_WORDS;
for (i = 0; i < nblocks; i++) {
SCRYPT_WORD_ENDIAN_SWAP(blocks[i]);
}
}
#endif
}
/* chunkmix test function */
typedef void (STDCALL *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r);
typedef void (STDCALL *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks);
static int
scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) {
/* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */
const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS;
scrypt_mix_word_t MM16 chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
uint8_t final[16];
size_t i;
for (i = 0; i < words; i++) {
v = (scrypt_mix_word_t)i;
v = (v << 8) | v;
v = (v << 16) | v;
chunk[0][i] = v;
}
prefn(chunk[0], blocks);
mixfn(chunk[1], chunk[0], NULL, r);
postfn(chunk[1], blocks);
/* grab the last 16 bytes of the final block */
for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) {
SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]);
}
return scrypt_verify(expected, final, 16);
}
/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */
static scrypt_mix_word_t *
scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) {
return base + (i * len);
}
/* returns a pointer to block i */
static scrypt_mix_word_t *
scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) {
return base + (i * SCRYPT_BLOCK_WORDS);
}

View File

@ -0,0 +1,118 @@
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
#if defined(SCRYPT_CHOOSE_COMPILETIME)
#undef SCRYPT_ROMIX_FN
#define SCRYPT_ROMIX_FN scrypt_ROMix
#endif
#undef SCRYPT_HAVE_ROMIX
#define SCRYPT_HAVE_ROMIX
#if !defined(SCRYPT_CHUNKMIX_FN)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic
/*
Bout = ChunkMix(Bin)
2*r: number of blocks in the chunk
*/
static void STDCALL
SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) {
scrypt_mix_word_t MM16 X[SCRYPT_BLOCK_WORDS], *block;
uint32_t i, j, blocksPerChunk = r * 2, half = 0;
/* 1: X = B_{2r - 1} */
block = scrypt_block(Bin, blocksPerChunk - 1);
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
X[i] = block[i];
if (Bxor) {
block = scrypt_block(Bxor, blocksPerChunk - 1);
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
X[i] ^= block[i];
}
/* 2: for i = 0 to 2r - 1 do */
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
/* 3: X = H(X ^ B_i) */
block = scrypt_block(Bin, i);
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
X[j] ^= block[j];
if (Bxor) {
block = scrypt_block(Bxor, i);
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
X[j] ^= block[j];
}
SCRYPT_MIX_FN(X);
/* 4: Y_i = X */
/* 6: B'[0..r-1] = Y_even */
/* 6: B'[r..2r-1] = Y_odd */
block = scrypt_block(Bout, (i / 2) + half);
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
block[j] = X[j];
}
}
#endif
/*
X = ROMix(X)
X: chunk to mix
Y: scratch chunk
N: number of rounds
V[N]: array of chunks to randomly index in to
2*r: number of blocks in a chunk
*/
static void NOINLINE FASTCALL
SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) {
uint32_t i, j, chunkWords = SCRYPT_BLOCK_WORDS * r * 2;
scrypt_mix_word_t *block = V;
SCRYPT_ROMIX_TANGLE_FN(X, r * 2);
/* 1: X = B */
/* implicit */
/* 2: for i = 0 to N - 1 do */
memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t));
for (i = 0; i < N - 1; i++, block += chunkWords) {
/* 3: V_i = X */
/* 4: X = H(X) */
SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, r);
}
SCRYPT_CHUNKMIX_FN(X, block, NULL, r);
/* 6: for i = 0 to N - 1 do */
for (i = 0; i < N; i += 2) {
/* 7: j = Integerify(X) % N */
j = X[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1);
/* 8: X = H(Y ^ V_j) */
SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), r);
/* 7: j = Integerify(Y) % N */
j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1);
/* 8: X = H(Y ^ V_j) */
SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), r);
}
/* 10: B' = X */
/* implicit */
SCRYPT_ROMIX_UNTANGLE_FN(X, r * 2);
}
#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */
#undef SCRYPT_CHUNKMIX_FN
#undef SCRYPT_ROMIX_FN
#undef SCRYPT_MIX_FN
#undef SCRYPT_ROMIX_TANGLE_FN
#undef SCRYPT_ROMIX_UNTANGLE_FN

View File

@ -0,0 +1,27 @@
#if defined(SCRYPT_CHACHA)
#include "scrypt-jane-chacha.h"
#elif defined(SCRYPT_SALSA)
#include "scrypt-jane-salsa.h"
#elif defined(SCRYPT_SALSA64)
#include "scrypt-jane-salsa64.h"
#else
#define SCRYPT_MIX_BASE "ERROR"
typedef uint32_t scrypt_mix_word_t;
#define SCRYPT_WORDTO8_LE U32TO8_LE
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
#define SCRYPT_BLOCK_BYTES 64
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {}
static scrypt_ROMixfn scrypt_getROMix() { return scrypt_ROMix_error; }
#else
static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {}
#endif
static int scrypt_test_mix() { return 0; }
#error must define a mix function!
#endif
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
#undef SCRYPT_MIX
#define SCRYPT_MIX SCRYPT_MIX_BASE
#endif

View File

@ -0,0 +1,106 @@
#define SCRYPT_MIX_BASE "Salsa20/8"
typedef uint32_t scrypt_mix_word_t;
#define SCRYPT_WORDTO8_LE U32TO8_LE
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
#define SCRYPT_BLOCK_BYTES 64
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
/* must have these here in case block bytes is ever != 64 */
#include "scrypt-jane-romix-basic.h"
#include "scrypt-jane-mix_salsa-avx.h"
#include "scrypt-jane-mix_salsa-sse2.h"
#include "scrypt-jane-mix_salsa.h"
#if defined(SCRYPT_SALSA_AVX)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
#define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2
#define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2
#include "scrypt-jane-romix-template.h"
#endif
#if defined(SCRYPT_SALSA_SSE2)
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
#define SCRYPT_MIX_FN salsa_core_sse2
#define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2
#define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2
#include "scrypt-jane-romix-template.h"
#endif
/* cpu agnostic */
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
#define SCRYPT_MIX_FN salsa_core_basic
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
#include "scrypt-jane-romix-template.h"
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
static scrypt_ROMixfn
scrypt_getROMix() {
size_t cpuflags = detect_cpu();
#if defined(SCRYPT_SALSA_AVX)
if (cpuflags & cpu_avx)
return scrypt_ROMix_avx;
else
#endif
#if defined(SCRYPT_SALSA_SSE2)
if (cpuflags & cpu_sse2)
return scrypt_ROMix_sse2;
else
#endif
return scrypt_ROMix_basic;
}
#endif
#if defined(SCRYPT_TEST_SPEED)
static size_t
available_implementations() {
size_t flags = 0;
#if defined(SCRYPT_SALSA_AVX)
flags |= cpu_avx;
#endif
#if defined(SCRYPT_SALSA_SSE2)
flags |= cpu_sse2;
#endif
return flags;
}
#endif
static int
scrypt_test_mix() {
static const uint8_t expected[16] = {
0x41,0x1f,0x2e,0xa3,0xab,0xa3,0x1a,0x34,0x87,0x1d,0x8a,0x1c,0x76,0xa0,0x27,0x66,
};
int ret = 1;
size_t cpuflags = detect_cpu();
#if defined(SCRYPT_SALSA_AVX)
if (cpuflags & cpu_avx)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected);
#endif
#if defined(SCRYPT_SALSA_SSE2)
if (cpuflags & cpu_sse2)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected);
#endif
#if defined(SCRYPT_SALSA_BASIC)
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
#endif
return ret;
}

View File

@ -0,0 +1,261 @@
typedef struct scrypt_test_setting_t {
const char *pw, *salt;
uint8_t Nfactor, rfactor, pfactor;
} scrypt_test_setting;
static const scrypt_test_setting post_settings[] = {
{"", "", 3, 0, 0},
{"password", "NaCl", 9, 3, 4},
{0}
};
#if defined(SCRYPT_SHA256)
#if defined(SCRYPT_SALSA)
/* sha256 + salsa20/8, the only 'official' test vectors! */
static const uint8_t post_vectors[][64] = {
{0x77,0xd6,0x57,0x62,0x38,0x65,0x7b,0x20,0x3b,0x19,0xca,0x42,0xc1,0x8a,0x04,0x97,
0xf1,0x6b,0x48,0x44,0xe3,0x07,0x4a,0xe8,0xdf,0xdf,0xfa,0x3f,0xed,0xe2,0x14,0x42,
0xfc,0xd0,0x06,0x9d,0xed,0x09,0x48,0xf8,0x32,0x6a,0x75,0x3a,0x0f,0xc8,0x1f,0x17,
0xe8,0xd3,0xe0,0xfb,0x2e,0x0d,0x36,0x28,0xcf,0x35,0xe2,0x0c,0x38,0xd1,0x89,0x06},
{0xfd,0xba,0xbe,0x1c,0x9d,0x34,0x72,0x00,0x78,0x56,0xe7,0x19,0x0d,0x01,0xe9,0xfe,
0x7c,0x6a,0xd7,0xcb,0xc8,0x23,0x78,0x30,0xe7,0x73,0x76,0x63,0x4b,0x37,0x31,0x62,
0x2e,0xaf,0x30,0xd9,0x2e,0x22,0xa3,0x88,0x6f,0xf1,0x09,0x27,0x9d,0x98,0x30,0xda,
0xc7,0x27,0xaf,0xb9,0x4a,0x83,0xee,0x6d,0x83,0x60,0xcb,0xdf,0xa2,0xcc,0x06,0x40}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0xef,0x8f,0x44,0x8f,0xc3,0xef,0x78,0x13,0xb2,0x26,0xa7,0x2a,0x40,0xa1,0x98,0x7f,
0xc8,0x7f,0x0d,0x5f,0x40,0x66,0xa2,0x05,0x07,0x4f,0xc7,0xac,0x3b,0x47,0x07,0x0c,
0xf5,0x20,0x46,0x76,0x20,0x7b,0xee,0x51,0x6d,0x5f,0xfa,0x9c,0x27,0xac,0xa9,0x36,
0x62,0xbd,0xde,0x0b,0xa3,0xc0,0x66,0x84,0xde,0x82,0xd0,0x1a,0xb4,0xd1,0xb5,0xfe},
{0xf1,0x94,0xf7,0x5f,0x15,0x12,0x10,0x4d,0x6e,0xfb,0x04,0x8c,0x35,0xc4,0x51,0xb6,
0x11,0x04,0xa7,0x9b,0xb0,0x46,0xaf,0x7b,0x47,0x39,0xf0,0xac,0xb2,0x8a,0xfa,0x45,
0x09,0x86,0x8f,0x10,0x4b,0xc6,0xee,0x00,0x11,0x38,0x73,0x7a,0x6a,0xd8,0x25,0x67,
0x85,0xa4,0x10,0x4e,0xa9,0x2f,0x15,0xfe,0xcf,0x63,0xe1,0xe8,0xcf,0xab,0xe8,0xbd}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xf4,0x87,0x29,0xf4,0xc3,0x31,0x8c,0xe8,0xdf,0xe5,0xd8,0x73,0xff,0xca,0x32,0xcf,
0xd8,0xac,0xe7,0xf7,0x15,0xda,0x84,0x41,0x60,0x23,0x26,0x4a,0xc8,0x3e,0xee,0xa6,
0xa5,0x6e,0x52,0xd6,0x64,0x55,0x16,0x31,0x3e,0x66,0x7b,0x65,0xd5,0xe2,0xc9,0x95,
0x1b,0xf0,0x81,0x40,0xb7,0x2f,0xff,0xa6,0xe6,0x02,0xcc,0x63,0x08,0x4a,0x74,0x31},
{0x7a,0xd8,0xad,0x02,0x9c,0xa5,0xf4,0x42,0x6a,0x29,0xd2,0xb5,0x53,0xf1,0x6d,0x1d,
0x25,0xc8,0x70,0x48,0x80,0xb9,0xa3,0xf6,0x94,0xf8,0xfa,0xb8,0x52,0x42,0xcd,0x14,
0x26,0x46,0x28,0x06,0xc7,0xf6,0x1f,0xa7,0x89,0x6d,0xc5,0xa0,0x36,0xcc,0xde,0xcb,
0x73,0x0b,0xa4,0xe2,0xd3,0xd1,0x44,0x06,0x35,0x08,0xe0,0x35,0x5b,0xf8,0xd7,0xe7}
};
#endif
#elif defined(SCRYPT_SHA512)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0xae,0x54,0xe7,0x74,0xe4,0x51,0x6b,0x0f,0xe1,0xe7,0x28,0x03,0x17,0xe4,0x8c,0xfa,
0x2f,0x66,0x55,0x7f,0xdc,0x3b,0x40,0xab,0x47,0x84,0xc9,0x63,0x36,0x07,0x9d,0xe5,
0x86,0x43,0x95,0x89,0xb6,0xc0,0x6c,0x72,0x64,0x00,0xc1,0x2a,0xd7,0x69,0x21,0x92,
0x8e,0xba,0xa4,0x59,0x9f,0x00,0x14,0x3a,0x7c,0x12,0x58,0x91,0x09,0xa0,0x32,0xfe},
{0xc5,0xb3,0xd6,0xea,0x0a,0x4b,0x1e,0xcc,0x40,0x00,0xe5,0x98,0x5c,0xdc,0x06,0x06,
0x78,0x34,0x92,0x16,0xcf,0xe4,0x9f,0x03,0x96,0x2d,0x41,0x35,0x00,0x9b,0xff,0x74,
0x60,0x19,0x6e,0xe6,0xa6,0x46,0xf7,0x37,0xcb,0xfa,0xd0,0x9f,0x80,0x72,0x2e,0x85,
0x13,0x3e,0x1a,0x91,0x90,0x53,0xa1,0x33,0x85,0x51,0xdc,0x62,0x1c,0x0e,0x4d,0x30}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0xe2,0x05,0x7c,0x44,0xf9,0x55,0x9f,0x64,0xbe,0xd5,0x7f,0x85,0x69,0xc7,0x8c,0x7f,
0x2b,0x91,0xd6,0x9a,0x6c,0xf8,0x57,0x55,0x61,0x25,0x3d,0xee,0xb8,0xd5,0x8c,0xdc,
0x2d,0xd5,0x53,0x84,0x8c,0x06,0xaa,0x37,0x77,0xa6,0xf0,0xf1,0x35,0xfe,0xb5,0xcb,
0x61,0xd7,0x2c,0x67,0xf3,0x7e,0x8a,0x1b,0x04,0xa3,0xa3,0x43,0xa2,0xb2,0x29,0xf2},
{0x82,0xda,0x29,0xb2,0x08,0x27,0xfc,0x78,0x22,0xc4,0xb8,0x7e,0xbc,0x36,0xcf,0xcd,
0x17,0x4b,0xa1,0x30,0x16,0x4a,0x25,0x70,0xc7,0xcb,0xe0,0x2b,0x56,0xd3,0x16,0x4e,
0x85,0xb6,0x84,0xe7,0x9b,0x7f,0x8b,0xb5,0x94,0x33,0xcf,0x33,0x44,0x65,0xc8,0xa1,
0x46,0xf9,0xf5,0xfc,0x74,0x29,0x7e,0xd5,0x46,0xec,0xbd,0x95,0xc1,0x80,0x24,0xe4}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xa6,0xcb,0x77,0x9a,0x64,0x1f,0x95,0x02,0x53,0xe7,0x5c,0x78,0xdb,0xa3,0x43,0xff,
0xbe,0x10,0x4c,0x7b,0xe4,0xe1,0x91,0xcf,0x67,0x69,0x5a,0x2c,0x12,0xd6,0x99,0x49,
0x92,0xfd,0x5a,0xaa,0x12,0x4c,0x2e,0xf6,0x95,0x46,0x8f,0x5e,0x77,0x62,0x16,0x29,
0xdb,0xe7,0xab,0x02,0x2b,0x9c,0x35,0x03,0xf8,0xd4,0x04,0x7d,0x2d,0x73,0x85,0xf1},
{0x54,0xb7,0xca,0xbb,0xaf,0x0f,0xb0,0x5f,0xb7,0x10,0x63,0x48,0xb3,0x15,0xd8,0xb5,
0x62,0x64,0x89,0x6a,0x59,0xc6,0x0f,0x86,0x96,0x38,0xf0,0xcf,0xd4,0x62,0x90,0x61,
0x7d,0xce,0xd6,0x13,0x85,0x67,0x4a,0xf5,0x32,0x03,0x74,0x30,0x0b,0x5a,0x2f,0x86,
0x82,0x6e,0x0c,0x3e,0x40,0x7a,0xde,0xbe,0x42,0x6e,0x80,0x2b,0xaf,0xdb,0xcc,0x94}
};
#endif
#elif defined(SCRYPT_BLAKE512)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0x4a,0x48,0xb3,0xfa,0xdc,0xb0,0xb8,0xdb,0x54,0xee,0xf3,0x5c,0x27,0x65,0x6c,0x20,
0xab,0x61,0x9a,0x5b,0xd5,0x1d,0xd9,0x95,0xab,0x88,0x0e,0x4d,0x1e,0x71,0x2f,0x11,
0x43,0x2e,0xef,0x23,0xca,0x8a,0x49,0x3b,0x11,0x38,0xa5,0x28,0x61,0x2f,0xb7,0x89,
0x5d,0xef,0x42,0x4c,0xc1,0x74,0xea,0x8a,0x56,0xbe,0x4a,0x82,0x76,0x15,0x1a,0x87},
{0x96,0x24,0xbf,0x40,0xeb,0x03,0x8e,0xfe,0xc0,0xd5,0xa4,0x81,0x85,0x7b,0x09,0x88,
0x52,0xb5,0xcb,0xc4,0x48,0xe1,0xb9,0x1d,0x3f,0x8b,0x3a,0xc6,0x38,0x32,0xc7,0x55,
0x30,0x28,0x7a,0x42,0xa9,0x5d,0x54,0x33,0x62,0xf3,0xd9,0x3c,0x96,0x40,0xd1,0x80,
0xe4,0x0e,0x7e,0xf0,0x64,0x53,0xfe,0x7b,0xd7,0x15,0xba,0xad,0x16,0x80,0x01,0xb5}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0x45,0x42,0x22,0x31,0x26,0x13,0x5f,0x94,0xa4,0x00,0x04,0x47,0xe8,0x50,0x6d,0xd6,
0xdd,0xd5,0x08,0xd4,0x90,0x64,0xe0,0x59,0x70,0x46,0xff,0xfc,0x29,0xb3,0x6a,0xc9,
0x4d,0x45,0x97,0x95,0xa8,0xf0,0x53,0xe7,0xee,0x4b,0x6b,0x5d,0x1e,0xa5,0xb2,0x58,
0x4b,0x93,0xc9,0x89,0x4c,0xa8,0xab,0x03,0x74,0x38,0xbd,0x54,0x97,0x6b,0xab,0x4a},
{0x4b,0x4a,0x63,0x96,0x73,0x34,0x9f,0x39,0x64,0x51,0x0e,0x2e,0x3b,0x07,0xd5,0x1c,
0xd2,0xf7,0xce,0x60,0xab,0xac,0x89,0xa4,0x16,0x0c,0x58,0x82,0xb3,0xd3,0x25,0x5b,
0xd5,0x62,0x32,0xf4,0x86,0x5d,0xb2,0x4b,0xbf,0x8e,0xc6,0xc0,0xac,0x40,0x48,0xb4,
0x69,0x08,0xba,0x40,0x4b,0x07,0x2a,0x13,0x9c,0x98,0x3b,0x8b,0x20,0x0c,0xac,0x9e}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xcb,0x4b,0xc2,0xd1,0xf4,0x77,0x32,0x3c,0x42,0x9d,0xf7,0x7d,0x1f,0x22,0x64,0xa4,
0xe2,0x88,0x30,0x2d,0x54,0x9d,0xb6,0x26,0x89,0x25,0x30,0xc3,0x3d,0xdb,0xba,0x99,
0xe9,0x8e,0x1e,0x5e,0x57,0x66,0x75,0x7c,0x24,0xda,0x00,0x6f,0x79,0xf7,0x47,0xf5,
0xea,0x40,0x70,0x37,0xd2,0x91,0xc7,0x4d,0xdf,0x46,0xb6,0x3e,0x95,0x7d,0xcb,0xc1},
{0x25,0xc2,0xcb,0x7f,0xc8,0x50,0xb7,0x0b,0x11,0x9e,0x1d,0x10,0xb2,0xa8,0x35,0x23,
0x91,0x39,0xfb,0x45,0xf2,0xbf,0xe4,0xd0,0x84,0xec,0x72,0x33,0x6d,0x09,0xed,0x41,
0x9a,0x7e,0x4f,0x10,0x73,0x97,0x22,0x76,0x58,0x93,0x39,0x24,0xdf,0xd2,0xaa,0x2f,
0x6b,0x2b,0x64,0x48,0xa5,0xb7,0xf5,0x56,0x77,0x02,0xa7,0x71,0x46,0xe5,0x0e,0x8d},
};
#endif
#elif defined(SCRYPT_BLAKE256)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0xf1,0xf1,0x91,0x1a,0x81,0xe6,0x9f,0xc1,0xce,0x43,0xab,0xb1,0x1a,0x02,0x1e,0x16,
0x08,0xc6,0xf9,0x00,0x50,0x1b,0x6d,0xf1,0x31,0x06,0x95,0x48,0x5d,0xf7,0x6c,0x00,
0xa2,0x4c,0xb1,0x0e,0x52,0x66,0x94,0x7e,0x84,0xfc,0xa5,0x34,0xfd,0xf0,0xe9,0x57,
0x85,0x2d,0x8c,0x05,0x5c,0x0f,0x04,0xd4,0x8d,0x3e,0x13,0x52,0x3d,0x90,0x2d,0x2c},
{0xd5,0x42,0xd2,0x7b,0x06,0xae,0x63,0x90,0x9e,0x30,0x00,0x0e,0xd8,0xa4,0x3a,0x0b,
0xee,0x4a,0xef,0xb2,0xc4,0x95,0x0d,0x72,0x07,0x70,0xcc,0xa3,0xf9,0x1e,0xc2,0x75,
0xcf,0xaf,0xe1,0x44,0x1c,0x8c,0xe2,0x3e,0x0c,0x81,0xf3,0x92,0xe1,0x13,0xe6,0x4f,
0x2d,0x27,0xc3,0x87,0xe5,0xb6,0xf9,0xd7,0x02,0x04,0x37,0x64,0x78,0x36,0x6e,0xb3}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0xad,0x1b,0x4b,0xca,0xe3,0x26,0x1a,0xfd,0xb7,0x77,0x8c,0xde,0x8d,0x26,0x14,0xe1,
0x54,0x38,0x42,0xf3,0xb3,0x66,0x29,0xf9,0x90,0x04,0xf1,0x82,0x7c,0x5a,0x6f,0xa8,
0x7d,0xd6,0x08,0x0d,0x8b,0x78,0x04,0xad,0x31,0xea,0xd4,0x87,0x2d,0xf7,0x74,0x9a,
0xe5,0xce,0x97,0xef,0xa3,0xbb,0x90,0x46,0x7c,0xf4,0x51,0x38,0xc7,0x60,0x53,0x21},
{0x39,0xbb,0x56,0x3d,0x0d,0x7b,0x74,0x82,0xfe,0x5a,0x78,0x3d,0x66,0xe8,0x3a,0xdf,
0x51,0x6f,0x3e,0xf4,0x86,0x20,0x8d,0xe1,0x81,0x22,0x02,0xf7,0x0d,0xb5,0x1a,0x0f,
0xfc,0x59,0xb6,0x60,0xc9,0xdb,0x38,0x0b,0x5b,0x95,0xa5,0x94,0xda,0x42,0x2d,0x90,
0x47,0xeb,0x73,0x31,0x9f,0x20,0xf6,0x81,0xc2,0xef,0x33,0x77,0x51,0xd8,0x2c,0xe4}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0x9e,0xf2,0x60,0x7c,0xbd,0x7c,0x19,0x5c,0x79,0xc6,0x1b,0x7e,0xb0,0x65,0x1b,0xc3,
0x70,0x0d,0x89,0xfc,0x72,0xb2,0x03,0x72,0x15,0xcb,0x8e,0x8c,0x49,0x50,0x4c,0x27,
0x99,0xda,0x47,0x32,0x5e,0xb4,0xa2,0x07,0x83,0x51,0x6b,0x06,0x37,0x60,0x42,0xc4,
0x59,0x49,0x99,0xdd,0xc0,0xd2,0x08,0x94,0x7f,0xe3,0x9e,0x4e,0x43,0x8e,0x5b,0xba},
{0x86,0x6f,0x3b,0x11,0xb8,0xca,0x4b,0x6e,0xa7,0x6f,0xc2,0xc9,0x33,0xb7,0x8b,0x9f,
0xa3,0xb9,0xf5,0xb5,0x62,0xa6,0x17,0x66,0xe4,0xc3,0x9d,0x9b,0xca,0x51,0xb0,0x2f,
0xda,0x09,0xc1,0x77,0xed,0x8b,0x89,0xc2,0x69,0x5a,0x34,0x05,0x4a,0x1f,0x4d,0x76,
0xcb,0xd5,0xa4,0x78,0xfa,0x1b,0xb9,0x5b,0xbc,0x3d,0xce,0x04,0x63,0x99,0xad,0x54}
};
#endif
#elif defined(SCRYPT_SKEIN512)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0xe4,0x36,0xa0,0x9a,0xdb,0xf0,0xd1,0x45,0x56,0xda,0x25,0x53,0x00,0xf9,0x2c,0x69,
0xa4,0xc2,0xa5,0x8e,0x1a,0x85,0xfa,0x53,0xbd,0x55,0x3d,0x11,0x2a,0x44,0x13,0x87,
0x8f,0x81,0x88,0x13,0x1e,0x49,0xa8,0xc4,0xc5,0xcd,0x1f,0xe1,0x5f,0xf5,0xcb,0x2f,
0x8b,0xab,0x57,0x38,0x59,0xeb,0x6b,0xac,0x3b,0x73,0x10,0xa6,0xe1,0xfe,0x17,0x3e},
{0x6d,0x61,0xde,0x43,0xa9,0x38,0x53,0x5f,0xd8,0xf2,0x6d,0xf3,0xe4,0xd6,0xd8,0x5e,
0x81,0x89,0xd0,0x0b,0x86,0x16,0xb1,0x91,0x65,0x76,0xd8,0xc1,0xf7,0x3b,0xca,0x8b,
0x35,0x07,0x58,0xba,0x77,0xdf,0x11,0x6c,0xbc,0x58,0xee,0x11,0x59,0xf2,0xfe,0xcb,
0x51,0xdc,0xcd,0x35,0x2e,0x46,0x22,0xa0,0xaa,0x55,0x60,0x7c,0x91,0x15,0xb8,0x00}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0xd1,0x12,0x6d,0x64,0x10,0x0e,0x98,0x6c,0xbe,0x70,0x21,0xd9,0xc6,0x04,0x62,0xa4,
0x29,0x13,0x9a,0x3c,0xf8,0xe9,0x1e,0x87,0x9f,0x88,0xf4,0x98,0x01,0x41,0x8e,0xce,
0x60,0xf7,0xbe,0x17,0x0a,0xec,0xd6,0x30,0x80,0xcf,0x6b,0x1e,0xcf,0x95,0xa0,0x4d,
0x37,0xed,0x3a,0x09,0xd1,0xeb,0x0c,0x80,0x82,0x22,0x8e,0xd3,0xb1,0x7f,0xd6,0xa8},
{0x5c,0x5c,0x05,0xe2,0x75,0xa5,0xa4,0xec,0x81,0x97,0x9c,0x5b,0xd7,0x26,0xb3,0x16,
0xb4,0x02,0x8c,0x56,0xe6,0x32,0x57,0x33,0x47,0x19,0x06,0x6c,0xde,0x68,0x41,0x37,
0x5b,0x7d,0xa7,0xb3,0x73,0xeb,0x82,0xca,0x0f,0x86,0x2e,0x6b,0x47,0xa2,0x70,0x39,
0x35,0xfd,0x2d,0x2e,0x7b,0xc3,0x68,0xbb,0x52,0x42,0x19,0x3b,0x78,0x96,0xe7,0xc8}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60,
0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59,
0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9,
0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89},
{0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5,
0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99,
0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23,
0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b}
};
#endif
#elif defined(SCRYPT_KECCAK512)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0xc2,0x7b,0xbe,0x1d,0xf1,0x99,0xd8,0xe7,0x1b,0xac,0xe0,0x9d,0xeb,0x5a,0xfe,0x21,
0x71,0xff,0x41,0x51,0x4f,0xbe,0x41,0x01,0x15,0xe2,0xb7,0xb9,0x55,0x15,0x25,0xa1,
0x40,0x4c,0x66,0x29,0x32,0xb7,0xc9,0x62,0x60,0x88,0xe0,0x99,0x39,0xae,0xce,0x25,
0x3c,0x11,0x89,0xdd,0xc6,0x14,0xd7,0x3e,0xa3,0x6d,0x07,0x2e,0x56,0xa0,0xff,0x97},
{0x3c,0x91,0x12,0x4a,0x37,0x7d,0xd6,0x96,0xd2,0x9b,0x5d,0xea,0xb8,0xb9,0x82,0x4e,
0x4f,0x6b,0x60,0x4c,0x59,0x01,0xe5,0x73,0xfd,0xf6,0xb8,0x9a,0x5a,0xd3,0x7c,0x7a,
0xd2,0x4f,0x8e,0x74,0xc1,0x90,0x88,0xa0,0x3f,0x55,0x75,0x79,0x10,0xd0,0x09,0x79,
0x0f,0x6c,0x74,0x0c,0x05,0x08,0x3c,0x8c,0x94,0x7b,0x30,0x56,0xca,0xdf,0xdf,0x34}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0x77,0xcb,0x70,0xbf,0xae,0xd4,0x4c,0x5b,0xbc,0xd3,0xec,0x8a,0x82,0x43,0x8d,0xb3,
0x7f,0x1f,0xfb,0x70,0x36,0x32,0x4d,0xa6,0xb7,0x13,0x37,0x77,0x30,0x0c,0x3c,0xfb,
0x2c,0x20,0x8f,0x2a,0xf4,0x47,0x4d,0x69,0x8e,0xae,0x2d,0xad,0xba,0x35,0xe9,0x2f,
0xe6,0x99,0x7a,0xf8,0xcf,0x70,0x78,0xbb,0x0c,0x72,0x64,0x95,0x8b,0x36,0x77,0x3d},
{0xc6,0x43,0x17,0x16,0x87,0x09,0x5f,0x12,0xed,0x21,0xe2,0xb4,0xad,0x55,0xa1,0xa1,
0x49,0x50,0x90,0x70,0xab,0x81,0x83,0x7a,0xcd,0xdf,0x23,0x52,0x19,0xc0,0xa2,0xd8,
0x8e,0x98,0xeb,0xf0,0x37,0xab,0xad,0xfd,0x1c,0x04,0x97,0x18,0x42,0x85,0xf7,0x4b,
0x18,0x2c,0x55,0xd3,0xa9,0xe6,0x89,0xfb,0x58,0x0a,0xb2,0x37,0xb9,0xf8,0xfb,0xc5}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xc7,0x34,0x95,0x02,0x5e,0x31,0x0d,0x1f,0x10,0x38,0x9c,0x3f,0x04,0x53,0xed,0x05,
0x27,0x38,0xc1,0x3f,0x6a,0x0f,0xc5,0xa3,0x9b,0x73,0x8a,0x28,0x7e,0x5d,0x3c,0xdc,
0x9d,0x5a,0x09,0xbf,0x8c,0x0a,0xad,0xe4,0x73,0x52,0xe3,0x6d,0xaa,0xd1,0x8b,0xbf,
0xa3,0xb7,0xf0,0x58,0xad,0x22,0x24,0xc9,0xaa,0x96,0xb7,0x5d,0xfc,0x5f,0xb0,0xcf},
{0x76,0x22,0xfd,0xe8,0xa2,0x79,0x8e,0x9d,0x43,0x8c,0x7a,0xba,0x78,0xb7,0x84,0xf1,
0xc8,0xee,0x3b,0xae,0x31,0x89,0xbf,0x7e,0xd0,0x4b,0xc1,0x2d,0x58,0x5d,0x84,0x6b,
0xec,0x86,0x56,0xe0,0x87,0x94,0x7f,0xbc,0xf9,0x48,0x92,0xef,0x54,0x7f,0x23,0x8d,
0x4f,0x8b,0x0a,0x75,0xa7,0x39,0x0e,0x46,0x6e,0xee,0x58,0xc8,0xfa,0xea,0x90,0x53}
};
#endif
#elif defined(SCRYPT_KECCAK256)
#if defined(SCRYPT_SALSA)
static const uint8_t post_vectors[][64] = {
{0x2e,0x96,0xd8,0x87,0x45,0xcd,0xd6,0xc8,0xf6,0xd2,0x87,0x33,0x50,0xc7,0x04,0xe5,
0x3c,0x4b,0x48,0x44,0x57,0xc1,0x74,0x09,0x76,0x02,0xaa,0xd3,0x7b,0xf3,0xbf,0xed,
0x4b,0x72,0xd7,0x1b,0x49,0x6b,0xe0,0x44,0x83,0xee,0x8f,0xaf,0xa1,0xb5,0x33,0xa9,
0x9e,0x86,0xab,0xe2,0x9f,0xcf,0x68,0x6e,0x7e,0xbd,0xf5,0x7a,0x83,0x4b,0x1c,0x10},
{0x42,0x7e,0xf9,0x4b,0x72,0x61,0xda,0x2d,0xb3,0x27,0x0e,0xe1,0xd9,0xde,0x5f,0x3e,
0x64,0x2f,0xd6,0xda,0x90,0x59,0xce,0xbf,0x02,0x5b,0x32,0xf7,0x6d,0x94,0x51,0x7b,
0xb6,0xa6,0x0d,0x99,0x3e,0x7f,0x39,0xbe,0x1b,0x1d,0x6c,0x97,0x12,0xd8,0xb7,0xfd,
0x5b,0xb5,0xf3,0x73,0x5a,0x89,0xb2,0xdd,0xcc,0x3d,0x74,0x2e,0x3d,0x9e,0x3c,0x22}
};
#elif defined(SCRYPT_CHACHA)
static const uint8_t post_vectors[][64] = {
{0x76,0x1d,0x5b,0x8f,0xa9,0xe1,0xa6,0x01,0xcb,0xc5,0x7a,0x5f,0x02,0x23,0xb6,0x82,
0x57,0x79,0x60,0x2f,0x05,0x7f,0xb8,0x0a,0xcb,0x5e,0x54,0x11,0x49,0x2e,0xdd,0x85,
0x83,0x30,0x67,0xb3,0x24,0x5c,0xce,0xfc,0x32,0xcf,0x12,0xc3,0xff,0xe0,0x79,0x36,
0x74,0x17,0xa6,0x3e,0xcd,0xa0,0x7e,0xcb,0x37,0xeb,0xcb,0xb6,0xe1,0xb9,0xf5,0x15},
{0xf5,0x66,0xa7,0x4c,0xe4,0xdc,0x18,0x56,0x2f,0x3e,0x86,0x4d,0x92,0xa5,0x5c,0x5a,
0x8f,0xc3,0x6b,0x32,0xdb,0xe5,0x72,0x50,0x84,0xfc,0x6e,0x5d,0x15,0x77,0x3d,0xca,
0xc5,0x2b,0x20,0x3c,0x78,0x37,0x80,0x78,0x23,0x56,0x91,0xa0,0xce,0xa4,0x06,0x5a,
0x7f,0xe3,0xbf,0xab,0x51,0x57,0x32,0x2c,0x0a,0xf0,0xc5,0x6f,0xf4,0xcb,0xff,0x42}
};
#elif defined(SCRYPT_SALSA64)
static const uint8_t post_vectors[][64] = {
{0xb0,0xb7,0x10,0xb5,0x1f,0x2b,0x7f,0xaf,0x9d,0x95,0x5f,0x4c,0x2d,0x98,0x7c,0xc1,
0xbc,0x37,0x2f,0x50,0x8d,0xb2,0x9f,0xfd,0x48,0x0d,0xe0,0x44,0x19,0xdf,0x28,0x6c,
0xab,0xbf,0x1e,0x17,0x26,0xcc,0x57,0x95,0x18,0x17,0x83,0x4c,0x12,0x48,0xd9,0xee,
0x4b,0x00,0x29,0x06,0x31,0x01,0x6b,0x8c,0x26,0x39,0xbf,0xe4,0xe4,0xd4,0x6a,0x26},
{0xa0,0x40,0xb2,0xf2,0x11,0xb6,0x5f,0x3d,0x4c,0x1e,0xef,0x59,0xd4,0x98,0xdb,0x14,
0x01,0xff,0xe3,0x34,0xd7,0x19,0xcd,0xeb,0xde,0x52,0x1c,0xf4,0x86,0x43,0xc9,0xe2,
0xfb,0xf9,0x4f,0x0a,0xbb,0x1f,0x5c,0x6a,0xdf,0xb9,0x28,0xfa,0xac,0xc4,0x48,0xed,
0xcc,0xd2,0x2e,0x25,0x5f,0xf3,0x56,0x1d,0x2d,0x23,0x22,0xc1,0xbc,0xff,0x78,0x80}
};
#endif
#else
static const uint8_t post_vectors[][64] = {{0}};
#endif

View File

@ -1,6 +1,6 @@
#ifndef SCRYPT_H
#define SCRYPT_H
#include <stdint.h>
#ifndef SCRYPTN_H
#define SCRYPTN_H
#ifdef __cplusplus
extern "C" {
#endif
@ -13,4 +13,4 @@ void scrypt_N_1_1_256_sp(const char* input, char* output, char* scratchpad, uint
}
#endif
#endif
#endif