From 29010cc5a94aa8244d5013b4252421e9b7bafb8c Mon Sep 17 00:00:00 2001 From: "zone117x@gmail.com" Date: Sun, 30 Mar 2014 04:12:48 -0400 Subject: [PATCH] more --- .../obj.target/multihashing/multihashing.o.d | 32 ++ .../obj.target/multihashing/scrypt.o.d | 4 + .../obj.target/multihashing/scryptjane.o.d | 30 ++ .../obj.target/multihashing/multihashing.o | Bin 0 -> 12544 bytes .../Release/obj.target/multihashing/scrypt.o | Bin 0 -> 12872 bytes .../obj.target/multihashing/scryptjane.o | Bin 0 -> 13056 bytes multihashing.cc | 4 +- package.json | 2 +- scryptjane/scrypt-jane-chacha.h | 132 ++++++ scryptjane/scrypt-jane-hash.h | 48 ++ scryptjane/scrypt-jane-hash_keccak.h | 168 +++++++ scryptjane/scrypt-jane-hash_sha256.h | 135 ++++++ scryptjane/scrypt-jane-mix_chacha-avx.h | 340 ++++++++++++++ scryptjane/scrypt-jane-mix_chacha-sse2.h | 371 +++++++++++++++ scryptjane/scrypt-jane-mix_chacha-ssse3.h | 348 ++++++++++++++ scryptjane/scrypt-jane-mix_chacha.h | 69 +++ scryptjane/scrypt-jane-mix_salsa-avx.h | 381 +++++++++++++++ scryptjane/scrypt-jane-mix_salsa-sse2.h | 443 ++++++++++++++++++ scryptjane/scrypt-jane-mix_salsa.h | 70 +++ scryptjane/scrypt-jane-pbkdf2.h | 112 +++++ scryptjane/scrypt-jane-portable-x86.h | 364 ++++++++++++++ scryptjane/scrypt-jane-portable.h | 281 +++++++++++ scryptjane/scrypt-jane-romix-basic.h | 67 +++ scryptjane/scrypt-jane-romix-template.h | 118 +++++ scryptjane/scrypt-jane-romix.h | 27 ++ scryptjane/scrypt-jane-salsa.h | 106 +++++ scryptjane/scrypt-jane-test-vectors.h | 261 +++++++++++ scryptn.h | 8 +- 28 files changed, 3914 insertions(+), 7 deletions(-) create mode 100644 build/Release/.deps/Release/obj.target/multihashing/multihashing.o.d create mode 100644 build/Release/.deps/Release/obj.target/multihashing/scrypt.o.d create mode 100644 build/Release/.deps/Release/obj.target/multihashing/scryptjane.o.d create mode 100644 build/Release/obj.target/multihashing/multihashing.o create mode 100644 build/Release/obj.target/multihashing/scrypt.o create mode 100644 build/Release/obj.target/multihashing/scryptjane.o create mode 100644 scryptjane/scrypt-jane-chacha.h create mode 100644 scryptjane/scrypt-jane-hash.h create mode 100644 scryptjane/scrypt-jane-hash_keccak.h create mode 100644 scryptjane/scrypt-jane-hash_sha256.h create mode 100644 scryptjane/scrypt-jane-mix_chacha-avx.h create mode 100644 scryptjane/scrypt-jane-mix_chacha-sse2.h create mode 100644 scryptjane/scrypt-jane-mix_chacha-ssse3.h create mode 100644 scryptjane/scrypt-jane-mix_chacha.h create mode 100644 scryptjane/scrypt-jane-mix_salsa-avx.h create mode 100644 scryptjane/scrypt-jane-mix_salsa-sse2.h create mode 100644 scryptjane/scrypt-jane-mix_salsa.h create mode 100644 scryptjane/scrypt-jane-pbkdf2.h create mode 100644 scryptjane/scrypt-jane-portable-x86.h create mode 100644 scryptjane/scrypt-jane-portable.h create mode 100644 scryptjane/scrypt-jane-romix-basic.h create mode 100644 scryptjane/scrypt-jane-romix-template.h create mode 100644 scryptjane/scrypt-jane-romix.h create mode 100644 scryptjane/scrypt-jane-salsa.h create mode 100644 scryptjane/scrypt-jane-test-vectors.h diff --git a/build/Release/.deps/Release/obj.target/multihashing/multihashing.o.d b/build/Release/.deps/Release/obj.target/multihashing/multihashing.o.d new file mode 100644 index 0000000..126cad4 --- /dev/null +++ b/build/Release/.deps/Release/obj.target/multihashing/multihashing.o.d @@ -0,0 +1,32 @@ +cmd_Release/obj.target/multihashing/multihashing.o := g++ '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -fno-rtti -fno-exceptions -MMD -MF ./Release/.deps/Release/obj.target/multihashing/multihashing.o.d.raw -c -o Release/obj.target/multihashing/multihashing.o ../multihashing.cc +Release/obj.target/multihashing/multihashing.o: ../multihashing.cc \ + /root/.node-gyp/0.10.26/src/node.h \ + /root/.node-gyp/0.10.26/deps/uv/include/uv.h \ + /root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-unix.h \ + /root/.node-gyp/0.10.26/deps/uv/include/uv-private/ngx-queue.h \ + /root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-linux.h \ + /root/.node-gyp/0.10.26/deps/v8/include/v8.h \ + /root/.node-gyp/0.10.26/deps/v8/include/v8stdint.h \ + /root/.node-gyp/0.10.26/src/node_object_wrap.h \ + /root/.node-gyp/0.10.26/src/node.h \ + /root/.node-gyp/0.10.26/src/node_buffer.h ../bcrypt.h ../keccak.h \ + ../quark.h ../scrypt.h ../scryptjane.h ../scryptn.h ../skein.h ../x11.h +../multihashing.cc: +/root/.node-gyp/0.10.26/src/node.h: +/root/.node-gyp/0.10.26/deps/uv/include/uv.h: +/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-unix.h: +/root/.node-gyp/0.10.26/deps/uv/include/uv-private/ngx-queue.h: +/root/.node-gyp/0.10.26/deps/uv/include/uv-private/uv-linux.h: +/root/.node-gyp/0.10.26/deps/v8/include/v8.h: +/root/.node-gyp/0.10.26/deps/v8/include/v8stdint.h: +/root/.node-gyp/0.10.26/src/node_object_wrap.h: +/root/.node-gyp/0.10.26/src/node.h: +/root/.node-gyp/0.10.26/src/node_buffer.h: +../bcrypt.h: +../keccak.h: +../quark.h: +../scrypt.h: +../scryptjane.h: +../scryptn.h: +../skein.h: +../x11.h: diff --git a/build/Release/.deps/Release/obj.target/multihashing/scrypt.o.d b/build/Release/.deps/Release/obj.target/multihashing/scrypt.o.d new file mode 100644 index 0000000..edadb2b --- /dev/null +++ b/build/Release/.deps/Release/obj.target/multihashing/scrypt.o.d @@ -0,0 +1,4 @@ +cmd_Release/obj.target/multihashing/scrypt.o := cc '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -MMD -MF ./Release/.deps/Release/obj.target/multihashing/scrypt.o.d.raw -c -o Release/obj.target/multihashing/scrypt.o ../scrypt.c +Release/obj.target/multihashing/scrypt.o: ../scrypt.c ../scrypt.h +../scrypt.c: +../scrypt.h: diff --git a/build/Release/.deps/Release/obj.target/multihashing/scryptjane.o.d b/build/Release/.deps/Release/obj.target/multihashing/scryptjane.o.d new file mode 100644 index 0000000..579a6db --- /dev/null +++ b/build/Release/.deps/Release/obj.target/multihashing/scryptjane.o.d @@ -0,0 +1,30 @@ +cmd_Release/obj.target/multihashing/scryptjane.o := cc '-D_LARGEFILE_SOURCE' '-D_FILE_OFFSET_BITS=64' '-DBUILDING_NODE_EXTENSION' -I/root/.node-gyp/0.10.26/src -I/root/.node-gyp/0.10.26/deps/uv/include -I/root/.node-gyp/0.10.26/deps/v8/include -fPIC -Wall -Wextra -Wno-unused-parameter -pthread -m64 -O2 -fno-strict-aliasing -fno-tree-vrp -fno-omit-frame-pointer -MMD -MF ./Release/.deps/Release/obj.target/multihashing/scryptjane.o.d.raw -c -o Release/obj.target/multihashing/scryptjane.o ../scryptjane.c +Release/obj.target/multihashing/scryptjane.o: ../scryptjane.c \ + ../scryptjane.h ../scryptjane/scrypt-jane-portable.h \ + ../scryptjane/scrypt-jane-portable-x86.h \ + ../scryptjane/scrypt-jane-hash.h ../scryptjane/scrypt-jane-hash_keccak.h \ + ../scryptjane/scrypt-jane-pbkdf2.h ../scryptjane/scrypt-jane-romix.h \ + ../scryptjane/scrypt-jane-chacha.h \ + ../scryptjane/scrypt-jane-romix-basic.h \ + ../scryptjane/scrypt-jane-mix_chacha-avx.h \ + ../scryptjane/scrypt-jane-mix_chacha-ssse3.h \ + ../scryptjane/scrypt-jane-mix_chacha-sse2.h \ + ../scryptjane/scrypt-jane-mix_chacha.h \ + ../scryptjane/scrypt-jane-romix-template.h \ + ../scryptjane/scrypt-jane-test-vectors.h +../scryptjane.c: +../scryptjane.h: +../scryptjane/scrypt-jane-portable.h: +../scryptjane/scrypt-jane-portable-x86.h: +../scryptjane/scrypt-jane-hash.h: +../scryptjane/scrypt-jane-hash_keccak.h: +../scryptjane/scrypt-jane-pbkdf2.h: +../scryptjane/scrypt-jane-romix.h: +../scryptjane/scrypt-jane-chacha.h: +../scryptjane/scrypt-jane-romix-basic.h: +../scryptjane/scrypt-jane-mix_chacha-avx.h: +../scryptjane/scrypt-jane-mix_chacha-ssse3.h: +../scryptjane/scrypt-jane-mix_chacha-sse2.h: +../scryptjane/scrypt-jane-mix_chacha.h: +../scryptjane/scrypt-jane-romix-template.h: +../scryptjane/scrypt-jane-test-vectors.h: diff --git a/build/Release/obj.target/multihashing/multihashing.o b/build/Release/obj.target/multihashing/multihashing.o new file mode 100644 index 0000000000000000000000000000000000000000..bb4068cc107d1e9066202251aa3dd1c2e16c142e GIT binary patch literal 12544 zcmd^_eQ*@z9mhAh1XB<$fc1?^mh#es!X*nyNK!zKD?X) z)T!wlQg^~dMmv^KM`=6LR&j<*XX;Sw;6+rx(NXA&NS%t-H;5gPsStsZ{(jFsPx8y= zmeIC<*@xNPXFt#PdHX&ux4Yp!k9SpeR+c0}mb6?Ny%SWDb~{JoD_C@eG+oM<8kJ+p zIo0^U-RN%csm4V$`AMGIy9^Z{<4H9Rnb#C!wZ@AFh9NMf9Pt|Osm431@v^Bg^I?hq z7kRzCbI~H~uh;k}bC+KlYu9OF*A?>-n@y9IBO^8^U(CDHeV2RV#)GXp8~5d~QTo7# zBUgF;z9}juZ#?KM=)uMrLq%tdfumm}zi>GF9sr=68_abUNXCG1IQeqc(V^s!!A3r} zYmG}jHMKDfH59@aFtxiED!D)H?0XW5ukF2ePM#iRt`=TdX1A}AV>WgU3;J&e1ngpYOX(4BJzDK z2IGIl{xv}Vj(v^$=Y{?q`5O06hyI;&_U*-)W-~v3B<3T|&zoV1iTg406Q+#2*Hg1! z%7Z1fEHvx&^<4?C@gYoGHjlpggteQmM{VZo?*(6om8-^pdi0cvo6`0qbk3us2lJc- zHMri7s>UnsWI9VddRk3haCH5{6G}e;np=Wt*r3$?2R|>o>%kX3FH}=GDZr9C@NTYJ zG*Fw`miN%-!^7&p$Jy%n_ms3ca5@_%f9(sb(@Q5hw@-o}F6Xo9*8MZ3Umtsj3Q!7bcIn6kGzKRJ}`?7IXl z!L8(dY*FtI$xdQ!5I8z#AT9aB{48hRWu~gmQ&ZhTtQhe#3(47MYn%(tIQvfXlGNm< z`C~LNXeco=V!5w@QjFx7QLiyBtH!|^<2=uQ!ujms1NB4=`_kBrN=zQeHWG!6N;QtN zjmmwu+mAmjD@Gm;PEAKs3t*og-M1j~;D8u!3xA4p(W6uU0`DRJ$o*9DbkWM@!MO5zHlRO91Y+${x1vb# zU~EgL&My&uy(bXSq;@SB475wlreJfRW8w1J_`2qMw4h#CB)MG0;Y5cXY74~MLXl0b zU{Ip>QgJLA)r(z`Xp6RR)0WQS5|>iqT2x*fj|Gbn`kNE2ty;|02F74mt?6~GfuJ6Z z70zubk{8PHXjs#+Xt^sEjch86$jg?=kTlWM-w{p7;Y3`QJ7dw!p%zV!Ml?AP+mr}v z5#1$8ZXT86ZP7$Wi`)#UvRS7bWt{*KgmLBQUD1(ps(6kCvK|U+aXk?3lml@&k_b0z zu?2D<(jrHy+XA78PY=WzKr)(Ywo;N-g<{aiD>R}}O-)8Ie<0)Qb-x!8e|i#S(JM4Y z4+Z65TOc-HMnkKrRx(Gcs%ipypm1|VRh0@2?9i5C^tM1pLR(%mTEe8!=#^uutE;Qz z!p7!AL{AiZLy7J6mJ$v=!37(W=M@}-=(VqoMNxB}A;OB;g*?4BUjgu!pv=ELaI50eqCzhr{ z8A->QiI82Ll%2rq+DWXg9jJkz;E8E(nLMGHRm0MsK@LI2lPK#pGac)C6*+{z;fbYJ zPo7YN?X5}XW`%T*}zn3b++{T!~0Ws9o#WA|yPGWd@bx3P!`Y_$AnMbj`K8 zu5lqOl3+X6X>I=2SRkxHzyB9OhC7L;5T>keM+R|iwM8YfNPG0;cbP?A^$Eg z6>hPfJdSaFV3-F7atGd!@Pwsak;I2j+_L$CW&Wr6aZO>Fi$+Q`BclFh3m5f^>k3HE zlwaZ-Q;`tzazy_HZs7T?{uf@Kvsc7Ot^#3Y2ME%15ATTU5 z8x9(pc>PN#gt%Spq-(ljo-l<07BnyEwD@Hze{~5=8q&{)ZuwssAT=|7Bhw_J0Z- zBEJ|vh`5X)1!})hDyr11q7`JYJ$9a!M{~YH{7CylFZ!G)}=XsV1hwrlE zf9)jc7~{F)eg_acB(a#8h@IYq^E5IK*|Q+9AvmnRk(gl$v;*}g`C?bYXVO1l z!~1RcYc_l;)Q9z^`63nd7ID6tFAc$KIPc+0gFUZ7JDWI{`4V8yX25$yIecRfpmO(OI(ljU{K79^k*CW+gzXKdYS7_+2~<#Gq$tDhWl-Jmkr-( z!w=c;lQ#T17^h5r-ekiIZMfTpCv14ihCgD%pSI!8+3-Kv@If1X*@jPq`HJ(rhi|!J z9?k`x$u|Y~RI=X|@_91m?H|7e_&3y*;(KYi`C3|vZ>65PMVl+i&DYY}I-lY%WACLN zkH^&+i0Q5+kn1aD^C*>mfBkw-L&F{ZRgHDk4Ylj){C>&htuDb6yiRzl7vX6!o=Wjl zhNp5oRpJRgf#BJcGpWL~OAuE;drCu*kUqBE{&Mp(2((-BulBB6>Gt~9ty<;tH253b zE4>~+WGiEzSk|w_oR#8ZL>%-RkSOJ=5|N-Dibfi=aA!wA*A~}lU7q??bw2;7Kcn_Q zIX*ur#^YP;NB_V?f2A+9DH6~VF^#pyrhG+#FD#-`&t~3^a-SZ9Pb`&Cxo=CjIojc= zUmFaGM6;KReVXpEX%}mTUs+Ngi*{-;eT!R%@|qL6h8P6ORs{UnyBs>Q~_3qOx*6sQ$VUFt*PP(K_~dZ`9m!6Y#+hOQ*6C z`rjIgXf2>Lr=I9xx%tg>{7f{tDWhLY{o!a!qC>;2^g6zu;&xmB$0hjejzN4ogKb=r z37-RT49I7}A@t(85r59{l}7Lih+{zgx8V?6MF;`;Y&ZnR_hby%9;{37+Yv%Qy$pxo zO%TU`oVI5Zqvku46XHUzbB=o2o^6EF_VjX&?U@UQDCeg{FB5J)kB@K9FS#E5!QVMi zPCw_?_M{1??J=L1O@Hv4j40;~qL&FjPB``F9nP^m`0gOe`Gn}HKbHun?Qvw83Fyyv z;1K0p%{lfNzA{tDsvsYB?G5ne+03BvImKo^wj5GqNhIZw9!9G^wekeADy^A)aTbpIn?J9gj1hCB%Jzu zj&SO;1Kx))U^4Z2D&hDSc;WL*!l}>m38y}n5l(&fac=ebcA}>~w-Y_}xzk3!mFTI@ zyNI6pyq9q5^M1mq&o2{Bef|sK)aSnwPJPbi-&xWB<-}(v;ne3Dgj1jA5l(%s=G^MD zO7zs{JBgn9++?GlhUP-R@~O{DImdocpYI@?`rJY|^*KrS%}|z@hrNWaApF;aQ~h4T zss0ay({X&2a5|1}5l+YPBf{sC_FN!*0pa+!VhrdH9pCAM)A5~2I33?g!rf4o@MkUO zI4;){UQhIN+=E1a1JSqH=(i9~>osinBcyz^M6`1s(bIYII^lGloFJUelMe}}<9Lp6 zI*tx}po1`O-y?h_q>FaWB%F@de8TB?l@U(I%g4EOJ>E|AbX?krp03AE8~s+Ir#|l@ zdg^mO;ne5n2&XO2(f@8gSh@kQaGQM>&(n2_5?r^thVC4y%eCa6hU)pen(saqrf0=8mSt*@sGvp8q=t_xdU%5#it8m? z6BUjjsl{r)2Nn;+)Y!{Qco*S%nYHSndWFSRcJ|oq8PnGDStxA3 zqBmK?Y^IK7wo6ynE3DR{y-Z@s?2N*iEFqezL0D+azrlQciXwefV9cMUuyyrjsD82{ zsr3-MUNt-m6sFdgp_++Oam_*_Yc^!9G278#Z-L>NqYCF`^R?^1Q)h$jz3`&dJJLt})e^nj)JWXmDzw zk?Sus(&w6;Q|5Aw$!6Pivb4Xz$ojf$wxb2toNP>;g9cK^LL+@5I27q!)p+Pum%_@{ zo1yiPaou`FFI!(|%%25OAl+w$M(#I&0%^qLkW7^Ljn4lU7`eC0Ms6O)1rw>qH0VEi zJ_zxySES$3ZGkbJB%LCgdnU`$0abcaHD={P50L%*B4d79p;2{{3j@wwSC0u|uC0)K z8YBn9UL#9@;I~1CbBk-t4)ooj80q6MMa(M%y?L&Og1{MFWO!1cDk-i-en(~?WYkW+ zFfhQ>df>{=0;~;+{xV6Vu#%S;3`Tuc=Ni^@+gR4Ypgw0|*jm|dnYvL2tgL*K!otlg zv{hlne`M+&RJfr8>_8deN?YI8e)=1ARCnTe%BIUMY~;JXh;1eH~f$q-fA}-Y?nXO4iiY_6M*my#pLb9L8GxB zI+cq;#vS=V*0of7@`J7Lrxgtu&)@0Jc>W&vdeoz2JYOJckyej+DARm5Rp`cpbBbV^ zhq8rsw&YxJUOfDIz*v|Ucyi|%v?jR~-KAFmx~NC2Na$KzVLMf3%4T95*@4*xYid`d zoid9HJ**v&k2TvTm`6m=q;}zBw!{;h*A;$2(+jhM`oo!;UYZ)zC(F##%!E&@C8-s5 zC}4izli2~*qq1JjJmxbyQ<%xR%y^n~g7v_V;b-iiQ7>$`<=+<~WGGU=s&N?+Mmvc8*u1<3u)Jn}nLm_9;TEfIbgPJ^q7= zbXYakOFvP*^#gWLX8SR%6tgo;kvgrLDYE}HMckYqS&3%K!U@(2uJLB4Tg4dCN!Dt1 zPC}`$9&e|=Rqcd z-4I$84%t+*4RZ6Ly=|)Bgg|2B2areg9#IDe^6y4;QG~;)zZ(^?=>!-GjOKBh4{ST4 zu)Qkl<}+)@%-XFR4B9RWZ?cb7BR!p+G~36UZEmR@wYEvXm6=}Ce^T)tk=dsLZo3ug z|B&Kuql(d-cK^_L(28;IU33%gjn z5M9Ik%=ma%pIeclxIqp2Lahtt!xmBP78g2kp(}40hsM>g1X6tUfGiM~C9(t6--U|UlnEXnFSM*nFIE`_;*Me=kJl5r z1M+bP`Sj5t-$VJ9pzlNlSj?>|d8JZeodiH<8q68c;sNUbTv?ES{N|wT|HjlI3%CsM z0DCV`_xGWM+9qE3!@(LDstpG@x_tvUEWiQBB>^WZ5*xE2isSOkLcEPT{eDGigF>g` zImR0I%apgC!`qteCUAZjiR~q*+1O`&DfU2_umJX5QZM=ys!l$KP~d>Tp?5HVaUep7 zO+JWV+d%r|Y~W(w2eRDPE#%{>)QjLKnvXp97Zn+WI|xBeromkV6$^1V$>7+qQER;8Zp4d3U(%Ian3t#z&Qj8VN$ocIX;A2A%fJOm51LE!6->Cw%O=xX0GZfb6 zz(Lp};!_Ai-+S#KpEZa*$J&?$xh@^<34A-?cT+x@ZOy{F=jZ8gPJnIY6GFaiFow0+ z$fu72d8BX)dMohvLU*~OJHWDL){&bj(Q{aUiUW!r^cP)8& zsymPiY$PmCcM=N>!}0V>;SM|U60UiWHYwTzT0)$g0lijq!d>QDN}Kc6ueLsbY~bKS z*dq(r`1Ck|KWh}&17$nb@0I!h{q=oyXCJfjyV;&bDW0bTO7UV-K8Cm6F1}Ktzx%G+ zH;kOX-Y>n{`ODa?yaeC-VI{q1(f7n_Pmu?YRA()wGkU81L0 zj`UvmlV9!|eR}6e^M%)?UhydR;tg*o*F)70SjfBYTHc=W)l&ZOaNj$W<$1;zn`r$> ziT>f4Gqe7NiN1E+pRhjXi5@SlooPJFk0!mtgz#!7Ha$e3CcoiXcn{`!ILt~*Z4>kS zH|AV@${$Mfl-%jnp0YfkA9t>NR>(^4NbU3@Oz83q56t++49_fIA<<79pSScfiQam* z|ML9K-w0?gXz(Tfi-{g8rF|pH_eE>Rm0h!ZPcdBRr%+GJy16fod-SnOr*~RJHqt(a z6YNKq=&fLWQLfKv+xg@bxv^I67bSGDWD}5< zDmFcudF^t#{T|2H?8xDh%w}xaj<{H-rCCYym8{vVcnzQ_r1NZ^y6RhB8+|svbPlK0 znpnJr@;Ggu@Psagehc-ugFn=e`JBBI{{D7_>-NheajBPb_qLa|=3Zg$_U{u*wq9rd++>PY9*M z9mgrwsMqJ0$=O3oa`fv>b;ocTa;e2Q@Dxk1U&c4$Mztj8{%o(x?R@>$fma$v&eyDi zZ^M`!Zft}iF-pCFhSk1>om8`967Jscp7M7n-ecH(6|lT7d6Txgq&N6le4pZPVYPd} zOBAt~c7JX>wrWYU%32iGLU5ZM*p4~CZ-Dgy)}jh;$@IvLN8|BM#EEQMuQedub$H+K zx9|~n%K|mU$&@Pm*?$TrhH*Yt2gbSKvtOKjMP?tfQ+yaKo25RgV-#$V9#Dlhf!kY( z)Qb3tRXPU5u>!WoS>v1NgP8D$9l#{%Ig%;s>J@#U*os>ne z^M635V@GI0=)<g)I-|jft}pz7_T(-){Bh2R=zweJe2bqB%l8D>X&HxH`*7&^ zAsxI1*>Sl@Gmrm0Kl6%wkgeZ4oplNAW~vt>+d+1;@=;VZ))$!hb^=_kfT2vE5oGTNgo7Gu)o?bK2)s8i5Qv;# z@Yt;(_F0e}%b2Fn_!q#XMULesX7cAToOpe{;D_%A3^gi8-dmtW`lBJEDkWdj*Tpdq z4HMawJ#>-Z?-U(}KcwlNn`ZC_YC?+Y1wOX3 zPW`iaG_|hy5O&!U+2+yAI<>#9_+9-T=%KFoeKV0-ryj-`MgkAsLwQt;99jU*ncb1U zeydLX%MZuCkcS>;c5BA1C|#_b4)W>&j%I}Wcck*s-bwr$2tn2_wE;rRThG$9h{=!V za9UBf1e-O9RHZKe0`APCI9bMUOU{9CUW=Sf4R@Vx5zy_9mFO*efBxy1J6w+cp70Z_ zTZ-b`?etF1DeX5^<6>h;1}zfvoZjiK8i%g}A~E+0R(K(DNT?E9$Kqg7O+2K{wbT&4 z+ddU7X~~#YW49DqMc4m4Ok-4S9JDv7jXH`PMPG|A(8yI!^Lbd~MUKJ*W1b^xpneX& zeLfjr=jEcHQMMJwsi!#3Eh~XTUNR27QZ#WVl(WN!@q_FzY46VS&>lM&WFIBwk|K4r zcTi?6rjnJrP;R%&s9naX45wjEdA1X9Vdy-wc*hbv#ywer;J`LV+E+df^qIaZkY#NF536)ViDUpeNvSYh^W z_D8103a3tat^fYv(|QJ{xo|K{Jh6&qwEdg*CC5}g97;YO{G$QC4ft)sZwr1~@!O7H z6u&r*v|_4@WJ1Y>Ldm5<$;Cp+*fVlj=}DPP#+iHRzl4hw6G%Nr$!_eo@|lv2LX!bK+(aIN2%!F%Q>hAf`-DC5fq$ z=~S|*!VRN3*)T?ok3Dj6i0!7Z@rlMR(207Un-pbi2ZYH80~O7v^^twD7LDQan%XE> z5Uy zSp*j!pmrf>kVuul5>KaLo9Uin!ynB{(`7#X$nG*|PwD^vKZH)%?b#S%tCSa|xbz6e$Y{2*#H47PkiUOU)A;=MAXF|rS^g&k8*i?f8-&a%! z8P6|qQ})exzWN+}T5&j4=mBj&xIkZ4;2Vg?wRyeaeGq*sf>peR<~0)^sTg(nZZ~?` zlFtn*6xHxq#Ao|87TP`-ZzR)55dXZd>}CyoPj5I^LhMpc{KcAnz1Nfe%ZyvP~!okFrOlh_1y}6 zO|x#h?z(9}gyEX~V*^H33PjS3+}r4TG7?70Op*tQ90R?R_`_R-pBmL?OVK9}Tdfa| z4GO32?_pX4623*V;t;;e84|A=4Fi!6h453PGCNN;5Ei6m){TT{9!ag!&f`7lXtsF- z8FHXbyFg0r)%4INO%HFCxBG}+U$=#_zz%9WWF$u>!M3e_>H8VDb|!$(3|Q>>i(LB* zauQlnvOF_nb?&y7L45dQVzI9pr*gGr;i5&i`DPS97+zf!z9sm`>SdK{e7QM!InvG2 z>@cU!n`sgF2wCfW!F~jZgU(L#+2i#)#q# zyuVp^w87>(My122(AcUmrl>wei;Q*uVI0^C(s;jdoBs*OZt-j5Qx?{XO2i}j1!C0F zXig_=KAxDIel0mw$oUv?%4zoesV`iA2fH5Xlg~+mw4d>A!A$aMHgD_T_?VX?uUemS zC)BmcZa{KW+x#~S_I>yGlu(jxKNYwx2!m#uS4DkL7nQc8IctAZUbXB&SI*k1N~>Jy z)GEtXEz7AYtFCh8JhZH8nJeePwQF5D%h!~bm#qeE^_r@(oWeVcZ(duqZ29+HIh?(0 zW$B8_W#wfw)U~*L=!C>i8P-|8eINeD#SeI@O(NZhI%SvP*UH%z#ldqNZfTrUn~`T~ z)W2@i$%K=ys6B3eG7)v(uzSLm`W)V#JIF9zy0xJid?ML2Tiijq)1LZIzMS?>c^e)& z^<=(TWJDue!+95jhiGJG$=7VVeRNm@M}H3Q9jJ_?|L@Sma3$-XmJCjNNB`G@_66Nh zyP5ch>prU({69>b5oIUlB{y45GVO`MWnDDZ@bSH%jWquEZn4_ChYdLF)?ymrKk<27 zCkIA&gz-l1MI2EQlJAQeCVO@#^}}N@XcVuBo^MZz$6L_IzKDLprt|m;nrzd3Uqb~K zkEbZfE{H#K8c|&?9yd`wfS-^gXFl%f`W}A7-{3<97muGPEAb<`&rei4R-*hdenijz zN20o1JT6*Q^gcWqSPw)OUv%1BLxt!XGjwCueuXmg3sjfu)l1M@FG2r5&?i97%$uyv zcsxe+xJ{pJ#|a*LiDm8b${$r!H%p5um#tpAVohbaLny95NG@~k3XrE)xt7R^OAE=1O6^7#HJK7v z{RoC0?0&9m$--|d1#|2MNl)guN(+OF7s|oX#RUaR)uPfOd0|j3b(NQuFR%EKtF)BI z{LIuR*Xa@9pI|B?TIXR0)BWQ;@8u*J&{|IDLQoB zhZ|hPKM_BNo^9LBaJppM&2ajkrc*CgKgr{|ZTyDw-)-YZbn+c%+#e*-ljZsR67%w* zEnPIPi_@B=XF^)Wr{{@H99shfL O7~Qe=EyL)Jz5hQT>5*Xo literal 0 HcmV?d00001 diff --git a/build/Release/obj.target/multihashing/scryptjane.o b/build/Release/obj.target/multihashing/scryptjane.o new file mode 100644 index 0000000000000000000000000000000000000000..75ad45363e905d842cbcbb0f109417a510012ed9 GIT binary patch literal 13056 zcmbtZ4Rlo1wZ4;tWDLGlCQAX2oE(` z;*)GWrp51yWsRB<8f*{`Hzb|>NbxW zr%rb|j%1CZTpyKB#jH_ID()Xu+qi-0|T(^**Im}DBDHZ{vXD& z?3&ueLOnq(lqh98lg!qyP1#oCSPVNYrnOp3YqfZGu_N4Yc3@z0jYvg~F-yg&SMsBb zpJn{BnT}_9Hh&W95wTli+aO=epNb>6$f@g$}ndzA*`L z?T&Tj!yN1G&1RvkcE`G(eVaItpXjmYb^|+|_&i@!1`-DfayNnVBugA{61g9&{USe= z_`D#upXF|1>P0M+uRICGwx;u2fkp$||MVO`#sPoVDs3)RFee2P~5E>S`@#R7q zUqJ)ve67%uuN9NwVPmH5mCz(#(3cyX!iH7Ca{)5$h9%uWoj1S?1{4j~8k7!DV17u5 zhW!S#8c;i+H7Ehgm8nE?Eh(m}UAlUbm3ZPB--KR5g%V@HlCaRL#85^(z)HeG-;&DJ zOg)R1n085J2P>)U)=MgbdWoTMiLj@x?qelu4FkW48qHkeTUp}Gsk*vJOT06csk=Su z9uI%d!?$aRx2Lki_Nkisfu{bQshf0O*~P%*fnMr69`!Iwc)CV2^?i?e2=k~Q<@xf- zC_MaI7(eb&ztQ=s*#xM^n7R{GVq%FMH+s}Q*tAr%V$`LGB}DTIE%9b1>cp6#^F?tz zx{)TWseQU(44wA_GZt9-M~vpAM`6Q6*Y%ou6pF;fR@;Pl}A0I8y=@eV{OE3`q4&OZcW|m;ai{@39_Rk6)xA*eOltJOBk>0CKb77 zJt(w}*FlNy3CPyp0i*y(E{j&iwZx8VVS2G_eY-HFd{E$<9kA>TdP&1-k9t%uSvwaf zM6(jwgP2!g0RBu(ytQFV}rmhfFBE<;v6#f-G$Xp)vJr>UvDK zTPr-PtJ|O#9Hc-Dxm7QGUn|_Kt60O0rJ{S6*4OPR?8hGPsHZecBO0y%LBm@0fp?c) zBHA8xyH?n#sqZtf%^6r=XuWVdWY4BXN#%OI#4gV{ax8cWE--Q4*|ol%`Zb=ouEO7@ z2*?q(og$a@bplEp4X7v&8K@4&^Plht^VkiQTjYBlZD z-!)6)$JG-vqmOq9)>C}b)}@!fg}Uvv>Bz@z3dL6#7TRQ|YahW!KeO#+QF~-Yf7G7nMR}6S{K$-NsVs=hfEVIu>2*eC zoHlA>BQwr8{=~8)GtN5JmF7gs2UzIa(!gp=9n(EGU!gz34vn7?(@0HXcR1d9=@%Mw z()kfBw1u(I7cA+4nk;l``ohn{`(1nm%{MSYnsd%(#~6iwfF4?GBhAaiduc+hHyCo$ z3a)WmwJCda$8?ew6hXQ8TQke}X2emGxf@4sPX^^dJvUIsRBz=R7(3LuC^fLqFJoh*}Xb_y}v>uYc_dRFVvhBX)@gAUCo zG&#cel@<0lUUqNMVxu2RZaW5>N3mfs0Mv@w29tjea@mwc@rAF#wI^F#+lVh&sBnE^1IZyVOLkAY?#7mKip2Q zsK60DwzmDuz`%xUz*vJPO>D&)x|Cz9N+AqZ!&)6M0Q(%%PUzk4-)f(+Qu?wW-;&1C$P9(%3;+G5!%tajXsBZ%X?mXrGm0cvszCIa`R zY9mN*6lajhKQ>l7>fQ@|jdci&gojj^yK=K&z{dYpETP8Vr{@PQVCWEAReW+_U?vu0 zE+lHPUtTMeS#o%Gj*yKQti69=z^WNmctb91$wFrtf~O9Uu`W>Rm21TMye~?}wiLXl)tBi zzAS}uM+HZCJrz9MZ?xh@OTlVFNSD#F8?A1mm2b568m$7O)nl}rMk{Hw#-b&hrYtIS zE{lpuBDNs4N)eRO6kMT%%{^tiBeW0u_>EFng?1YX9R3?gNr4GKxGJqKQPcgF_L=nC z`?1f+^DO&JE|feBaEl;J>4e{r2Ey+;lZ8-M>Li?~GkG_Ny-(3GVJvL#99$$~@YMN& z`%oN&*c0MD6ZhkNZob2Ko4ffI<9Qx+W6>)kQRBQ6$2o<@o*-=BMj=hq?F{dV&dEoc zB2UNCJK;pG;OXx<9`6vf0lVYzhfxcjy29~zB|$O$TANFR8)f`x89%YRr;MN6eUR_t zo&1<<_dyr$OZ-m`cfao9A8L-5PbbH~OGHs9L(Jv$?}CYs`%>-XEs_z{-5s4MDu9i7)eOx(@;Lz``G+h<8X%4Q6i}0>77VN~!p;QavG#OY>7rY(#{C!O~@LJU=$Z(JU1c2$lg$|O$+_C0C8a{;!#)O4lVSv z0>u%o#In1?1A)>FA|GewNLh&J_Y;_{4&**n5-k~MIRV&0bRErafl;fu))hhf6y z5lHCU1ZC}xun&E7-orv&r3B?Gy2B0cO#yZbkbh!p#yPS*bihWfXm}aq(L*(~J?nW$ z%>sJBXlGEH`W!$dh)I2kwkz~$mSd?K{-DOmE__qsfIW8;-FwPuW?DehwWozd%k_iClHYDSo(7qA=HDR`)!^p zKGpi(buIZc2vTny2*P>Jd3J9hx?b0kr?gZzKjLq_)!7tibvD&F8+;8-&5xwe87AIb z)%c(<1vg(9_78>?e^pD%!lvf=%8aUM^-zS8IiA01p$}pjoh`ol8fUAorPWzeRaftu zZ?L8K8tU4dHG#(J);jQtPUD*z$5W^AgF2aVYpYsn|1TX(exv+Hk`0B}4dd58Q*w37 z+m~&5bC! zPyOKdbWh!rFO7fkwVQiyI{52H`*--;{yd_2{_^Ogg|}~=cKKzG=Xx*xvhcvGFIGPD zj8@U^e_=`J(YHUo=AJ9!&+dJ)Q}s4bCru7L+l zgG9hg33!moJjsv#q8%j=7!Mj{rYtjLX%tE@PgZ3ApgCSp@|k+r4Y|Rb>|t4%ct+8(_}v9;yf zt{jnLZ^z`X0!}|l-=WMkD*v$zHZr>c3}}wU?>54bf^?S&`duTlJx^smnH3whEIX8w z`CyJhaw#$%J0LWa|BBC=I>z%{N3_)%I*nHUFs?_8Ey9?HsC z4H3nFyNHIgfijb)?8(enmV4Q-vQYNF<$)EgKff&hQ!HXzZlq%^}AU z`j>LK;b#;K$m^^&)wbB|U>m)83G$8H-QinrtNQ zuHg!;55+f>CZ@)RZ8VFo+$XqQSQ;Z0wLH?$S~X88YH4jY%38D5?5nRTYW20XDn;|F zTC0?zc`YqUQF9YDh+QpG2RA31O_bDEFrrpe-&9>ypAI%RnRs2Qi-MaUoG{2C9S_Xf3yGHvcGiG6!%w!SR-I`X*@ueH4NTxZ>G6srSPcfnLuIz8csGiRIti;mSZ?Ei6?+j})?Pp>u zP`S|jgL3{sIUvn{6nGu`ah63>^5Az~X~zr)X_v3doOT)E$z>AMOl6qlI#;|S`BzH*B1!6%c(=qwtOM+{#M^1pA&K|~r2~4?xE1ok z6fq3&t0e9pLEJK4K{*b%E#)=m{O2~wzee(#$Nfx+cS~HvERZu_$jMPwqE4w)pAo$n{ZC2r2& z>}B|+)yoXZrIO#Azsdg{iOXZmgq1N8H~rouTr2SGR6K6t<0LMR6BAa7BtAQZoU3Fi z(s|B7N(uPWV;3T8L*Nxd;13LeR}Xg_bL5m2>$0K z|4PXyyD41~Un6l5M?=nzA>_O}1il~mMey5|@M}t@ZI4NOO&UH7_W<$7)9?a`ccV=8*O-=p_Q;cpYnboyb_!Gl; zMW+XSt=?*Xpl-ghz*k*ewZJQtSN8Z|os_HbQ{PPyfubE(xJ3$d& zue?o-Uiy0FCGkZ!t0k!=IG>CjP@%=w+FIB6V2jau7y~yowIlW%-)FLh~eYi7C;cILOH2b`d`kI>{%jhFS zNmD@*_=j3vrKZ`3MQm-Z_cbal@ZJ)yuW^1|6>PS!%I}>taUS$B@aCr0`pMps34;-J zQaaQO$ml2bUkLk%61Jb>#P37=C@ruXrHPww`{(3+PX)!!_@AywFG-yEue0EBiPOD6 zXS&J1-Gb8_$iy?`)kyp$7W`5fHxNG2g0~NWFOo+&@zej#P5J*Rankeq7W^3tUTDFe zmIq+U4xmx`MB*fW5`L!q1u||R{1ywo%z|6_FLHnhiTEeuXUciVf?MSzk^`Ack{#y%vtA+myi~Jib_}3QPs^@L;I}YjTwD8wi z@Ea|7lLfcRdCG#1x9~5w;8y-mBu?|4Wa0n2<$RcE!EcdvCVs2`ped3KInBX*#~7&b zE~l;Om4H0<6ymqy=DXR%=SqI3EKR&F4W}{bXTIA?CEdi$cUyfLZobc!rQxRjtJ84v z9rjilZob3Bgg`ULHQ!-3lHwNJe1}z};pThGpN5<7t(9rG8E?kZaPz%&I1M-7TUU{S U7JbY(^tLqIj6>_waPz(Ozkq3hF8}}l literal 0 HcmV?d00001 diff --git a/multihashing.cc b/multihashing.cc index fad8023..74f974d 100644 --- a/multihashing.cc +++ b/multihashing.cc @@ -193,8 +193,8 @@ Handle scryptjane(const Arguments& args) { Local num = args[1]->ToNumber(); int timestamp = num->Value(); - Local num = args[2]->ToNumber(); - int nChainStarTime = num->Value(); + Local num2 = args[2]->ToNumber(); + int nChainStarTime = num2->Value(); char * input = Buffer::Data(target); diff --git a/package.json b/package.json index 6160b2f..dacd698 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ }, "keywords": [ "scrypt", - "scrypt-jane", + "scryptjane", "script-n", "x11", "quark", diff --git a/scryptjane/scrypt-jane-chacha.h b/scryptjane/scrypt-jane-chacha.h new file mode 100644 index 0000000..41d96e5 --- /dev/null +++ b/scryptjane/scrypt-jane-chacha.h @@ -0,0 +1,132 @@ +#define SCRYPT_MIX_BASE "ChaCha20/8" + +typedef uint32_t scrypt_mix_word_t; + +#define SCRYPT_WORDTO8_LE U32TO8_LE +#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + +#define SCRYPT_BLOCK_BYTES 64 +#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + +/* must have these here in case block bytes is ever != 64 */ +#include "scrypt-jane-romix-basic.h" + +#include "scrypt-jane-mix_chacha-avx.h" +#include "scrypt-jane-mix_chacha-ssse3.h" +#include "scrypt-jane-mix_chacha-sse2.h" +#include "scrypt-jane-mix_chacha.h" + +#if defined(SCRYPT_CHACHA_AVX) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx + #define SCRYPT_MIX_FN chacha_core_avx + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3 + #define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3 + #define SCRYPT_MIX_FN chacha_core_ssse3 + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 + #define SCRYPT_MIX_FN chacha_core_sse2 + #define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop + #define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop + #include "scrypt-jane-romix-template.h" +#endif + +/* cpu agnostic */ +#define SCRYPT_ROMIX_FN scrypt_ROMix_basic +#define SCRYPT_MIX_FN chacha_core_basic +#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian +#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian +#include "scrypt-jane-romix-template.h" + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +static scrypt_ROMixfn +scrypt_getROMix() { + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_CHACHA_AVX) + if (cpuflags & cpu_avx) + return scrypt_ROMix_avx; + else +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + if (cpuflags & cpu_ssse3) + return scrypt_ROMix_ssse3; + else +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + if (cpuflags & cpu_sse2) + return scrypt_ROMix_sse2; + else +#endif + + return scrypt_ROMix_basic; +} +#endif + + +#if defined(SCRYPT_TEST_SPEED) +static size_t +available_implementations() { + size_t flags = 0; + +#if defined(SCRYPT_CHACHA_AVX) + flags |= cpu_avx; +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + flags |= cpu_ssse3; +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + flags |= cpu_sse2; +#endif + + return flags; +} +#endif + +static int +scrypt_test_mix() { + static const uint8_t expected[16] = { + 0x48,0x2b,0x2d,0xb8,0xa1,0x33,0x22,0x73,0xcd,0x16,0xc4,0xb4,0xb0,0x7f,0xb1,0x8a, + }; + + int ret = 1; + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_CHACHA_AVX) + if (cpuflags & cpu_avx) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + if (cpuflags & cpu_ssse3) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + if (cpuflags & cpu_sse2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, scrypt_romix_nop, scrypt_romix_nop, expected); +#endif + +#if defined(SCRYPT_CHACHA_BASIC) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); +#endif + + return ret; +} + diff --git a/scryptjane/scrypt-jane-hash.h b/scryptjane/scrypt-jane-hash.h new file mode 100644 index 0000000..db5c1db --- /dev/null +++ b/scryptjane/scrypt-jane-hash.h @@ -0,0 +1,48 @@ +#if defined(SCRYPT_BLAKE512) +#include "scrypt-jane-hash_blake512.h" +#elif defined(SCRYPT_BLAKE256) +#include "scrypt-jane-hash_blake256.h" +#elif defined(SCRYPT_SHA512) +#include "scrypt-jane-hash_sha512.h" +#elif defined(SCRYPT_SHA256) +#include "scrypt-jane-hash_sha256.h" +#elif defined(SCRYPT_SKEIN512) +#include "scrypt-jane-hash_skein512.h" +#elif defined(SCRYPT_KECCAK512) || defined(SCRYPT_KECCAK256) +#include "scrypt-jane-hash_keccak.h" +#else + #define SCRYPT_HASH "ERROR" + #define SCRYPT_HASH_BLOCK_SIZE 64 + #define SCRYPT_HASH_DIGEST_SIZE 64 + typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state; + typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + static void scrypt_hash_init(scrypt_hash_state *S) {} + static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {} + static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {} + static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0}; + #error must define a hash function! +#endif + +#include "scrypt-jane-pbkdf2.h" + +#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */ + +static int +scrypt_test_hash() { + scrypt_hash_state st; + scrypt_hash_digest hash, final; + uint8_t msg[SCRYPT_TEST_HASH_LEN]; + size_t i; + + for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++) + msg[i] = (uint8_t)i; + + scrypt_hash_init(&st); + for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) { + scrypt_hash(hash, msg, i); + scrypt_hash_update(&st, hash, sizeof(hash)); + } + scrypt_hash_finish(&st, final); + return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE); +} + diff --git a/scryptjane/scrypt-jane-hash_keccak.h b/scryptjane/scrypt-jane-hash_keccak.h new file mode 100644 index 0000000..7ed5574 --- /dev/null +++ b/scryptjane/scrypt-jane-hash_keccak.h @@ -0,0 +1,168 @@ +#if defined(SCRYPT_KECCAK256) + #define SCRYPT_HASH "Keccak-256" + #define SCRYPT_HASH_DIGEST_SIZE 32 +#else + #define SCRYPT_HASH "Keccak-512" + #define SCRYPT_HASH_DIGEST_SIZE 64 +#endif +#define SCRYPT_KECCAK_F 1600 +#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 256=512, 512=1024 */ +#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 256=1088, 512=576 */ +#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8) + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint64_t state[SCRYPT_KECCAK_F / 64]; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static const uint64_t keccak_round_constants[24] = { + 0x0000000000000001ull, 0x0000000000008082ull, + 0x800000000000808aull, 0x8000000080008000ull, + 0x000000000000808bull, 0x0000000080000001ull, + 0x8000000080008081ull, 0x8000000000008009ull, + 0x000000000000008aull, 0x0000000000000088ull, + 0x0000000080008009ull, 0x000000008000000aull, + 0x000000008000808bull, 0x800000000000008bull, + 0x8000000000008089ull, 0x8000000000008003ull, + 0x8000000000008002ull, 0x8000000000000080ull, + 0x000000000000800aull, 0x800000008000000aull, + 0x8000000080008081ull, 0x8000000000008080ull, + 0x0000000080000001ull, 0x8000000080008008ull +}; + +static void +keccak_block(scrypt_hash_state *S, const uint8_t *in) { + size_t i; + uint64_t *s = S->state, t[5], u[5], v, w; + + /* absorb input */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8) + s[i] ^= U8TO64_LE(in); + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROTL64(t[1], 1); + u[1] = t[0] ^ ROTL64(t[2], 1); + u[2] = t[1] ^ ROTL64(t[3], 1); + u[3] = t[2] ^ ROTL64(t[4], 1); + u[4] = t[3] ^ ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[ 1]; + s[ 1] = ROTL64(s[ 6], 44); + s[ 6] = ROTL64(s[ 9], 20); + s[ 9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[ 2], 62); + s[ 2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[ 4], 27); + s[ 4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[ 8], 55); + s[ 8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[ 5], 36); + s[ 5] = ROTL64(s[ 3], 28); + s[ 3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[ 7], 6); + s[ 7] = ROTL64(s[10], 3); + s[10] = ROTL64( v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; + v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + memset(S, 0, sizeof(*S)); +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + keccak_block(S, S->buffer); + } + + /* handle the current data */ + while (inlen >= SCRYPT_HASH_BLOCK_SIZE) { + keccak_block(S, in); + in += SCRYPT_HASH_BLOCK_SIZE; + inlen -= SCRYPT_HASH_BLOCK_SIZE; + } + + /* handle leftover data */ + S->leftover = (uint32_t)inlen; + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + size_t i; + + S->buffer[S->leftover] = 0x01; + memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1)); + S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80; + keccak_block(S, S->buffer); + + for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) { + U64TO8_LE(&hash[i], S->state[i / 8]); + } +} + +#if defined(SCRYPT_KECCAK256) +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x26,0xb7,0x10,0xb3,0x66,0xb1,0xd1,0xb1,0x25,0xfc,0x3e,0xe3,0x1e,0x33,0x1d,0x19, + 0x94,0xaa,0x63,0x7a,0xd5,0x77,0x29,0xb4,0x27,0xe9,0xe0,0xf4,0x19,0xba,0x68,0xea, +}; +#else +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0x17,0xc7,0x8c,0xa0,0xd9,0x08,0x1d,0xba,0x8a,0xc8,0x3e,0x07,0x90,0xda,0x91,0x88, + 0x25,0xbd,0xd3,0xf8,0x78,0x4a,0x8d,0x5e,0xe4,0x96,0x9c,0x01,0xf3,0xeb,0xdc,0x12, + 0xea,0x35,0x57,0xba,0x94,0xb8,0xe9,0xb9,0x27,0x45,0x0a,0x48,0x5c,0x3d,0x69,0xf0, + 0xdb,0x22,0x38,0xb5,0x52,0x22,0x29,0xea,0x7a,0xb2,0xe6,0x07,0xaa,0x37,0x4d,0xe6, +}; +#endif + diff --git a/scryptjane/scrypt-jane-hash_sha256.h b/scryptjane/scrypt-jane-hash_sha256.h new file mode 100644 index 0000000..d06d3e1 --- /dev/null +++ b/scryptjane/scrypt-jane-hash_sha256.h @@ -0,0 +1,135 @@ +#define SCRYPT_HASH "SHA-2-256" +#define SCRYPT_HASH_BLOCK_SIZE 64 +#define SCRYPT_HASH_DIGEST_SIZE 32 + +typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +typedef struct scrypt_hash_state_t { + uint32_t H[8]; + uint64_t T; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} scrypt_hash_state; + +static const uint32_t sha256_constants[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) (((x | y) & z) | (x & y)) +#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) +#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) +#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3)) +#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10)) +#define W0(in,i) (U8TO32_BE(&in[i * 4])) +#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16]) +#define STEP(i) \ + t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \ + t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \ + r[7] = r[6]; \ + r[6] = r[5]; \ + r[5] = r[4]; \ + r[4] = r[3] + t0; \ + r[3] = r[2]; \ + r[2] = r[1]; \ + r[1] = r[0]; \ + r[0] = t0 + t1; + +static void +sha256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) { + uint32_t r[8], w[64], t0, t1; + size_t i; + + for (i = 0; i < 8; i++) r[i] = S->H[i]; + + while (blocks--) { + for (i = 0; i < 16; i++) { w[i] = W0(in, i); } + for (i = 16; i < 64; i++) { w[i] = W1(i); } + for (i = 0; i < 64; i++) { STEP(i); } + for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; } + S->T += SCRYPT_HASH_BLOCK_SIZE * 8; + in += SCRYPT_HASH_BLOCK_SIZE; + } +} + +static void +scrypt_hash_init(scrypt_hash_state *S) { + S->H[0] = 0x6a09e667; + S->H[1] = 0xbb67ae85; + S->H[2] = 0x3c6ef372; + S->H[3] = 0xa54ff53a; + S->H[4] = 0x510e527f; + S->H[5] = 0x9b05688c; + S->H[6] = 0x1f83d9ab; + S->H[7] = 0x5be0cd19; + S->T = 0; + S->leftover = 0; +} + +static void +scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if (S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if (S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + sha256_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if (blocks) { + sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if (S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void +scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) { + uint64_t t = S->T + (S->leftover * 8); + + S->buffer[S->leftover] = 0x80; + if (S->leftover <= 55) { + memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover); + sha256_blocks(S, S->buffer, 1); + memset(S->buffer, 0, 56); + } + + U64TO8_BE(S->buffer + 56, t); + sha256_blocks(S, S->buffer, 1); + + U32TO8_BE(&hash[ 0], S->H[0]); + U32TO8_BE(&hash[ 4], S->H[1]); + U32TO8_BE(&hash[ 8], S->H[2]); + U32TO8_BE(&hash[12], S->H[3]); + U32TO8_BE(&hash[16], S->H[4]); + U32TO8_BE(&hash[20], S->H[5]); + U32TO8_BE(&hash[24], S->H[6]); + U32TO8_BE(&hash[28], S->H[7]); +} + +static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = { + 0xee,0x36,0xae,0xa6,0x65,0xf0,0x28,0x7d,0xc9,0xde,0xd8,0xad,0x48,0x33,0x7d,0xbf, + 0xcb,0xc0,0x48,0xfa,0x5f,0x92,0xfd,0x0a,0x95,0x6f,0x34,0x8e,0x8c,0x1e,0x73,0xad, +}; diff --git a/scryptjane/scrypt-jane-mix_chacha-avx.h b/scryptjane/scrypt-jane-mix_chacha-avx.h new file mode 100644 index 0000000..50d6e2d --- /dev/null +++ b/scryptjane/scrypt-jane-mix_chacha-avx.h @@ -0,0 +1,340 @@ +/* x86 */ +#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,64) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(vmovdqa xmm4,[ssse3_rotl16_32bit]) + a2(vmovdqa xmm5,[ssse3_rotl8_32bit]) + a2(vmovdqa xmm0,[ecx+esi+0]) + a2(vmovdqa xmm1,[ecx+esi+16]) + a2(vmovdqa xmm2,[ecx+esi+32]) + a2(vmovdqa xmm3,[ecx+esi+48]) + a1(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_avx_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + a1(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa [esp+0],xmm0) + a2(vmovdqa [esp+16],xmm1) + a2(vmovdqa [esp+32],xmm2) + a2(vmovdqa [esp+48],xmm3) + a2(mov eax,8) + a1(scrypt_chacha_avx_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm6,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm6) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpsrld xmm6,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm6) + a2(sub eax,2) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm6,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm6) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x39) + a3(vpaddd xmm2,xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a3(vpsrld xmm6,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm6) + a1(ja scrypt_chacha_avx_loop) + a3(vpaddd xmm0,xmm0,[esp+0]) + a3(vpaddd xmm1,xmm1,[esp+16]) + a3(vpaddd xmm2,xmm2,[esp+32]) + a3(vpaddd xmm3,xmm3,[esp+48]) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + a1(jne scrypt_ChunkMix_avx_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + a1(ret 16) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a2(lea rcx,[rcx*2]) + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm4,[ssse3_rotl16_32bit]) + a2(vmovdqa xmm5,[ssse3_rotl8_32bit]) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + a1(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor r8,r8) + a2(xor r9,r9) + a1(scrypt_ChunkMix_avx_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + a1(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa xmm8,xmm0) + a2(vmovdqa xmm9,xmm1) + a2(vmovdqa xmm10,xmm2) + a2(vmovdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_avx_loop: ) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm12,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x93) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpshufd xmm2,xmm2,0x39) + a3(vpsrld xmm12,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm12) + a2(sub rax,2) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm4) + a3(vpaddd xmm2,xmm2,xmm3) + a3(vpxor xmm1,xmm1,xmm2) + a3(vpsrld xmm12,xmm1,20) + a3(vpslld xmm1,xmm1,12) + a3(vpxor xmm1,xmm1,xmm12) + a3(vpaddd xmm0,xmm0,xmm1) + a3(vpxor xmm3,xmm3,xmm0) + a3(vpshufb xmm3,xmm3,xmm5) + a3(vpshufd xmm0,xmm0,0x39) + a3(vpaddd xmm2,xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a3(vpxor xmm1,xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a3(vpsrld xmm12,xmm1,25) + a3(vpslld xmm1,xmm1,7) + a3(vpxor xmm1,xmm1,xmm12) + a1(ja scrypt_chacha_avx_loop) + a3(vpaddd xmm0,xmm0,xmm8) + a3(vpaddd xmm1,xmm1,xmm9) + a3(vpaddd xmm2,xmm2,xmm10) + a3(vpaddd xmm3,xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + a1(jne scrypt_ChunkMix_avx_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_AVX + +static void NOINLINE +scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3; + const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_AVX) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-AVX" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/scryptjane/scrypt-jane-mix_chacha-sse2.h b/scryptjane/scrypt-jane-mix_chacha-sse2.h new file mode 100644 index 0000000..d2192c8 --- /dev/null +++ b/scryptjane/scrypt-jane-mix_chacha-sse2.h @@ -0,0 +1,371 @@ +/* x86 */ +#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,16) + a2(and esp,~15) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa xmm4,xmm1) + a2(movdqa xmm5,xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_chacha_sse2_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,16) + a2(psrld xmm6,16) + a2(pxor xmm3,xmm6) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub eax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,16) + a2(psrld xmm6,16) + a2(pxor xmm3,xmm6) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a1(ja scrypt_chacha_sse2_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,xmm4) + a2(paddd xmm2,xmm5) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + a1(jne scrypt_ChunkMix_sse2_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + a1(ret 16) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a2(lea rcx,[rcx*2]) + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_sse2_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,16) + a2(psrld xmm6,16) + a2(pxor xmm3,xmm6) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub rax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,16) + a2(psrld xmm6,16) + a2(pxor xmm3,xmm6) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(movdqa xmm6,xmm3) + a2(pslld xmm3,8) + a2(psrld xmm6,24) + a2(pxor xmm3,xmm6) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a1(ja scrypt_chacha_sse2_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + a1(jne scrypt_ChunkMix_sse2_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSE2 + +static void NOINLINE +scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 16), _mm_srli_epi32(x4, 16)); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24)); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 16), _mm_srli_epi32(x4, 16)); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x4 = x3; + x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24)); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x4 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_SSE2) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-SSE2" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/scryptjane/scrypt-jane-mix_chacha-ssse3.h b/scryptjane/scrypt-jane-mix_chacha-ssse3.h new file mode 100644 index 0000000..b25e356 --- /dev/null +++ b/scryptjane/scrypt-jane-mix_chacha-ssse3.h @@ -0,0 +1,348 @@ +/* x86 */ +#if defined(X86ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSSE3 + +asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_ssse3) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,64) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm4,[ssse3_rotl16_32bit]) + a2(movdqa xmm5,[ssse3_rotl8_32bit]) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + a1(jz scrypt_ChunkMix_ssse3_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_ssse3_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_ssse3_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + a1(jz scrypt_ChunkMix_ssse3_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_ssse3_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa [esp+16],xmm1) + a2(movdqa [esp+32],xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_chacha_ssse3_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a2(sub eax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm6,20) + a2(pxor xmm1,xmm6) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm6,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm6,25) + a2(pxor xmm1,xmm6) + a1(ja scrypt_chacha_ssse3_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,[esp+16]) + a2(paddd xmm2,[esp+32]) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + a1(jne scrypt_ChunkMix_ssse3_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + a1(ret 16) +asm_naked_fn_end(scrypt_ChunkMix_ssse3) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSSE3 + +asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_ssse3) + a2(lea rcx,[rcx*2]) + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm4,[ssse3_rotl16_32bit]) + a2(movdqa xmm5,[ssse3_rotl8_32bit]) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a1(jz scrypt_ChunkMix_ssse3_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_ssse3_no_xor1:) + a2(xor r8,r8) + a2(xor r9,r9) + a1(scrypt_ChunkMix_ssse3_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + a1(jz scrypt_ChunkMix_ssse3_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_ssse3_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_chacha_ssse3_loop: ) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm12,20) + a2(pxor xmm1,xmm12) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x93) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x39) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm12,25) + a2(pxor xmm1,xmm12) + a2(sub rax,2) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm4) + a2(paddd xmm2,xmm3) + a2(pxor xmm1,xmm2) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,12) + a2(psrld xmm12,20) + a2(pxor xmm1,xmm12) + a2(paddd xmm0,xmm1) + a2(pxor xmm3,xmm0) + a2(pshufb xmm3,xmm5) + a3(pshufd xmm0,xmm0,0x39) + a2(paddd xmm2,xmm3) + a3(pshufd xmm3,xmm3,0x4e) + a2(pxor xmm1,xmm2) + a3(pshufd xmm2,xmm2,0x93) + a2(movdqa xmm12,xmm1) + a2(pslld xmm1,7) + a2(psrld xmm12,25) + a2(pxor xmm1,xmm12) + a1(ja scrypt_chacha_ssse3_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + a1(jne scrypt_ChunkMix_ssse3_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_ssse3) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) + +#define SCRYPT_CHACHA_SSSE3 + +static void NOINLINE +scrypt_ChunkMix_ssse3(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3; + const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x93); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x39); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x4); + x2 = _mm_add_epi32(x2, x3); + x1 = _mm_xor_si128(x1, x2); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20)); + x0 = _mm_add_epi32(x0, x1); + x3 = _mm_xor_si128(x3, x0); + x3 = _mm_shuffle_epi8(x3, x5); + x0 = _mm_shuffle_epi32(x0, 0x39); + x2 = _mm_add_epi32(x2, x3); + x3 = _mm_shuffle_epi32(x3, 0x4e); + x1 = _mm_xor_si128(x1, x2); + x2 = _mm_shuffle_epi32(x2, 0x93); + x6 = x1; + x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25)); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_CHACHA_SSSE3) + #undef SCRYPT_MIX + #define SCRYPT_MIX "ChaCha/8-SSSE3" + #undef SCRYPT_CHACHA_INCLUDED + #define SCRYPT_CHACHA_INCLUDED +#endif diff --git a/scryptjane/scrypt-jane-mix_chacha.h b/scryptjane/scrypt-jane-mix_chacha.h new file mode 100644 index 0000000..85ee9c1 --- /dev/null +++ b/scryptjane/scrypt-jane-mix_chacha.h @@ -0,0 +1,69 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED) + +#undef SCRYPT_MIX +#define SCRYPT_MIX "ChaCha20/8 Ref" + +#undef SCRYPT_CHACHA_INCLUDED +#define SCRYPT_CHACHA_INCLUDED +#define SCRYPT_CHACHA_BASIC + +static void +chacha_core_basic(uint32_t state[16]) { + size_t rounds = 8; + uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t; + + x0 = state[0]; + x1 = state[1]; + x2 = state[2]; + x3 = state[3]; + x4 = state[4]; + x5 = state[5]; + x6 = state[6]; + x7 = state[7]; + x8 = state[8]; + x9 = state[9]; + x10 = state[10]; + x11 = state[11]; + x12 = state[12]; + x13 = state[13]; + x14 = state[14]; + x15 = state[15]; + + #define quarter(a,b,c,d) \ + a += b; t = d^a; d = ROTL32(t,16); \ + c += d; t = b^c; b = ROTL32(t,12); \ + a += b; t = d^a; d = ROTL32(t, 8); \ + c += d; t = b^c; b = ROTL32(t, 7); + + for (; rounds; rounds -= 2) { + quarter( x0, x4, x8,x12) + quarter( x1, x5, x9,x13) + quarter( x2, x6,x10,x14) + quarter( x3, x7,x11,x15) + quarter( x0, x5,x10,x15) + quarter( x1, x6,x11,x12) + quarter( x2, x7, x8,x13) + quarter( x3, x4, x9,x14) + } + + state[0] += x0; + state[1] += x1; + state[2] += x2; + state[3] += x3; + state[4] += x4; + state[5] += x5; + state[6] += x6; + state[7] += x7; + state[8] += x8; + state[9] += x9; + state[10] += x10; + state[11] += x11; + state[12] += x12; + state[13] += x13; + state[14] += x14; + state[15] += x15; + + #undef quarter +} + +#endif \ No newline at end of file diff --git a/scryptjane/scrypt-jane-mix_salsa-avx.h b/scryptjane/scrypt-jane-mix_salsa-avx.h new file mode 100644 index 0000000..15fb48e --- /dev/null +++ b/scryptjane/scrypt-jane-mix_salsa-avx.h @@ -0,0 +1,381 @@ +/* x86 */ +#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,32) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + a1(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[ecx+eax+0]) + a3(vpxor xmm1,xmm1,[ecx+eax+16]) + a3(vpxor xmm2,xmm2,[ecx+eax+32]) + a3(vpxor xmm3,xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_avx_loop:) + a2(and eax, eax) + a3(vpxor xmm0,xmm0,[esi+ecx+0]) + a3(vpxor xmm1,xmm1,[esi+ecx+16]) + a3(vpxor xmm2,xmm2,[esi+ecx+32]) + a3(vpxor xmm3,xmm3,[esi+ecx+48]) + a1(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[eax+ecx+0]) + a3(vpxor xmm1,xmm1,[eax+ecx+16]) + a3(vpxor xmm2,xmm2,[eax+ecx+32]) + a3(vpxor xmm3,xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa [esp+0],xmm0) + a2(vmovdqa [esp+16],xmm1) + a2(vmovdqa xmm6,xmm2) + a2(vmovdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_salsa_avx_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a2(sub eax, 2) + a3(vpaddd xmm4, xmm3, xmm0) + a3(pshufd xmm1, xmm1, 0x39) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm2, xmm3) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a1(ja scrypt_salsa_avx_loop) + a3(vpaddd xmm0,xmm0,[esp+0]) + a3(vpaddd xmm1,xmm1,[esp+16]) + a3(vpaddd xmm2,xmm2,xmm6) + a3(vpaddd xmm3,xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(vmovdqa [eax+0],xmm0) + a2(vmovdqa [eax+16],xmm1) + a2(vmovdqa [eax+32],xmm2) + a2(vmovdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + a1(jne scrypt_ChunkMix_avx_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + a1(ret 16) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_AVX + +asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_avx) + a2(lea rcx,[rcx*2]) + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(vmovdqa xmm0,[rax+0]) + a2(vmovdqa xmm1,[rax+16]) + a2(vmovdqa xmm2,[rax+32]) + a2(vmovdqa xmm3,[rax+48]) + a1(jz scrypt_ChunkMix_avx_no_xor1) + a3(vpxor xmm0,xmm0,[r9+0]) + a3(vpxor xmm1,xmm1,[r9+16]) + a3(vpxor xmm2,xmm2,[r9+32]) + a3(vpxor xmm3,xmm3,[r9+48]) + a1(scrypt_ChunkMix_avx_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_avx_loop:) + a2(and rdx, rdx) + a3(vpxor xmm0,xmm0,[rsi+r9+0]) + a3(vpxor xmm1,xmm1,[rsi+r9+16]) + a3(vpxor xmm2,xmm2,[rsi+r9+32]) + a3(vpxor xmm3,xmm3,[rsi+r9+48]) + a1(jz scrypt_ChunkMix_avx_no_xor2) + a3(vpxor xmm0,xmm0,[rdx+r9+0]) + a3(vpxor xmm1,xmm1,[rdx+r9+16]) + a3(vpxor xmm2,xmm2,[rdx+r9+32]) + a3(vpxor xmm3,xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_avx_no_xor2:) + a2(vmovdqa xmm8,xmm0) + a2(vmovdqa xmm9,xmm1) + a2(vmovdqa xmm10,xmm2) + a2(vmovdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_salsa_avx_loop: ) + a3(vpaddd xmm4, xmm1, xmm0) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm3, xmm3, xmm5) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm0, xmm3) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm3, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm1, xmm1, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm2, xmm1) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a2(sub rax, 2) + a3(vpaddd xmm4, xmm3, xmm0) + a3(pshufd xmm1, xmm1, 0x39) + a3(vpsrld xmm5, xmm4, 25) + a3(vpslld xmm4, xmm4, 7) + a3(vpxor xmm1, xmm1, xmm5) + a3(vpxor xmm1, xmm1, xmm4) + a3(vpaddd xmm4, xmm0, xmm1) + a3(vpsrld xmm5, xmm4, 23) + a3(vpslld xmm4, xmm4, 9) + a3(vpxor xmm2, xmm2, xmm5) + a3(vpxor xmm2, xmm2, xmm4) + a3(vpaddd xmm4, xmm1, xmm2) + a3(vpsrld xmm5, xmm4, 19) + a3(vpslld xmm4, xmm4, 13) + a3(vpxor xmm3, xmm3, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a3(vpxor xmm3, xmm3, xmm4) + a3(vpaddd xmm4, xmm2, xmm3) + a3(vpsrld xmm5, xmm4, 14) + a3(vpslld xmm4, xmm4, 18) + a3(vpxor xmm0, xmm0, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a3(vpxor xmm0, xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a1(ja scrypt_salsa_avx_loop) + a3(vpaddd xmm0,xmm0,xmm8) + a3(vpaddd xmm1,xmm1,xmm9) + a3(vpaddd xmm2,xmm2,xmm10) + a3(vpaddd xmm3,xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(vmovdqa [rax+0],xmm0) + a2(vmovdqa [rax+16],xmm1) + a2(vmovdqa [rax+32],xmm2) + a2(vmovdqa [rax+48],xmm3) + a1(jne scrypt_ChunkMix_avx_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_avx) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_AVX + +static void NOINLINE +scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x4 = x1; + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x3 = _mm_xor_si128(x3, x4); + x4 = x0; + x3 = _mm_xor_si128(x3, x5); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x3; + x2 = _mm_xor_si128(x2, x5); + x3 = _mm_shuffle_epi32(x3, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x1 = _mm_xor_si128(x1, x4); + x4 = x2; + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x4 = x3; + x0 = _mm_xor_si128(x0, x5); + x1 = _mm_shuffle_epi32(x1, 0x39); + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x1 = _mm_xor_si128(x1, x4); + x4 = x0; + x1 = _mm_xor_si128(x1, x5); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x1; + x2 = _mm_xor_si128(x2, x5); + x1 = _mm_shuffle_epi32(x1, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x3 = _mm_xor_si128(x3, x4); + x4 = x2; + x3 = _mm_xor_si128(x3, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x3 = _mm_shuffle_epi32(x3, 0x39); + x0 = _mm_xor_si128(x0, x5); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_SALSA_AVX) + /* uses salsa_core_tangle_sse2 */ + + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa/8-AVX" + #undef SCRYPT_SALSA_INCLUDED + #define SCRYPT_SALSA_INCLUDED +#endif diff --git a/scryptjane/scrypt-jane-mix_salsa-sse2.h b/scryptjane/scrypt-jane-mix_salsa-sse2.h new file mode 100644 index 0000000..4898659 --- /dev/null +++ b/scryptjane/scrypt-jane-mix_salsa-sse2.h @@ -0,0 +1,443 @@ +/* x86 */ +#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a1(push ebx) + a1(push edi) + a1(push esi) + a1(push ebp) + a2(mov ebp,esp) + a2(mov edi,[ebp+20]) + a2(mov esi,[ebp+24]) + a2(mov eax,[ebp+28]) + a2(mov ebx,[ebp+32]) + a2(sub esp,32) + a2(and esp,~63) + a2(lea edx,[ebx*2]) + a2(shl edx,6) + a2(lea ecx,[edx-64]) + a2(and eax, eax) + a2(movdqa xmm0,[ecx+esi+0]) + a2(movdqa xmm1,[ecx+esi+16]) + a2(movdqa xmm2,[ecx+esi+32]) + a2(movdqa xmm3,[ecx+esi+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[ecx+eax+0]) + a2(pxor xmm1,[ecx+eax+16]) + a2(pxor xmm2,[ecx+eax+32]) + a2(pxor xmm3,[ecx+eax+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor ecx,ecx) + a2(xor ebx,ebx) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and eax, eax) + a2(pxor xmm0,[esi+ecx+0]) + a2(pxor xmm1,[esi+ecx+16]) + a2(pxor xmm2,[esi+ecx+32]) + a2(pxor xmm3,[esi+ecx+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[eax+ecx+0]) + a2(pxor xmm1,[eax+ecx+16]) + a2(pxor xmm2,[eax+ecx+32]) + a2(pxor xmm3,[eax+ecx+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa [esp+0],xmm0) + a2(movdqa [esp+16],xmm1) + a2(movdqa xmm6,xmm2) + a2(movdqa xmm7,xmm3) + a2(mov eax,8) + a1(scrypt_salsa_sse2_loop: ) + a2(movdqa xmm4, xmm1) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm3, xmm5) + a2(paddd xmm4, xmm3) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm2, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm1, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm0, xmm5) + a3(pshufd xmm1, xmm1, 0x39) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm1, xmm5) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm1) + a2(pxor xmm2, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm3, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm3) + a2(sub eax, 2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a2(pxor xmm0, xmm5) + a1(ja scrypt_salsa_sse2_loop) + a2(paddd xmm0,[esp+0]) + a2(paddd xmm1,[esp+16]) + a2(paddd xmm2,xmm6) + a2(paddd xmm3,xmm7) + a2(lea eax,[ebx+ecx]) + a2(xor ebx,edx) + a2(and eax,~0x7f) + a2(add ecx,64) + a2(shr eax,1) + a2(add eax, edi) + a2(cmp ecx,edx) + a2(movdqa [eax+0],xmm0) + a2(movdqa [eax+16],xmm1) + a2(movdqa [eax+32],xmm2) + a2(movdqa [eax+48],xmm3) + a2(mov eax,[ebp+28]) + a1(jne scrypt_ChunkMix_sse2_loop) + a2(mov esp,ebp) + a1(pop ebp) + a1(pop esi) + a1(pop edi) + a1(pop ebx) + a1(ret 16) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + + +/* x64 */ +#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_SSE2 + +asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) +asm_naked_fn(scrypt_ChunkMix_sse2) + a2(lea rcx,[rcx*2]) + a2(shl rcx,6) + a2(lea r9,[rcx-64]) + a2(lea rax,[rsi+r9]) + a2(lea r9,[rdx+r9]) + a2(and rdx, rdx) + a2(movdqa xmm0,[rax+0]) + a2(movdqa xmm1,[rax+16]) + a2(movdqa xmm2,[rax+32]) + a2(movdqa xmm3,[rax+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor1) + a2(pxor xmm0,[r9+0]) + a2(pxor xmm1,[r9+16]) + a2(pxor xmm2,[r9+32]) + a2(pxor xmm3,[r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor1:) + a2(xor r9,r9) + a2(xor r8,r8) + a1(scrypt_ChunkMix_sse2_loop:) + a2(and rdx, rdx) + a2(pxor xmm0,[rsi+r9+0]) + a2(pxor xmm1,[rsi+r9+16]) + a2(pxor xmm2,[rsi+r9+32]) + a2(pxor xmm3,[rsi+r9+48]) + a1(jz scrypt_ChunkMix_sse2_no_xor2) + a2(pxor xmm0,[rdx+r9+0]) + a2(pxor xmm1,[rdx+r9+16]) + a2(pxor xmm2,[rdx+r9+32]) + a2(pxor xmm3,[rdx+r9+48]) + a1(scrypt_ChunkMix_sse2_no_xor2:) + a2(movdqa xmm8,xmm0) + a2(movdqa xmm9,xmm1) + a2(movdqa xmm10,xmm2) + a2(movdqa xmm11,xmm3) + a2(mov rax,8) + a1(scrypt_salsa_sse2_loop: ) + a2(movdqa xmm4, xmm1) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm3, xmm5) + a2(paddd xmm4, xmm3) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm2, xmm5) + a3(pshufd xmm3, xmm3, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm1, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a2(movdqa xmm4, xmm3) + a2(pxor xmm0, xmm5) + a3(pshufd xmm1, xmm1, 0x39) + a2(paddd xmm4, xmm0) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 7) + a2(psrld xmm5, 25) + a2(pxor xmm1, xmm4) + a2(movdqa xmm4, xmm0) + a2(pxor xmm1, xmm5) + a2(paddd xmm4, xmm1) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 9) + a2(psrld xmm5, 23) + a2(pxor xmm2, xmm4) + a2(movdqa xmm4, xmm1) + a2(pxor xmm2, xmm5) + a3(pshufd xmm1, xmm1, 0x93) + a2(paddd xmm4, xmm2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 13) + a2(psrld xmm5, 19) + a2(pxor xmm3, xmm4) + a2(movdqa xmm4, xmm2) + a2(pxor xmm3, xmm5) + a3(pshufd xmm2, xmm2, 0x4e) + a2(paddd xmm4, xmm3) + a2(sub rax, 2) + a2(movdqa xmm5, xmm4) + a2(pslld xmm4, 18) + a2(psrld xmm5, 14) + a2(pxor xmm0, xmm4) + a3(pshufd xmm3, xmm3, 0x39) + a2(pxor xmm0, xmm5) + a1(ja scrypt_salsa_sse2_loop) + a2(paddd xmm0,xmm8) + a2(paddd xmm1,xmm9) + a2(paddd xmm2,xmm10) + a2(paddd xmm3,xmm11) + a2(lea rax,[r8+r9]) + a2(xor r8,rcx) + a2(and rax,~0x7f) + a2(add r9,64) + a2(shr rax,1) + a2(add rax, rdi) + a2(cmp r9,rcx) + a2(movdqa [rax+0],xmm0) + a2(movdqa [rax+16],xmm1) + a2(movdqa [rax+32],xmm2) + a2(movdqa [rax+48],xmm3) + a1(jne scrypt_ChunkMix_sse2_loop) + a1(ret) +asm_naked_fn_end(scrypt_ChunkMix_sse2) + +#endif + + +/* intrinsic */ +#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) + +#define SCRYPT_SALSA_SSE2 + +static void NOINLINE +scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) { + uint32_t i, blocksPerChunk = r * 2, half = 0; + xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3; + size_t rounds; + + /* 1: X = B_{2r - 1} */ + xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1); + x0 = xmmp[0]; + x1 = xmmp[1]; + x2 = xmmp[2]; + x3 = xmmp[3]; + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + xmmp = (xmmi *)scrypt_block(Bin, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + + if (Bxor) { + xmmp = (xmmi *)scrypt_block(Bxor, i); + x0 = _mm_xor_si128(x0, xmmp[0]); + x1 = _mm_xor_si128(x1, xmmp[1]); + x2 = _mm_xor_si128(x2, xmmp[2]); + x3 = _mm_xor_si128(x3, xmmp[3]); + } + + t0 = x0; + t1 = x1; + t2 = x2; + t3 = x3; + + for (rounds = 8; rounds; rounds -= 2) { + x4 = x1; + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x3 = _mm_xor_si128(x3, x4); + x4 = x0; + x3 = _mm_xor_si128(x3, x5); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x3; + x2 = _mm_xor_si128(x2, x5); + x3 = _mm_shuffle_epi32(x3, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x1 = _mm_xor_si128(x1, x4); + x4 = x2; + x1 = _mm_xor_si128(x1, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x4 = x3; + x0 = _mm_xor_si128(x0, x5); + x1 = _mm_shuffle_epi32(x1, 0x39); + x4 = _mm_add_epi32(x4, x0); + x5 = x4; + x4 = _mm_slli_epi32(x4, 7); + x5 = _mm_srli_epi32(x5, 25); + x1 = _mm_xor_si128(x1, x4); + x4 = x0; + x1 = _mm_xor_si128(x1, x5); + x4 = _mm_add_epi32(x4, x1); + x5 = x4; + x4 = _mm_slli_epi32(x4, 9); + x5 = _mm_srli_epi32(x5, 23); + x2 = _mm_xor_si128(x2, x4); + x4 = x1; + x2 = _mm_xor_si128(x2, x5); + x1 = _mm_shuffle_epi32(x1, 0x93); + x4 = _mm_add_epi32(x4, x2); + x5 = x4; + x4 = _mm_slli_epi32(x4, 13); + x5 = _mm_srli_epi32(x5, 19); + x3 = _mm_xor_si128(x3, x4); + x4 = x2; + x3 = _mm_xor_si128(x3, x5); + x2 = _mm_shuffle_epi32(x2, 0x4e); + x4 = _mm_add_epi32(x4, x3); + x5 = x4; + x4 = _mm_slli_epi32(x4, 18); + x5 = _mm_srli_epi32(x5, 14); + x0 = _mm_xor_si128(x0, x4); + x3 = _mm_shuffle_epi32(x3, 0x39); + x0 = _mm_xor_si128(x0, x5); + } + + x0 = _mm_add_epi32(x0, t0); + x1 = _mm_add_epi32(x1, t1); + x2 = _mm_add_epi32(x2, t2); + x3 = _mm_add_epi32(x3, t3); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half); + xmmp[0] = x0; + xmmp[1] = x1; + xmmp[2] = x2; + xmmp[3] = x3; + } +} + +#endif + +#if defined(SCRYPT_SALSA_SSE2) + #undef SCRYPT_MIX + #define SCRYPT_MIX "Salsa/8-SSE2" + #undef SCRYPT_SALSA_INCLUDED + #define SCRYPT_SALSA_INCLUDED +#endif + +/* used by avx,etc as well */ +#if defined(SCRYPT_SALSA_INCLUDED) + /* + Default layout: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + 12 13 14 15 + + SSE2 layout: + 0 5 10 15 + 12 1 6 11 + 8 13 2 7 + 4 9 14 3 + */ + + static void STDCALL + salsa_core_tangle_sse2(uint32_t *blocks, size_t count) { + uint32_t t; + while (count--) { + t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t; + t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t; + t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t; + t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t; + t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t; + t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t; + blocks += 16; + } + } +#endif + diff --git a/scryptjane/scrypt-jane-mix_salsa.h b/scryptjane/scrypt-jane-mix_salsa.h new file mode 100644 index 0000000..33f3340 --- /dev/null +++ b/scryptjane/scrypt-jane-mix_salsa.h @@ -0,0 +1,70 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED) + +#undef SCRYPT_MIX +#define SCRYPT_MIX "Salsa20/8 Ref" + +#undef SCRYPT_SALSA_INCLUDED +#define SCRYPT_SALSA_INCLUDED +#define SCRYPT_SALSA_BASIC + +static void +salsa_core_basic(uint32_t state[16]) { + size_t rounds = 8; + uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t; + + x0 = state[0]; + x1 = state[1]; + x2 = state[2]; + x3 = state[3]; + x4 = state[4]; + x5 = state[5]; + x6 = state[6]; + x7 = state[7]; + x8 = state[8]; + x9 = state[9]; + x10 = state[10]; + x11 = state[11]; + x12 = state[12]; + x13 = state[13]; + x14 = state[14]; + x15 = state[15]; + + #define quarter(a,b,c,d) \ + t = a+d; t = ROTL32(t, 7); b ^= t; \ + t = b+a; t = ROTL32(t, 9); c ^= t; \ + t = c+b; t = ROTL32(t, 13); d ^= t; \ + t = d+c; t = ROTL32(t, 18); a ^= t; \ + + for (; rounds; rounds -= 2) { + quarter( x0, x4, x8,x12) + quarter( x5, x9,x13, x1) + quarter(x10,x14, x2, x6) + quarter(x15, x3, x7,x11) + quarter( x0, x1, x2, x3) + quarter( x5, x6, x7, x4) + quarter(x10,x11, x8, x9) + quarter(x15,x12,x13,x14) + } + + state[0] += x0; + state[1] += x1; + state[2] += x2; + state[3] += x3; + state[4] += x4; + state[5] += x5; + state[6] += x6; + state[7] += x7; + state[8] += x8; + state[9] += x9; + state[10] += x10; + state[11] += x11; + state[12] += x12; + state[13] += x13; + state[14] += x14; + state[15] += x15; + + #undef quarter +} + +#endif + diff --git a/scryptjane/scrypt-jane-pbkdf2.h b/scryptjane/scrypt-jane-pbkdf2.h new file mode 100644 index 0000000..711e3d6 --- /dev/null +++ b/scryptjane/scrypt-jane-pbkdf2.h @@ -0,0 +1,112 @@ +typedef struct scrypt_hmac_state_t { + scrypt_hash_state inner, outer; +} scrypt_hmac_state; + + +static void +scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) { + scrypt_hash_state st; + scrypt_hash_init(&st); + scrypt_hash_update(&st, m, mlen); + scrypt_hash_finish(&st, hash); +} + +/* hmac */ +static void +scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) { + uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; + size_t i; + + scrypt_hash_init(&st->inner); + scrypt_hash_init(&st->outer); + + if (keylen <= SCRYPT_HASH_BLOCK_SIZE) { + /* use the key directly if it's <= blocksize bytes */ + memcpy(pad, key, keylen); + } else { + /* if it's > blocksize bytes, hash it */ + scrypt_hash(pad, key, keylen); + } + + /* inner = (key ^ 0x36) */ + /* h(inner || ...) */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= 0x36; + scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); + + /* outer = (key ^ 0x5c) */ + /* h(outer || ...) */ + for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= (0x5c ^ 0x36); + scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); + + scrypt_ensure_zero(pad, sizeof(pad)); +} + +static void +scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) { + /* h(inner || m...) */ + scrypt_hash_update(&st->inner, m, mlen); +} + +static void +scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) { + /* h(inner || m) */ + scrypt_hash_digest innerhash; + scrypt_hash_finish(&st->inner, innerhash); + + /* h(outer || h(inner || m)) */ + scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash)); + scrypt_hash_finish(&st->outer, mac); + + scrypt_ensure_zero(st, sizeof(*st)); +} + +static void +scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) { + scrypt_hmac_state hmac_pw, hmac_pw_salt, work; + scrypt_hash_digest ti, u; + uint8_t be[4]; + uint32_t i, j, blocks; + uint64_t c; + + /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ + + /* hmac(password, ...) */ + scrypt_hmac_init(&hmac_pw, password, password_len); + + /* hmac(password, salt...) */ + hmac_pw_salt = hmac_pw; + scrypt_hmac_update(&hmac_pw_salt, salt, salt_len); + + blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; + for (i = 1; i <= blocks; i++) { + /* U1 = hmac(password, salt || be(i)) */ + U32TO8_BE(be, i); + work = hmac_pw_salt; + scrypt_hmac_update(&work, be, 4); + scrypt_hmac_finish(&work, ti); + memcpy(u, ti, sizeof(u)); + + /* T[i] = U1 ^ U2 ^ U3... */ + for (c = 0; c < N - 1; c++) { + /* UX = hmac(password, U{X-1}) */ + work = hmac_pw; + scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE); + scrypt_hmac_finish(&work, u); + + /* T[i] ^= UX */ + for (j = 0; j < sizeof(u); j++) + ti[j] ^= u[j]; + } + + memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes); + out += SCRYPT_HASH_DIGEST_SIZE; + bytes -= SCRYPT_HASH_DIGEST_SIZE; + } + + scrypt_ensure_zero(ti, sizeof(ti)); + scrypt_ensure_zero(u, sizeof(u)); + scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw)); + scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt)); +} diff --git a/scryptjane/scrypt-jane-portable-x86.h b/scryptjane/scrypt-jane-portable-x86.h new file mode 100644 index 0000000..03282fa --- /dev/null +++ b/scryptjane/scrypt-jane-portable-x86.h @@ -0,0 +1,364 @@ +#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC)) + #define X86ASM + /* gcc 2.95 royally screws up stack alignments on variables */ + #if (defined(COMPILER_MSVC6PP_AND_LATER) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000))) + #define X86ASM_SSE + #define X86ASM_SSE2 + #endif + #if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= 1400)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102))) + #define X86ASM_SSSE3 + #endif + #if ((defined(COMPILER_GCC) && (COMPILER_GCC >= 40400))) + #define X86ASM_AVX + #endif +#endif + +#if defined(CPU_X86_64) && defined(COMPILER_GCC) + #define X86_64ASM + #define X86_64ASM_SSE2 + #if (COMPILER_GCC >= 40102) + #define X86_64ASM_SSSE3 + #endif + #if (COMPILER_GCC >= 40400) + #define X86_64ASM_AVX + #endif +#endif + +#if defined(COMPILER_MSVC) + #define X86_INTRINSIC + #if defined(CPU_X86_64) || defined(X86ASM_SSE) + #define X86_INTRINSIC_SSE + #endif + #if defined(CPU_X86_64) || defined(X86ASM_SSE2) + #define X86_INTRINSIC_SSE2 + #endif + #if (COMPILER_MSVC >= 1400) + #define X86_INTRINSIC_SSSE3 + #endif +#endif + +#if defined(COMPILER_MSVC) && defined(CPU_X86_64) + #define X86_64USE_INTRINSIC +#endif + +#if defined(COMPILER_MSVC) && defined(CPU_X86_64) + #define X86_64USE_INTRINSIC +#endif + +#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS) + #define X86_INTRINSIC + #if defined(__SSE__) + #define X86_INTRINSIC_SSE + #endif + #if defined(__SSE2__) + #define X86_INTRINSIC_SSE2 + #endif + #if defined(__SSSE3__) + #define X86_INTRINSIC_SSSE3 + #endif + #if defined(__AVX__) + #define X86_INTRINSIC_AVX + #endif +#endif + +/* only use simd on windows (or SSE2 on gcc)! */ +#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC) + #if defined(X86_INTRINSIC_SSE) + #define X86_INTRINSIC + #include + #include + typedef __m64 qmm; + typedef __m128 xmm; + typedef __m128d xmmd; + #endif + #if defined(X86_INTRINSIC_SSE2) + #define X86_INTRINSIC_SSE2 + #include + typedef __m128i xmmi; + #endif + #if defined(X86_INTRINSIC_SSSE3) + #define X86_INTRINSIC_SSSE3 + #include + #endif +#endif + + +#if defined(X86_INTRINSIC_SSE2) + typedef union packedelem8_t { + uint8_t u[16]; + xmmi v; + } packedelem8; + + typedef union packedelem32_t { + uint32_t u[4]; + xmmi v; + } packedelem32; + + typedef union packedelem64_t { + uint64_t u[2]; + xmmi v; + } packedelem64; +#else + typedef union packedelem8_t { + uint8_t u[16]; + uint32_t dw[4]; + } packedelem8; + + typedef union packedelem32_t { + uint32_t u[4]; + uint8_t b[16]; + } packedelem32; + + typedef union packedelem64_t { + uint64_t u[2]; + uint8_t b[16]; + } packedelem64; +#endif + +#if defined(X86_INTRINSIC_SSSE3) || defined(X86ASM_SSSE3) || defined(X86_64ASM_SSSE3) + const packedelem8 MM16 ssse3_rotr16_64bit = {{2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9}}; + const packedelem8 MM16 ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}}; + const packedelem8 MM16 ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}}; + const packedelem8 MM16 ssse3_endian_swap_64bit = {{7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}}; +#endif + +/* + x86 inline asm for gcc/msvc. usage: + + asm_naked_fn_proto(return_type, name) (type parm1, type parm2..) + asm_naked_fn(name) + a1(..) + a2(.., ..) + a3(.., .., ..) + a1(ret) + asm_naked_fn_end(name) +*/ + +#if defined(X86ASM) || defined(X86_64ASM) + +#if defined(COMPILER_MSVC) + #pragma warning(disable : 4731) /* frame pointer modified by inline assembly */ + #define a1(x) __asm {x} + #define a2(x, y) __asm {x, y} + #define a3(x, y, z) __asm {x, y, z} + #define a4(x, y, z, w) __asm {x, y, z, w} + #define al(x) __asm {label##x:} + #define aj(x, y, z) __asm {x label##y} + #define asm_align8 a1(ALIGN 8) + #define asm_align16 a1(ALIGN 16) + + #define asm_naked_fn_proto(type, fn) static NAKED type STDCALL fn + #define asm_naked_fn(fn) { + #define asm_naked_fn_end(fn) } +#elif defined(COMPILER_GCC) + #define GNU_AS1(x) #x ";\n" + #define GNU_AS2(x, y) #x ", " #y ";\n" + #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n" + #define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n" + #define GNU_ASL(x) "\n" #x ":\n" + #define GNU_ASJ(x, y, z) #x " " #y #z ";" + + #define a1(x) GNU_AS1(x) + #define a2(x, y) GNU_AS2(x, y) + #define a3(x, y, z) GNU_AS3(x, y, z) + #define a4(x, y, z, w) GNU_AS4(x, y, z, w) + #define al(x) GNU_ASL(x) + #define aj(x, y, z) GNU_ASJ(x, y, z) + #define asm_align8 a1(.align 8) + #define asm_align16 a1(.align 16) + + #define asm_naked_fn_proto(type, fn) extern type STDCALL fn + #define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASL(fn) + #define asm_naked_fn_end(fn) ".att_syntax prefix;\n.type " #fn ",@function\n.size " #fn ",.-" #fn "\n" ); + #define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n" + #define asm_gcc_parms() ".att_syntax prefix;" + #define asm_gcc_trashed() __asm__ __volatile__("" ::: + #define asm_gcc_end() ); +#else + need x86 asm +#endif + +#endif /* X86ASM || X86_64ASM */ + + +#if defined(CPU_X86) || defined(CPU_X86_64) + +typedef enum cpu_flags_x86_t { + cpu_mmx = 1 << 0, + cpu_sse = 1 << 1, + cpu_sse2 = 1 << 2, + cpu_sse3 = 1 << 3, + cpu_ssse3 = 1 << 4, + cpu_sse4_1 = 1 << 5, + cpu_sse4_2 = 1 << 6, + cpu_avx = 1 << 7 +} cpu_flags_x86; + +typedef enum cpu_vendors_x86_t { + cpu_nobody, + cpu_intel, + cpu_amd +} cpu_vendors_x86; + +typedef struct x86_regs_t { + uint32_t eax, ebx, ecx, edx; +} x86_regs; + +#if defined(X86ASM) +asm_naked_fn_proto(int, has_cpuid)(void) +asm_naked_fn(has_cpuid) + a1(pushfd) + a1(pop eax) + a2(mov ecx, eax) + a2(xor eax, 0x200000) + a1(push eax) + a1(popfd) + a1(pushfd) + a1(pop eax) + a2(xor eax, ecx) + a2(shr eax, 21) + a2(and eax, 1) + a1(push ecx) + a1(popfd) + a1(ret) +asm_naked_fn_end(has_cpuid) +#endif /* X86ASM */ + + +static void NOINLINE +get_cpuid(x86_regs *regs, uint32_t flags) { +#if defined(COMPILER_MSVC) + __cpuid((int *)regs, (int)flags); +#else + #if defined(CPU_X86_64) + #define cpuid_bx rbx + #else + #define cpuid_bx ebx + #endif + + asm_gcc() + a1(push cpuid_bx) + a1(cpuid) + a2(mov [%1 + 0], eax) + a2(mov [%1 + 4], ebx) + a2(mov [%1 + 8], ecx) + a2(mov [%1 + 12], edx) + a1(pop cpuid_bx) + asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc" + asm_gcc_end() +#endif +} + +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) +static uint64_t NOINLINE +get_xgetbv(uint32_t flags) { +#if defined(COMPILER_MSVC) + return _xgetbv(flags); +#else + uint32_t lo, hi; + asm_gcc() + a1(xgetbv) + asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi) + asm_gcc_end() + return ((uint64_t)lo | ((uint64_t)hi << 32)); +#endif +} +#endif // AVX support + +#if defined(SCRYPT_TEST_SPEED) +size_t cpu_detect_mask = (size_t)-1; +#endif + +static size_t +detect_cpu(void) { + union { uint8_t s[12]; uint32_t i[3]; } vendor_string; + cpu_vendors_x86 vendor = cpu_nobody; + x86_regs regs; + uint32_t max_level; + size_t cpu_flags = 0; +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) + uint64_t xgetbv_flags; +#endif + +#if defined(CPU_X86) + if (!has_cpuid()) + return cpu_flags; +#endif + + get_cpuid(®s, 0); + max_level = regs.eax; + vendor_string.i[0] = regs.ebx; + vendor_string.i[1] = regs.edx; + vendor_string.i[2] = regs.ecx; + + if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12)) + vendor = cpu_intel; + else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12)) + vendor = cpu_amd; + + if (max_level & 0x00000500) { + /* "Intel P5 pre-B0" */ + cpu_flags |= cpu_mmx; + return cpu_flags; + } + + if (max_level < 1) + return cpu_flags; + + get_cpuid(®s, 1); +#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX) + /* xsave/xrestore */ + if (regs.ecx & (1 << 27)) { + xgetbv_flags = get_xgetbv(0); + if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx; + } +#endif + if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2; + if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2; + if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3; + if (regs.ecx & (1 )) cpu_flags |= cpu_sse3; + if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2; + if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse; + if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx; + +#if defined(SCRYPT_TEST_SPEED) + cpu_flags &= cpu_detect_mask; +#endif + + return cpu_flags; +} + +#if defined(SCRYPT_TEST_SPEED) +static const char * +get_top_cpuflag_desc(size_t flag) { + if (flag & cpu_avx) return "AVX"; + else if (flag & cpu_sse4_2) return "SSE4.2"; + else if (flag & cpu_sse4_1) return "SSE4.1"; + else if (flag & cpu_ssse3) return "SSSE3"; + else if (flag & cpu_sse2) return "SSE2"; + else if (flag & cpu_sse) return "SSE"; + else if (flag & cpu_mmx) return "MMX"; + else return "Basic"; +} +#endif + +/* enable the highest system-wide option */ +#if defined(SCRYPT_CHOOSE_COMPILETIME) + #if !defined(__AVX__) + #undef X86_64ASM_AVX + #undef X86ASM_AVX + #undef X86_INTRINSIC_AVX + #endif + #if !defined(__SSSE3__) + #undef X86_64ASM_SSSE3 + #undef X86ASM_SSSE3 + #undef X86_INTRINSIC_SSSE3 + #endif + #if !defined(__SSE2__) + #undef X86_64ASM_SSE2 + #undef X86ASM_SSE2 + #undef X86_INTRINSIC_SSE2 + #endif +#endif + +#endif /* defined(CPU_X86) || defined(CPU_X86_64) */ \ No newline at end of file diff --git a/scryptjane/scrypt-jane-portable.h b/scryptjane/scrypt-jane-portable.h new file mode 100644 index 0000000..33c8c2c --- /dev/null +++ b/scryptjane/scrypt-jane-portable.h @@ -0,0 +1,281 @@ +/* determine os */ +#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__) + #include + #include + #define OS_WINDOWS +#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__) + #include + #include + #include + + #define OS_SOLARIS +#else + #include + #include + #include /* need this to define BSD */ + #include + #include + + #define OS_NIX + #if defined(__linux__) + #include + #define OS_LINUX + #elif defined(BSD) + #define OS_BSD + + #if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__)) + #define OS_OSX + #elif defined(macintosh) || defined(Macintosh) + #define OS_MAC + #elif defined(__OpenBSD__) + #define OS_OPENBSD + #endif + #endif +#endif + + +/* determine compiler */ +#if defined(_MSC_VER) + #define COMPILER_MSVC _MSC_VER + #if ((COMPILER_MSVC > 1200) || defined(_mm_free)) + #define COMPILER_MSVC6PP_AND_LATER + #endif + #if (COMPILER_MSVC >= 1500) + #define COMPILER_HAS_TMMINTRIN + #endif + + #pragma warning(disable : 4127) /* conditional expression is constant */ + #pragma warning(disable : 4100) /* unreferenced formal parameter */ + + #define _CRT_SECURE_NO_WARNINGS + #include + #include /* _rotl */ + #include + + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + typedef signed int int32_t; + typedef unsigned __int64 uint64_t; + typedef signed __int64 int64_t; + + #define ROTL32(a,b) _rotl(a,b) + #define ROTR32(a,b) _rotr(a,b) + #define ROTL64(a,b) _rotl64(a,b) + #define ROTR64(a,b) _rotr64(a,b) + #undef NOINLINE + #define NOINLINE __declspec(noinline) + #undef INLINE + #define INLINE __forceinline + #undef FASTCALL + #define FASTCALL __fastcall + #undef CDECL + #define CDECL __cdecl + #undef STDCALL + #define STDCALL __stdcall + #undef NAKED + #define NAKED __declspec(naked) + #define MM16 __declspec(align(16)) +#endif +#if defined(__ICC) + #define COMPILER_INTEL +#endif +#if defined(__GNUC__) + #if (__GNUC__ >= 3) + #define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__ + #else + #define COMPILER_GCC_PATCHLEVEL 0 + #endif + #define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL) + #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) + #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) + #define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b))) + #define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b))) + #undef NOINLINE + #if (COMPILER_GCC >= 30000) + #define NOINLINE __attribute__((noinline)) + #else + #define NOINLINE + #endif + #undef INLINE + #if (COMPILER_GCC >= 30000) + #define INLINE __attribute__((always_inline)) + #else + #define INLINE inline + #endif + #undef FASTCALL + #if (COMPILER_GCC >= 30400) + #define FASTCALL __attribute__((fastcall)) + #else + #define FASTCALL + #endif + #undef CDECL + #define CDECL __attribute__((cdecl)) + #undef STDCALL + #define STDCALL __attribute__((stdcall)) + #define MM16 __attribute__((aligned(16))) + #include +#endif +#if defined(__MINGW32__) || defined(__MINGW64__) + #define COMPILER_MINGW +#endif +#if defined(__PATHCC__) + #define COMPILER_PATHCC +#endif + +#define OPTIONAL_INLINE +#if defined(OPTIONAL_INLINE) + #undef OPTIONAL_INLINE + #define OPTIONAL_INLINE INLINE +#else + #define OPTIONAL_INLINE +#endif + +#define CRYPTO_FN NOINLINE STDCALL + +/* determine cpu */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64) + #define CPU_X86_64 +#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500)) + #define CPU_X86 500 +#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400)) + #define CPU_X86 400 +#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__) + #define CPU_X86 300 +#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64) + #define CPU_IA64 +#endif + +#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9) + #define CPU_SPARC + #if defined(__sparcv9) + #define CPU_SPARC64 + #endif +#endif + +#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64)) + #define CPU_64BITS + #undef FASTCALL + #define FASTCALL + #undef CDECL + #define CDECL + #undef STDCALL + #define STDCALL +#endif + +#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC) + #define CPU_PPC + #if defined(_ARCH_PWR7) + #define CPU_POWER7 + #elif defined(__64BIT__) + #define CPU_PPC64 + #else + #define CPU_PPC32 + #endif +#endif + +#if defined(__hppa__) || defined(__hppa) + #define CPU_HPPA +#endif + +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) + #define CPU_ALPHA +#endif + +/* endian */ + +#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ + (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ + (defined(CPU_X86) || defined(CPU_X86_64)) || \ + (defined(vax) || defined(MIPSEL) || defined(_MIPSEL))) +#define CPU_LE +#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \ + (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \ + (defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB)) +#define CPU_BE +#else + /* unknown endian! */ +#endif + + +#define U8TO32_BE(p) \ + (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ + ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) )) + +#define U8TO32_LE(p) \ + (((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \ + ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) + +#define U32TO8_BE(p, v) \ + (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ + (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); + +#define U32TO8_LE(p, v) \ + (p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \ + (p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24); + +#define U8TO64_BE(p) \ + (((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4)) + +#define U8TO64_LE(p) \ + (((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32)) + +#define U64TO8_BE(p, v) \ + U32TO8_BE((p), (uint32_t)((v) >> 32)); \ + U32TO8_BE((p) + 4, (uint32_t)((v) )); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (uint32_t)((v) )); \ + U32TO8_LE((p) + 4, (uint32_t)((v) >> 32)); + +#define U32_SWAP(v) { \ + (v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \ + (v) = ((v) << 16) | ((v) >> 16); \ +} + +#define U64_SWAP(v) { \ + (v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \ + (v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \ + (v) = ((v) << 32) | ((v) >> 32); \ +} + +static int +scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) { + uint32_t differentbits = 0; + while (len--) + differentbits |= (*x++ ^ *y++); + return (1 & ((differentbits - 1) >> 8)); +} + +void +scrypt_ensure_zero(void *p, size_t len) { +#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC)) + __stosb((unsigned char *)p, 0, len); +#elif (defined(CPU_X86) && defined(COMPILER_GCC)) + __asm__ __volatile__( + "pushl %%edi;\n" + "pushl %%ecx;\n" + "rep stosb;\n" + "popl %%ecx;\n" + "popl %%edi;\n" + :: "a"(0), "D"(p), "c"(len) : "cc", "memory" + ); +#elif (defined(CPU_X86_64) && defined(COMPILER_GCC)) + __asm__ __volatile__( + "pushq %%rdi;\n" + "pushq %%rcx;\n" + "rep stosb;\n" + "popq %%rcx;\n" + "popq %%rdi;\n" + :: "a"(0), "D"(p), "c"(len) : "cc", "memory" + ); +#else + volatile uint8_t *b = (volatile uint8_t *)p; + size_t i; + for (i = 0; i < len; i++) + b[i] = 0; +#endif +} + +#include "scrypt-jane-portable-x86.h" + diff --git a/scryptjane/scrypt-jane-romix-basic.h b/scryptjane/scrypt-jane-romix-basic.h new file mode 100644 index 0000000..ca1df02 --- /dev/null +++ b/scryptjane/scrypt-jane-romix-basic.h @@ -0,0 +1,67 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +/* function type returned by scrypt_getROMix, used with cpu detection */ +typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r); +#endif + +/* romix pre/post nop function */ +static void STDCALL +scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) { +} + +/* romix pre/post endian conversion function */ +static void STDCALL +scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) { +#if !defined(CPU_LE) + static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}}; + size_t i; + if (endian_test.w == 0x100) { + nblocks *= SCRYPT_BLOCK_WORDS; + for (i = 0; i < nblocks; i++) { + SCRYPT_WORD_ENDIAN_SWAP(blocks[i]); + } + } +#endif +} + +/* chunkmix test function */ +typedef void (STDCALL *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r); +typedef void (STDCALL *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks); + +static int +scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) { + /* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */ + const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS; + scrypt_mix_word_t MM16 chunk[2][4 * SCRYPT_BLOCK_WORDS], v; + uint8_t final[16]; + size_t i; + + for (i = 0; i < words; i++) { + v = (scrypt_mix_word_t)i; + v = (v << 8) | v; + v = (v << 16) | v; + chunk[0][i] = v; + } + + prefn(chunk[0], blocks); + mixfn(chunk[1], chunk[0], NULL, r); + postfn(chunk[1], blocks); + + /* grab the last 16 bytes of the final block */ + for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) { + SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]); + } + + return scrypt_verify(expected, final, 16); +} + +/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */ +static scrypt_mix_word_t * +scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) { + return base + (i * len); +} + +/* returns a pointer to block i */ +static scrypt_mix_word_t * +scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) { + return base + (i * SCRYPT_BLOCK_WORDS); +} diff --git a/scryptjane/scrypt-jane-romix-template.h b/scryptjane/scrypt-jane-romix-template.h new file mode 100644 index 0000000..2fd7674 --- /dev/null +++ b/scryptjane/scrypt-jane-romix-template.h @@ -0,0 +1,118 @@ +#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) + +#if defined(SCRYPT_CHOOSE_COMPILETIME) +#undef SCRYPT_ROMIX_FN +#define SCRYPT_ROMIX_FN scrypt_ROMix +#endif + +#undef SCRYPT_HAVE_ROMIX +#define SCRYPT_HAVE_ROMIX + +#if !defined(SCRYPT_CHUNKMIX_FN) + +#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic + +/* + Bout = ChunkMix(Bin) + + 2*r: number of blocks in the chunk +*/ +static void STDCALL +SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) { + scrypt_mix_word_t MM16 X[SCRYPT_BLOCK_WORDS], *block; + uint32_t i, j, blocksPerChunk = r * 2, half = 0; + + /* 1: X = B_{2r - 1} */ + block = scrypt_block(Bin, blocksPerChunk - 1); + for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) + X[i] = block[i]; + + if (Bxor) { + block = scrypt_block(Bxor, blocksPerChunk - 1); + for (i = 0; i < SCRYPT_BLOCK_WORDS; i++) + X[i] ^= block[i]; + } + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < blocksPerChunk; i++, half ^= r) { + /* 3: X = H(X ^ B_i) */ + block = scrypt_block(Bin, i); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + X[j] ^= block[j]; + + if (Bxor) { + block = scrypt_block(Bxor, i); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + X[j] ^= block[j]; + } + SCRYPT_MIX_FN(X); + + /* 4: Y_i = X */ + /* 6: B'[0..r-1] = Y_even */ + /* 6: B'[r..2r-1] = Y_odd */ + block = scrypt_block(Bout, (i / 2) + half); + for (j = 0; j < SCRYPT_BLOCK_WORDS; j++) + block[j] = X[j]; + } +} +#endif + +/* + X = ROMix(X) + + X: chunk to mix + Y: scratch chunk + N: number of rounds + V[N]: array of chunks to randomly index in to + 2*r: number of blocks in a chunk +*/ + +static void NOINLINE FASTCALL +SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) { + uint32_t i, j, chunkWords = SCRYPT_BLOCK_WORDS * r * 2; + scrypt_mix_word_t *block = V; + + SCRYPT_ROMIX_TANGLE_FN(X, r * 2); + + /* 1: X = B */ + /* implicit */ + + /* 2: for i = 0 to N - 1 do */ + memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t)); + for (i = 0; i < N - 1; i++, block += chunkWords) { + /* 3: V_i = X */ + /* 4: X = H(X) */ + SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, r); + } + SCRYPT_CHUNKMIX_FN(X, block, NULL, r); + + /* 6: for i = 0 to N - 1 do */ + for (i = 0; i < N; i += 2) { + /* 7: j = Integerify(X) % N */ + j = X[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1); + + /* 8: X = H(Y ^ V_j) */ + SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), r); + + /* 7: j = Integerify(Y) % N */ + j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1); + + /* 8: X = H(Y ^ V_j) */ + SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), r); + } + + /* 10: B' = X */ + /* implicit */ + + SCRYPT_ROMIX_UNTANGLE_FN(X, r * 2); +} + +#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */ + + +#undef SCRYPT_CHUNKMIX_FN +#undef SCRYPT_ROMIX_FN +#undef SCRYPT_MIX_FN +#undef SCRYPT_ROMIX_TANGLE_FN +#undef SCRYPT_ROMIX_UNTANGLE_FN + diff --git a/scryptjane/scrypt-jane-romix.h b/scryptjane/scrypt-jane-romix.h new file mode 100644 index 0000000..faa655a --- /dev/null +++ b/scryptjane/scrypt-jane-romix.h @@ -0,0 +1,27 @@ +#if defined(SCRYPT_CHACHA) +#include "scrypt-jane-chacha.h" +#elif defined(SCRYPT_SALSA) +#include "scrypt-jane-salsa.h" +#elif defined(SCRYPT_SALSA64) +#include "scrypt-jane-salsa64.h" +#else + #define SCRYPT_MIX_BASE "ERROR" + typedef uint32_t scrypt_mix_word_t; + #define SCRYPT_WORDTO8_LE U32TO8_LE + #define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + #define SCRYPT_BLOCK_BYTES 64 + #define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + #if !defined(SCRYPT_CHOOSE_COMPILETIME) + static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {} + static scrypt_ROMixfn scrypt_getROMix() { return scrypt_ROMix_error; } + #else + static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {} + #endif + static int scrypt_test_mix() { return 0; } + #error must define a mix function! +#endif + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +#undef SCRYPT_MIX +#define SCRYPT_MIX SCRYPT_MIX_BASE +#endif diff --git a/scryptjane/scrypt-jane-salsa.h b/scryptjane/scrypt-jane-salsa.h new file mode 100644 index 0000000..0c1604b --- /dev/null +++ b/scryptjane/scrypt-jane-salsa.h @@ -0,0 +1,106 @@ +#define SCRYPT_MIX_BASE "Salsa20/8" + +typedef uint32_t scrypt_mix_word_t; + +#define SCRYPT_WORDTO8_LE U32TO8_LE +#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP + +#define SCRYPT_BLOCK_BYTES 64 +#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t)) + +/* must have these here in case block bytes is ever != 64 */ +#include "scrypt-jane-romix-basic.h" + +#include "scrypt-jane-mix_salsa-avx.h" +#include "scrypt-jane-mix_salsa-sse2.h" +#include "scrypt-jane-mix_salsa.h" + +#if defined(SCRYPT_SALSA_AVX) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx + #define SCRYPT_ROMIX_FN scrypt_ROMix_avx + #define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +#if defined(SCRYPT_SALSA_SSE2) + #define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2 + #define SCRYPT_ROMIX_FN scrypt_ROMix_sse2 + #define SCRYPT_MIX_FN salsa_core_sse2 + #define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2 + #define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2 + #include "scrypt-jane-romix-template.h" +#endif + +/* cpu agnostic */ +#define SCRYPT_ROMIX_FN scrypt_ROMix_basic +#define SCRYPT_MIX_FN salsa_core_basic +#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian +#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian +#include "scrypt-jane-romix-template.h" + +#if !defined(SCRYPT_CHOOSE_COMPILETIME) +static scrypt_ROMixfn +scrypt_getROMix() { + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA_AVX) + if (cpuflags & cpu_avx) + return scrypt_ROMix_avx; + else +#endif + +#if defined(SCRYPT_SALSA_SSE2) + if (cpuflags & cpu_sse2) + return scrypt_ROMix_sse2; + else +#endif + + return scrypt_ROMix_basic; +} +#endif + + +#if defined(SCRYPT_TEST_SPEED) +static size_t +available_implementations() { + size_t flags = 0; + +#if defined(SCRYPT_SALSA_AVX) + flags |= cpu_avx; +#endif + +#if defined(SCRYPT_SALSA_SSE2) + flags |= cpu_sse2; +#endif + + return flags; +} +#endif + + +static int +scrypt_test_mix() { + static const uint8_t expected[16] = { + 0x41,0x1f,0x2e,0xa3,0xab,0xa3,0x1a,0x34,0x87,0x1d,0x8a,0x1c,0x76,0xa0,0x27,0x66, + }; + + int ret = 1; + size_t cpuflags = detect_cpu(); + +#if defined(SCRYPT_SALSA_AVX) + if (cpuflags & cpu_avx) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA_SSE2) + if (cpuflags & cpu_sse2) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected); +#endif + +#if defined(SCRYPT_SALSA_BASIC) + ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected); +#endif + + return ret; +} diff --git a/scryptjane/scrypt-jane-test-vectors.h b/scryptjane/scrypt-jane-test-vectors.h new file mode 100644 index 0000000..a1e4c61 --- /dev/null +++ b/scryptjane/scrypt-jane-test-vectors.h @@ -0,0 +1,261 @@ +typedef struct scrypt_test_setting_t { + const char *pw, *salt; + uint8_t Nfactor, rfactor, pfactor; +} scrypt_test_setting; + +static const scrypt_test_setting post_settings[] = { + {"", "", 3, 0, 0}, + {"password", "NaCl", 9, 3, 4}, + {0} +}; + +#if defined(SCRYPT_SHA256) + #if defined(SCRYPT_SALSA) + /* sha256 + salsa20/8, the only 'official' test vectors! */ + static const uint8_t post_vectors[][64] = { + {0x77,0xd6,0x57,0x62,0x38,0x65,0x7b,0x20,0x3b,0x19,0xca,0x42,0xc1,0x8a,0x04,0x97, + 0xf1,0x6b,0x48,0x44,0xe3,0x07,0x4a,0xe8,0xdf,0xdf,0xfa,0x3f,0xed,0xe2,0x14,0x42, + 0xfc,0xd0,0x06,0x9d,0xed,0x09,0x48,0xf8,0x32,0x6a,0x75,0x3a,0x0f,0xc8,0x1f,0x17, + 0xe8,0xd3,0xe0,0xfb,0x2e,0x0d,0x36,0x28,0xcf,0x35,0xe2,0x0c,0x38,0xd1,0x89,0x06}, + {0xfd,0xba,0xbe,0x1c,0x9d,0x34,0x72,0x00,0x78,0x56,0xe7,0x19,0x0d,0x01,0xe9,0xfe, + 0x7c,0x6a,0xd7,0xcb,0xc8,0x23,0x78,0x30,0xe7,0x73,0x76,0x63,0x4b,0x37,0x31,0x62, + 0x2e,0xaf,0x30,0xd9,0x2e,0x22,0xa3,0x88,0x6f,0xf1,0x09,0x27,0x9d,0x98,0x30,0xda, + 0xc7,0x27,0xaf,0xb9,0x4a,0x83,0xee,0x6d,0x83,0x60,0xcb,0xdf,0xa2,0xcc,0x06,0x40} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xef,0x8f,0x44,0x8f,0xc3,0xef,0x78,0x13,0xb2,0x26,0xa7,0x2a,0x40,0xa1,0x98,0x7f, + 0xc8,0x7f,0x0d,0x5f,0x40,0x66,0xa2,0x05,0x07,0x4f,0xc7,0xac,0x3b,0x47,0x07,0x0c, + 0xf5,0x20,0x46,0x76,0x20,0x7b,0xee,0x51,0x6d,0x5f,0xfa,0x9c,0x27,0xac,0xa9,0x36, + 0x62,0xbd,0xde,0x0b,0xa3,0xc0,0x66,0x84,0xde,0x82,0xd0,0x1a,0xb4,0xd1,0xb5,0xfe}, + {0xf1,0x94,0xf7,0x5f,0x15,0x12,0x10,0x4d,0x6e,0xfb,0x04,0x8c,0x35,0xc4,0x51,0xb6, + 0x11,0x04,0xa7,0x9b,0xb0,0x46,0xaf,0x7b,0x47,0x39,0xf0,0xac,0xb2,0x8a,0xfa,0x45, + 0x09,0x86,0x8f,0x10,0x4b,0xc6,0xee,0x00,0x11,0x38,0x73,0x7a,0x6a,0xd8,0x25,0x67, + 0x85,0xa4,0x10,0x4e,0xa9,0x2f,0x15,0xfe,0xcf,0x63,0xe1,0xe8,0xcf,0xab,0xe8,0xbd} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xf4,0x87,0x29,0xf4,0xc3,0x31,0x8c,0xe8,0xdf,0xe5,0xd8,0x73,0xff,0xca,0x32,0xcf, + 0xd8,0xac,0xe7,0xf7,0x15,0xda,0x84,0x41,0x60,0x23,0x26,0x4a,0xc8,0x3e,0xee,0xa6, + 0xa5,0x6e,0x52,0xd6,0x64,0x55,0x16,0x31,0x3e,0x66,0x7b,0x65,0xd5,0xe2,0xc9,0x95, + 0x1b,0xf0,0x81,0x40,0xb7,0x2f,0xff,0xa6,0xe6,0x02,0xcc,0x63,0x08,0x4a,0x74,0x31}, + {0x7a,0xd8,0xad,0x02,0x9c,0xa5,0xf4,0x42,0x6a,0x29,0xd2,0xb5,0x53,0xf1,0x6d,0x1d, + 0x25,0xc8,0x70,0x48,0x80,0xb9,0xa3,0xf6,0x94,0xf8,0xfa,0xb8,0x52,0x42,0xcd,0x14, + 0x26,0x46,0x28,0x06,0xc7,0xf6,0x1f,0xa7,0x89,0x6d,0xc5,0xa0,0x36,0xcc,0xde,0xcb, + 0x73,0x0b,0xa4,0xe2,0xd3,0xd1,0x44,0x06,0x35,0x08,0xe0,0x35,0x5b,0xf8,0xd7,0xe7} + }; + #endif +#elif defined(SCRYPT_SHA512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xae,0x54,0xe7,0x74,0xe4,0x51,0x6b,0x0f,0xe1,0xe7,0x28,0x03,0x17,0xe4,0x8c,0xfa, + 0x2f,0x66,0x55,0x7f,0xdc,0x3b,0x40,0xab,0x47,0x84,0xc9,0x63,0x36,0x07,0x9d,0xe5, + 0x86,0x43,0x95,0x89,0xb6,0xc0,0x6c,0x72,0x64,0x00,0xc1,0x2a,0xd7,0x69,0x21,0x92, + 0x8e,0xba,0xa4,0x59,0x9f,0x00,0x14,0x3a,0x7c,0x12,0x58,0x91,0x09,0xa0,0x32,0xfe}, + {0xc5,0xb3,0xd6,0xea,0x0a,0x4b,0x1e,0xcc,0x40,0x00,0xe5,0x98,0x5c,0xdc,0x06,0x06, + 0x78,0x34,0x92,0x16,0xcf,0xe4,0x9f,0x03,0x96,0x2d,0x41,0x35,0x00,0x9b,0xff,0x74, + 0x60,0x19,0x6e,0xe6,0xa6,0x46,0xf7,0x37,0xcb,0xfa,0xd0,0x9f,0x80,0x72,0x2e,0x85, + 0x13,0x3e,0x1a,0x91,0x90,0x53,0xa1,0x33,0x85,0x51,0xdc,0x62,0x1c,0x0e,0x4d,0x30} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xe2,0x05,0x7c,0x44,0xf9,0x55,0x9f,0x64,0xbe,0xd5,0x7f,0x85,0x69,0xc7,0x8c,0x7f, + 0x2b,0x91,0xd6,0x9a,0x6c,0xf8,0x57,0x55,0x61,0x25,0x3d,0xee,0xb8,0xd5,0x8c,0xdc, + 0x2d,0xd5,0x53,0x84,0x8c,0x06,0xaa,0x37,0x77,0xa6,0xf0,0xf1,0x35,0xfe,0xb5,0xcb, + 0x61,0xd7,0x2c,0x67,0xf3,0x7e,0x8a,0x1b,0x04,0xa3,0xa3,0x43,0xa2,0xb2,0x29,0xf2}, + {0x82,0xda,0x29,0xb2,0x08,0x27,0xfc,0x78,0x22,0xc4,0xb8,0x7e,0xbc,0x36,0xcf,0xcd, + 0x17,0x4b,0xa1,0x30,0x16,0x4a,0x25,0x70,0xc7,0xcb,0xe0,0x2b,0x56,0xd3,0x16,0x4e, + 0x85,0xb6,0x84,0xe7,0x9b,0x7f,0x8b,0xb5,0x94,0x33,0xcf,0x33,0x44,0x65,0xc8,0xa1, + 0x46,0xf9,0xf5,0xfc,0x74,0x29,0x7e,0xd5,0x46,0xec,0xbd,0x95,0xc1,0x80,0x24,0xe4} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xa6,0xcb,0x77,0x9a,0x64,0x1f,0x95,0x02,0x53,0xe7,0x5c,0x78,0xdb,0xa3,0x43,0xff, + 0xbe,0x10,0x4c,0x7b,0xe4,0xe1,0x91,0xcf,0x67,0x69,0x5a,0x2c,0x12,0xd6,0x99,0x49, + 0x92,0xfd,0x5a,0xaa,0x12,0x4c,0x2e,0xf6,0x95,0x46,0x8f,0x5e,0x77,0x62,0x16,0x29, + 0xdb,0xe7,0xab,0x02,0x2b,0x9c,0x35,0x03,0xf8,0xd4,0x04,0x7d,0x2d,0x73,0x85,0xf1}, + {0x54,0xb7,0xca,0xbb,0xaf,0x0f,0xb0,0x5f,0xb7,0x10,0x63,0x48,0xb3,0x15,0xd8,0xb5, + 0x62,0x64,0x89,0x6a,0x59,0xc6,0x0f,0x86,0x96,0x38,0xf0,0xcf,0xd4,0x62,0x90,0x61, + 0x7d,0xce,0xd6,0x13,0x85,0x67,0x4a,0xf5,0x32,0x03,0x74,0x30,0x0b,0x5a,0x2f,0x86, + 0x82,0x6e,0x0c,0x3e,0x40,0x7a,0xde,0xbe,0x42,0x6e,0x80,0x2b,0xaf,0xdb,0xcc,0x94} + }; + #endif +#elif defined(SCRYPT_BLAKE512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0x4a,0x48,0xb3,0xfa,0xdc,0xb0,0xb8,0xdb,0x54,0xee,0xf3,0x5c,0x27,0x65,0x6c,0x20, + 0xab,0x61,0x9a,0x5b,0xd5,0x1d,0xd9,0x95,0xab,0x88,0x0e,0x4d,0x1e,0x71,0x2f,0x11, + 0x43,0x2e,0xef,0x23,0xca,0x8a,0x49,0x3b,0x11,0x38,0xa5,0x28,0x61,0x2f,0xb7,0x89, + 0x5d,0xef,0x42,0x4c,0xc1,0x74,0xea,0x8a,0x56,0xbe,0x4a,0x82,0x76,0x15,0x1a,0x87}, + {0x96,0x24,0xbf,0x40,0xeb,0x03,0x8e,0xfe,0xc0,0xd5,0xa4,0x81,0x85,0x7b,0x09,0x88, + 0x52,0xb5,0xcb,0xc4,0x48,0xe1,0xb9,0x1d,0x3f,0x8b,0x3a,0xc6,0x38,0x32,0xc7,0x55, + 0x30,0x28,0x7a,0x42,0xa9,0x5d,0x54,0x33,0x62,0xf3,0xd9,0x3c,0x96,0x40,0xd1,0x80, + 0xe4,0x0e,0x7e,0xf0,0x64,0x53,0xfe,0x7b,0xd7,0x15,0xba,0xad,0x16,0x80,0x01,0xb5} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x45,0x42,0x22,0x31,0x26,0x13,0x5f,0x94,0xa4,0x00,0x04,0x47,0xe8,0x50,0x6d,0xd6, + 0xdd,0xd5,0x08,0xd4,0x90,0x64,0xe0,0x59,0x70,0x46,0xff,0xfc,0x29,0xb3,0x6a,0xc9, + 0x4d,0x45,0x97,0x95,0xa8,0xf0,0x53,0xe7,0xee,0x4b,0x6b,0x5d,0x1e,0xa5,0xb2,0x58, + 0x4b,0x93,0xc9,0x89,0x4c,0xa8,0xab,0x03,0x74,0x38,0xbd,0x54,0x97,0x6b,0xab,0x4a}, + {0x4b,0x4a,0x63,0x96,0x73,0x34,0x9f,0x39,0x64,0x51,0x0e,0x2e,0x3b,0x07,0xd5,0x1c, + 0xd2,0xf7,0xce,0x60,0xab,0xac,0x89,0xa4,0x16,0x0c,0x58,0x82,0xb3,0xd3,0x25,0x5b, + 0xd5,0x62,0x32,0xf4,0x86,0x5d,0xb2,0x4b,0xbf,0x8e,0xc6,0xc0,0xac,0x40,0x48,0xb4, + 0x69,0x08,0xba,0x40,0x4b,0x07,0x2a,0x13,0x9c,0x98,0x3b,0x8b,0x20,0x0c,0xac,0x9e} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xcb,0x4b,0xc2,0xd1,0xf4,0x77,0x32,0x3c,0x42,0x9d,0xf7,0x7d,0x1f,0x22,0x64,0xa4, + 0xe2,0x88,0x30,0x2d,0x54,0x9d,0xb6,0x26,0x89,0x25,0x30,0xc3,0x3d,0xdb,0xba,0x99, + 0xe9,0x8e,0x1e,0x5e,0x57,0x66,0x75,0x7c,0x24,0xda,0x00,0x6f,0x79,0xf7,0x47,0xf5, + 0xea,0x40,0x70,0x37,0xd2,0x91,0xc7,0x4d,0xdf,0x46,0xb6,0x3e,0x95,0x7d,0xcb,0xc1}, + {0x25,0xc2,0xcb,0x7f,0xc8,0x50,0xb7,0x0b,0x11,0x9e,0x1d,0x10,0xb2,0xa8,0x35,0x23, + 0x91,0x39,0xfb,0x45,0xf2,0xbf,0xe4,0xd0,0x84,0xec,0x72,0x33,0x6d,0x09,0xed,0x41, + 0x9a,0x7e,0x4f,0x10,0x73,0x97,0x22,0x76,0x58,0x93,0x39,0x24,0xdf,0xd2,0xaa,0x2f, + 0x6b,0x2b,0x64,0x48,0xa5,0xb7,0xf5,0x56,0x77,0x02,0xa7,0x71,0x46,0xe5,0x0e,0x8d}, + }; + #endif +#elif defined(SCRYPT_BLAKE256) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xf1,0xf1,0x91,0x1a,0x81,0xe6,0x9f,0xc1,0xce,0x43,0xab,0xb1,0x1a,0x02,0x1e,0x16, + 0x08,0xc6,0xf9,0x00,0x50,0x1b,0x6d,0xf1,0x31,0x06,0x95,0x48,0x5d,0xf7,0x6c,0x00, + 0xa2,0x4c,0xb1,0x0e,0x52,0x66,0x94,0x7e,0x84,0xfc,0xa5,0x34,0xfd,0xf0,0xe9,0x57, + 0x85,0x2d,0x8c,0x05,0x5c,0x0f,0x04,0xd4,0x8d,0x3e,0x13,0x52,0x3d,0x90,0x2d,0x2c}, + {0xd5,0x42,0xd2,0x7b,0x06,0xae,0x63,0x90,0x9e,0x30,0x00,0x0e,0xd8,0xa4,0x3a,0x0b, + 0xee,0x4a,0xef,0xb2,0xc4,0x95,0x0d,0x72,0x07,0x70,0xcc,0xa3,0xf9,0x1e,0xc2,0x75, + 0xcf,0xaf,0xe1,0x44,0x1c,0x8c,0xe2,0x3e,0x0c,0x81,0xf3,0x92,0xe1,0x13,0xe6,0x4f, + 0x2d,0x27,0xc3,0x87,0xe5,0xb6,0xf9,0xd7,0x02,0x04,0x37,0x64,0x78,0x36,0x6e,0xb3} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xad,0x1b,0x4b,0xca,0xe3,0x26,0x1a,0xfd,0xb7,0x77,0x8c,0xde,0x8d,0x26,0x14,0xe1, + 0x54,0x38,0x42,0xf3,0xb3,0x66,0x29,0xf9,0x90,0x04,0xf1,0x82,0x7c,0x5a,0x6f,0xa8, + 0x7d,0xd6,0x08,0x0d,0x8b,0x78,0x04,0xad,0x31,0xea,0xd4,0x87,0x2d,0xf7,0x74,0x9a, + 0xe5,0xce,0x97,0xef,0xa3,0xbb,0x90,0x46,0x7c,0xf4,0x51,0x38,0xc7,0x60,0x53,0x21}, + {0x39,0xbb,0x56,0x3d,0x0d,0x7b,0x74,0x82,0xfe,0x5a,0x78,0x3d,0x66,0xe8,0x3a,0xdf, + 0x51,0x6f,0x3e,0xf4,0x86,0x20,0x8d,0xe1,0x81,0x22,0x02,0xf7,0x0d,0xb5,0x1a,0x0f, + 0xfc,0x59,0xb6,0x60,0xc9,0xdb,0x38,0x0b,0x5b,0x95,0xa5,0x94,0xda,0x42,0x2d,0x90, + 0x47,0xeb,0x73,0x31,0x9f,0x20,0xf6,0x81,0xc2,0xef,0x33,0x77,0x51,0xd8,0x2c,0xe4} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0x9e,0xf2,0x60,0x7c,0xbd,0x7c,0x19,0x5c,0x79,0xc6,0x1b,0x7e,0xb0,0x65,0x1b,0xc3, + 0x70,0x0d,0x89,0xfc,0x72,0xb2,0x03,0x72,0x15,0xcb,0x8e,0x8c,0x49,0x50,0x4c,0x27, + 0x99,0xda,0x47,0x32,0x5e,0xb4,0xa2,0x07,0x83,0x51,0x6b,0x06,0x37,0x60,0x42,0xc4, + 0x59,0x49,0x99,0xdd,0xc0,0xd2,0x08,0x94,0x7f,0xe3,0x9e,0x4e,0x43,0x8e,0x5b,0xba}, + {0x86,0x6f,0x3b,0x11,0xb8,0xca,0x4b,0x6e,0xa7,0x6f,0xc2,0xc9,0x33,0xb7,0x8b,0x9f, + 0xa3,0xb9,0xf5,0xb5,0x62,0xa6,0x17,0x66,0xe4,0xc3,0x9d,0x9b,0xca,0x51,0xb0,0x2f, + 0xda,0x09,0xc1,0x77,0xed,0x8b,0x89,0xc2,0x69,0x5a,0x34,0x05,0x4a,0x1f,0x4d,0x76, + 0xcb,0xd5,0xa4,0x78,0xfa,0x1b,0xb9,0x5b,0xbc,0x3d,0xce,0x04,0x63,0x99,0xad,0x54} + }; + #endif +#elif defined(SCRYPT_SKEIN512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xe4,0x36,0xa0,0x9a,0xdb,0xf0,0xd1,0x45,0x56,0xda,0x25,0x53,0x00,0xf9,0x2c,0x69, + 0xa4,0xc2,0xa5,0x8e,0x1a,0x85,0xfa,0x53,0xbd,0x55,0x3d,0x11,0x2a,0x44,0x13,0x87, + 0x8f,0x81,0x88,0x13,0x1e,0x49,0xa8,0xc4,0xc5,0xcd,0x1f,0xe1,0x5f,0xf5,0xcb,0x2f, + 0x8b,0xab,0x57,0x38,0x59,0xeb,0x6b,0xac,0x3b,0x73,0x10,0xa6,0xe1,0xfe,0x17,0x3e}, + {0x6d,0x61,0xde,0x43,0xa9,0x38,0x53,0x5f,0xd8,0xf2,0x6d,0xf3,0xe4,0xd6,0xd8,0x5e, + 0x81,0x89,0xd0,0x0b,0x86,0x16,0xb1,0x91,0x65,0x76,0xd8,0xc1,0xf7,0x3b,0xca,0x8b, + 0x35,0x07,0x58,0xba,0x77,0xdf,0x11,0x6c,0xbc,0x58,0xee,0x11,0x59,0xf2,0xfe,0xcb, + 0x51,0xdc,0xcd,0x35,0x2e,0x46,0x22,0xa0,0xaa,0x55,0x60,0x7c,0x91,0x15,0xb8,0x00} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0xd1,0x12,0x6d,0x64,0x10,0x0e,0x98,0x6c,0xbe,0x70,0x21,0xd9,0xc6,0x04,0x62,0xa4, + 0x29,0x13,0x9a,0x3c,0xf8,0xe9,0x1e,0x87,0x9f,0x88,0xf4,0x98,0x01,0x41,0x8e,0xce, + 0x60,0xf7,0xbe,0x17,0x0a,0xec,0xd6,0x30,0x80,0xcf,0x6b,0x1e,0xcf,0x95,0xa0,0x4d, + 0x37,0xed,0x3a,0x09,0xd1,0xeb,0x0c,0x80,0x82,0x22,0x8e,0xd3,0xb1,0x7f,0xd6,0xa8}, + {0x5c,0x5c,0x05,0xe2,0x75,0xa5,0xa4,0xec,0x81,0x97,0x9c,0x5b,0xd7,0x26,0xb3,0x16, + 0xb4,0x02,0x8c,0x56,0xe6,0x32,0x57,0x33,0x47,0x19,0x06,0x6c,0xde,0x68,0x41,0x37, + 0x5b,0x7d,0xa7,0xb3,0x73,0xeb,0x82,0xca,0x0f,0x86,0x2e,0x6b,0x47,0xa2,0x70,0x39, + 0x35,0xfd,0x2d,0x2e,0x7b,0xc3,0x68,0xbb,0x52,0x42,0x19,0x3b,0x78,0x96,0xe7,0xc8} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60, + 0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59, + 0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9, + 0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89}, + {0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5, + 0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99, + 0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23, + 0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b} + }; + #endif +#elif defined(SCRYPT_KECCAK512) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0xc2,0x7b,0xbe,0x1d,0xf1,0x99,0xd8,0xe7,0x1b,0xac,0xe0,0x9d,0xeb,0x5a,0xfe,0x21, + 0x71,0xff,0x41,0x51,0x4f,0xbe,0x41,0x01,0x15,0xe2,0xb7,0xb9,0x55,0x15,0x25,0xa1, + 0x40,0x4c,0x66,0x29,0x32,0xb7,0xc9,0x62,0x60,0x88,0xe0,0x99,0x39,0xae,0xce,0x25, + 0x3c,0x11,0x89,0xdd,0xc6,0x14,0xd7,0x3e,0xa3,0x6d,0x07,0x2e,0x56,0xa0,0xff,0x97}, + {0x3c,0x91,0x12,0x4a,0x37,0x7d,0xd6,0x96,0xd2,0x9b,0x5d,0xea,0xb8,0xb9,0x82,0x4e, + 0x4f,0x6b,0x60,0x4c,0x59,0x01,0xe5,0x73,0xfd,0xf6,0xb8,0x9a,0x5a,0xd3,0x7c,0x7a, + 0xd2,0x4f,0x8e,0x74,0xc1,0x90,0x88,0xa0,0x3f,0x55,0x75,0x79,0x10,0xd0,0x09,0x79, + 0x0f,0x6c,0x74,0x0c,0x05,0x08,0x3c,0x8c,0x94,0x7b,0x30,0x56,0xca,0xdf,0xdf,0x34} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x77,0xcb,0x70,0xbf,0xae,0xd4,0x4c,0x5b,0xbc,0xd3,0xec,0x8a,0x82,0x43,0x8d,0xb3, + 0x7f,0x1f,0xfb,0x70,0x36,0x32,0x4d,0xa6,0xb7,0x13,0x37,0x77,0x30,0x0c,0x3c,0xfb, + 0x2c,0x20,0x8f,0x2a,0xf4,0x47,0x4d,0x69,0x8e,0xae,0x2d,0xad,0xba,0x35,0xe9,0x2f, + 0xe6,0x99,0x7a,0xf8,0xcf,0x70,0x78,0xbb,0x0c,0x72,0x64,0x95,0x8b,0x36,0x77,0x3d}, + {0xc6,0x43,0x17,0x16,0x87,0x09,0x5f,0x12,0xed,0x21,0xe2,0xb4,0xad,0x55,0xa1,0xa1, + 0x49,0x50,0x90,0x70,0xab,0x81,0x83,0x7a,0xcd,0xdf,0x23,0x52,0x19,0xc0,0xa2,0xd8, + 0x8e,0x98,0xeb,0xf0,0x37,0xab,0xad,0xfd,0x1c,0x04,0x97,0x18,0x42,0x85,0xf7,0x4b, + 0x18,0x2c,0x55,0xd3,0xa9,0xe6,0x89,0xfb,0x58,0x0a,0xb2,0x37,0xb9,0xf8,0xfb,0xc5} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xc7,0x34,0x95,0x02,0x5e,0x31,0x0d,0x1f,0x10,0x38,0x9c,0x3f,0x04,0x53,0xed,0x05, + 0x27,0x38,0xc1,0x3f,0x6a,0x0f,0xc5,0xa3,0x9b,0x73,0x8a,0x28,0x7e,0x5d,0x3c,0xdc, + 0x9d,0x5a,0x09,0xbf,0x8c,0x0a,0xad,0xe4,0x73,0x52,0xe3,0x6d,0xaa,0xd1,0x8b,0xbf, + 0xa3,0xb7,0xf0,0x58,0xad,0x22,0x24,0xc9,0xaa,0x96,0xb7,0x5d,0xfc,0x5f,0xb0,0xcf}, + {0x76,0x22,0xfd,0xe8,0xa2,0x79,0x8e,0x9d,0x43,0x8c,0x7a,0xba,0x78,0xb7,0x84,0xf1, + 0xc8,0xee,0x3b,0xae,0x31,0x89,0xbf,0x7e,0xd0,0x4b,0xc1,0x2d,0x58,0x5d,0x84,0x6b, + 0xec,0x86,0x56,0xe0,0x87,0x94,0x7f,0xbc,0xf9,0x48,0x92,0xef,0x54,0x7f,0x23,0x8d, + 0x4f,0x8b,0x0a,0x75,0xa7,0x39,0x0e,0x46,0x6e,0xee,0x58,0xc8,0xfa,0xea,0x90,0x53} + }; + #endif +#elif defined(SCRYPT_KECCAK256) + #if defined(SCRYPT_SALSA) + static const uint8_t post_vectors[][64] = { + {0x2e,0x96,0xd8,0x87,0x45,0xcd,0xd6,0xc8,0xf6,0xd2,0x87,0x33,0x50,0xc7,0x04,0xe5, + 0x3c,0x4b,0x48,0x44,0x57,0xc1,0x74,0x09,0x76,0x02,0xaa,0xd3,0x7b,0xf3,0xbf,0xed, + 0x4b,0x72,0xd7,0x1b,0x49,0x6b,0xe0,0x44,0x83,0xee,0x8f,0xaf,0xa1,0xb5,0x33,0xa9, + 0x9e,0x86,0xab,0xe2,0x9f,0xcf,0x68,0x6e,0x7e,0xbd,0xf5,0x7a,0x83,0x4b,0x1c,0x10}, + {0x42,0x7e,0xf9,0x4b,0x72,0x61,0xda,0x2d,0xb3,0x27,0x0e,0xe1,0xd9,0xde,0x5f,0x3e, + 0x64,0x2f,0xd6,0xda,0x90,0x59,0xce,0xbf,0x02,0x5b,0x32,0xf7,0x6d,0x94,0x51,0x7b, + 0xb6,0xa6,0x0d,0x99,0x3e,0x7f,0x39,0xbe,0x1b,0x1d,0x6c,0x97,0x12,0xd8,0xb7,0xfd, + 0x5b,0xb5,0xf3,0x73,0x5a,0x89,0xb2,0xdd,0xcc,0x3d,0x74,0x2e,0x3d,0x9e,0x3c,0x22} + }; + #elif defined(SCRYPT_CHACHA) + static const uint8_t post_vectors[][64] = { + {0x76,0x1d,0x5b,0x8f,0xa9,0xe1,0xa6,0x01,0xcb,0xc5,0x7a,0x5f,0x02,0x23,0xb6,0x82, + 0x57,0x79,0x60,0x2f,0x05,0x7f,0xb8,0x0a,0xcb,0x5e,0x54,0x11,0x49,0x2e,0xdd,0x85, + 0x83,0x30,0x67,0xb3,0x24,0x5c,0xce,0xfc,0x32,0xcf,0x12,0xc3,0xff,0xe0,0x79,0x36, + 0x74,0x17,0xa6,0x3e,0xcd,0xa0,0x7e,0xcb,0x37,0xeb,0xcb,0xb6,0xe1,0xb9,0xf5,0x15}, + {0xf5,0x66,0xa7,0x4c,0xe4,0xdc,0x18,0x56,0x2f,0x3e,0x86,0x4d,0x92,0xa5,0x5c,0x5a, + 0x8f,0xc3,0x6b,0x32,0xdb,0xe5,0x72,0x50,0x84,0xfc,0x6e,0x5d,0x15,0x77,0x3d,0xca, + 0xc5,0x2b,0x20,0x3c,0x78,0x37,0x80,0x78,0x23,0x56,0x91,0xa0,0xce,0xa4,0x06,0x5a, + 0x7f,0xe3,0xbf,0xab,0x51,0x57,0x32,0x2c,0x0a,0xf0,0xc5,0x6f,0xf4,0xcb,0xff,0x42} + }; + #elif defined(SCRYPT_SALSA64) + static const uint8_t post_vectors[][64] = { + {0xb0,0xb7,0x10,0xb5,0x1f,0x2b,0x7f,0xaf,0x9d,0x95,0x5f,0x4c,0x2d,0x98,0x7c,0xc1, + 0xbc,0x37,0x2f,0x50,0x8d,0xb2,0x9f,0xfd,0x48,0x0d,0xe0,0x44,0x19,0xdf,0x28,0x6c, + 0xab,0xbf,0x1e,0x17,0x26,0xcc,0x57,0x95,0x18,0x17,0x83,0x4c,0x12,0x48,0xd9,0xee, + 0x4b,0x00,0x29,0x06,0x31,0x01,0x6b,0x8c,0x26,0x39,0xbf,0xe4,0xe4,0xd4,0x6a,0x26}, + {0xa0,0x40,0xb2,0xf2,0x11,0xb6,0x5f,0x3d,0x4c,0x1e,0xef,0x59,0xd4,0x98,0xdb,0x14, + 0x01,0xff,0xe3,0x34,0xd7,0x19,0xcd,0xeb,0xde,0x52,0x1c,0xf4,0x86,0x43,0xc9,0xe2, + 0xfb,0xf9,0x4f,0x0a,0xbb,0x1f,0x5c,0x6a,0xdf,0xb9,0x28,0xfa,0xac,0xc4,0x48,0xed, + 0xcc,0xd2,0x2e,0x25,0x5f,0xf3,0x56,0x1d,0x2d,0x23,0x22,0xc1,0xbc,0xff,0x78,0x80} + }; + #endif +#else + static const uint8_t post_vectors[][64] = {{0}}; +#endif + diff --git a/scryptn.h b/scryptn.h index ba461a2..f14a826 100644 --- a/scryptn.h +++ b/scryptn.h @@ -1,6 +1,6 @@ -#ifndef SCRYPT_H -#define SCRYPT_H -#include +#ifndef SCRYPTN_H +#define SCRYPTN_H + #ifdef __cplusplus extern "C" { #endif @@ -13,4 +13,4 @@ void scrypt_N_1_1_256_sp(const char* input, char* output, char* scratchpad, uint } #endif -#endif +#endif \ No newline at end of file