Remove unused modules from libphobos std.internal package.
libphobos/ChangeLog: 2018-11-19 Iain Buclaw <ibuclaw@gdcproject.org> * src/Makefile.am: Remove std.internal.digest.sha_SSSE3 and std.internal.math.biguintx86 modules. * src/Makefile.in: Rebuild. * src/std/internal/digest/sha_SSSE3.d: Remove. * src/std/internal/math/biguintx86.d: Remove. From-SVN: r266256
This commit is contained in:
parent
bee39274cb
commit
6feee1e1b8
@ -1,3 +1,11 @@
|
||||
2018-11-19 Iain Buclaw <ibuclaw@gdcproject.org>
|
||||
|
||||
* src/Makefile.am: Remove std.internal.digest.sha_SSSE3 and
|
||||
std.internal.math.biguintx86 modules.
|
||||
* src/Makefile.in: Rebuild.
|
||||
* src/std/internal/digest/sha_SSSE3.d: Remove.
|
||||
* src/std/internal/math/biguintx86.d: Remove.
|
||||
|
||||
2018-11-02 Iain Buclaw <ibuclaw@gdcproject.org>
|
||||
|
||||
PR d/87827
|
||||
|
@ -156,9 +156,8 @@ PHOBOS_DSOURCES = etc/c/curl.d etc/c/sqlite3.d etc/c/zlib.d \
|
||||
std/experimental/logger/multilogger.d \
|
||||
std/experimental/logger/nulllogger.d std/experimental/logger/package.d \
|
||||
std/experimental/typecons.d std/file.d std/format.d std/functional.d \
|
||||
std/getopt.d std/internal/cstring.d std/internal/digest/sha_SSSE3.d \
|
||||
std/internal/math/biguintcore.d std/internal/math/biguintnoasm.d \
|
||||
std/internal/math/biguintx86.d std/internal/math/errorfunction.d \
|
||||
std/getopt.d std/internal/cstring.d std/internal/math/biguintcore.d \
|
||||
std/internal/math/biguintnoasm.d std/internal/math/errorfunction.d \
|
||||
std/internal/math/gammafunction.d std/internal/scopebuffer.d \
|
||||
std/internal/test/dummyrange.d std/internal/test/range.d \
|
||||
std/internal/test/uda.d std/internal/unicode_comp.d \
|
||||
|
@ -193,10 +193,8 @@ am__objects_1 = etc/c/curl.lo etc/c/sqlite3.lo etc/c/zlib.lo \
|
||||
std/experimental/logger/package.lo \
|
||||
std/experimental/typecons.lo std/file.lo std/format.lo \
|
||||
std/functional.lo std/getopt.lo std/internal/cstring.lo \
|
||||
std/internal/digest/sha_SSSE3.lo \
|
||||
std/internal/math/biguintcore.lo \
|
||||
std/internal/math/biguintnoasm.lo \
|
||||
std/internal/math/biguintx86.lo \
|
||||
std/internal/math/errorfunction.lo \
|
||||
std/internal/math/gammafunction.lo std/internal/scopebuffer.lo \
|
||||
std/internal/test/dummyrange.lo std/internal/test/range.lo \
|
||||
@ -282,10 +280,8 @@ am__DEPENDENCIES_1 = etc/c/curl.t.lo etc/c/sqlite3.t.lo \
|
||||
std/experimental/logger/package.t.lo \
|
||||
std/experimental/typecons.t.lo std/file.t.lo std/format.t.lo \
|
||||
std/functional.t.lo std/getopt.t.lo std/internal/cstring.t.lo \
|
||||
std/internal/digest/sha_SSSE3.t.lo \
|
||||
std/internal/math/biguintcore.t.lo \
|
||||
std/internal/math/biguintnoasm.t.lo \
|
||||
std/internal/math/biguintx86.t.lo \
|
||||
std/internal/math/errorfunction.t.lo \
|
||||
std/internal/math/gammafunction.t.lo \
|
||||
std/internal/scopebuffer.t.lo \
|
||||
@ -392,10 +388,8 @@ am__DEPENDENCIES_4 = etc/c/curl.t.o etc/c/sqlite3.t.o etc/c/zlib.t.o \
|
||||
std/experimental/logger/package.t.o \
|
||||
std/experimental/typecons.t.o std/file.t.o std/format.t.o \
|
||||
std/functional.t.o std/getopt.t.o std/internal/cstring.t.o \
|
||||
std/internal/digest/sha_SSSE3.t.o \
|
||||
std/internal/math/biguintcore.t.o \
|
||||
std/internal/math/biguintnoasm.t.o \
|
||||
std/internal/math/biguintx86.t.o \
|
||||
std/internal/math/errorfunction.t.o \
|
||||
std/internal/math/gammafunction.t.o \
|
||||
std/internal/scopebuffer.t.o std/internal/test/dummyrange.t.o \
|
||||
@ -788,9 +782,8 @@ PHOBOS_DSOURCES = etc/c/curl.d etc/c/sqlite3.d etc/c/zlib.d \
|
||||
std/experimental/logger/multilogger.d \
|
||||
std/experimental/logger/nulllogger.d std/experimental/logger/package.d \
|
||||
std/experimental/typecons.d std/file.d std/format.d std/functional.d \
|
||||
std/getopt.d std/internal/cstring.d std/internal/digest/sha_SSSE3.d \
|
||||
std/internal/math/biguintcore.d std/internal/math/biguintnoasm.d \
|
||||
std/internal/math/biguintx86.d std/internal/math/errorfunction.d \
|
||||
std/getopt.d std/internal/cstring.d std/internal/math/biguintcore.d \
|
||||
std/internal/math/biguintnoasm.d std/internal/math/errorfunction.d \
|
||||
std/internal/math/gammafunction.d std/internal/scopebuffer.d \
|
||||
std/internal/test/dummyrange.d std/internal/test/range.d \
|
||||
std/internal/test/uda.d std/internal/unicode_comp.d \
|
||||
@ -1032,16 +1025,11 @@ std/internal/$(am__dirstamp):
|
||||
@$(MKDIR_P) std/internal
|
||||
@: > std/internal/$(am__dirstamp)
|
||||
std/internal/cstring.lo: std/internal/$(am__dirstamp)
|
||||
std/internal/digest/$(am__dirstamp):
|
||||
@$(MKDIR_P) std/internal/digest
|
||||
@: > std/internal/digest/$(am__dirstamp)
|
||||
std/internal/digest/sha_SSSE3.lo: std/internal/digest/$(am__dirstamp)
|
||||
std/internal/math/$(am__dirstamp):
|
||||
@$(MKDIR_P) std/internal/math
|
||||
@: > std/internal/math/$(am__dirstamp)
|
||||
std/internal/math/biguintcore.lo: std/internal/math/$(am__dirstamp)
|
||||
std/internal/math/biguintnoasm.lo: std/internal/math/$(am__dirstamp)
|
||||
std/internal/math/biguintx86.lo: std/internal/math/$(am__dirstamp)
|
||||
std/internal/math/errorfunction.lo: std/internal/math/$(am__dirstamp)
|
||||
std/internal/math/gammafunction.lo: std/internal/math/$(am__dirstamp)
|
||||
std/internal/scopebuffer.lo: std/internal/$(am__dirstamp)
|
||||
@ -1174,8 +1162,6 @@ mostlyclean-compile:
|
||||
-rm -f std/experimental/logger/*.lo
|
||||
-rm -f std/internal/*.$(OBJEXT)
|
||||
-rm -f std/internal/*.lo
|
||||
-rm -f std/internal/digest/*.$(OBJEXT)
|
||||
-rm -f std/internal/digest/*.lo
|
||||
-rm -f std/internal/math/*.$(OBJEXT)
|
||||
-rm -f std/internal/math/*.lo
|
||||
-rm -f std/internal/test/*.$(OBJEXT)
|
||||
@ -1401,7 +1387,6 @@ clean-libtool:
|
||||
-rm -rf std/experimental/allocator/building_blocks/.libs std/experimental/allocator/building_blocks/_libs
|
||||
-rm -rf std/experimental/logger/.libs std/experimental/logger/_libs
|
||||
-rm -rf std/internal/.libs std/internal/_libs
|
||||
-rm -rf std/internal/digest/.libs std/internal/digest/_libs
|
||||
-rm -rf std/internal/math/.libs std/internal/math/_libs
|
||||
-rm -rf std/internal/test/.libs std/internal/test/_libs
|
||||
-rm -rf std/internal/windows/.libs std/internal/windows/_libs
|
||||
@ -1529,7 +1514,6 @@ distclean-generic:
|
||||
-rm -f std/experimental/allocator/building_blocks/$(am__dirstamp)
|
||||
-rm -f std/experimental/logger/$(am__dirstamp)
|
||||
-rm -f std/internal/$(am__dirstamp)
|
||||
-rm -f std/internal/digest/$(am__dirstamp)
|
||||
-rm -f std/internal/math/$(am__dirstamp)
|
||||
-rm -f std/internal/test/$(am__dirstamp)
|
||||
-rm -f std/internal/windows/$(am__dirstamp)
|
||||
|
@ -1,729 +0,0 @@
|
||||
// Written in the D programming language.
|
||||
|
||||
/**
|
||||
* Computes SHA1 digests of arbitrary data, using an optimized algorithm with SSSE3 instructions.
|
||||
*
|
||||
* Authors:
|
||||
* The general idea is described by Dean Gaudet.
|
||||
* Another important observation is published by Max Locktyukhin.
|
||||
* (Both implementations are public domain.)
|
||||
* Translation to X86 and D by Kai Nacke <kai@redstar.de>
|
||||
*
|
||||
* References:
|
||||
* $(LINK2 http://arctic.org/~dean/crypto/sha1.html)
|
||||
* $(LINK2 http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1/, Fast implementation of SHA1)
|
||||
*/
|
||||
module std.internal.digest.sha_SSSE3;
|
||||
|
||||
version (D_InlineAsm_X86)
|
||||
{
|
||||
version (D_PIC) {} // Bugzilla 9378
|
||||
else
|
||||
{
|
||||
private version = USE_SSSE3;
|
||||
private version = _32Bit;
|
||||
}
|
||||
}
|
||||
else version (D_InlineAsm_X86_64)
|
||||
{
|
||||
private version = USE_SSSE3;
|
||||
private version = _64Bit;
|
||||
}
|
||||
|
||||
/*
|
||||
* The idea is quite simple. The SHA-1 specification defines the following message schedule:
|
||||
* W[i] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1
|
||||
*
|
||||
* To employ SSE, simply write down the formula four times:
|
||||
* W[i ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1
|
||||
* W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1
|
||||
* W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1
|
||||
* W[i+3] = (W[i ] ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1
|
||||
* The last formula requires value W[i] computed with the first formula.
|
||||
* Because the xor operation and the rotate operation are commutative, we can replace the
|
||||
* last formula with
|
||||
* W[i+3] = ( 0 ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1
|
||||
* and then calculate
|
||||
* W[i+3] ^= W[i] rol 1
|
||||
* which unfortunately requires many additional operations. This approach was described by
|
||||
* Dean Gaudet.
|
||||
*
|
||||
* Max Locktyukhin observed that
|
||||
* W[i] = W[i-A] ^ W[i-B]
|
||||
* is equivalent to
|
||||
* W[i] = W[i-2*A] ^ W[i-2*B]
|
||||
* (if the indices are still in valid ranges). Using this observation, the formula is
|
||||
* translated to
|
||||
* W[i] = (W[i-6] ^ W[i-16] ^ W[i-28] ^ W[i-32]) rol 2
|
||||
* Again, to employ SSE the formula is used four times.
|
||||
*
|
||||
* Later on, the expression W[i] + K(i) is used. (K(i) is the constant used in round i.)
|
||||
* Once the 4 W[i] are calculated, we can also add the four K(i) values with one SSE instruction.
|
||||
*
|
||||
* The 32bit and 64bit implementations are almost identical. The main difference is that there
|
||||
* are only 8 XMM registers in 32bit mode. Therefore, space on the stack is needed to save
|
||||
* computed values.
|
||||
*/
|
||||
|
||||
version (USE_SSSE3)
|
||||
{
|
||||
/*
|
||||
* The general idea is to use the XMM registers as a sliding window over
|
||||
* message schedule. XMM0 to XMM7 are used to store the last 64 byte of
|
||||
* the message schedule. In 64 bit mode this is fine because of the number of
|
||||
* registers. The main difference of the 32 bit code is that a part of the
|
||||
* calculated message schedule is saved on the stack because 2 temporary
|
||||
* registers are needed.
|
||||
*/
|
||||
|
||||
/* Number of message words we are precalculating. */
|
||||
private immutable int PRECALC_AHEAD = 16;
|
||||
|
||||
/* T1 and T2 are used for intermediate results of computations. */
|
||||
private immutable string T1 = "EAX";
|
||||
private immutable string T2 = "EBX";
|
||||
|
||||
/* The registers used for the SHA-1 variables. */
|
||||
private immutable string A = "ECX";
|
||||
private immutable string B = "ESI";
|
||||
private immutable string C = "EDI";
|
||||
private immutable string D = "EBP";
|
||||
private immutable string E = "EDX";
|
||||
|
||||
/* */
|
||||
version (_32Bit)
|
||||
{
|
||||
private immutable string SP = "ESP";
|
||||
private immutable string BUFFER_PTR = "EAX";
|
||||
private immutable string STATE_PTR = "EBX";
|
||||
|
||||
// Control byte for shuffle instruction (only used in round 0-15)
|
||||
private immutable string X_SHUFFLECTL = "XMM6";
|
||||
|
||||
// Round constant (only used in round 0-15)
|
||||
private immutable string X_CONSTANT = "XMM7";
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
private immutable string SP = "RSP";
|
||||
private immutable string BUFFER_PTR = "R9";
|
||||
private immutable string STATE_PTR = "R8";
|
||||
private immutable string CONSTANTS_PTR = "R10";
|
||||
|
||||
// Registers for temporary results (XMM10 and XMM11 are also used temporary)
|
||||
private immutable string W_TMP = "XMM8";
|
||||
private immutable string W_TMP2 = "XMM9";
|
||||
|
||||
// Control byte for shuffle instruction (only used in round 0-15)
|
||||
private immutable string X_SHUFFLECTL = "XMM12";
|
||||
|
||||
// Round constant
|
||||
private immutable string X_CONSTANT = "XMM13";
|
||||
}
|
||||
|
||||
/* The control words for the byte shuffle instruction and the round constants. */
|
||||
align(16) public immutable uint[20] constants =
|
||||
[
|
||||
// The control words for the byte shuffle instruction.
|
||||
0x0001_0203, 0x0405_0607, 0x0809_0a0b, 0x0c0d_0e0f,
|
||||
// Constants for round 0-19
|
||||
0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999,
|
||||
// Constants for round 20-39
|
||||
0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1,
|
||||
// Constants for round 40-59
|
||||
0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc,
|
||||
// Constants for round 60-79
|
||||
0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
|
||||
];
|
||||
|
||||
/** Simple version to produce numbers < 100 as string. */
|
||||
private nothrow pure string to_string(uint i)
|
||||
{
|
||||
if (i < 10)
|
||||
return "0123456789"[i .. i + 1];
|
||||
|
||||
assert(i < 100);
|
||||
char[2] s;
|
||||
s[0] = cast(char)(i / 10 + '0');
|
||||
s[1] = cast(char)(i % 10 + '0');
|
||||
return s.idup;
|
||||
}
|
||||
|
||||
/** Returns the reference to the byte shuffle control word. */
|
||||
private nothrow pure string bswap_shufb_ctl()
|
||||
{
|
||||
version (_64Bit)
|
||||
return "["~CONSTANTS_PTR~"]";
|
||||
else
|
||||
return "[constants]";
|
||||
}
|
||||
|
||||
/** Returns the reference to constant used in round i. */
|
||||
private nothrow pure string constant(uint i)
|
||||
{
|
||||
version (_64Bit)
|
||||
return "16 + 16*"~to_string(i/20)~"["~CONSTANTS_PTR~"]";
|
||||
else
|
||||
return "[constants + 16 + 16*"~to_string(i/20)~"]";
|
||||
}
|
||||
|
||||
/** Returns the XMM register number used in round i */
|
||||
private nothrow pure uint regno(uint i)
|
||||
{
|
||||
return (i/4)&7;
|
||||
}
|
||||
|
||||
/** Returns reference to storage of vector W[i .. i+4]. */
|
||||
private nothrow pure string WiV(uint i)
|
||||
{
|
||||
return "["~SP~" + WI_PTR + "~to_string((i/4)&7)~"*16]";
|
||||
}
|
||||
|
||||
/** Returns reference to storage of vector (W + K)[i .. i+4]. */
|
||||
private nothrow pure string WiKiV(uint i)
|
||||
{
|
||||
return "["~SP~" + WI_PLUS_KI_PTR + "~to_string((i/4)&3)~"*16]";
|
||||
}
|
||||
|
||||
/** Returns reference to storage of value W[i] + K[i]. */
|
||||
private nothrow pure string WiKi(uint i)
|
||||
{
|
||||
return "["~SP~" + WI_PLUS_KI_PTR + 4*"~to_string(i&15)~"]";
|
||||
}
|
||||
|
||||
/**
|
||||
* Chooses the instruction sequence based on the 32bit or 64bit model.
|
||||
*/
|
||||
private nothrow pure string[] swt3264(string[] insn32, string[] insn64)
|
||||
{
|
||||
version (_32Bit)
|
||||
{
|
||||
return insn32;
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
return insn64;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flattens the instruction sequence and wraps it in an asm block.
|
||||
*/
|
||||
private nothrow pure string wrap(string[] insn)
|
||||
{
|
||||
string s = "asm pure nothrow @nogc {";
|
||||
foreach (t; insn) s ~= (t ~ "; \n");
|
||||
s ~= "}";
|
||||
return s;
|
||||
// Is not CTFE:
|
||||
// return "asm pure nothrow @nogc { " ~ join(insn, "; \n") ~ "}";
|
||||
}
|
||||
|
||||
/**
|
||||
* Weaves the 2 instruction sequences together.
|
||||
*/
|
||||
private nothrow pure string[] weave(string[] seq1, string[] seq2, uint dist = 1)
|
||||
{
|
||||
string[] res = [];
|
||||
auto i1 = 0, i2 = 0;
|
||||
while (i1 < seq1.length || i2 < seq2.length)
|
||||
{
|
||||
if (i2 < seq2.length)
|
||||
{
|
||||
res ~= seq2[i2 .. i2+1];
|
||||
i2 += 1;
|
||||
}
|
||||
if (i1 < seq1.length)
|
||||
{
|
||||
import std.algorithm.comparison : min;
|
||||
|
||||
res ~= seq1[i1 .. min(i1+dist, $)];
|
||||
i1 += dist;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates instructions to load state from memory into registers.
|
||||
*/
|
||||
private nothrow pure string[] loadstate(string base, string a, string b, string c, string d, string e)
|
||||
{
|
||||
return ["mov "~a~",["~base~" + 0*4]",
|
||||
"mov "~b~",["~base~" + 1*4]",
|
||||
"mov "~c~",["~base~" + 2*4]",
|
||||
"mov "~d~",["~base~" + 3*4]",
|
||||
"mov "~e~",["~base~" + 4*4]" ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates instructions to update state from registers, saving result in memory.
|
||||
*/
|
||||
private nothrow pure string[] savestate(string base, string a, string b, string c, string d, string e)
|
||||
{
|
||||
return ["add ["~base~" + 0*4],"~a,
|
||||
"add ["~base~" + 1*4],"~b,
|
||||
"add ["~base~" + 2*4],"~c,
|
||||
"add ["~base~" + 3*4],"~d,
|
||||
"add ["~base~" + 4*4],"~e ];
|
||||
}
|
||||
|
||||
/** Calculates Ch(x, y, z) = z ^ (x & (y ^ z)) */
|
||||
private nothrow pure string[] Ch(string x, string y, string z)
|
||||
{
|
||||
return ["mov "~T1~","~y,
|
||||
"xor "~T1~","~z,
|
||||
"and "~T1~","~x,
|
||||
"xor "~T1~","~z ];
|
||||
}
|
||||
|
||||
/** Calculates Parity(x, y, z) = x ^ y ^ z */
|
||||
private nothrow pure string[] Parity(string x, string y, string z)
|
||||
{
|
||||
return ["mov "~T1~","~z,
|
||||
"xor "~T1~","~y,
|
||||
"xor "~T1~","~x ];
|
||||
}
|
||||
|
||||
/** Calculates Maj(x, y, z) = (x & y) | (z & (x ^ y)) */
|
||||
private nothrow pure string[] Maj(string x, string y, string z)
|
||||
{
|
||||
return ["mov "~T1~","~y,
|
||||
"mov "~T2~","~x,
|
||||
"or "~T1~","~x,
|
||||
"and "~T2~","~y,
|
||||
"and "~T1~","~z,
|
||||
"or "~T1~","~T2 ];
|
||||
}
|
||||
|
||||
/** Returns function for round i. Function returns result in T1 and may destroy T2. */
|
||||
private nothrow pure string[] F(int i, string b, string c, string d)
|
||||
{
|
||||
string[] insn;
|
||||
if (i >= 0 && i <= 19) insn = Ch(b, c, d);
|
||||
else if (i >= 20 && i <= 39) insn = Parity(b, c, d);
|
||||
else if (i >= 40 && i <= 59) insn = Maj(b, c, d);
|
||||
else if (i >= 60 && i <= 79) insn = Parity(b, c, d);
|
||||
else assert(false, "Coding error");
|
||||
return insn;
|
||||
}
|
||||
|
||||
/** Returns instruction used to setup a round. */
|
||||
private nothrow pure string[] xsetup(int i)
|
||||
{
|
||||
if (i == 0)
|
||||
{
|
||||
return swt3264(["movdqa "~X_SHUFFLECTL~","~bswap_shufb_ctl(),
|
||||
"movdqa "~X_CONSTANT~","~constant(i)],
|
||||
["movdqa "~X_SHUFFLECTL~","~bswap_shufb_ctl(),
|
||||
"movdqa "~X_CONSTANT~","~constant(i)]);
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
if (i%20 == 0)
|
||||
{
|
||||
return ["movdqa "~X_CONSTANT~","~constant(i)];
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the message words and performs the little to big endian conversion.
|
||||
* Requires that the shuffle control word and the round constant is loaded
|
||||
* into required XMM register. The BUFFER_PTR register must point to the
|
||||
* buffer.
|
||||
*/
|
||||
private nothrow pure string[] precalc_00_15(int i)
|
||||
{
|
||||
int regno = regno(i);
|
||||
|
||||
string W = "XMM" ~ to_string(regno);
|
||||
version (_32Bit)
|
||||
{
|
||||
string W_TMP = "XMM" ~ to_string(regno+2);
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
string W_TMP = "XMM" ~ to_string(regno+8);
|
||||
}
|
||||
|
||||
if ((i & 3) == 0)
|
||||
{
|
||||
return ["movdqu "~W~",["~BUFFER_PTR~" + "~to_string(regno)~"*16]"];
|
||||
}
|
||||
else if ((i & 3) == 1)
|
||||
{
|
||||
return ["pshufb "~W~","~X_SHUFFLECTL] ~
|
||||
swt3264(["movdqa "~WiV(i)~","~W], []);
|
||||
}
|
||||
else if ((i & 3) == 2)
|
||||
{
|
||||
return ["movdqa "~W_TMP~","~W,
|
||||
"paddd "~W_TMP~","~X_CONSTANT,
|
||||
];
|
||||
}
|
||||
else
|
||||
{
|
||||
return ["movdqa "~WiKiV(i)~","~W_TMP,
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Done on 4 consequtive W[i] values in a single XMM register
|
||||
* W[i ] = (W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]) rol 1
|
||||
* W[i+1] = (W[i-2] ^ W[i-7] ^ W[i-13] ^ W[i-15]) rol 1
|
||||
* W[i+2] = (W[i-1] ^ W[i-6] ^ W[i-12] ^ W[i-14]) rol 1
|
||||
* W[i+3] = ( 0 ^ W[i-5] ^ W[i-11] ^ W[i-13]) rol 1
|
||||
*
|
||||
* This additional calculation unfortunately requires many additional operations
|
||||
* W[i+3] ^= W[i] rol 1
|
||||
*
|
||||
* Once we have 4 W[i] values in XMM we can also add four K values with one instruction
|
||||
* W[i:i+3] += {K,K,K,K}
|
||||
*/
|
||||
private nothrow pure string[] precalc_16_31(int i)
|
||||
{
|
||||
int regno = regno(i);
|
||||
|
||||
string W = "XMM" ~ to_string(regno);
|
||||
string W_minus_4 = "XMM" ~ to_string((regno-1)&7);
|
||||
string W_minus_8 = "XMM" ~ to_string((regno-2)&7);
|
||||
string W_minus_12 = "XMM" ~ to_string((regno-3)&7);
|
||||
string W_minus_16 = "XMM" ~ to_string((regno-4)&7);
|
||||
version (_32Bit)
|
||||
{
|
||||
string W_TMP = "XMM" ~ to_string((regno+1)&7);
|
||||
string W_TMP2 = "XMM" ~ to_string((regno+2)&7);
|
||||
}
|
||||
|
||||
if ((i & 3) == 0)
|
||||
{
|
||||
return ["movdqa "~W~","~W_minus_12,
|
||||
"palignr "~W~","~W_minus_16~",8", // W[i] = W[i-14]
|
||||
"pxor "~W~","~W_minus_16, // W[i] ^= W[i-16]
|
||||
"pxor "~W~","~W_minus_8, // W[i] ^= W[i-8]
|
||||
"movdqa "~W_TMP~","~W_minus_4,
|
||||
];
|
||||
}
|
||||
else if ((i & 3) == 1)
|
||||
{
|
||||
return ["psrldq "~W_TMP~",4", // W[i-3]
|
||||
"pxor "~W~","~W_TMP, // W[i] ^= W[i-3]
|
||||
"movdqa "~W_TMP~","~W,
|
||||
"psrld "~W~",31",
|
||||
"pslld "~W_TMP~",1",
|
||||
];
|
||||
}
|
||||
else if ((i & 3) == 2)
|
||||
{
|
||||
return ["por "~W~","~W_TMP,
|
||||
"movdqa "~W_TMP~","~W,
|
||||
"pslldq "~W_TMP~",12",
|
||||
"movdqa "~W_TMP2~","~W_TMP,
|
||||
"pslld "~W_TMP~",1",
|
||||
];
|
||||
}
|
||||
else
|
||||
{
|
||||
return ["psrld "~W_TMP2~",31",
|
||||
"por "~W_TMP~","~W_TMP2,
|
||||
"pxor "~W~","~W_TMP,
|
||||
"movdqa "~W_TMP~","~W ] ~
|
||||
swt3264(["movdqa "~WiV(i)~","~W,
|
||||
"paddd "~W_TMP~","~constant(i) ],
|
||||
["paddd "~W_TMP~","~X_CONSTANT ]) ~
|
||||
["movdqa "~WiKiV(i)~","~W_TMP];
|
||||
}
|
||||
}
|
||||
|
||||
/** Performs the main calculation as decribed above. */
|
||||
private nothrow pure string[] precalc_32_79(int i)
|
||||
{
|
||||
int regno = regno(i);
|
||||
|
||||
string W = "XMM" ~ to_string(regno);
|
||||
string W_minus_4 = "XMM" ~ to_string((regno-1)&7);
|
||||
string W_minus_8 = "XMM" ~ to_string((regno-2)&7);
|
||||
string W_minus_16 = "XMM" ~ to_string((regno-4)&7);
|
||||
version (_32Bit)
|
||||
{
|
||||
string W_minus_28 = "[ESP + WI_PTR + "~ to_string((regno-7)&7)~"*16]";
|
||||
string W_minus_32 = "[ESP + WI_PTR + "~ to_string((regno-8)&7)~"*16]";
|
||||
string W_TMP = "XMM" ~ to_string((regno+1)&7);
|
||||
string W_TMP2 = "XMM" ~ to_string((regno+2)&7);
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
string W_minus_28 = "XMM" ~ to_string((regno-7)&7);
|
||||
string W_minus_32 = "XMM" ~ to_string((regno-8)&7);
|
||||
}
|
||||
|
||||
if ((i & 3) == 0)
|
||||
{
|
||||
return swt3264(["movdqa "~W~","~W_minus_32], []) ~
|
||||
["movdqa "~W_TMP~","~W_minus_4,
|
||||
"pxor "~W~","~W_minus_28, // W is W_minus_32 before xor
|
||||
"palignr "~W_TMP~","~W_minus_8~",8",
|
||||
];
|
||||
}
|
||||
else if ((i & 3) == 1)
|
||||
{
|
||||
return ["pxor "~W~","~W_minus_16,
|
||||
"pxor "~W~","~W_TMP,
|
||||
"movdqa "~W_TMP~","~W,
|
||||
];
|
||||
}
|
||||
else if ((i & 3) == 2)
|
||||
{
|
||||
return ["psrld "~W~",30",
|
||||
"pslld "~W_TMP~",2",
|
||||
"por "~W_TMP~","~W,
|
||||
];
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < 76)
|
||||
return ["movdqa "~W~","~W_TMP] ~
|
||||
swt3264(["movdqa "~WiV(i)~","~W,
|
||||
"paddd "~W_TMP~","~constant(i)],
|
||||
["paddd "~W_TMP~","~X_CONSTANT]) ~
|
||||
["movdqa "~WiKiV(i)~","~W_TMP];
|
||||
else
|
||||
return swt3264(["paddd "~W_TMP~","~constant(i)],
|
||||
["paddd "~W_TMP~","~X_CONSTANT]) ~
|
||||
["movdqa "~WiKiV(i)~","~W_TMP];
|
||||
}
|
||||
}
|
||||
|
||||
/** Choose right precalc method. */
|
||||
private nothrow pure string[] precalc(int i)
|
||||
{
|
||||
if (i >= 0 && i < 16) return precalc_00_15(i);
|
||||
if (i >= 16 && i < 32) return precalc_16_31(i);
|
||||
if (i >= 32 && i < 80) return precalc_32_79(i);
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return code for round i and i+1.
|
||||
* Performs the following rotation:
|
||||
* in=>out: A=>D, B=>E, C=>A, D=>B, E=>C
|
||||
*/
|
||||
private nothrow pure string[] round(int i, string a, string b, string c, string d, string e)
|
||||
{
|
||||
return xsetup(PRECALC_AHEAD + i) ~
|
||||
weave(F(i, b, c, d) ~ // Returns result in T1; may destroy T2
|
||||
["add "~e~","~WiKi(i),
|
||||
"ror "~b~",2",
|
||||
"mov "~T2~","~a,
|
||||
"add "~d~","~WiKi(i+1),
|
||||
"rol "~T2~",5",
|
||||
"add "~e~","~T1 ],
|
||||
precalc(PRECALC_AHEAD + i), 2) ~
|
||||
weave(
|
||||
["add "~T2~","~e, // T2 = (A <<< 5) + F(B, C, D) + Wi + Ki + E
|
||||
"mov "~e~","~T2,
|
||||
"rol "~T2~",5",
|
||||
"add "~d~","~T2 ] ~
|
||||
F(i+1, a, b, c) ~ // Returns result in T1; may destroy T2
|
||||
["add "~d~","~T1,
|
||||
"ror "~a~",2"],
|
||||
precalc(PRECALC_AHEAD + i+1), 2);
|
||||
}
|
||||
|
||||
// Offset into stack (see below)
|
||||
version (_32Bit)
|
||||
{
|
||||
private enum { STATE_OFS = 4, WI_PLUS_KI_PTR = 8, WI_PTR = 72 };
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
private enum { WI_PLUS_KI_PTR = 0 };
|
||||
}
|
||||
|
||||
/** The prologue sequence. */
|
||||
private nothrow pure string[] prologue()
|
||||
{
|
||||
version (_32Bit)
|
||||
{
|
||||
/*
|
||||
* Parameters:
|
||||
* EAX contains pointer to input buffer
|
||||
*
|
||||
* Stack layout as follows:
|
||||
* +----------------+
|
||||
* | ptr to state |
|
||||
* +----------------+
|
||||
* | return address |
|
||||
* +----------------+
|
||||
* | EBP |
|
||||
* +----------------+
|
||||
* | ESI |
|
||||
* +----------------+
|
||||
* | EDI |
|
||||
* +----------------+
|
||||
* | EBX |
|
||||
* +----------------+
|
||||
* | Space for |
|
||||
* | Wi | <- ESP+72
|
||||
* +----------------+
|
||||
* | Space for |
|
||||
* | Wi+Ki | <- ESP+8
|
||||
* +----------------+ <- 16byte aligned
|
||||
* | ptr to state | <- ESP+4
|
||||
* +----------------+
|
||||
* | old ESP | <- ESP
|
||||
* +----------------+
|
||||
*/
|
||||
static assert(BUFFER_PTR == "EAX");
|
||||
static assert(STATE_PTR == "EBX");
|
||||
return [// Save registers according to calling convention
|
||||
"push EBP",
|
||||
"push ESI",
|
||||
"push EDI",
|
||||
"push EBX",
|
||||
// Load parameters
|
||||
"mov EBX, [ESP + 5*4]", //pointer to state
|
||||
// Align stack
|
||||
"mov EBP, ESP",
|
||||
"sub ESP, 4*16 + 8*16",
|
||||
"and ESP, 0xffff_fff0",
|
||||
"push EBX",
|
||||
"push EBP",
|
||||
];
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
/*
|
||||
* Parameters:
|
||||
* RDX contains pointer to state
|
||||
* RSI contains pointer to input buffer
|
||||
* RDI contains pointer to constants
|
||||
*
|
||||
* Stack layout as follows:
|
||||
* +----------------+
|
||||
* | return address |
|
||||
* +----------------+
|
||||
* | RBP |
|
||||
* +----------------+
|
||||
* | RBX |
|
||||
* +----------------+
|
||||
* | Unused |
|
||||
* +----------------+
|
||||
* | Space for |
|
||||
* | Wi+Ki | <- RSP
|
||||
* +----------------+ <- 16byte aligned
|
||||
*/
|
||||
return [// Save registers according to calling convention
|
||||
"push RBP",
|
||||
"push RBX",
|
||||
// Save parameters
|
||||
"mov "~STATE_PTR~", RDX", //pointer to state
|
||||
"mov "~BUFFER_PTR~", RSI", //pointer to buffer
|
||||
"mov "~CONSTANTS_PTR~", RDI", //pointer to constants to avoid absolute addressing
|
||||
// Align stack
|
||||
"sub RSP, 4*16+8",
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The epilogue sequence. Just pop the saved registers from stack and return to caller.
|
||||
*/
|
||||
private nothrow pure string[] epilogue()
|
||||
{
|
||||
version (_32Bit)
|
||||
{
|
||||
return ["pop ESP",
|
||||
"pop EBX",
|
||||
"pop EDI",
|
||||
"pop ESI",
|
||||
"pop EBP",
|
||||
"ret 4",
|
||||
];
|
||||
}
|
||||
version (_64Bit)
|
||||
{
|
||||
return ["add RSP,4*16+8",
|
||||
"pop RBX",
|
||||
"pop RBP",
|
||||
"ret 0",
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
// constants as extra argument for PIC, see Bugzilla 9378
|
||||
import std.meta : AliasSeq;
|
||||
version (_64Bit)
|
||||
alias ExtraArgs = AliasSeq!(typeof(&constants));
|
||||
else
|
||||
alias ExtraArgs = AliasSeq!();
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public void transformSSSE3(uint[5]* state, const(ubyte[64])* buffer, ExtraArgs) pure nothrow @nogc
|
||||
{
|
||||
mixin(wrap(["naked;"] ~ prologue()));
|
||||
// Precalc first 4*16=64 bytes
|
||||
mixin(wrap(xsetup(0)));
|
||||
mixin(wrap(weave(precalc(0)~precalc(1)~precalc(2)~precalc(3),
|
||||
precalc(4)~precalc(5)~precalc(6)~precalc(7))));
|
||||
mixin(wrap(weave(loadstate(STATE_PTR, A, B, C, D, E),
|
||||
weave(precalc(8)~precalc(9)~precalc(10)~precalc(11),
|
||||
precalc(12)~precalc(13)~precalc(14)~precalc(15)))));
|
||||
// Round 1
|
||||
mixin(wrap(round( 0, A, B, C, D, E)));
|
||||
mixin(wrap(round( 2, D, E, A, B, C)));
|
||||
mixin(wrap(round( 4, B, C, D, E, A)));
|
||||
mixin(wrap(round( 6, E, A, B, C, D)));
|
||||
mixin(wrap(round( 8, C, D, E, A, B)));
|
||||
mixin(wrap(round(10, A, B, C, D, E)));
|
||||
mixin(wrap(round(12, D, E, A, B, C)));
|
||||
mixin(wrap(round(14, B, C, D, E, A)));
|
||||
mixin(wrap(round(16, E, A, B, C, D)));
|
||||
mixin(wrap(round(18, C, D, E, A, B)));
|
||||
// Round 2
|
||||
mixin(wrap(round(20, A, B, C, D, E)));
|
||||
mixin(wrap(round(22, D, E, A, B, C)));
|
||||
mixin(wrap(round(24, B, C, D, E, A)));
|
||||
mixin(wrap(round(26, E, A, B, C, D)));
|
||||
mixin(wrap(round(28, C, D, E, A, B)));
|
||||
mixin(wrap(round(30, A, B, C, D, E)));
|
||||
mixin(wrap(round(32, D, E, A, B, C)));
|
||||
mixin(wrap(round(34, B, C, D, E, A)));
|
||||
mixin(wrap(round(36, E, A, B, C, D)));
|
||||
mixin(wrap(round(38, C, D, E, A, B)));
|
||||
// Round 3
|
||||
mixin(wrap(round(40, A, B, C, D, E)));
|
||||
mixin(wrap(round(42, D, E, A, B, C)));
|
||||
mixin(wrap(round(44, B, C, D, E, A)));
|
||||
mixin(wrap(round(46, E, A, B, C, D)));
|
||||
mixin(wrap(round(48, C, D, E, A, B)));
|
||||
mixin(wrap(round(50, A, B, C, D, E)));
|
||||
mixin(wrap(round(52, D, E, A, B, C)));
|
||||
mixin(wrap(round(54, B, C, D, E, A)));
|
||||
mixin(wrap(round(56, E, A, B, C, D)));
|
||||
mixin(wrap(round(58, C, D, E, A, B)));
|
||||
// Round 4
|
||||
mixin(wrap(round(60, A, B, C, D, E)));
|
||||
mixin(wrap(round(62, D, E, A, B, C)));
|
||||
mixin(wrap(round(64, B, C, D, E, A)));
|
||||
mixin(wrap(round(66, E, A, B, C, D)));
|
||||
mixin(wrap(round(68, C, D, E, A, B)));
|
||||
mixin(wrap(round(70, A, B, C, D, E)));
|
||||
mixin(wrap(round(72, D, E, A, B, C)));
|
||||
mixin(wrap(round(74, B, C, D, E, A)));
|
||||
mixin(wrap(round(76, E, A, B, C, D)));
|
||||
mixin(wrap(round(78, C, D, E, A, B)));
|
||||
version (_32Bit)
|
||||
{
|
||||
// Load pointer to state
|
||||
mixin(wrap(["mov "~STATE_PTR~",[ESP + STATE_OFS]"]));
|
||||
}
|
||||
mixin(wrap(savestate(STATE_PTR, A, B, C, D, E)));
|
||||
mixin(wrap(epilogue()));
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user