gcc/libgo/go/crypto/elliptic/p256_amd64.go
Ian Lance Taylor e0f69f36ea libgo: change build procedure to use build tags
Previously the libgo Makefile explicitly listed the set of files to
    compile for each package.  For packages that use build tags, this
    required a lot of awkward automake conditionals in the Makefile.
    
    This CL changes the build to look at the build tags in the files.
    The new shell script libgo/match.sh does the matching.  This required
    adjusting a lot of build tags, and removing some files that are never
    used.  I verified that the exact same sets of files are compiled on
    amd64 GNU/Linux.  I also tested the build on i386 Solaris.
    
    Writing match.sh revealed some bugs in the build tag handling that
    already exists, in a slightly different form, in the gotest shell
    script.  This CL fixes those problems as well.
    
    The old code used automake conditionals to handle systems that were
    missing strerror_r and wait4.  Rather than deal with those in Go, those
    functions are now implemented in runtime/go-nosys.c when necessary, so
    the Go code can simply assume that they exist.
    
    The os testsuite looked for dir_unix.go, which was never built for gccgo
    and has now been removed.  I changed the testsuite to look for dir.go
    instead.
    
    Reviewed-on: https://go-review.googlesource.com/25546

From-SVN: r239189
2016-08-06 00:36:33 +00:00

558 lines
15 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains the Go wrapper for the constant-time, 64-bit assembly
// implementation of P256. The optimizations performed here are described in
// detail in:
// S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
// 256-bit primes"
// http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
// https://eprint.iacr.org/2013/816.pdf
// +build ignore
// +build amd64
package elliptic
import (
"math/big"
"sync"
)
type (
p256Curve struct {
*CurveParams
}
p256Point struct {
xyz [12]uint64
}
)
var (
p256 p256Curve
p256Precomputed *[37][64 * 8]uint64
precomputeOnce sync.Once
)
func initP256() {
// See FIPS 186-3, section D.2.3
p256.CurveParams = &CurveParams{Name: "P-256"}
p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
p256.BitSize = 256
}
func (curve p256Curve) Params() *CurveParams {
return curve.CurveParams
}
// Functions implemented in p256_asm_amd64.s
// Montgomery multiplication modulo P256
func p256Mul(res, in1, in2 []uint64)
// Montgomery square modulo P256
func p256Sqr(res, in []uint64)
// Montgomery multiplication by 1
func p256FromMont(res, in []uint64)
// iff cond == 1 val <- -val
func p256NegCond(val []uint64, cond int)
// if cond == 0 res <- b; else res <- a
func p256MovCond(res, a, b []uint64, cond int)
// Endianness swap
func p256BigToLittle(res []uint64, in []byte)
func p256LittleToBig(res []byte, in []uint64)
// Constant time table access
func p256Select(point, table []uint64, idx int)
func p256SelectBase(point, table []uint64, idx int)
// Montgomery multiplication modulo Ord(G)
func p256OrdMul(res, in1, in2 []uint64)
// Montgomery square modulo Ord(G), repeated n times
func p256OrdSqr(res, in []uint64, n int)
// Point add with in2 being affine point
// If sign == 1 -> in2 = -in2
// If sel == 0 -> res = in1
// if zero == 0 -> res = in2
func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
// Point add
func p256PointAddAsm(res, in1, in2 []uint64)
// Point double
func p256PointDoubleAsm(res, in []uint64)
func (curve p256Curve) Inverse(k *big.Int) *big.Int {
if k.Sign() < 0 {
// This should never happen.
k = new(big.Int).Neg(k)
}
if k.Cmp(p256.N) >= 0 {
// This should never happen.
k = new(big.Int).Mod(k, p256.N)
}
// table will store precomputed powers of x. The four words at index
// 4×i store x^(i+1).
var table [4 * 15]uint64
x := make([]uint64, 4)
fromBig(x[:], k)
// This code operates in the Montgomery domain where R = 2^256 mod n
// and n is the order of the scalar field. (See initP256 for the
// value.) Elements in the Montgomery domain take the form a×R and
// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
// i.e. converts x into the Montgomery domain.
RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
p256OrdMul(table[:4], x, RR)
// Prepare the table, no need in constant time access, because the
// power is not a secret. (Entry 0 is never used.)
for i := 2; i < 16; i += 2 {
p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
}
x[0] = table[4*14+0] // f
x[1] = table[4*14+1]
x[2] = table[4*14+2]
x[3] = table[4*14+3]
p256OrdSqr(x, x, 4)
p256OrdMul(x, x, table[4*14:4*14+4]) // ff
t := make([]uint64, 4, 4)
t[0] = x[0]
t[1] = x[1]
t[2] = x[2]
t[3] = x[3]
p256OrdSqr(x, x, 8)
p256OrdMul(x, x, t) // ffff
t[0] = x[0]
t[1] = x[1]
t[2] = x[2]
t[3] = x[3]
p256OrdSqr(x, x, 16)
p256OrdMul(x, x, t) // ffffffff
t[0] = x[0]
t[1] = x[1]
t[2] = x[2]
t[3] = x[3]
p256OrdSqr(x, x, 64) // ffffffff0000000000000000
p256OrdMul(x, x, t) // ffffffff00000000ffffffff
p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
p256OrdMul(x, x, t) // ffffffff00000000ffffffffffffffff
// Remaining 32 windows
expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
for i := 0; i < 32; i++ {
p256OrdSqr(x, x, 4)
p256OrdMul(x, x, table[4*(expLo[i]-1):])
}
// Multiplying by one in the Montgomery domain converts a Montgomery
// value out of the domain.
one := []uint64{1, 0, 0, 0}
p256OrdMul(x, x, one)
xOut := make([]byte, 32)
p256LittleToBig(xOut, x)
return new(big.Int).SetBytes(xOut)
}
// fromBig converts a *big.Int into a format used by this code.
func fromBig(out []uint64, big *big.Int) {
for i := range out {
out[i] = 0
}
for i, v := range big.Bits() {
out[i] = uint64(v)
}
}
// p256GetScalar endian-swaps the big-endian scalar value from in and writes it
// to out. If the scalar is equal or greater than the order of the group, it's
// reduced modulo that order.
func p256GetScalar(out []uint64, in []byte) {
n := new(big.Int).SetBytes(in)
if n.Cmp(p256.N) >= 0 {
n.Mod(n, p256.N)
}
fromBig(out, n)
}
// p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
// underlying field of the curve. (See initP256 for the value.) Thus rr here is
// R×R mod p. See comment in Inverse about how this is used.
var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
func maybeReduceModP(in *big.Int) *big.Int {
if in.Cmp(p256.P) < 0 {
return in
}
return new(big.Int).Mod(in, p256.P)
}
func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
scalarReversed := make([]uint64, 4)
var r1, r2 p256Point
p256GetScalar(scalarReversed, baseScalar)
r1.p256BaseMult(scalarReversed)
p256GetScalar(scalarReversed, scalar)
fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
// This sets r2's Z value to 1, in the Montgomery domain.
r2.xyz[8] = 0x0000000000000001
r2.xyz[9] = 0xffffffff00000000
r2.xyz[10] = 0xffffffffffffffff
r2.xyz[11] = 0x00000000fffffffe
r2.p256ScalarMult(scalarReversed)
p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:])
return r1.p256PointToAffine()
}
func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
scalarReversed := make([]uint64, 4)
p256GetScalar(scalarReversed, scalar)
var r p256Point
r.p256BaseMult(scalarReversed)
return r.p256PointToAffine()
}
func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
scalarReversed := make([]uint64, 4)
p256GetScalar(scalarReversed, scalar)
var r p256Point
fromBig(r.xyz[0:4], maybeReduceModP(bigX))
fromBig(r.xyz[4:8], maybeReduceModP(bigY))
p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
// This sets r2's Z value to 1, in the Montgomery domain.
r.xyz[8] = 0x0000000000000001
r.xyz[9] = 0xffffffff00000000
r.xyz[10] = 0xffffffffffffffff
r.xyz[11] = 0x00000000fffffffe
r.p256ScalarMult(scalarReversed)
return r.p256PointToAffine()
}
func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
zInv := make([]uint64, 4)
zInvSq := make([]uint64, 4)
p256Inverse(zInv, p.xyz[8:12])
p256Sqr(zInvSq, zInv)
p256Mul(zInv, zInv, zInvSq)
p256Mul(zInvSq, p.xyz[0:4], zInvSq)
p256Mul(zInv, p.xyz[4:8], zInv)
p256FromMont(zInvSq, zInvSq)
p256FromMont(zInv, zInv)
xOut := make([]byte, 32)
yOut := make([]byte, 32)
p256LittleToBig(xOut, zInvSq)
p256LittleToBig(yOut, zInv)
return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
}
// p256Inverse sets out to in^-1 mod p.
func p256Inverse(out, in []uint64) {
var stack [6 * 4]uint64
p2 := stack[4*0 : 4*0+4]
p4 := stack[4*1 : 4*1+4]
p8 := stack[4*2 : 4*2+4]
p16 := stack[4*3 : 4*3+4]
p32 := stack[4*4 : 4*4+4]
p256Sqr(out, in)
p256Mul(p2, out, in) // 3*p
p256Sqr(out, p2)
p256Sqr(out, out)
p256Mul(p4, out, p2) // f*p
p256Sqr(out, p4)
p256Sqr(out, out)
p256Sqr(out, out)
p256Sqr(out, out)
p256Mul(p8, out, p4) // ff*p
p256Sqr(out, p8)
for i := 0; i < 7; i++ {
p256Sqr(out, out)
}
p256Mul(p16, out, p8) // ffff*p
p256Sqr(out, p16)
for i := 0; i < 15; i++ {
p256Sqr(out, out)
}
p256Mul(p32, out, p16) // ffffffff*p
p256Sqr(out, p32)
for i := 0; i < 31; i++ {
p256Sqr(out, out)
}
p256Mul(out, out, in)
for i := 0; i < 32*4; i++ {
p256Sqr(out, out)
}
p256Mul(out, out, p32)
for i := 0; i < 32; i++ {
p256Sqr(out, out)
}
p256Mul(out, out, p32)
for i := 0; i < 16; i++ {
p256Sqr(out, out)
}
p256Mul(out, out, p16)
for i := 0; i < 8; i++ {
p256Sqr(out, out)
}
p256Mul(out, out, p8)
p256Sqr(out, out)
p256Sqr(out, out)
p256Sqr(out, out)
p256Sqr(out, out)
p256Mul(out, out, p4)
p256Sqr(out, out)
p256Sqr(out, out)
p256Mul(out, out, p2)
p256Sqr(out, out)
p256Sqr(out, out)
p256Mul(out, out, in)
}
func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
copy(r[index*12:], p.xyz[:])
}
func boothW5(in uint) (int, int) {
var s uint = ^((in >> 5) - 1)
var d uint = (1 << 6) - in - 1
d = (d & s) | (in & (^s))
d = (d >> 1) + (d & 1)
return int(d), int(s & 1)
}
func boothW7(in uint) (int, int) {
var s uint = ^((in >> 7) - 1)
var d uint = (1 << 8) - in - 1
d = (d & s) | (in & (^s))
d = (d >> 1) + (d & 1)
return int(d), int(s & 1)
}
func initTable() {
p256Precomputed = new([37][64 * 8]uint64)
basePoint := []uint64{
0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
}
t1 := make([]uint64, 12)
t2 := make([]uint64, 12)
copy(t2, basePoint)
zInv := make([]uint64, 4)
zInvSq := make([]uint64, 4)
for j := 0; j < 64; j++ {
copy(t1, t2)
for i := 0; i < 37; i++ {
// The window size is 7 so we need to double 7 times.
if i != 0 {
for k := 0; k < 7; k++ {
p256PointDoubleAsm(t1, t1)
}
}
// Convert the point to affine form. (Its values are
// still in Montgomery form however.)
p256Inverse(zInv, t1[8:12])
p256Sqr(zInvSq, zInv)
p256Mul(zInv, zInv, zInvSq)
p256Mul(t1[:4], t1[:4], zInvSq)
p256Mul(t1[4:8], t1[4:8], zInv)
copy(t1[8:12], basePoint[8:12])
// Update the table entry
copy(p256Precomputed[i][j*8:], t1[:8])
}
if j == 0 {
p256PointDoubleAsm(t2, basePoint)
} else {
p256PointAddAsm(t2, t2, basePoint)
}
}
}
func (p *p256Point) p256BaseMult(scalar []uint64) {
precomputeOnce.Do(initTable)
wvalue := (scalar[0] << 1) & 0xff
sel, sign := boothW7(uint(wvalue))
p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
p256NegCond(p.xyz[4:8], sign)
// (This is one, in the Montgomery domain.)
p.xyz[8] = 0x0000000000000001
p.xyz[9] = 0xffffffff00000000
p.xyz[10] = 0xffffffffffffffff
p.xyz[11] = 0x00000000fffffffe
var t0 p256Point
// (This is one, in the Montgomery domain.)
t0.xyz[8] = 0x0000000000000001
t0.xyz[9] = 0xffffffff00000000
t0.xyz[10] = 0xffffffffffffffff
t0.xyz[11] = 0x00000000fffffffe
index := uint(6)
zero := sel
for i := 1; i < 37; i++ {
if index < 192 {
wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
} else {
wvalue = (scalar[index/64] >> (index % 64)) & 0xff
}
index += 7
sel, sign = boothW7(uint(wvalue))
p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
zero |= sel
}
}
func (p *p256Point) p256ScalarMult(scalar []uint64) {
// precomp is a table of precomputed points that stores powers of p
// from p^1 to p^16.
var precomp [16 * 4 * 3]uint64
var t0, t1, t2, t3 p256Point
// Prepare the table
p.p256StorePoint(&precomp, 0) // 1
p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
t0.p256StorePoint(&precomp, 1) // 2
t1.p256StorePoint(&precomp, 3) // 4
t2.p256StorePoint(&precomp, 7) // 8
t3.p256StorePoint(&precomp, 15) // 16
p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
t0.p256StorePoint(&precomp, 2) // 3
t1.p256StorePoint(&precomp, 4) // 5
t2.p256StorePoint(&precomp, 8) // 9
p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
t0.p256StorePoint(&precomp, 5) // 6
t1.p256StorePoint(&precomp, 9) // 10
p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
t2.p256StorePoint(&precomp, 6) // 7
t1.p256StorePoint(&precomp, 10) // 11
p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
t0.p256StorePoint(&precomp, 11) // 12
t2.p256StorePoint(&precomp, 13) // 14
p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
t0.p256StorePoint(&precomp, 12) // 13
t2.p256StorePoint(&precomp, 14) // 15
// Start scanning the window from top bit
index := uint(254)
var sel, sign int
wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
sel, _ = boothW5(uint(wvalue))
p256Select(p.xyz[0:12], precomp[0:], sel)
zero := sel
for index > 4 {
index -= 5
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
if index < 192 {
wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
} else {
wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
}
sel, sign = boothW5(uint(wvalue))
p256Select(t0.xyz[0:], precomp[0:], sel)
p256NegCond(t0.xyz[4:8], sign)
p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
zero |= sel
}
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
p256PointDoubleAsm(p.xyz[:], p.xyz[:])
wvalue = (scalar[0] << 1) & 0x3f
sel, sign = boothW5(uint(wvalue))
p256Select(t0.xyz[0:], precomp[0:], sel)
p256NegCond(t0.xyz[4:8], sign)
p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
}