Enable direct movement between gpr and mask registers in pass_reload.
Changelog gcc/ * config/i386/i386.c (inline_secondary_memory_needed): No memory is needed between mask regs and gpr. (ix86_hard_regno_mode_ok): Add condition TARGET_AVX512F for mask regno. * config/i386/i386.h (enum reg_class): Add INT_MASK_REGS. (REG_CLASS_NAMES): Ditto. (REG_CLASS_CONTENTS): Ditto. * config/i386/i386.md: Exclude mask register in define_peephole2 which is avaiable only for gpr. gcc/testsuite/ * gcc.target/i386/spill_to_mask-1.c: New tests. * gcc.target/i386/spill_to_mask-2.c: New tests. * gcc.target/i386/spill_to_mask-3.c: New tests. * gcc.target/i386/spill_to_mask-4.c: New tests.
This commit is contained in:
parent
00cb3494ca
commit
2d2bc36c44
@ -18971,7 +18971,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
|
||||
if ((mode == P2QImode || mode == P2HImode))
|
||||
return MASK_PAIR_REGNO_P(regno);
|
||||
|
||||
return (VALID_MASK_REG_MODE (mode)
|
||||
return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
|
||||
|| (TARGET_AVX512BW
|
||||
&& VALID_MASK_AVX512BW_MODE (mode)));
|
||||
}
|
||||
|
@ -1418,6 +1418,7 @@ enum reg_class
|
||||
FLOAT_INT_SSE_REGS,
|
||||
MASK_REGS,
|
||||
ALL_MASK_REGS,
|
||||
INT_MASK_REGS,
|
||||
ALL_REGS,
|
||||
LIM_REG_CLASSES
|
||||
};
|
||||
@ -1477,6 +1478,7 @@ enum reg_class
|
||||
"FLOAT_INT_SSE_REGS", \
|
||||
"MASK_REGS", \
|
||||
"ALL_MASK_REGS", \
|
||||
"INT_MASK_REGS", \
|
||||
"ALL_REGS" }
|
||||
|
||||
/* Define which registers fit in which classes. This is an initializer
|
||||
@ -1515,6 +1517,7 @@ enum reg_class
|
||||
{ 0xff9ffff, 0xfffffff0, 0xf }, /* FLOAT_INT_SSE_REGS */ \
|
||||
{ 0x0, 0x0, 0xfe0 }, /* MASK_REGS */ \
|
||||
{ 0x0, 0x0, 0xff0 }, /* ALL_MASK_REGS */ \
|
||||
{ 0x900ff, 0xff0, 0xff0 }, /* INT_MASK_REGS */ \
|
||||
{ 0xffffffff, 0xffffffff, 0xfff } /* ALL_REGS */ \
|
||||
}
|
||||
|
||||
|
@ -15026,7 +15026,7 @@
|
||||
;; Replace zero_extend:HI followed by parityhi2_cmp with parityqi2_cmp
|
||||
(define_peephole2
|
||||
[(set (match_operand:HI 0 "register_operand")
|
||||
(zero_extend:HI (match_operand:QI 1 "register_operand")))
|
||||
(zero_extend:HI (match_operand:QI 1 "general_reg_operand")))
|
||||
(parallel [(set (reg:CC FLAGS_REG)
|
||||
(unspec:CC [(match_dup 0)] UNSPEC_PARITY))
|
||||
(clobber (match_dup 0))])]
|
||||
@ -15037,7 +15037,7 @@
|
||||
;; Eliminate QImode popcount&1 using parity flag
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "register_operand")
|
||||
(zero_extend:SI (match_operand:QI 1 "register_operand")))
|
||||
(zero_extend:SI (match_operand:QI 1 "general_reg_operand")))
|
||||
(parallel [(set (match_operand:SI 2 "register_operand")
|
||||
(popcount:SI (match_dup 0)))
|
||||
(clobber (reg:CC FLAGS_REG))])
|
||||
|
92
gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
Normal file
92
gcc/testsuite/gcc.target/i386/spill_to_mask-1.c
Normal file
@ -0,0 +1,92 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512" } */
|
||||
|
||||
#ifndef DTYPE
|
||||
#define DTYPE u32
|
||||
#endif
|
||||
|
||||
typedef unsigned long long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned char u8;
|
||||
|
||||
#define R(x,n) ( (x >> n) | (x << (32 - n)))
|
||||
|
||||
#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
|
||||
#define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25))
|
||||
|
||||
#define TT(a,b,c,d,e,f,g,h,x,K) \
|
||||
{ \
|
||||
tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x; \
|
||||
tmp2 = S0(a) + ((a & b) | (c & (a | b))); \
|
||||
h = tmp1 + tmp2; \
|
||||
d += tmp1; \
|
||||
}
|
||||
|
||||
static inline DTYPE byteswap(DTYPE x)
|
||||
{
|
||||
x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
|
||||
x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;
|
||||
return x;
|
||||
}
|
||||
|
||||
#define BE_LOAD32(n,b,i) (n) = byteswap(*(DTYPE *)(b + i))
|
||||
|
||||
void foo (u8 *in, DTYPE out[8], const DTYPE C[16])
|
||||
{
|
||||
DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
|
||||
DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
|
||||
w8, w9, w10, w11, w12, w13, w14, w15;
|
||||
w0 = byteswap(*(DTYPE *)(in + 0));
|
||||
w1 = byteswap(*(DTYPE *)(in + 4));
|
||||
w2 = byteswap(*(DTYPE *)(in + 8));
|
||||
w3 = byteswap(*(DTYPE *)(in + 12));
|
||||
w4 = byteswap(*(DTYPE *)(in + 16));
|
||||
w5 = byteswap(*(DTYPE *)(in + 20));
|
||||
w6 = byteswap(*(DTYPE *)(in + 24));
|
||||
w7 = byteswap(*(DTYPE *)(in + 28));
|
||||
w8 = byteswap(*(DTYPE *)(in + 32));
|
||||
w9 = byteswap(*(DTYPE *)(in + 36));
|
||||
w10 = byteswap(*(DTYPE *)(in + 40));
|
||||
w11 = byteswap(*(DTYPE *)(in + 44));
|
||||
w12 = byteswap(*(DTYPE *)(in + 48));
|
||||
w13 = byteswap(*(DTYPE *)(in + 52));
|
||||
w14 = byteswap(*(DTYPE *)(in + 56));
|
||||
w15 = byteswap(*(DTYPE *)(in + 60));
|
||||
a = out[0];
|
||||
b = out[1];
|
||||
c = out[2];
|
||||
d = out[3];
|
||||
e = out[4];
|
||||
f = out[5];
|
||||
g = out[6];
|
||||
h = out[7];
|
||||
|
||||
TT(a, b, c, d, e, f, g, h, w0, C[0]);
|
||||
TT(h, a, b, c, d, e, f, g, w1, C[1]);
|
||||
TT(g, h, a, b, c, d, e, f, w2, C[2]);
|
||||
TT(f, g, h, a, b, c, d, e, w3, C[3]);
|
||||
TT(e, f, g, h, a, b, c, d, w4, C[4]);
|
||||
TT(d, e, f, g, h, a, b, c, w5, C[5]);
|
||||
TT(c, d, e, f, g, h, a, b, w6, C[6]);
|
||||
TT(b, c, d, e, f, g, h, a, w7, C[7]);
|
||||
TT(a, b, c, d, e, f, g, h, w8, C[8]);
|
||||
TT(h, a, b, c, d, e, f, g, w9, C[9]);
|
||||
TT(g, h, a, b, c, d, e, f, w10, C[10]);
|
||||
TT(f, g, h, a, b, c, d, e, w11, C[11]);
|
||||
TT(e, f, g, h, a, b, c, d, w12, C[12]);
|
||||
TT(d, e, f, g, h, a, b, c, w13, C[13]);
|
||||
TT(c, d, e, f, g, h, a, b, w14, C[14]);
|
||||
TT(b, c, d, e, f, g, h, a, w15, C[15]);
|
||||
|
||||
out[0] += a;
|
||||
out[1] += b;
|
||||
out[2] += c;
|
||||
out[3] += d;
|
||||
out[4] += e;
|
||||
out[5] += f;
|
||||
out[6] += g;
|
||||
out[7] += h;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "kmovd" } } */
|
10
gcc/testsuite/gcc.target/i386/spill_to_mask-2.c
Normal file
10
gcc/testsuite/gcc.target/i386/spill_to_mask-2.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512" } */
|
||||
|
||||
#ifndef DTYPE
|
||||
#define DTYPE u16
|
||||
#endif
|
||||
|
||||
#include "spill_to_mask-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler "kmovw" } } */
|
10
gcc/testsuite/gcc.target/i386/spill_to_mask-3.c
Normal file
10
gcc/testsuite/gcc.target/i386/spill_to_mask-3.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512" } */
|
||||
|
||||
#ifndef DTYPE
|
||||
#define DTYPE u8
|
||||
#endif
|
||||
|
||||
#include "spill_to_mask-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler "kmovb" } } */
|
10
gcc/testsuite/gcc.target/i386/spill_to_mask-4.c
Normal file
10
gcc/testsuite/gcc.target/i386/spill_to_mask-4.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-O2 -march=skylake-avx512" } */
|
||||
|
||||
#ifndef DTYPE
|
||||
#define DTYPE u64
|
||||
#endif
|
||||
|
||||
#include "spill_to_mask-1.c"
|
||||
|
||||
/* { dg-final { scan-assembler "kmovq" } } */
|
Loading…
Reference in New Issue
Block a user