x86: Fix up x86_{,64_}sh{l,r}d patterns [PR103431]

The following testcase is miscompiled because the x86_{,64_}sh{l,r}d
patterns don't properly describe what the instructions do.  One thing
is left out, in particular that there is initial count &= 63 for
sh{l,r}dq and initial count &= 31 for sh{l,r}d{l,w}.  And another thing
not described properly, in particular the behavior when count (after the
masking) is 0.  The pattern says it is e.g.
res = (op0 << op2) | (op1 >> (64 - op2))
but that triggers UB on op1 >> 64.  For op2 0 we actually want
res = (op0 << op2) | 0
When constants are propagated to these patterns during RTL optimizations,
both such problems trigger wrong-code issues.
This patch represents the patterns as e.g.
res = (op0 << (op2 & 63)) | (unsigned long long) ((uint128_t) op1 >> (64 - (op2 & 63)))
so there is both the initial masking and op2 == 0 behavior results in
zero being ored.
The patch introduces alternate patterns for constant op2 where
simplify-rtx.c will fold those expressions into simple numbers,
and define_insn_and_split pre-reload splitter for how the patterns
looked before into the new form, so that it can pattern match during
combine even computations that assumed the shift amount will be in
the range of 1 .. bitsize-1.

2021-11-27  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/103431
	* config/i386/i386.md (x86_64_shld, x86_shld, x86_64_shrd, x86_shrd):
	Change insn pattern to accurately describe the instructions.
	(*x86_64_shld_1, *x86_shld_1, *x86_64_shrd_1, *x86_shrd_1): New
	define_insn patterns.
	(*x86_64_shld_2, *x86_shld_2, *x86_64_shrd_2, *x86_shrd_2): New
	define_insn_and_split patterns.
	(*ashl<dwi>3_doubleword_mask, *ashl<dwi>3_doubleword_mask_1,
	*<insn><dwi>3_doubleword_mask, *<insn><dwi>3_doubleword_mask_1,
	ix86_rotl<dwi>3_doubleword, ix86_rotr<dwi>3_doubleword): Adjust
	splitters for x86_{,64_}sh{l,r}d pattern changes.

	* gcc.dg/pr103431.c: New test.
This commit is contained in:
Jakub Jelinek 2021-11-27 13:02:06 +01:00
parent 567d5f3d62
commit f7e4f57f1c
2 changed files with 281 additions and 42 deletions

View File

@ -11301,9 +11301,12 @@
"&& 1"
[(parallel
[(set (match_dup 6)
(ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
(lshiftrt:DWIH (match_dup 5)
(minus:QI (match_dup 8) (match_dup 2)))))
(ior:DWIH (ashift:DWIH (match_dup 6)
(and:QI (match_dup 2) (match_dup 8)))
(subreg:DWIH
(lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
(minus:QI (match_dup 9)
(and:QI (match_dup 2) (match_dup 8)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 4)
@ -11312,7 +11315,8 @@
{
split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
!= ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@ -11342,9 +11346,12 @@
"&& 1"
[(parallel
[(set (match_dup 6)
(ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
(lshiftrt:DWIH (match_dup 5)
(minus:QI (match_dup 8) (match_dup 2)))))
(ior:DWIH (ashift:DWIH (match_dup 6)
(and:QI (match_dup 2) (match_dup 8)))
(subreg:DWIH
(lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
(minus:QI (match_dup 9)
(and:QI (match_dup 2) (match_dup 8)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 4)
@ -11353,7 +11360,8 @@
{
split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
!= ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@ -11404,9 +11412,14 @@
(define_insn "x86_64_shld"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Jc"))
(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
(minus:QI (const_int 64) (match_dup 2)))))
(and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
(const_int 63)))
(subreg:DI
(lshiftrt:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(minus:QI (const_int 64)
(and:QI (match_dup 2) (const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
"shld{q}\t{%s2%1, %0|%0, %1, %2}"
@ -11417,12 +11430,58 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn "*x86_64_shld_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (ashift:DI (match_dup 0)
(match_operand:QI 2 "const_0_to_63_operand" "J"))
(subreg:DI
(lshiftrt:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(match_operand:QI 3 "const_0_to_255_operand" "N")) 0)))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT
&& INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
"shld{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn_and_split "*x86_64_shld_2"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (ashift:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand"))
(lshiftrt:DI (match_operand:DI 1 "register_operand")
(minus:QI (const_int 64) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel [(set (match_dup 0)
(ior:DI (ashift:DI (match_dup 0)
(and:QI (match_dup 2) (const_int 63)))
(subreg:DI
(lshiftrt:TI
(zero_extend:TI (match_dup 1))
(minus:QI (const_int 64)
(and:QI (match_dup 2)
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "x86_shld"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Ic"))
(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
(minus:QI (const_int 32) (match_dup 2)))))
(and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
(const_int 31)))
(subreg:SI
(lshiftrt:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(minus:QI (const_int 32)
(and:QI (match_dup 2) (const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"shld{l}\t{%s2%1, %0|%0, %1, %2}"
@ -11434,6 +11493,47 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn "*x86_shld_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (ashift:SI (match_dup 0)
(match_operand:QI 2 "const_0_to_31_operand" "I"))
(subreg:SI
(lshiftrt:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(match_operand:QI 3 "const_0_to_63_operand" "J")) 0)))
(clobber (reg:CC FLAGS_REG))]
"INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
"shld{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn_and_split "*x86_shld_2"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (ashift:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand"))
(lshiftrt:SI (match_operand:SI 1 "register_operand")
(minus:QI (const_int 32) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel [(set (match_dup 0)
(ior:SI (ashift:SI (match_dup 0)
(and:QI (match_dup 2) (const_int 31)))
(subreg:SI
(lshiftrt:DI
(zero_extend:DI (match_dup 1))
(minus:QI (const_int 32)
(and:QI (match_dup 2)
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
(define_expand "@x86_shift<mode>_adj_1"
[(set (reg:CCZ FLAGS_REG)
(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
@ -12080,9 +12180,12 @@
"&& 1"
[(parallel
[(set (match_dup 4)
(ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
(ashift:DWIH (match_dup 7)
(minus:QI (match_dup 8) (match_dup 2)))))
(ior:DWIH (lshiftrt:DWIH (match_dup 4)
(and:QI (match_dup 2) (match_dup 8)))
(subreg:DWIH
(ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
(minus:QI (match_dup 9)
(and:QI (match_dup 2) (match_dup 8)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 6)
@ -12091,7 +12194,8 @@
{
split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
!= ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@ -12121,9 +12225,12 @@
"&& 1"
[(parallel
[(set (match_dup 4)
(ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
(ashift:DWIH (match_dup 7)
(minus:QI (match_dup 8) (match_dup 2)))))
(ior:DWIH (lshiftrt:DWIH (match_dup 4)
(and:QI (match_dup 2) (match_dup 8)))
(subreg:DWIH
(ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
(minus:QI (match_dup 9)
(and:QI (match_dup 2) (match_dup 8)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 6)
@ -12132,7 +12239,8 @@
{
split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
!= ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@ -12177,9 +12285,14 @@
(define_insn "x86_64_shrd"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Jc"))
(ashift:DI (match_operand:DI 1 "register_operand" "r")
(minus:QI (const_int 64) (match_dup 2)))))
(and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
(const_int 63)))
(subreg:DI
(ashift:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(minus:QI (const_int 64)
(and:QI (match_dup 2) (const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
"shrd{q}\t{%s2%1, %0|%0, %1, %2}"
@ -12190,12 +12303,58 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn "*x86_64_shrd_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
(ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "const_0_to_63_operand" "J"))
(subreg:DI
(ashift:TI
(zero_extend:TI
(match_operand:DI 1 "register_operand" "r"))
(match_operand:QI 3 "const_0_to_255_operand" "N")) 0)))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT
&& INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
"shrd{q}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn_and_split "*x86_64_shrd_2"
[(set (match_operand:DI 0 "nonimmediate_operand")
(ior:DI (lshiftrt:DI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand"))
(ashift:DI (match_operand:DI 1 "register_operand")
(minus:QI (const_int 64) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel [(set (match_dup 0)
(ior:DI (lshiftrt:DI (match_dup 0)
(and:QI (match_dup 2) (const_int 63)))
(subreg:DI
(ashift:TI
(zero_extend:TI (match_dup 1))
(minus:QI (const_int 64)
(and:QI (match_dup 2)
(const_int 63)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
(define_insn "x86_shrd"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand" "Ic"))
(ashift:SI (match_operand:SI 1 "register_operand" "r")
(minus:QI (const_int 32) (match_dup 2)))))
(and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
(const_int 31)))
(subreg:SI
(ashift:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(minus:QI (const_int 32)
(and:QI (match_dup 2) (const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))]
""
"shrd{l}\t{%s2%1, %0|%0, %1, %2}"
@ -12207,6 +12366,47 @@
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn "*x86_shrd_1"
[(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
(ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "const_0_to_31_operand" "I"))
(subreg:SI
(ashift:DI
(zero_extend:DI
(match_operand:SI 1 "register_operand" "r"))
(match_operand:QI 3 "const_0_to_63_operand" "J")) 0)))
(clobber (reg:CC FLAGS_REG))]
"INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
"shrd{l}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "vector")
(set_attr "bdver1_decode" "vector")])
(define_insn_and_split "*x86_shrd_2"
[(set (match_operand:SI 0 "nonimmediate_operand")
(ior:SI (lshiftrt:SI (match_dup 0)
(match_operand:QI 2 "nonmemory_operand"))
(ashift:SI (match_operand:SI 1 "register_operand")
(minus:QI (const_int 32) (match_dup 2)))))
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT && ix86_pre_reload_split ()"
"#"
"&& 1"
[(parallel [(set (match_dup 0)
(ior:SI (lshiftrt:SI (match_dup 0)
(and:QI (match_dup 2) (const_int 31)))
(subreg:SI
(ashift:DI
(zero_extend:DI (match_dup 1))
(minus:QI (const_int 32)
(and:QI (match_dup 2)
(const_int 31)))) 0)))
(clobber (reg:CC FLAGS_REG))])])
;; Base name for insn mnemonic.
(define_mode_attr cvt_mnemonic
[(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
@ -12784,18 +12984,27 @@
[(set (match_dup 3) (match_dup 4))
(parallel
[(set (match_dup 4)
(ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
(lshiftrt:DWIH (match_dup 5)
(minus:QI (match_dup 6) (match_dup 2)))))
(ior:DWIH (ashift:DWIH (match_dup 4)
(and:QI (match_dup 2) (match_dup 6)))
(subreg:DWIH
(lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
(minus:QI (match_dup 7)
(and:QI (match_dup 2)
(match_dup 6)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 5)
(ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
(lshiftrt:DWIH (match_dup 3)
(minus:QI (match_dup 6) (match_dup 2)))))
(ior:DWIH (ashift:DWIH (match_dup 5)
(and:QI (match_dup 2) (match_dup 6)))
(subreg:DWIH
(lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
(minus:QI (match_dup 7)
(and:QI (match_dup 2)
(match_dup 6)))) 0)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
})
@ -12812,18 +13021,27 @@
[(set (match_dup 3) (match_dup 4))
(parallel
[(set (match_dup 4)
(ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
(ashift:DWIH (match_dup 5)
(minus:QI (match_dup 6) (match_dup 2)))))
(ior:DWIH (lshiftrt:DWIH (match_dup 4)
(and:QI (match_dup 2) (match_dup 6)))
(subreg:DWIH
(ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
(minus:QI (match_dup 7)
(and:QI (match_dup 2)
(match_dup 6)))) 0)))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 5)
(ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
(ashift:DWIH (match_dup 3)
(minus:QI (match_dup 6) (match_dup 2)))))
(ior:DWIH (lshiftrt:DWIH (match_dup 5)
(and:QI (match_dup 2) (match_dup 6)))
(subreg:DWIH
(ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
(minus:QI (match_dup 7)
(and:QI (match_dup 2)
(match_dup 6)))) 0)))
(clobber (reg:CC FLAGS_REG))])]
{
operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
})

View File

@ -0,0 +1,21 @@
/* PR middle-end/103431 */
/* { dg-do run { target int128 } } */
/* { dg-options "-O -fno-tree-bit-ccp -fno-tree-dominator-opts" } */
__attribute__((noipa))
void foo (unsigned short a)
{
__uint128_t b = 5;
int size = __SIZEOF_INT128__ * __CHAR_BIT__ - 1;
a /= 0xfffffffd;
__uint128_t c = (b << (a & size) | b >> (-(a & size) & size));
if (c != 5)
__builtin_abort ();
}
int
main ()
{
foo (0);
return 0;
}