nvptx: Add support for 64-bit mul.hi (and other) instructions
Now that the middle-end MULT_HIGHPART_EXPR pieces are in place, this patch adds support for nvptx's mul.hi.s64 and mul.hi.u64 instructions, as previously reviewed (provisionally pre-approved) back in August 2020: https://gcc.gnu.org/pipermail/gcc-patches/2020-August/551373.html Since then a few things have changed, so this patch uses the new SMUL_HIGHPART and UMUL_HIGHPART RTX expressions, but the test cases remain the same. Like the x86_64 backend, this patch retains the "trunc" forms of these instructions (while the RTL optimizers/combine may still generate them). Given that we're rapidly approaching stage 4, I also took the liberty of including support in nvptx.md for a few other instructions. With the new 64-bit highpart multiplication instructions added above, we can now provide a define_expand for efficient 64-bit (to 128-bit) widening multiplications. This patch also adds support for nvptx's testp.infinite instruction (for implementing __builtin_isinf) and the not.pred instruction. As an example of the code generation improvements, the function int foo(double x) { return __builtin_isinf(x); } previously generated with -O2: mov.f64 %r26, %ar0; abs.f64 %r28, %r26; setp.leu.f64 %r31, %r28, 0d7fefffffffffffff; selp.u32 %r30, 1, 0, %r31; mov.u32 %r29, %r30; cvt.u16.u8 %r35, %r29; mov.u16 %r33, %r35; xor.b16 %r32, %r33, 1; cvt.u32.u16 %r34, %r32; cvt.u32.u8 %value, %r34; and with this patch now generates: mov.f64 %r23, %ar0; testp.infinite.f64 %r24, %r23; selp.u32 %value, 1, 0, %r24; This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu (including newlib) with a make and make -k check with no new failures. gcc/ChangeLog: * config/nvptx/nvptx.md (UNSPEC_ISINF): New UNSPEC. (one_cmplbi2): New define_insn for not.pred. (mulditi3): New define_expand for signed widening multiply. (umulditi3): New define_expand for unsigned widening multiply. (smul<mode>3_highpart): New define_insn for signed highpart mult. (umul<mode>3_highpart): New define_insn for unsigned highpart mult. (*smulhi3_highpart_2): Renamed from smulhi3_highpart. (*smulsi3_highpart_2): Renamed from smulsi3_highpart. (*umulhi3_highpart_2): Renamed from umulhi3_highpart. (*umulsi3_highpart_2): Renamed from umulsi3_highpart. (*setcc<mode>_from_not_bi): New define_insn. (*setcc_isinf<mode>): New define_insn for testp.infinite. (isinf<mode>2): New define_expand. gcc/testsuite/ChangeLog: * gcc.target/nvptx/mul-hi64.c: New test case. * gcc.target/nvptx/umul-hi64.c: New test case. * gcc.target/nvptx/mul-wide64.c: New test case. * gcc.target/nvptx/umul-wide64.c: New test case. * gcc.target/nvptx/isinf.c: New test case.
This commit is contained in:
parent
de12b919c7
commit
26d7b8f9bd
@ -27,6 +27,7 @@
|
||||
UNSPEC_SIN
|
||||
UNSPEC_COS
|
||||
UNSPEC_TANH
|
||||
UNSPEC_ISINF
|
||||
|
||||
UNSPEC_FPINT_FLOOR
|
||||
UNSPEC_FPINT_BTRUNC
|
||||
@ -596,6 +597,12 @@
|
||||
""
|
||||
"%.\\tnot.b%T0\\t%0, %1;")
|
||||
|
||||
(define_insn "one_cmplbi2"
|
||||
[(set (match_operand:BI 0 "nvptx_register_operand" "=R")
|
||||
(not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))]
|
||||
""
|
||||
"%.\\tnot.pred\\t%0, %1;")
|
||||
|
||||
(define_insn "*cnot<mode>2"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
||||
@ -671,7 +678,57 @@
|
||||
""
|
||||
"%.\\tmul.wide.u32\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "smulhi3_highpart"
|
||||
(define_expand "mulditi3"
|
||||
[(set (match_operand:TI 0 "nvptx_register_operand")
|
||||
(mult:TI (sign_extend:TI
|
||||
(match_operand:DI 1 "nvptx_register_operand"))
|
||||
(sign_extend:DI
|
||||
(match_operand:DI 2 "nvptx_nonmemory_operand"))))]
|
||||
""
|
||||
{
|
||||
rtx hi = gen_reg_rtx (DImode);
|
||||
rtx lo = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2]));
|
||||
emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
|
||||
emit_move_insn (gen_highpart (DImode, operands[0]), hi);
|
||||
emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "umulditi3"
|
||||
[(set (match_operand:TI 0 "nvptx_register_operand")
|
||||
(mult:TI (zero_extend:TI
|
||||
(match_operand:DI 1 "nvptx_register_operand"))
|
||||
(zero_extend:DI
|
||||
(match_operand:DI 2 "nvptx_nonmemory_operand"))))]
|
||||
""
|
||||
{
|
||||
rtx hi = gen_reg_rtx (DImode);
|
||||
rtx lo = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2]));
|
||||
emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
|
||||
emit_move_insn (gen_highpart (DImode, operands[0]), hi);
|
||||
emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "smul<mode>3_highpart"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(smul_highpart:HSDIM
|
||||
(match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
||||
(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
||||
""
|
||||
"%.\\tmul.hi.s%T0\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "umul<mode>3_highpart"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(umul_highpart:HSDIM
|
||||
(match_operand:HSDIM 1 "nvptx_register_operand" "R")
|
||||
(match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
|
||||
""
|
||||
"%.\\tmul.hi.u%T0\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "*smulhi3_highpart_2"
|
||||
[(set (match_operand:HI 0 "nvptx_register_operand" "=R")
|
||||
(truncate:HI
|
||||
(lshiftrt:SI
|
||||
@ -683,7 +740,7 @@
|
||||
""
|
||||
"%.\\tmul.hi.s16\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "smulsi3_highpart"
|
||||
(define_insn "*smulsi3_highpart_2"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
||||
(truncate:SI
|
||||
(lshiftrt:DI
|
||||
@ -695,7 +752,7 @@
|
||||
""
|
||||
"%.\\tmul.hi.s32\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "umulhi3_highpart"
|
||||
(define_insn "*umulhi3_highpart_2"
|
||||
[(set (match_operand:HI 0 "nvptx_register_operand" "=R")
|
||||
(truncate:HI
|
||||
(lshiftrt:SI
|
||||
@ -707,7 +764,7 @@
|
||||
""
|
||||
"%.\\tmul.hi.u16\\t%0, %1, %2;")
|
||||
|
||||
(define_insn "umulsi3_highpart"
|
||||
(define_insn "*umulsi3_highpart_2"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
||||
(truncate:SI
|
||||
(lshiftrt:DI
|
||||
@ -885,6 +942,13 @@
|
||||
""
|
||||
"%.\\tselp%t0\\t%0, 1, 0, %1;")
|
||||
|
||||
(define_insn "*setcc<mode>_from_not_bi"
|
||||
[(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
|
||||
(eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
|
||||
(const_int 0)))]
|
||||
""
|
||||
"%.\\tselp%t0\\t%0, 0, 1, %1;")
|
||||
|
||||
(define_insn "extendbi<mode>2"
|
||||
[(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
|
||||
(sign_extend:QHSDIM
|
||||
@ -1160,6 +1224,25 @@
|
||||
"flag_unsafe_math_optimizations"
|
||||
"%.\\tex2.approx%t0\\t%0, %1;")
|
||||
|
||||
(define_insn "setcc_isinf<mode>"
|
||||
[(set (match_operand:BI 0 "nvptx_register_operand" "=R")
|
||||
(unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
|
||||
UNSPEC_ISINF))]
|
||||
""
|
||||
"%.\\ttestp.infinite%t1\\t%0, %1;")
|
||||
|
||||
(define_expand "isinf<mode>2"
|
||||
[(set (match_operand:SI 0 "nvptx_register_operand" "=R")
|
||||
(unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
|
||||
UNSPEC_ISINF))]
|
||||
""
|
||||
{
|
||||
rtx pred = gen_reg_rtx (BImode);
|
||||
emit_insn (gen_setcc_isinf<mode> (pred, operands[1]));
|
||||
emit_insn (gen_setccsi_from_bi (operands[0], pred));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; HFmode floating point arithmetic.
|
||||
|
||||
(define_insn "addhf3"
|
||||
|
9
gcc/testsuite/gcc.target/nvptx/isinf.c
Normal file
9
gcc/testsuite/gcc.target/nvptx/isinf.c
Normal file
@ -0,0 +1,9 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
int foo(double x)
|
||||
{
|
||||
return __builtin_isinf(x);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "testp.infinite.f64" 1 } } */
|
47
gcc/testsuite/gcc.target/nvptx/mul-hi64.c
Normal file
47
gcc/testsuite/gcc.target/nvptx/mul-hi64.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Wno-long-long" } */
|
||||
|
||||
typedef unsigned int __attribute ((mode(TI))) uti_t;
|
||||
typedef int __attribute ((mode(TI))) ti_t;
|
||||
|
||||
long test1(long x, long y)
|
||||
{
|
||||
return ((ti_t)x * (ti_t)y) >> 64;
|
||||
}
|
||||
|
||||
long test2(long x)
|
||||
{
|
||||
return ((ti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
long test3(long x, long y)
|
||||
{
|
||||
return (uti_t)((ti_t)x * (ti_t)y) >> 64;
|
||||
}
|
||||
|
||||
long test4(long x)
|
||||
{
|
||||
return (uti_t)((ti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
ti_t test5(long x, long y)
|
||||
{
|
||||
return ((ti_t)x * (ti_t)y) >> 64;
|
||||
}
|
||||
|
||||
ti_t test6(long x)
|
||||
{
|
||||
return ((ti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
uti_t test7(long x, long y)
|
||||
{
|
||||
return (uti_t)((ti_t)x * (ti_t)y) >> 64;
|
||||
}
|
||||
|
||||
uti_t test8(long x)
|
||||
{
|
||||
return (uti_t)((ti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mul.hi.s64" 8 } } */
|
13
gcc/testsuite/gcc.target/nvptx/mul-wide64.c
Normal file
13
gcc/testsuite/gcc.target/nvptx/mul-wide64.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef int __attribute ((mode(TI))) ti_t;
|
||||
|
||||
ti_t foo(long x, long y)
|
||||
{
|
||||
return (ti_t)x * (ti_t)y;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mul.lo.u64" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "mul.hi.s64" 1 } } */
|
||||
|
47
gcc/testsuite/gcc.target/nvptx/umul-hi64.c
Normal file
47
gcc/testsuite/gcc.target/nvptx/umul-hi64.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Wno-long-long" } */
|
||||
|
||||
typedef unsigned int __attribute ((mode(TI))) uti_t;
|
||||
typedef int __attribute ((mode(TI))) ti_t;
|
||||
|
||||
unsigned long test1(unsigned long x, unsigned long y)
|
||||
{
|
||||
return ((uti_t)x * (uti_t)y) >> 64;
|
||||
}
|
||||
|
||||
unsigned long test2(unsigned long x)
|
||||
{
|
||||
return ((uti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
unsigned long test3(unsigned long x, unsigned long y)
|
||||
{
|
||||
return (ti_t)((uti_t)x * (uti_t)y) >> 64;
|
||||
}
|
||||
|
||||
unsigned long test4(unsigned long x)
|
||||
{
|
||||
return (ti_t)((uti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
uti_t test5(unsigned long x, unsigned long y)
|
||||
{
|
||||
return ((uti_t)x * (uti_t)y) >> 64;
|
||||
}
|
||||
|
||||
uti_t test6(unsigned long x)
|
||||
{
|
||||
return ((uti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
ti_t test7(unsigned long x, unsigned long y)
|
||||
{
|
||||
return (ti_t)((uti_t)x * (uti_t)y) >> 64;
|
||||
}
|
||||
|
||||
ti_t test8(unsigned long x)
|
||||
{
|
||||
return (ti_t)((uti_t)x * 19065) >> 64;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mul.hi.u64" 8 } } */
|
13
gcc/testsuite/gcc.target/nvptx/umul-wide64.c
Normal file
13
gcc/testsuite/gcc.target/nvptx/umul-wide64.c
Normal file
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
typedef unsigned int __attribute ((mode(TI))) uti_t;
|
||||
|
||||
uti_t foo(unsigned long x, unsigned long y)
|
||||
{
|
||||
return (uti_t)x * (uti_t)y;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "mul.lo.u64" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "mul.hi.u64" 1 } } */
|
||||
|
Loading…
Reference in New Issue
Block a user