From 953e25c44acc2458d854b1442d7775d997aaf187 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sun, 24 Feb 2019 20:23:51 +0100 Subject: [PATCH] re PR rtl-optimization/89445 (_mm512_maskz_loadu_pd "forgets" to use the mask) PR rtl-optimization/89445 * simplify-rtx.c (simplify_ternary_operation): Don't use simplify_merge_mask on operands that may trap. * rtlanal.c (may_trap_p_1): Use FLOAT_MODE_P instead of SCALAR_FLOAT_MODE_P checks. For integral division by zero, if second operand is CONST_VECTOR, check if any element could be zero. Don't expect traps for VEC_{MERGE,SELECT,CONCAT,DUPLICATE} unless their operands can trap. * gcc.target/i386/avx512f-pr89445.c: New test. From-SVN: r269176 --- gcc/ChangeLog | 11 ++++ gcc/rtlanal.c | 26 ++++++++- gcc/simplify-rtx.c | 6 ++- gcc/testsuite/ChangeLog | 5 ++ .../gcc.target/i386/avx512f-pr89445.c | 54 +++++++++++++++++++ 5 files changed, 98 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-pr89445.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2be8a96355a..21850194880 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2019-02-24 Jakub Jelinek + + PR rtl-optimization/89445 + * simplify-rtx.c (simplify_ternary_operation): Don't use + simplify_merge_mask on operands that may trap. + * rtlanal.c (may_trap_p_1): Use FLOAT_MODE_P instead of + SCALAR_FLOAT_MODE_P checks. For integral division by zero, if + second operand is CONST_VECTOR, check if any element could be zero. + Don't expect traps for VEC_{MERGE,SELECT,CONCAT,DUPLICATE} unless + their operands can trap. + 2019-02-23 Martin Sebor * gimple-ssa-sprintf.c (target_strtol): Rename... diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c index 345f13c6e99..3873b4098b0 100644 --- a/gcc/rtlanal.c +++ b/gcc/rtlanal.c @@ -2846,10 +2846,28 @@ may_trap_p_1 (const_rtx x, unsigned flags) case UMOD: if (HONOR_SNANS (x)) return 1; - if (SCALAR_FLOAT_MODE_P (GET_MODE (x))) + if (FLOAT_MODE_P (GET_MODE (x))) return flag_trapping_math; if (!CONSTANT_P (XEXP (x, 1)) || (XEXP (x, 1) == const0_rtx)) return 1; + if (GET_CODE (XEXP (x, 1)) == CONST_VECTOR) + { + /* For CONST_VECTOR, return 1 if any element is or might be zero. */ + unsigned int n_elts; + rtx op = XEXP (x, 1); + if (!GET_MODE_NUNITS (GET_MODE (op)).is_constant (&n_elts)) + { + if (!CONST_VECTOR_DUPLICATE_P (op)) + return 1; + for (unsigned i = 0; i < (unsigned int) XVECLEN (op, 0); i++) + if (CONST_VECTOR_ENCODED_ELT (op, i) == const0_rtx) + return 1; + } + else + for (unsigned i = 0; i < n_elts; i++) + if (CONST_VECTOR_ELT (op, i) == const0_rtx) + return 1; + } break; case EXPR_LIST: @@ -2898,12 +2916,16 @@ may_trap_p_1 (const_rtx x, unsigned flags) case NEG: case ABS: case SUBREG: + case VEC_MERGE: + case VEC_SELECT: + case VEC_CONCAT: + case VEC_DUPLICATE: /* These operations don't trap even with floating point. */ break; default: /* Any floating arithmetic may trap. */ - if (SCALAR_FLOAT_MODE_P (GET_MODE (x)) && flag_trapping_math) + if (FLOAT_MODE_P (GET_MODE (x)) && flag_trapping_math) return 1; } diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index 83580a259f3..89a46a933fa 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -6073,8 +6073,10 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, if (!side_effects_p (op2)) { - rtx top0 = simplify_merge_mask (op0, op2, 0); - rtx top1 = simplify_merge_mask (op1, op2, 1); + rtx top0 + = may_trap_p (op0) ? NULL_RTX : simplify_merge_mask (op0, op2, 0); + rtx top1 + = may_trap_p (op1) ? NULL_RTX : simplify_merge_mask (op1, op2, 1); if (top0 || top1) return simplify_gen_ternary (code, mode, mode, top0 ? top0 : op0, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dd9877efe5b..707c827621e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-02-24 Jakub Jelinek + + PR rtl-optimization/89445 + * gcc.target/i386/avx512f-pr89445.c: New test. + 2019-02-23 Martin Sebor * gcc.dg/tree-ssa/builtin-sprintf-10.c: Cast remaining diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr89445.c b/gcc/testsuite/gcc.target/i386/avx512f-pr89445.c new file mode 100644 index 00000000000..230c1d1f864 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr89445.c @@ -0,0 +1,54 @@ +/* PR rtl-optimization/89445 */ +/* { dg-do run { target { avx512f && mmap } } } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include "avx512f-check.h" + +#include +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#ifndef MAP_ANON +#define MAP_ANON 0 +#endif +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +__attribute__ ((noipa)) +void daxpy (unsigned long n, double a, double const *__restrict x, + double *__restrict y) +{ + const __m512d v_a = _mm512_broadcastsd_pd (_mm_set_sd (a)); + const __mmask16 final = (1U << (n % 8u)) - 1; + __mmask16 mask = 65535u; + unsigned long i; + for (i = 0; i < n * sizeof (double); i += 8 * sizeof (double)) + { + if (i + 8 * sizeof (double) > n * sizeof (double)) + mask = final; + __m512d v_x = _mm512_maskz_loadu_pd (mask, (char const *) x + i); + __m512d v_y = _mm512_maskz_loadu_pd (mask, (char const *) y + i); + __m512d tmp = _mm512_fmadd_pd (v_x, v_a, v_y); + _mm512_mask_storeu_pd ((char *) y + i, mask, tmp); + } +} + +static const double x[] = { 1, 2, 3, 4 }; + +static void +avx512f_test (void) +{ + char *ptr + = (char *) mmap (NULL, 2 * 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) + return; + + munmap (ptr + 4096, 4096); + double *y = (double *) (ptr + 4096 - sizeof (x)); + __builtin_memcpy (y, x, sizeof (x)); + daxpy (sizeof (x) / sizeof (x[0]), 1.0, x, y); + if (y[0] != 2.0 || y[1] != 4.0 || y[2] != 6.0 || y[3] != 8.0) + abort (); +}