From 7b86aaf45c00ec038a0da003f41d5334a00e603c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 4 Dec 2019 13:14:20 +0000 Subject: [PATCH] Fix VIEW_CONVERT_EXPRs for VECTOR_BOOLEAN_TYPE_Ps In r278410 I added code to handle VIEW_CONVERT_EXPRs between variable-length vectors. This included support for decoding a VECTOR_BOOLEAN_TYPE_P with subbyte elements. However, it turns out that we were already mishandling such bool vectors for fixed-length vectors: we treated each element as a stand-alone byte instead of putting multiple elements into the same byte. I think in principle this could have been an issue for AVX512 as well. This patch adds encoding support for boolean vectors and reuses a version of the new decode support for fixed-length vectors. 2019-12-04 Richard Sandiford gcc/ * fold-const.c (native_encode_vector_part): Handle VECTOR_BOOLEAN_TYPE_Ps that have subbyte precision. (native_decode_vector_tree): Delete, moving the bulk of the code to... (native_interpret_vector_part): ...this new function. Use a pointer and length instead of a vec<> and start index. (native_interpret_vector): Use native_interpret_vector_part. (fold_view_convert_vector_encoding): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/whilelt_5.c: New test. From-SVN: r278964 --- gcc/ChangeLog | 10 ++ gcc/fold-const.c | 163 ++++++++++-------- gcc/testsuite/ChangeLog | 4 + .../aarch64/sve/acle/general/whilelt_5.c | 163 ++++++++++++++++++ 4 files changed, 271 insertions(+), 69 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6b69111ebc0..d80a8a8058a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-12-04 Richard Sandiford + + * fold-const.c (native_encode_vector_part): Handle + VECTOR_BOOLEAN_TYPE_Ps that have subbyte precision. + (native_decode_vector_tree): Delete, moving the bulk of the code to... + (native_interpret_vector_part): ...this new function. Use a pointer + and length instead of a vec<> and start index. + (native_interpret_vector): Use native_interpret_vector_part. + (fold_view_convert_vector_encoding): Likewise. + 2019-12-04 Richard Biener * tree-ssa-sccvn.c (vn_walk_cb_data::push_partial_def): Handle diff --git a/gcc/fold-const.c b/gcc/fold-const.c index d7a77a9848a..84b3bb06217 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -7727,21 +7727,53 @@ static int native_encode_vector_part (const_tree expr, unsigned char *ptr, int len, int off, unsigned HOST_WIDE_INT count) { - unsigned HOST_WIDE_INT i; - int size, offset; - tree itype, elem; + tree itype = TREE_TYPE (TREE_TYPE (expr)); + if (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (expr)) + && TYPE_PRECISION (itype) <= BITS_PER_UNIT) + { + /* This is the only case in which elements can be smaller than a byte. + Element 0 is always in the lsb of the containing byte. */ + unsigned int elt_bits = TYPE_PRECISION (itype); + int total_bytes = CEIL (elt_bits * count, BITS_PER_UNIT); + if ((off == -1 && total_bytes > len) || off >= total_bytes) + return 0; - offset = 0; - itype = TREE_TYPE (TREE_TYPE (expr)); - size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); - for (i = 0; i < count; i++) + if (off == -1) + off = 0; + + /* Zero the buffer and then set bits later where necessary. */ + int extract_bytes = MIN (len, total_bytes - off); + if (ptr) + memset (ptr, 0, extract_bytes); + + unsigned int elts_per_byte = BITS_PER_UNIT / elt_bits; + unsigned int first_elt = off * elts_per_byte; + unsigned int extract_elts = extract_bytes * elts_per_byte; + for (unsigned int i = 0; i < extract_elts; ++i) + { + tree elt = VECTOR_CST_ELT (expr, first_elt + i); + if (TREE_CODE (elt) != INTEGER_CST) + return 0; + + if (ptr && wi::extract_uhwi (wi::to_wide (elt), 0, 1)) + { + unsigned int bit = i * elt_bits; + ptr[bit / BITS_PER_UNIT] |= 1 << (bit % BITS_PER_UNIT); + } + } + return extract_bytes; + } + + int offset = 0; + int size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); + for (unsigned HOST_WIDE_INT i = 0; i < count; i++) { if (off >= size) { off -= size; continue; } - elem = VECTOR_CST_ELT (expr, i); + tree elem = VECTOR_CST_ELT (expr, i); int res = native_encode_expr (elem, ptr ? ptr + offset : NULL, len - offset, off); if ((off == -1 && res != size) || res == 0) @@ -7976,6 +8008,55 @@ native_interpret_complex (tree type, const unsigned char *ptr, int len) return build_complex (type, rpart, ipart); } +/* Read a vector of type TYPE from the target memory image given by BYTES, + which contains LEN bytes. The vector is known to be encodable using + NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each. + + Return the vector on success, otherwise return null. */ + +static tree +native_interpret_vector_part (tree type, const unsigned char *bytes, + unsigned int len, unsigned int npatterns, + unsigned int nelts_per_pattern) +{ + tree elt_type = TREE_TYPE (type); + if (VECTOR_BOOLEAN_TYPE_P (type) + && TYPE_PRECISION (elt_type) <= BITS_PER_UNIT) + { + /* This is the only case in which elements can be smaller than a byte. + Element 0 is always in the lsb of the containing byte. */ + unsigned int elt_bits = TYPE_PRECISION (elt_type); + if (elt_bits * npatterns * nelts_per_pattern > len * BITS_PER_UNIT) + return NULL_TREE; + + tree_vector_builder builder (type, npatterns, nelts_per_pattern); + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + unsigned int bit_index = i * elt_bits; + unsigned int byte_index = bit_index / BITS_PER_UNIT; + unsigned int lsb = bit_index % BITS_PER_UNIT; + builder.quick_push (bytes[byte_index] & (1 << lsb) + ? build_all_ones_cst (elt_type) + : build_zero_cst (elt_type)); + } + return builder.build (); + } + + unsigned int elt_bytes = tree_to_uhwi (TYPE_SIZE_UNIT (elt_type)); + if (elt_bytes * npatterns * nelts_per_pattern > len) + return NULL_TREE; + + tree_vector_builder builder (type, npatterns, nelts_per_pattern); + for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) + { + tree elt = native_interpret_expr (elt_type, bytes, elt_bytes); + if (!elt) + return NULL_TREE; + builder.quick_push (elt); + bytes += elt_bytes; + } + return builder.build (); +} /* Subroutine of native_interpret_expr. Interpret the contents of the buffer PTR of length LEN as a VECTOR_CST of type TYPE. @@ -7984,8 +8065,8 @@ native_interpret_complex (tree type, const unsigned char *ptr, int len) static tree native_interpret_vector (tree type, const unsigned char *ptr, unsigned int len) { - tree etype, elem; - unsigned int i, size; + tree etype; + unsigned int size; unsigned HOST_WIDE_INT count; etype = TREE_TYPE (type); @@ -7994,15 +8075,7 @@ native_interpret_vector (tree type, const unsigned char *ptr, unsigned int len) || size * count > len) return NULL_TREE; - tree_vector_builder elements (type, count, 1); - for (i = 0; i < count; ++i) - { - elem = native_interpret_expr (etype, ptr+(i*size), size); - if (!elem) - return NULL_TREE; - elements.quick_push (elem); - } - return elements.build (); + return native_interpret_vector_part (type, ptr, len, count, 1); } @@ -8064,54 +8137,6 @@ can_native_interpret_type_p (tree type) } } -/* Read a vector of type TYPE from the target memory image given by BYTES, - starting at byte FIRST_BYTE. The vector is known to be encodable using - NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, - and BYTES is known to have enough bytes to supply NPATTERNS * - NELTS_PER_PATTERN vector elements. Each element of BYTES contains - BITS_PER_UNIT bits and the bytes are in target memory order. - - Return the vector on success, otherwise return null. */ - -static tree -native_decode_vector_tree (tree type, vec bytes, - unsigned int first_byte, unsigned int npatterns, - unsigned int nelts_per_pattern) -{ - tree_vector_builder builder (type, npatterns, nelts_per_pattern); - tree elt_type = TREE_TYPE (type); - unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type)); - if (VECTOR_BOOLEAN_TYPE_P (type) && elt_bits <= BITS_PER_UNIT) - { - /* This is the only case in which elements can be smaller than a byte. - Element 0 is always in the lsb of the containing byte. */ - elt_bits = TYPE_PRECISION (elt_type); - for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) - { - unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; - unsigned int byte_index = bit_index / BITS_PER_UNIT; - unsigned int lsb = bit_index % BITS_PER_UNIT; - builder.quick_push (bytes[byte_index] & (1 << lsb) - ? build_all_ones_cst (elt_type) - : build_zero_cst (elt_type)); - } - } - else - { - unsigned int elt_bytes = elt_bits / BITS_PER_UNIT; - for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) - { - tree elt = native_interpret_expr (elt_type, &bytes[first_byte], - elt_bytes); - if (!elt) - return NULL_TREE; - builder.quick_push (elt); - first_byte += elt_bytes; - } - } - return builder.build (); -} - /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating directly on the VECTOR_CST encoding, in a way that works for variable- length vectors. Return the resulting VECTOR_CST on success or null @@ -8168,8 +8193,8 @@ fold_view_convert_vector_encoding (tree type, tree expr) /* Reencode the bytes as TYPE. */ unsigned int type_npatterns = type_sequence_bits / type_elt_bits; - return native_decode_vector_tree (type, buffer, 0, type_npatterns, - nelts_per_pattern); + return native_interpret_vector_part (type, &buffer[0], buffer.length (), + type_npatterns, nelts_per_pattern); } /* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2e6597e8b50..7678625820f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-12-04 Richard Sandiford + + * gcc.target/aarch64/sve/acle/general/whilelt_5.c: New test. + 2019-12-04 Richard Biener * gcc.dg/tree-ssa/ssa-fre-84.c: New testcase. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c new file mode 100644 index 00000000000..f06a74aa2da --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c @@ -0,0 +1,163 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-additional-options "-O -msve-vector-bits=512 -fdump-tree-optimized" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** load_vl1: +** ptrue (p[0-7])\.[bhsd], vl1 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl1 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 1), ptr); +} + +/* +** load_vl2: +** ptrue (p[0-7])\.h, vl2 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl2 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 2), ptr); +} + +/* +** load_vl3: +** ptrue (p[0-7])\.h, vl3 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl3 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 3), ptr); +} + +/* +** load_vl4: +** ptrue (p[0-7])\.h, vl4 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl4 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 4), ptr); +} + +/* +** load_vl5: +** ptrue (p[0-7])\.h, vl5 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl5 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 5), ptr); +} + +/* +** load_vl6: +** ptrue (p[0-7])\.h, vl6 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl6 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 6), ptr); +} + +/* +** load_vl7: +** ptrue (p[0-7])\.h, vl7 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl7 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 7), ptr); +} + +/* +** load_vl8: +** ptrue (p[0-7])\.h, vl8 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl8 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 8), ptr); +} + +/* +** load_vl9: +** mov (x[0-9]+), #?9 +** whilelo (p[0-7])\.h, xzr, \1 +** ld1h z0\.h, \2/z, \[x0\] +** ret +*/ +svint16_t +load_vl9 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 9), ptr); +} + +/* +** load_vl15: +** mov (x[0-9]+), #?15 +** whilelo (p[0-7])\.h, xzr, \1 +** ld1h z0\.h, \2/z, \[x0\] +** ret +*/ +svint16_t +load_vl15 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 15), ptr); +} + +/* +** load_vl16: +** ptrue (p[0-7])\.h, vl16 +** ld1h z0\.h, \1/z, \[x0\] +** ret +*/ +svint16_t +load_vl16 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 16), ptr); +} + +/* +** load_vl17: +** mov (x[0-9]+), #?17 +** whilelo (p[0-7])\.h, xzr, \1 +** ld1h z0\.h, \2/z, \[x0\] +** ret +*/ +svint16_t +load_vl17 (int16_t *ptr) +{ + return svld1 (svwhilelt_b16 (0, 17), ptr); +} + +#ifdef __cplusplus +} +#endif + +/* { dg-final { scan-tree-dump-not "VIEW_CONVERT_EXPR" "optimized" } } */