[AArch64] Rework SVE REV[BHW] patterns

The current SVE REV patterns follow the AArch64 scheme, in which UNSPEC_REV<NN> reverses elements within an <NN>-bit granule. E.g. UNSPEC_REV64 on VNx8HI reverses the four 16-bit elements within each 64-bit granule. The native SVE scheme is the other way around: UNSPEC_REV64 is seen as an operation on 64-bit elements, with REVB swapping bytes within the elements, REVH swapping halfwords, and so on. This fits SVE more naturally because the operation can then be predicated per <NN>-bit granule/element. Making the patterns use the Advanced SIMD scheme was more natural when all we cared about were permutes, since we could then use the source and target of the permute in their original modes. However, the ACLE does need patterns that follow the native scheme, treating them as operations on integer elements. This patch defines the patterns that way instead and updates the existing uses to match. This also brings in a couple of helper routines from the ACLE branch. 2019-08-15 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/iterators.md (UNSPEC_REVB, UNSPEC_REVH) (UNSPEC_REVW): New constants. (elem_bits): New mode attribute. (SVE_INT_UNARY): New int iterator. (optab): Handle UNSPEC_REV[BHW]. (sve_int_op): New int attribute. (min_elem_bits): Handle VNx16QI and the predicate modes. * config/aarch64/aarch64-sve.md (*aarch64_sve_rev64<mode>) (*aarch64_sve_rev32<mode>, *aarch64_sve_rev16vnx16qi): Delete. (@aarch64_pred_<SVE_INT_UNARY:optab><SVE_I:mode>): New pattern. * config/aarch64/aarch64.c (aarch64_sve_data_mode): New function. (aarch64_sve_int_mode, aarch64_sve_rev_unspec): Likewise. (aarch64_split_sve_subreg_move): Use UNSPEC_REV[BHW] instead of unspecs based on the total width of the reversed data. (aarch64_evpc_rev_local): Likewise (for SVE only). Use a reinterpret followed by a subreg on big-endian targets. gcc/testsuite/ * gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets. Avoid including stdint.h. * gcc.target/aarch64/sve/revh_1.c: Likewise. * gcc.target/aarch64/sve/revw_1.c: Likewise. * gcc.target/aarch64/sve/revb_2.c: New big-endian test. * gcc.target/aarch64/sve/revh_2.c: Likewise. * gcc.target/aarch64/sve/revw_2.c: Likewise. From-SVN: r274517
2019-08-15 08:43:36 +00:00 · 2019-08-15 08:43:36 +00:00 · d7a09c445a
parent 432b29c189
commit d7a09c445a
11 changed files with 178 additions and 79 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,22 @@
+2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* config/aarch64/iterators.md (UNSPEC_REVB, UNSPEC_REVH)
+	(UNSPEC_REVW): New constants.
+	(elem_bits): New mode attribute.
+	(SVE_INT_UNARY): New int iterator.
+	(optab): Handle UNSPEC_REV[BHW].
+	(sve_int_op): New int attribute.
+	(min_elem_bits): Handle VNx16QI and the predicate modes.
+	* config/aarch64/aarch64-sve.md (*aarch64_sve_rev64<mode>)
+	(*aarch64_sve_rev32<mode>, *aarch64_sve_rev16vnx16qi): Delete.
+	(@aarch64_pred_<SVE_INT_UNARY:optab><SVE_I:mode>): New pattern.
+	* config/aarch64/aarch64.c (aarch64_sve_data_mode): New function.
+	(aarch64_sve_int_mode, aarch64_sve_rev_unspec): Likewise.
+	(aarch64_split_sve_subreg_move): Use UNSPEC_REV[BHW] instead of
+	unspecs based on the total width of the reversed data.
+	(aarch64_evpc_rev_local): Likewise (for SVE only).  Use a
+	reinterpret followed by a subreg on big-endian targets.
+
 2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
 	    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@ -54,6 +54,7 @@
 ;;
 ;; == Unary arithmetic
 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [INT] General unary arithmetic corresponding to unspecs
 ;; ---- [INT] Zero extension
 ;; ---- [INT] Logical inverse
 ;; ---- [FP] General unary arithmetic corresponding to unspecs
@ -1497,6 +1498,28 @@
  [(set_attr "movprfx" "*,yes,yes")]
 )

+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
+
+;; Predicated integer unary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_I
+	     [(match_operand:SVE_I 2 "register_operand" "w")]
+	     SVE_INT_UNARY)]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
+  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Zero extension
 ;; -------------------------------------------------------------------------
@ -4619,9 +4642,6 @@
 ;; Includes:
 ;; - DUP
 ;; - REV
-;; - REVB
-;; - REVH
-;; - REVW
 ;; -------------------------------------------------------------------------

 ;; Duplicate one element of a vector.
@ -4644,42 +4664,6 @@
  "TARGET_SVE"
  "rev\t%0.<Vetype>, %1.<Vetype>")

-;; Reverse the order elements within a 64-bit container.
-(define_insn "*aarch64_sve_rev64<mode>"
-  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
-	(unspec:SVE_BHS
-	  [(match_operand:VNx2BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
-			   UNSPEC_REV64)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "rev<Vesize>\t%0.d, %1/m, %2.d"
-)
-
-;; Reverse the order elements within a 32-bit container.
-(define_insn "*aarch64_sve_rev32<mode>"
-  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
-	(unspec:SVE_BH
-	  [(match_operand:VNx4BI 1 "register_operand" "Upl")
-	   (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
-			  UNSPEC_REV32)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "rev<Vesize>\t%0.s, %1/m, %2.s"
-)
-
-;; Reverse the order elements within a 16-bit container.
-(define_insn "*aarch64_sve_rev16vnx16qi"
-  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
-	(unspec:VNx16QI
-	  [(match_operand:VNx8BI 1 "register_operand" "Upl")
-	   (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
-			   UNSPEC_REV16)]
-	  UNSPEC_PRED_X))]
-  "TARGET_SVE"
-  "revb\t%0.h, %1/m, %2.h"
-)
-
 ;; -------------------------------------------------------------------------
 ;; ---- [INT,FP] Special-purpose binary permutes
 ;; -------------------------------------------------------------------------
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@ -1675,6 +1675,22 @@ aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
  return default_get_mask_mode (nunits, nbytes);
 }

+/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE.  */
+
+static opt_machine_mode
+aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
+{
+  enum mode_class mclass = (is_a <scalar_float_mode> (inner_mode)
+			    ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
+  machine_mode mode;
+  FOR_EACH_MODE_IN_CLASS (mode, mclass)
+    if (inner_mode == GET_MODE_INNER (mode)
+	&& known_eq (nunits, GET_MODE_NUNITS (mode))
+	&& aarch64_sve_data_mode_p (mode))
+      return mode;
+  return opt_machine_mode ();
+}
+
 /* Return the integer element mode associated with SVE mode MODE.  */

 static scalar_int_mode
@ -1685,6 +1701,17 @@ aarch64_sve_element_int_mode (machine_mode mode)
  return int_mode_for_size (elt_bits, 0).require ();
 }

+/* Return the integer vector mode associated with SVE mode MODE.
+   Unlike mode_for_int_vector, this can handle the case in which
+   MODE is a predicate (and thus has a different total size).  */
+
+static machine_mode
+aarch64_sve_int_mode (machine_mode mode)
+{
+  scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode);
+  return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
+}
+
 /* Implement TARGET_PREFERRED_ELSE_VALUE.  For binary operations,
   prefer to use the first arithmetic operand as the else value if
   the else value doesn't matter, since that exactly matches the SVE
@ -4280,14 +4307,29 @@ aarch64_replace_reg_mode (rtx x, machine_mode mode)
  return x;
 }

+/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE
+   stored in wider integer containers.  */
+
+static unsigned int
+aarch64_sve_rev_unspec (machine_mode mode)
+{
+  switch (GET_MODE_UNIT_SIZE (mode))
+    {
+    case 1: return UNSPEC_REVB;
+    case 2: return UNSPEC_REVH;
+    case 4: return UNSPEC_REVW;
+    }
+  gcc_unreachable ();
+}
+
 /* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
   operands.  */

 void
 aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 {
-  /* Decide which REV operation we need.  The mode with narrower elements
-     determines the mode of the operands and the mode with the wider
+  /* Decide which REV operation we need.  The mode with wider elements
+     determines the mode of the operands and the mode with the narrower
     elements determines the reverse width.  */
  machine_mode mode_with_wider_elts = GET_MODE (dest);
  machine_mode mode_with_narrower_elts = GET_MODE (src);
@ -4295,30 +4337,16 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
      < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
    std::swap (mode_with_wider_elts, mode_with_narrower_elts);

+  unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts);
  unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
-  unsigned int unspec;
-  if (wider_bytes == 8)
-    unspec = UNSPEC_REV64;
-  else if (wider_bytes == 4)
-    unspec = UNSPEC_REV32;
-  else if (wider_bytes == 2)
-    unspec = UNSPEC_REV16;
-  else
-    gcc_unreachable ();
  machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();

-  /* Emit:
-
-       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)] UNSPEC_PRED_X))
-
-     with the appropriate modes.  */
+  /* Get the operands in the appropriate modes and emit the instruction.  */
  ptrue = gen_lowpart (pred_mode, ptrue);
-  dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
-  src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
-  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
-			UNSPEC_PRED_X);
-  emit_insn (gen_rtx_SET (dest, src));
+  dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts);
+  src = aarch64_replace_reg_mode (src, mode_with_wider_elts);
+  emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts,
+			       dest, ptrue, src));
 }

 static bool
@ -17753,13 +17781,31 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d)
  if (d->testing_p)
    return true;

-  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
  if (d->vec_flags == VEC_SVE_DATA)
    {
-      rtx pred = aarch64_ptrue_reg (pred_mode);
-      src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src),
-			    UNSPEC_PRED_X);
+      machine_mode int_mode = aarch64_sve_int_mode (pred_mode);
+      rtx target = gen_reg_rtx (int_mode);
+      if (BYTES_BIG_ENDIAN)
+	/* The act of taking a subreg between INT_MODE and d->vmode
+	   is itself a reversing operation on big-endian targets;
+	   see the comment at the head of aarch64-sve.md for details.
+	   First reinterpret OP0 as INT_MODE without using a subreg
+	   and without changing the contents.  */
+	emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0));
+      else
+	{
+	  /* For SVE we use REV[BHW] unspecs derived from the element size
+	     of v->mode and vector modes whose elements have SIZE bytes.
+	     This ensures that the vector modes match the predicate modes.  */
+	  int unspec = aarch64_sve_rev_unspec (d->vmode);
+	  rtx pred = aarch64_ptrue_reg (pred_mode);
+	  emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred,
+				       gen_lowpart (int_mode, d->op0)));
+	}
+      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
+      return true;
    }
+  rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec);
  emit_set_insn (d->target, src);
  return true;
 }
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@ -476,6 +476,9 @@
    UNSPEC_ANDF		; Used in aarch64-sve.md.
    UNSPEC_IORF		; Used in aarch64-sve.md.
    UNSPEC_XORF		; Used in aarch64-sve.md.
+    UNSPEC_REVB		; Used in aarch64-sve.md.
+    UNSPEC_REVH		; Used in aarch64-sve.md.
+    UNSPEC_REVW		; Used in aarch64-sve.md.
    UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md.
    UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md.
    UNSPEC_COND_FABS	; Used in aarch64-sve.md.
@ -638,7 +641,10 @@

 ;; The number of bits in a vector element, or controlled by a predicate
 ;; element.
-(define_mode_attr elem_bits [(VNx8HI "16") (VNx4SI "32") (VNx2DI "64")
+(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16")
+			     (VNx4BI "32") (VNx2BI "64")
+			     (VNx16QI "8") (VNx8HI "16")
+			     (VNx4SI "32") (VNx2DI "64")
 			     (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")])

 ;; Attribute to describe constants acceptable in logical operations
@ -1677,6 +1683,8 @@

 (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART])

+(define_int_iterator SVE_INT_UNARY [UNSPEC_REVB UNSPEC_REVH UNSPEC_REVW])
+
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
 					UNSPEC_IORV
 					UNSPEC_SMAXV
@ -1777,6 +1785,9 @@
 			(UNSPEC_ANDV "and")
 			(UNSPEC_IORV "ior")
 			(UNSPEC_XORV "xor")
+			(UNSPEC_REVB "revb")
+			(UNSPEC_REVH "revh")
+			(UNSPEC_REVW "revw")
 			(UNSPEC_UMAXV "umax")
 			(UNSPEC_UMINV "umin")
 			(UNSPEC_SMAXV "smax")
@ -2045,7 +2056,10 @@
 			     (UNSPEC_UMAXV "umaxv")
 			     (UNSPEC_UMINV "uminv")
 			     (UNSPEC_SMAXV "smaxv")
-			     (UNSPEC_SMINV "sminv")])
+			     (UNSPEC_SMINV "sminv")
+			     (UNSPEC_REVB "revb")
+			     (UNSPEC_REVH "revh")
+			     (UNSPEC_REVW "revw")])

 (define_int_attr sve_fp_op [(UNSPEC_FADDV "faddv")
 			    (UNSPEC_FMAXNMV "fmaxnmv")
@ -2118,3 +2132,8 @@
  [(UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate")
   (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate")
   (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")])
+
+;; The minimum number of element bits that an instruction can handle.
+(define_int_attr min_elem_bits [(UNSPEC_REVB "16")
+				(UNSPEC_REVH "32")
+				(UNSPEC_REVW "64")])
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,13 @@
+2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* gcc.target/aarch64/sve/revb_1.c: Restrict to little-endian targets.
+	Avoid including stdint.h.
+	* gcc.target/aarch64/sve/revh_1.c: Likewise.
+	* gcc.target/aarch64/sve/revw_1.c: Likewise.
+	* gcc.target/aarch64/sve/revb_2.c: New big-endian test.
+	* gcc.target/aarch64/sve/revh_2.c: Likewise.
+	* gcc.target/aarch64/sve/revw_2.c: Likewise.
+
 2019-08-15  Richard Sandiford  <richard.sandiford@arm.com>
 	    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

--- a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c
@ -1,9 +1,7 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */

-#include <stdint.h>
-
-typedef int8_t vnx16qi __attribute__((vector_size (32)));
+typedef __INT8_TYPE__ vnx16qi __attribute__((vector_size (32)));

 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
 #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y)
--- a/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c
@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revb_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */
--- a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c
@ -1,9 +1,7 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */

-#include <stdint.h>
-
-typedef uint16_t vnx8hi __attribute__((vector_size (32)));
+typedef __UINT16_TYPE__ vnx8hi __attribute__((vector_size (32)));
 typedef _Float16 vnx8hf __attribute__((vector_size (32)));

 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
--- a/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c
@ -0,0 +1,9 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revh_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */
+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */
--- a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c
@ -1,9 +1,7 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */

-#include <stdint.h>
-
-typedef uint32_t vnx4si __attribute__((vector_size (32)));
+typedef __UINT32_TYPE__ vnx4si __attribute__((vector_size (32)));
 typedef float vnx4sf __attribute__((vector_size (32)));

 #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y)
--- a/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c
@ -0,0 +1,8 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */
+
+#include "revw_1.c"
+
+/* { dg-final { scan-assembler-not {\ttbl\t} } } */
+
+/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */