optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...

gcc: * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD, unless this is little endian and the first unit in this word. Let extract_bit_field decide how to load an element. Force arguments to matching mode. (expand_vector_unop): Likewise. * simplify-rtx.c (simplify_subreg): Don't assume that all vectors consist of word_mode elements. * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR, BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. (build_unary_op): Allow vector types for BIT_NOT_EPR. * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for CONST_VECTOR. * optabs.c (expand_vector_binop): Try to perform operation in smaller vector modes with same inner size. Add handling of AND, IOR and XOR. Reject expansion to inner-mode sized scalars when using OPTAB_DIRECT. Use simplify_gen_subreg on constants. (expand_vector_unop): Try to perform operation in smaller vector modes with same inner size. Add handling of one's complement. When there is no vector negate operation, try a vector subtract operation. Use simplify_gen_subreg on constants. * simplify-rtx.c (simplify_subreg): Add capability to convert vector constants into smaller vectors with same inner mode, and to integer CONST_DOUBLEs. gcc/testsuite: * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~. * gcc.c-torture/execute/simd-2.c (main): Likewise. From-SVN: r55209
2002-07-03 09:49:46 +00:00 · 2002-07-03 09:49:46 +00:00 · 34a80643d8
parent 032b2b2990
commit 34a80643d8
8 changed files with 260 additions and 45 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,30 @@
+Wed Jul  3 10:24:16 2002  J"orn Rennecke <joern.rennecke@superh.com>
+
+	* optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
+	than UNITS_PER_WORD, unless this is little endian and the first unit
+	in this word.  Let extract_bit_field decide how to load an element.
+	Force arguments to matching mode.
+	(expand_vector_unop): Likewise.
+
+	* simplify-rtx.c (simplify_subreg): Don't assume that all vectors
+	consist of word_mode elements.
+	* c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
+	BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
+	(build_unary_op): Allow vector types for BIT_NOT_EPR.
+	* emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
+	CONST_VECTOR.
+	* optabs.c (expand_vector_binop): Try to perform operation in
+	smaller vector modes with same inner size.  Add handling of AND, IOR
+	and XOR.  Reject expansion to inner-mode sized scalars when using
+	OPTAB_DIRECT.  Use simplify_gen_subreg on constants.
+	(expand_vector_unop): Try to perform operation in smaller vector
+	modes with same inner size.  Add handling of one's complement.
+	When there is no vector negate operation, try a vector subtract
+	operation.  Use simplify_gen_subreg on constants.
+	* simplify-rtx.c (simplify_subreg): Add capability to convert vector
+	constants into smaller vectors with same inner mode, and to
+	integer CONST_DOUBLEs.
+
 2002-07-02  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>

 	* c-parse.in (parsing_iso_function_signature): New variable.
--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p)
    case BIT_XOR_EXPR:
      if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE)
 	shorten = -1;
+      else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
+	common = 1;
      break;

    case TRUNC_MOD_EXPR:
@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag)
      break;

    case BIT_NOT_EXPR:
-      if (typecode == COMPLEX_TYPE)
+      if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE)
+	{
+	  if (!noconvert)
+	    arg = default_conversion (arg);
+	}
+      else if (typecode == COMPLEX_TYPE)
 	{
 	  code = CONJ_EXPR;
 	  if (pedantic)
@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag)
 	  if (!noconvert)
 	    arg = default_conversion (arg);
 	}
-      else if (typecode != INTEGER_TYPE)
+      else
 	{
 	  error ("wrong type argument to bit-complement");
 	  return error_mark_node;
 	}
-      else if (!noconvert)
-	arg = default_conversion (arg);
      break;

    case ABS_EXPR:
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@ -991,7 +991,7 @@ gen_lowpart_common (mode, x)
 	return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
    }
  else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG
-	   || GET_CODE (x) == CONCAT)
+	   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR)
    return simplify_gen_subreg (mode, x, GET_MODE (x), offset);
  /* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits
     from the low-order part of the constant.  */
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
     int unsignedp;
     enum optab_methods methods;
 {
-  enum machine_mode submode;
-  int elts, subsize, i;
+  enum machine_mode submode, tmode;
+  int size, elts, subsize, subbitsize, i;
  rtx t, a, b, res, seq;
  enum mode_class class;

  class = GET_MODE_CLASS (mode);

+  size =  GET_MODE_SIZE (mode);
  submode = GET_MODE_INNER (mode);
-  subsize = GET_MODE_UNIT_SIZE (mode);
-  elts = GET_MODE_NUNITS (mode);

-  if (!target)
-    target = gen_reg_rtx (mode);
-
-  start_sequence ();
-
-  /* FIXME: Optimally, we should try to do this in narrower vector
-     modes if available.  E.g. When trying V8SI, try V4SI, else
-     V2SI, else decay into SI.  */
+  /* Search for the widest vector mode with the same inner mode that is
+     still narrower than MODE and that allows to open-code this operator.
+     Note, if we find such a mode and the handler later decides it can't
+     do the expansion, we'll be called recursively with the narrower mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (class);
+       GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+	  && binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+	submode = tmode;
+    }

  switch (binoptab->code)
    {
+    case AND:
+    case IOR:
+    case XOR:
+      tmode = int_mode_for_mode (mode);
+      if (tmode != BLKmode)
+	submode = tmode;
    case PLUS:
    case MINUS:
    case MULT:
    case DIV:
+      subsize = GET_MODE_SIZE (submode);
+      subbitsize = GET_MODE_BITSIZE (submode);
+      elts = size / subsize;
+
+      /* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode,
+	 but that we operate on more than one element at a time.  */
+      if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT)
+	return 0;
+
+      start_sequence ();
+
+      /* Errors can leave us with a const0_rtx as operand.  */
+      if (GET_MODE (op0) != mode)
+	op0 = copy_to_mode_reg (mode, op0);
+      if (GET_MODE (op1) != mode)
+	op1 = copy_to_mode_reg (mode, op1);
+
+      if (!target)
+	target = gen_reg_rtx (mode);
+
      for (i = 0; i < elts; ++i)
 	{
-	  t = simplify_gen_subreg (submode, target, mode,
-				   i * subsize);
-	  a = simplify_gen_subreg (submode, op0, mode,
-				   i * subsize);
-	  b = simplify_gen_subreg (submode, op1, mode,
-				   i * subsize);
+	  /* If this is part of a register, and not the first item in the
+	     word, we can't store using a SUBREG - that would clobber
+	     previous results.
+	     And storing with a SUBREG is only possible for the least
+	     significant part, hence we can't do it for big endian
+	     (unless we want to permute the evaluation order.  */
+	  if (GET_CODE (target) == REG
+	      && (BYTES_BIG_ENDIAN
+		  ? subsize < UNITS_PER_WORD
+		  : ((i * subsize) % UNITS_PER_WORD) != 0))
+	    t = NULL_RTX;
+	  else
+	    t = simplify_gen_subreg (submode, target, mode, i * subsize);
+	  if (CONSTANT_P (op0))
+	    a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+	  else
+	    a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+				   NULL_RTX, submode, submode, size);
+	  if (CONSTANT_P (op1))
+	    b = simplify_gen_subreg (submode, op1, mode, i * subsize);
+	  else
+	    b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp,
+				   NULL_RTX, submode, submode, size);

 	  if (binoptab->code == DIV)
 	    {
@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
 	  if (res == 0)
 	    break;

-	  emit_move_insn (t, res);
+	  if (t)
+	    emit_move_insn (t, res);
+	  else
+	    store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+			     size);
 	}
      break;

@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp)
     rtx target;
     int unsignedp;
 {
-  enum machine_mode submode;
-  int elts, subsize, i;
+  enum machine_mode submode, tmode;
+  int size, elts, subsize, subbitsize, i;
  rtx t, a, res, seq;

+  size =  GET_MODE_SIZE (mode);
  submode = GET_MODE_INNER (mode);
-  subsize = GET_MODE_UNIT_SIZE (mode);
-  elts = GET_MODE_NUNITS (mode);
+
+  /* Search for the widest vector mode with the same inner mode that is
+     still narrower than MODE and that allows to open-code this operator.
+     Note, if we find such a mode and the handler later decides it can't
+     do the expansion, we'll be called recursively with the narrower mode.  */
+  for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode));
+       GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
+       tmode = GET_MODE_WIDER_MODE (tmode))
+    {
+      if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
+	  && unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
+	submode = tmode;
+    }
+  /* If there is no negate operation, try doing a subtract from zero.  */
+  if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT)
+    {    
+      rtx temp;
+      temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0,
+                           target, unsignedp, OPTAB_DIRECT);
+      if (temp)
+	return temp;
+    }
+
+  if (unoptab == one_cmpl_optab)
+    {
+      tmode = int_mode_for_mode (mode);
+      if (tmode != BLKmode)
+	submode = tmode;
+    }
+
+  subsize = GET_MODE_SIZE (submode);
+  subbitsize = GET_MODE_BITSIZE (submode);
+  elts = size / subsize;
+
+  /* Errors can leave us with a const0_rtx as operand.  */
+  if (GET_MODE (op0) != mode)
+    op0 = copy_to_mode_reg (mode, op0);

  if (!target)
    target = gen_reg_rtx (mode);

  start_sequence ();

-  /* FIXME: Optimally, we should try to do this in narrower vector
-     modes if available.  E.g. When trying V8SI, try V4SI, else
-     V2SI, else decay into SI.  */
-
  for (i = 0; i < elts; ++i)
    {
-      t = simplify_gen_subreg (submode, target, mode, i * subsize);
-      a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+      /* If this is part of a register, and not the first item in the
+	 word, we can't store using a SUBREG - that would clobber
+	 previous results.
+	 And storing with a SUBREG is only possible for the least
+	 significant part, hence we can't do it for big endian
+	 (unless we want to permute the evaluation order.  */
+      if (GET_CODE (target) == REG
+	  && (BYTES_BIG_ENDIAN
+	      ?  subsize < UNITS_PER_WORD
+	      : ((i * subsize) % UNITS_PER_WORD) != 0))
+	t = NULL_RTX;
+      else
+	t = simplify_gen_subreg (submode, target, mode, i * subsize);
+      if (CONSTANT_P (op0))
+	a = simplify_gen_subreg (submode, op0, mode, i * subsize);
+      else
+	a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
+			       t, submode, submode, size);

      res = expand_unop (submode, unoptab, a, t, unsignedp);

-      emit_move_insn (t, res);
+      if (t)
+	emit_move_insn (t, res);
+      else
+	store_bit_field (target, subbitsize, i * subbitsize, submode, res,
+			 size);
    }

  seq = get_insns ();
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte)
  /* Simplify subregs of vector constants.  */
  if (GET_CODE (op) == CONST_VECTOR)
    {
-      int offset = byte / UNITS_PER_WORD;
+      int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode));
+      int offset = byte / elt_size;
      rtx elt;

-      /* This shouldn't happen, but let's not do anything stupid.  */
-      if (GET_MODE_INNER (innermode) != outermode)
+      if (GET_MODE_INNER (innermode) == outermode)
+	{
+	  elt = CONST_VECTOR_ELT (op, offset);
+
+	  /* ?? We probably don't need this copy_rtx because constants
+	     can be shared.  ?? */
+
+	  return copy_rtx (elt);
+	}
+      else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode)
+	       && GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode))
+	{
+	  return (gen_rtx_CONST_VECTOR
+		  (outermode,
+		   gen_rtvec_v (GET_MODE_NUNITS (outermode),
+				&CONST_VECTOR_ELT (op, offset))));
+	}
+      else if (GET_MODE_CLASS (outermode) == MODE_INT
+	       && (GET_MODE_SIZE (outermode) % elt_size == 0))
+	{
+	  /* This happens when the target register size is smaller then
+	     the vector mode, and we synthesize operations with vectors
+	     of elements that are smaller than the register size.  */
+	  HOST_WIDE_INT sum = 0, high = 0;
+	  unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size);
+	  unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1;
+	  unsigned step = BYTES_BIG_ENDIAN ? 1 : -1;
+	  int shift = BITS_PER_UNIT * elt_size;
+
+	  for (; n_elts--; i += step)
+	    {
+	      elt = CONST_VECTOR_ELT (op, i);
+	      if (GET_CODE (elt) != CONST_INT)
+		return NULL_RTX;
+	      high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift);
+	      sum = (sum << shift) + INTVAL (elt);
+	    }
+	  if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT)
+	    return GEN_INT (trunc_int_for_mode (sum, outermode));
+	  else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT)
+	    return immed_double_const (high, sum, outermode);
+	  else
+	    return NULL_RTX;
+	}
+      else
+        /* This shouldn't happen, but let's not do anything stupid.  */
 	return NULL_RTX;
-
-      elt = CONST_VECTOR_ELT (op, offset);
-
-      /* ?? We probably don't need this copy_rtx because constants
-	 can be shared.  ?? */
-
-      return copy_rtx (elt);
    }

  /* Attempt to simplify constant to non-SUBREG expression.  */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,8 @@
+Wed Jul  3 10:25:41 2002  J"orn Rennecke <joern.rennecke@superh.com>
+
+	* gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
+	* gcc.c-torture/execute/simd-2.c (main): Likewise.
+
 2002-07-02  Kaveh R. Ghazi  <ghazi@caip.rutgers.edu>

 	* gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions.
--- a/gcc/testsuite/gcc.c-torture/execute/simd-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-1.c
@ -45,10 +45,29 @@ main ()

  verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);

+  k = i & j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+  k = i | j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+  k = i ^ j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
  k = -i;
  res.v = k;
  verify (res.i[0], res.i[1], res.i[2], res.i[3],
 	  -150, -100, -150, -200);

+  k = ~i;
+  res.v = k;
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
  exit (0);
 }
--- a/gcc/testsuite/gcc.c-torture/execute/simd-2.c
+++ b/gcc/testsuite/gcc.c-torture/execute/simd-2.c
@ -44,10 +44,29 @@ main ()

  verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);

+  k = i & j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
+
+  k = i | j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
+
+  k = i ^ j;
+  res.v = k;
+
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
+
  k = -i;
  res.v = k;
  verify (res.i[0], res.i[1], res.i[2], res.i[3],
 	  -150, -100, -150, -200);

+  k = ~i;
+  res.v = k;
+  verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
+
  exit (0);
 }