optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...

gcc:
	* optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
	than UNITS_PER_WORD, unless this is little endian and the first unit
	in this word.  Let extract_bit_field decide how to load an element.
	Force arguments to matching mode.
	(expand_vector_unop): Likewise.

	* simplify-rtx.c (simplify_subreg): Don't assume that all vectors
	consist of word_mode elements.
	* c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
	BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
	(build_unary_op): Allow vector types for BIT_NOT_EPR.
	* emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
	CONST_VECTOR.
	* optabs.c (expand_vector_binop): Try to perform operation in
	smaller vector modes with same inner size.  Add handling of AND, IOR
	and XOR.  Reject expansion to inner-mode sized scalars when using
	OPTAB_DIRECT.  Use simplify_gen_subreg on constants.
	(expand_vector_unop): Try to perform operation in smaller vector
	modes with same inner size.  Add handling of one's complement.
	When there is no vector negate operation, try a vector subtract
	operation.  Use simplify_gen_subreg on constants.
	* simplify-rtx.c (simplify_subreg): Add capability to convert vector
	constants into smaller vectors with same inner mode, and to
	integer CONST_DOUBLEs.

gcc/testsuite:
	* gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
	* gcc.c-torture/execute/simd-2.c (main): Likewise.

From-SVN: r55209
This commit is contained in:
J"orn Rennecke 2002-07-03 09:49:46 +00:00 committed by Joern Rennecke
parent 032b2b2990
commit 34a80643d8
8 changed files with 260 additions and 45 deletions

View File

@ -1,3 +1,30 @@
Wed Jul 3 10:24:16 2002 J"orn Rennecke <joern.rennecke@superh.com>
* optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
than UNITS_PER_WORD, unless this is little endian and the first unit
in this word. Let extract_bit_field decide how to load an element.
Force arguments to matching mode.
(expand_vector_unop): Likewise.
* simplify-rtx.c (simplify_subreg): Don't assume that all vectors
consist of word_mode elements.
* c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
(build_unary_op): Allow vector types for BIT_NOT_EPR.
* emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
CONST_VECTOR.
* optabs.c (expand_vector_binop): Try to perform operation in
smaller vector modes with same inner size. Add handling of AND, IOR
and XOR. Reject expansion to inner-mode sized scalars when using
OPTAB_DIRECT. Use simplify_gen_subreg on constants.
(expand_vector_unop): Try to perform operation in smaller vector
modes with same inner size. Add handling of one's complement.
When there is no vector negate operation, try a vector subtract
operation. Use simplify_gen_subreg on constants.
* simplify-rtx.c (simplify_subreg): Add capability to convert vector
constants into smaller vectors with same inner mode, and to
integer CONST_DOUBLEs.
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* c-parse.in (parsing_iso_function_signature): New variable.

View File

@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p)
case BIT_XOR_EXPR:
if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE)
shorten = -1;
else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
common = 1;
break;
case TRUNC_MOD_EXPR:
@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag)
break;
case BIT_NOT_EXPR:
if (typecode == COMPLEX_TYPE)
if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE)
{
if (!noconvert)
arg = default_conversion (arg);
}
else if (typecode == COMPLEX_TYPE)
{
code = CONJ_EXPR;
if (pedantic)
@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag)
if (!noconvert)
arg = default_conversion (arg);
}
else if (typecode != INTEGER_TYPE)
else
{
error ("wrong type argument to bit-complement");
return error_mark_node;
}
else if (!noconvert)
arg = default_conversion (arg);
break;
case ABS_EXPR:

View File

@ -991,7 +991,7 @@ gen_lowpart_common (mode, x)
return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
}
else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG
|| GET_CODE (x) == CONCAT)
|| GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR)
return simplify_gen_subreg (mode, x, GET_MODE (x), offset);
/* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits
from the low-order part of the constant. */

View File

@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
int unsignedp;
enum optab_methods methods;
{
enum machine_mode submode;
int elts, subsize, i;
enum machine_mode submode, tmode;
int size, elts, subsize, subbitsize, i;
rtx t, a, b, res, seq;
enum mode_class class;
class = GET_MODE_CLASS (mode);
size = GET_MODE_SIZE (mode);
submode = GET_MODE_INNER (mode);
subsize = GET_MODE_UNIT_SIZE (mode);
elts = GET_MODE_NUNITS (mode);
if (!target)
target = gen_reg_rtx (mode);
start_sequence ();
/* FIXME: Optimally, we should try to do this in narrower vector
modes if available. E.g. When trying V8SI, try V4SI, else
V2SI, else decay into SI. */
/* Search for the widest vector mode with the same inner mode that is
still narrower than MODE and that allows to open-code this operator.
Note, if we find such a mode and the handler later decides it can't
do the expansion, we'll be called recursively with the narrower mode. */
for (tmode = GET_CLASS_NARROWEST_MODE (class);
GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
tmode = GET_MODE_WIDER_MODE (tmode))
{
if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
&& binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
submode = tmode;
}
switch (binoptab->code)
{
case AND:
case IOR:
case XOR:
tmode = int_mode_for_mode (mode);
if (tmode != BLKmode)
submode = tmode;
case PLUS:
case MINUS:
case MULT:
case DIV:
subsize = GET_MODE_SIZE (submode);
subbitsize = GET_MODE_BITSIZE (submode);
elts = size / subsize;
/* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode,
but that we operate on more than one element at a time. */
if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT)
return 0;
start_sequence ();
/* Errors can leave us with a const0_rtx as operand. */
if (GET_MODE (op0) != mode)
op0 = copy_to_mode_reg (mode, op0);
if (GET_MODE (op1) != mode)
op1 = copy_to_mode_reg (mode, op1);
if (!target)
target = gen_reg_rtx (mode);
for (i = 0; i < elts; ++i)
{
t = simplify_gen_subreg (submode, target, mode,
i * subsize);
a = simplify_gen_subreg (submode, op0, mode,
i * subsize);
b = simplify_gen_subreg (submode, op1, mode,
i * subsize);
/* If this is part of a register, and not the first item in the
word, we can't store using a SUBREG - that would clobber
previous results.
And storing with a SUBREG is only possible for the least
significant part, hence we can't do it for big endian
(unless we want to permute the evaluation order. */
if (GET_CODE (target) == REG
&& (BYTES_BIG_ENDIAN
? subsize < UNITS_PER_WORD
: ((i * subsize) % UNITS_PER_WORD) != 0))
t = NULL_RTX;
else
t = simplify_gen_subreg (submode, target, mode, i * subsize);
if (CONSTANT_P (op0))
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
else
a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
NULL_RTX, submode, submode, size);
if (CONSTANT_P (op1))
b = simplify_gen_subreg (submode, op1, mode, i * subsize);
else
b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp,
NULL_RTX, submode, submode, size);
if (binoptab->code == DIV)
{
@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
if (res == 0)
break;
emit_move_insn (t, res);
if (t)
emit_move_insn (t, res);
else
store_bit_field (target, subbitsize, i * subbitsize, submode, res,
size);
}
break;
@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp)
rtx target;
int unsignedp;
{
enum machine_mode submode;
int elts, subsize, i;
enum machine_mode submode, tmode;
int size, elts, subsize, subbitsize, i;
rtx t, a, res, seq;
size = GET_MODE_SIZE (mode);
submode = GET_MODE_INNER (mode);
subsize = GET_MODE_UNIT_SIZE (mode);
elts = GET_MODE_NUNITS (mode);
/* Search for the widest vector mode with the same inner mode that is
still narrower than MODE and that allows to open-code this operator.
Note, if we find such a mode and the handler later decides it can't
do the expansion, we'll be called recursively with the narrower mode. */
for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode));
GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
tmode = GET_MODE_WIDER_MODE (tmode))
{
if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
&& unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
submode = tmode;
}
/* If there is no negate operation, try doing a subtract from zero. */
if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT)
{
rtx temp;
temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0,
target, unsignedp, OPTAB_DIRECT);
if (temp)
return temp;
}
if (unoptab == one_cmpl_optab)
{
tmode = int_mode_for_mode (mode);
if (tmode != BLKmode)
submode = tmode;
}
subsize = GET_MODE_SIZE (submode);
subbitsize = GET_MODE_BITSIZE (submode);
elts = size / subsize;
/* Errors can leave us with a const0_rtx as operand. */
if (GET_MODE (op0) != mode)
op0 = copy_to_mode_reg (mode, op0);
if (!target)
target = gen_reg_rtx (mode);
start_sequence ();
/* FIXME: Optimally, we should try to do this in narrower vector
modes if available. E.g. When trying V8SI, try V4SI, else
V2SI, else decay into SI. */
for (i = 0; i < elts; ++i)
{
t = simplify_gen_subreg (submode, target, mode, i * subsize);
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
/* If this is part of a register, and not the first item in the
word, we can't store using a SUBREG - that would clobber
previous results.
And storing with a SUBREG is only possible for the least
significant part, hence we can't do it for big endian
(unless we want to permute the evaluation order. */
if (GET_CODE (target) == REG
&& (BYTES_BIG_ENDIAN
? subsize < UNITS_PER_WORD
: ((i * subsize) % UNITS_PER_WORD) != 0))
t = NULL_RTX;
else
t = simplify_gen_subreg (submode, target, mode, i * subsize);
if (CONSTANT_P (op0))
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
else
a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
t, submode, submode, size);
res = expand_unop (submode, unoptab, a, t, unsignedp);
emit_move_insn (t, res);
if (t)
emit_move_insn (t, res);
else
store_bit_field (target, subbitsize, i * subbitsize, submode, res,
size);
}
seq = get_insns ();

View File

@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte)
/* Simplify subregs of vector constants. */
if (GET_CODE (op) == CONST_VECTOR)
{
int offset = byte / UNITS_PER_WORD;
int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode));
int offset = byte / elt_size;
rtx elt;
/* This shouldn't happen, but let's not do anything stupid. */
if (GET_MODE_INNER (innermode) != outermode)
if (GET_MODE_INNER (innermode) == outermode)
{
elt = CONST_VECTOR_ELT (op, offset);
/* ?? We probably don't need this copy_rtx because constants
can be shared. ?? */
return copy_rtx (elt);
}
else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode)
&& GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode))
{
return (gen_rtx_CONST_VECTOR
(outermode,
gen_rtvec_v (GET_MODE_NUNITS (outermode),
&CONST_VECTOR_ELT (op, offset))));
}
else if (GET_MODE_CLASS (outermode) == MODE_INT
&& (GET_MODE_SIZE (outermode) % elt_size == 0))
{
/* This happens when the target register size is smaller then
the vector mode, and we synthesize operations with vectors
of elements that are smaller than the register size. */
HOST_WIDE_INT sum = 0, high = 0;
unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size);
unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1;
unsigned step = BYTES_BIG_ENDIAN ? 1 : -1;
int shift = BITS_PER_UNIT * elt_size;
for (; n_elts--; i += step)
{
elt = CONST_VECTOR_ELT (op, i);
if (GET_CODE (elt) != CONST_INT)
return NULL_RTX;
high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift);
sum = (sum << shift) + INTVAL (elt);
}
if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT)
return GEN_INT (trunc_int_for_mode (sum, outermode));
else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT)
return immed_double_const (high, sum, outermode);
else
return NULL_RTX;
}
else
/* This shouldn't happen, but let's not do anything stupid. */
return NULL_RTX;
elt = CONST_VECTOR_ELT (op, offset);
/* ?? We probably don't need this copy_rtx because constants
can be shared. ?? */
return copy_rtx (elt);
}
/* Attempt to simplify constant to non-SUBREG expression. */

View File

@ -1,3 +1,8 @@
Wed Jul 3 10:25:41 2002 J"orn Rennecke <joern.rennecke@superh.com>
* gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
* gcc.c-torture/execute/simd-2.c (main): Likewise.
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions.

View File

@ -45,10 +45,29 @@ main ()
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
k = i & j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
k = i | j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
k = i ^ j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
k = -i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3],
-150, -100, -150, -200);
k = ~i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
exit (0);
}

View File

@ -44,10 +44,29 @@ main ()
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
k = i & j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
k = i | j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
k = i ^ j;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
k = -i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3],
-150, -100, -150, -200);
k = ~i;
res.v = k;
verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
exit (0);
}