optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD...
gcc: * optabs.c (expand_vector_binop): Don't store using a SUBREG smaller than UNITS_PER_WORD, unless this is little endian and the first unit in this word. Let extract_bit_field decide how to load an element. Force arguments to matching mode. (expand_vector_unop): Likewise. * simplify-rtx.c (simplify_subreg): Don't assume that all vectors consist of word_mode elements. * c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR, BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR. (build_unary_op): Allow vector types for BIT_NOT_EPR. * emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for CONST_VECTOR. * optabs.c (expand_vector_binop): Try to perform operation in smaller vector modes with same inner size. Add handling of AND, IOR and XOR. Reject expansion to inner-mode sized scalars when using OPTAB_DIRECT. Use simplify_gen_subreg on constants. (expand_vector_unop): Try to perform operation in smaller vector modes with same inner size. Add handling of one's complement. When there is no vector negate operation, try a vector subtract operation. Use simplify_gen_subreg on constants. * simplify-rtx.c (simplify_subreg): Add capability to convert vector constants into smaller vectors with same inner mode, and to integer CONST_DOUBLEs. gcc/testsuite: * gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~. * gcc.c-torture/execute/simd-2.c (main): Likewise. From-SVN: r55209
This commit is contained in:
parent
032b2b2990
commit
34a80643d8
|
@ -1,3 +1,30 @@
|
|||
Wed Jul 3 10:24:16 2002 J"orn Rennecke <joern.rennecke@superh.com>
|
||||
|
||||
* optabs.c (expand_vector_binop): Don't store using a SUBREG smaller
|
||||
than UNITS_PER_WORD, unless this is little endian and the first unit
|
||||
in this word. Let extract_bit_field decide how to load an element.
|
||||
Force arguments to matching mode.
|
||||
(expand_vector_unop): Likewise.
|
||||
|
||||
* simplify-rtx.c (simplify_subreg): Don't assume that all vectors
|
||||
consist of word_mode elements.
|
||||
* c-typeck.c (build_binary_op): Allow vector types for BIT_AND_EXPR,
|
||||
BIT_ANDTC_EXPR, BIT_IOR_EXPR and BIT_XOR_EXPR.
|
||||
(build_unary_op): Allow vector types for BIT_NOT_EPR.
|
||||
* emit-rtl.c (gen_lowpart_common): Use simplify_gen_subreg for
|
||||
CONST_VECTOR.
|
||||
* optabs.c (expand_vector_binop): Try to perform operation in
|
||||
smaller vector modes with same inner size. Add handling of AND, IOR
|
||||
and XOR. Reject expansion to inner-mode sized scalars when using
|
||||
OPTAB_DIRECT. Use simplify_gen_subreg on constants.
|
||||
(expand_vector_unop): Try to perform operation in smaller vector
|
||||
modes with same inner size. Add handling of one's complement.
|
||||
When there is no vector negate operation, try a vector subtract
|
||||
operation. Use simplify_gen_subreg on constants.
|
||||
* simplify-rtx.c (simplify_subreg): Add capability to convert vector
|
||||
constants into smaller vectors with same inner mode, and to
|
||||
integer CONST_DOUBLEs.
|
||||
|
||||
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
|
||||
|
||||
* c-parse.in (parsing_iso_function_signature): New variable.
|
||||
|
|
|
@ -2071,6 +2071,8 @@ build_binary_op (code, orig_op0, orig_op1, convert_p)
|
|||
case BIT_XOR_EXPR:
|
||||
if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE)
|
||||
shorten = -1;
|
||||
else if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE)
|
||||
common = 1;
|
||||
break;
|
||||
|
||||
case TRUNC_MOD_EXPR:
|
||||
|
@ -2778,7 +2780,12 @@ build_unary_op (code, xarg, flag)
|
|||
break;
|
||||
|
||||
case BIT_NOT_EXPR:
|
||||
if (typecode == COMPLEX_TYPE)
|
||||
if (typecode == INTEGER_TYPE || typecode == VECTOR_TYPE)
|
||||
{
|
||||
if (!noconvert)
|
||||
arg = default_conversion (arg);
|
||||
}
|
||||
else if (typecode == COMPLEX_TYPE)
|
||||
{
|
||||
code = CONJ_EXPR;
|
||||
if (pedantic)
|
||||
|
@ -2786,13 +2793,11 @@ build_unary_op (code, xarg, flag)
|
|||
if (!noconvert)
|
||||
arg = default_conversion (arg);
|
||||
}
|
||||
else if (typecode != INTEGER_TYPE)
|
||||
else
|
||||
{
|
||||
error ("wrong type argument to bit-complement");
|
||||
return error_mark_node;
|
||||
}
|
||||
else if (!noconvert)
|
||||
arg = default_conversion (arg);
|
||||
break;
|
||||
|
||||
case ABS_EXPR:
|
||||
|
|
|
@ -991,7 +991,7 @@ gen_lowpart_common (mode, x)
|
|||
return gen_rtx_fmt_e (GET_CODE (x), mode, XEXP (x, 0));
|
||||
}
|
||||
else if (GET_CODE (x) == SUBREG || GET_CODE (x) == REG
|
||||
|| GET_CODE (x) == CONCAT)
|
||||
|| GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR)
|
||||
return simplify_gen_subreg (mode, x, GET_MODE (x), offset);
|
||||
/* If X is a CONST_INT or a CONST_DOUBLE, extract the appropriate bits
|
||||
from the low-order part of the constant. */
|
||||
|
|
162
gcc/optabs.c
162
gcc/optabs.c
|
@ -1923,40 +1923,86 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
|
|||
int unsignedp;
|
||||
enum optab_methods methods;
|
||||
{
|
||||
enum machine_mode submode;
|
||||
int elts, subsize, i;
|
||||
enum machine_mode submode, tmode;
|
||||
int size, elts, subsize, subbitsize, i;
|
||||
rtx t, a, b, res, seq;
|
||||
enum mode_class class;
|
||||
|
||||
class = GET_MODE_CLASS (mode);
|
||||
|
||||
size = GET_MODE_SIZE (mode);
|
||||
submode = GET_MODE_INNER (mode);
|
||||
subsize = GET_MODE_UNIT_SIZE (mode);
|
||||
elts = GET_MODE_NUNITS (mode);
|
||||
|
||||
if (!target)
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* FIXME: Optimally, we should try to do this in narrower vector
|
||||
modes if available. E.g. When trying V8SI, try V4SI, else
|
||||
V2SI, else decay into SI. */
|
||||
/* Search for the widest vector mode with the same inner mode that is
|
||||
still narrower than MODE and that allows to open-code this operator.
|
||||
Note, if we find such a mode and the handler later decides it can't
|
||||
do the expansion, we'll be called recursively with the narrower mode. */
|
||||
for (tmode = GET_CLASS_NARROWEST_MODE (class);
|
||||
GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
|
||||
tmode = GET_MODE_WIDER_MODE (tmode))
|
||||
{
|
||||
if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
|
||||
&& binoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
|
||||
submode = tmode;
|
||||
}
|
||||
|
||||
switch (binoptab->code)
|
||||
{
|
||||
case AND:
|
||||
case IOR:
|
||||
case XOR:
|
||||
tmode = int_mode_for_mode (mode);
|
||||
if (tmode != BLKmode)
|
||||
submode = tmode;
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case MULT:
|
||||
case DIV:
|
||||
subsize = GET_MODE_SIZE (submode);
|
||||
subbitsize = GET_MODE_BITSIZE (submode);
|
||||
elts = size / subsize;
|
||||
|
||||
/* If METHODS is OPTAB_DIRECT, we don't insist on the exact mode,
|
||||
but that we operate on more than one element at a time. */
|
||||
if (subsize == GET_MODE_UNIT_SIZE (mode) && methods == OPTAB_DIRECT)
|
||||
return 0;
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* Errors can leave us with a const0_rtx as operand. */
|
||||
if (GET_MODE (op0) != mode)
|
||||
op0 = copy_to_mode_reg (mode, op0);
|
||||
if (GET_MODE (op1) != mode)
|
||||
op1 = copy_to_mode_reg (mode, op1);
|
||||
|
||||
if (!target)
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
for (i = 0; i < elts; ++i)
|
||||
{
|
||||
t = simplify_gen_subreg (submode, target, mode,
|
||||
i * subsize);
|
||||
a = simplify_gen_subreg (submode, op0, mode,
|
||||
i * subsize);
|
||||
b = simplify_gen_subreg (submode, op1, mode,
|
||||
i * subsize);
|
||||
/* If this is part of a register, and not the first item in the
|
||||
word, we can't store using a SUBREG - that would clobber
|
||||
previous results.
|
||||
And storing with a SUBREG is only possible for the least
|
||||
significant part, hence we can't do it for big endian
|
||||
(unless we want to permute the evaluation order. */
|
||||
if (GET_CODE (target) == REG
|
||||
&& (BYTES_BIG_ENDIAN
|
||||
? subsize < UNITS_PER_WORD
|
||||
: ((i * subsize) % UNITS_PER_WORD) != 0))
|
||||
t = NULL_RTX;
|
||||
else
|
||||
t = simplify_gen_subreg (submode, target, mode, i * subsize);
|
||||
if (CONSTANT_P (op0))
|
||||
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
|
||||
else
|
||||
a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
|
||||
NULL_RTX, submode, submode, size);
|
||||
if (CONSTANT_P (op1))
|
||||
b = simplify_gen_subreg (submode, op1, mode, i * subsize);
|
||||
else
|
||||
b = extract_bit_field (op1, subbitsize, i * subbitsize, unsignedp,
|
||||
NULL_RTX, submode, submode, size);
|
||||
|
||||
if (binoptab->code == DIV)
|
||||
{
|
||||
|
@ -1974,7 +2020,11 @@ expand_vector_binop (mode, binoptab, op0, op1, target, unsignedp, methods)
|
|||
if (res == 0)
|
||||
break;
|
||||
|
||||
emit_move_insn (t, res);
|
||||
if (t)
|
||||
emit_move_insn (t, res);
|
||||
else
|
||||
store_bit_field (target, subbitsize, i * subbitsize, submode, res,
|
||||
size);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1999,31 +2049,83 @@ expand_vector_unop (mode, unoptab, op0, target, unsignedp)
|
|||
rtx target;
|
||||
int unsignedp;
|
||||
{
|
||||
enum machine_mode submode;
|
||||
int elts, subsize, i;
|
||||
enum machine_mode submode, tmode;
|
||||
int size, elts, subsize, subbitsize, i;
|
||||
rtx t, a, res, seq;
|
||||
|
||||
size = GET_MODE_SIZE (mode);
|
||||
submode = GET_MODE_INNER (mode);
|
||||
subsize = GET_MODE_UNIT_SIZE (mode);
|
||||
elts = GET_MODE_NUNITS (mode);
|
||||
|
||||
/* Search for the widest vector mode with the same inner mode that is
|
||||
still narrower than MODE and that allows to open-code this operator.
|
||||
Note, if we find such a mode and the handler later decides it can't
|
||||
do the expansion, we'll be called recursively with the narrower mode. */
|
||||
for (tmode = GET_CLASS_NARROWEST_MODE (GET_MODE_CLASS (mode));
|
||||
GET_MODE_SIZE (tmode) < GET_MODE_SIZE (mode);
|
||||
tmode = GET_MODE_WIDER_MODE (tmode))
|
||||
{
|
||||
if (GET_MODE_INNER (tmode) == GET_MODE_INNER (mode)
|
||||
&& unoptab->handlers[(int) tmode].insn_code != CODE_FOR_nothing)
|
||||
submode = tmode;
|
||||
}
|
||||
/* If there is no negate operation, try doing a subtract from zero. */
|
||||
if (unoptab == neg_optab && GET_MODE_CLASS (submode) == MODE_INT)
|
||||
{
|
||||
rtx temp;
|
||||
temp = expand_binop (mode, sub_optab, CONST0_RTX (mode), op0,
|
||||
target, unsignedp, OPTAB_DIRECT);
|
||||
if (temp)
|
||||
return temp;
|
||||
}
|
||||
|
||||
if (unoptab == one_cmpl_optab)
|
||||
{
|
||||
tmode = int_mode_for_mode (mode);
|
||||
if (tmode != BLKmode)
|
||||
submode = tmode;
|
||||
}
|
||||
|
||||
subsize = GET_MODE_SIZE (submode);
|
||||
subbitsize = GET_MODE_BITSIZE (submode);
|
||||
elts = size / subsize;
|
||||
|
||||
/* Errors can leave us with a const0_rtx as operand. */
|
||||
if (GET_MODE (op0) != mode)
|
||||
op0 = copy_to_mode_reg (mode, op0);
|
||||
|
||||
if (!target)
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
start_sequence ();
|
||||
|
||||
/* FIXME: Optimally, we should try to do this in narrower vector
|
||||
modes if available. E.g. When trying V8SI, try V4SI, else
|
||||
V2SI, else decay into SI. */
|
||||
|
||||
for (i = 0; i < elts; ++i)
|
||||
{
|
||||
t = simplify_gen_subreg (submode, target, mode, i * subsize);
|
||||
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
|
||||
/* If this is part of a register, and not the first item in the
|
||||
word, we can't store using a SUBREG - that would clobber
|
||||
previous results.
|
||||
And storing with a SUBREG is only possible for the least
|
||||
significant part, hence we can't do it for big endian
|
||||
(unless we want to permute the evaluation order. */
|
||||
if (GET_CODE (target) == REG
|
||||
&& (BYTES_BIG_ENDIAN
|
||||
? subsize < UNITS_PER_WORD
|
||||
: ((i * subsize) % UNITS_PER_WORD) != 0))
|
||||
t = NULL_RTX;
|
||||
else
|
||||
t = simplify_gen_subreg (submode, target, mode, i * subsize);
|
||||
if (CONSTANT_P (op0))
|
||||
a = simplify_gen_subreg (submode, op0, mode, i * subsize);
|
||||
else
|
||||
a = extract_bit_field (op0, subbitsize, i * subbitsize, unsignedp,
|
||||
t, submode, submode, size);
|
||||
|
||||
res = expand_unop (submode, unoptab, a, t, unsignedp);
|
||||
|
||||
emit_move_insn (t, res);
|
||||
if (t)
|
||||
emit_move_insn (t, res);
|
||||
else
|
||||
store_bit_field (target, subbitsize, i * subbitsize, submode, res,
|
||||
size);
|
||||
}
|
||||
|
||||
seq = get_insns ();
|
||||
|
|
|
@ -2271,19 +2271,57 @@ simplify_subreg (outermode, op, innermode, byte)
|
|||
/* Simplify subregs of vector constants. */
|
||||
if (GET_CODE (op) == CONST_VECTOR)
|
||||
{
|
||||
int offset = byte / UNITS_PER_WORD;
|
||||
int elt_size = GET_MODE_SIZE (GET_MODE_INNER (innermode));
|
||||
int offset = byte / elt_size;
|
||||
rtx elt;
|
||||
|
||||
/* This shouldn't happen, but let's not do anything stupid. */
|
||||
if (GET_MODE_INNER (innermode) != outermode)
|
||||
if (GET_MODE_INNER (innermode) == outermode)
|
||||
{
|
||||
elt = CONST_VECTOR_ELT (op, offset);
|
||||
|
||||
/* ?? We probably don't need this copy_rtx because constants
|
||||
can be shared. ?? */
|
||||
|
||||
return copy_rtx (elt);
|
||||
}
|
||||
else if (GET_MODE_INNER (innermode) == GET_MODE_INNER (outermode)
|
||||
&& GET_MODE_SIZE (innermode) > GET_MODE_SIZE (outermode))
|
||||
{
|
||||
return (gen_rtx_CONST_VECTOR
|
||||
(outermode,
|
||||
gen_rtvec_v (GET_MODE_NUNITS (outermode),
|
||||
&CONST_VECTOR_ELT (op, offset))));
|
||||
}
|
||||
else if (GET_MODE_CLASS (outermode) == MODE_INT
|
||||
&& (GET_MODE_SIZE (outermode) % elt_size == 0))
|
||||
{
|
||||
/* This happens when the target register size is smaller then
|
||||
the vector mode, and we synthesize operations with vectors
|
||||
of elements that are smaller than the register size. */
|
||||
HOST_WIDE_INT sum = 0, high = 0;
|
||||
unsigned n_elts = (GET_MODE_SIZE (outermode) / elt_size);
|
||||
unsigned i = BYTES_BIG_ENDIAN ? offset : offset + n_elts - 1;
|
||||
unsigned step = BYTES_BIG_ENDIAN ? 1 : -1;
|
||||
int shift = BITS_PER_UNIT * elt_size;
|
||||
|
||||
for (; n_elts--; i += step)
|
||||
{
|
||||
elt = CONST_VECTOR_ELT (op, i);
|
||||
if (GET_CODE (elt) != CONST_INT)
|
||||
return NULL_RTX;
|
||||
high = high << shift | sum >> (HOST_BITS_PER_WIDE_INT - shift);
|
||||
sum = (sum << shift) + INTVAL (elt);
|
||||
}
|
||||
if (GET_MODE_BITSIZE (outermode) <= HOST_BITS_PER_WIDE_INT)
|
||||
return GEN_INT (trunc_int_for_mode (sum, outermode));
|
||||
else if (GET_MODE_BITSIZE (outermode) == 2* HOST_BITS_PER_WIDE_INT)
|
||||
return immed_double_const (high, sum, outermode);
|
||||
else
|
||||
return NULL_RTX;
|
||||
}
|
||||
else
|
||||
/* This shouldn't happen, but let's not do anything stupid. */
|
||||
return NULL_RTX;
|
||||
|
||||
elt = CONST_VECTOR_ELT (op, offset);
|
||||
|
||||
/* ?? We probably don't need this copy_rtx because constants
|
||||
can be shared. ?? */
|
||||
|
||||
return copy_rtx (elt);
|
||||
}
|
||||
|
||||
/* Attempt to simplify constant to non-SUBREG expression. */
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
Wed Jul 3 10:25:41 2002 J"orn Rennecke <joern.rennecke@superh.com>
|
||||
|
||||
* gcc.c-torture/execute/simd-1.c (main): Also test &, |, ^, ~.
|
||||
* gcc.c-torture/execute/simd-2.c (main): Likewise.
|
||||
|
||||
2002-07-02 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
|
||||
|
||||
* gcc.dg/cpp/tr-warn2.c: Use traditional C style function definitions.
|
||||
|
|
|
@ -45,10 +45,29 @@ main ()
|
|||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
|
||||
|
||||
k = i & j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
|
||||
|
||||
k = i | j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
|
||||
|
||||
k = i ^ j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
|
||||
|
||||
k = -i;
|
||||
res.v = k;
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3],
|
||||
-150, -100, -150, -200);
|
||||
|
||||
k = ~i;
|
||||
res.v = k;
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
|
||||
|
||||
exit (0);
|
||||
}
|
||||
|
|
|
@ -44,10 +44,29 @@ main ()
|
|||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 15, 7, 7, 6);
|
||||
|
||||
k = i & j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 2, 4, 20, 8);
|
||||
|
||||
k = i | j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 158, 109, 150, 222);
|
||||
|
||||
k = i ^ j;
|
||||
res.v = k;
|
||||
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], 156, 105, 130, 214);
|
||||
|
||||
k = -i;
|
||||
res.v = k;
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3],
|
||||
-150, -100, -150, -200);
|
||||
|
||||
k = ~i;
|
||||
res.v = k;
|
||||
verify (res.i[0], res.i[1], res.i[2], res.i[3], -151, -101, -151, -201);
|
||||
|
||||
exit (0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue