ia64: Implement vec_perm_const.

* config/ia64/ia64.c (MAX_VECT_LEN): New.
        (struct expand_vec_perm_d): New.
        (TARGET_VECTORIZE_VEC_PERM_CONST_OK): New.
        (ia64_unpack_assemble): Use ia64_expand_vec_perm_const_1.
        (expand_vselect, expand_vselect_vconcat): New.
        (expand_vec_perm_identity, expand_vec_perm_shrp): New.
        (expand_vec_perm_1, expand_vec_perm_broadcast): New.
        (expand_vec_perm_interleave_2, expand_vec_perm_v4hi_5): New.
        (ia64_expand_vec_perm_const_1, ia64_expand_vec_perm_const): New.
        (ia64_vectorize_vec_perm_const_ok): New.
        (ia64_expand_vec_setv2sf, ia64_expand_vec_perm_even_odd): New.
        * config/ia64/ia64-protos.h: Update.
        * config/ia64/vect.md (VEC): New mode iterator.
        (vecint): New mode attribute.
        (vec_interleave_lowv8qi, vec_interleave_highv8qi): Privatize with '*'.
        (vec_interleave_lowv4hi, vec_interleave_highv4hi): Likewise.
        (vec_interleave_lowv2si, vec_interleave_highv2si): Likewise.
        (vec_interleave_lowv2sf, vec_interleave_highv2sf): Likewise.
        (mix1_even, mix1_odd, mux1_alt): Likewise.
        (mux1_brcst_qi): Remove '*' from name.
        (vec_extract_evenv8qi, vec_extract_oddv8qi): Remove.
        (vec_extract_evenv4hi, vec_extract_oddv4hi): Remove.
        (vec_extract_evenv2si, vec_extract_oddv2si): Remove.
        (vec_extract_evenv2sf, vec_extract_oddv2sf): Remove.
        (vec_extract_evenodd_helper): Remove.
        (vec_setv2sf): Use ia64_expand_vec_setv2sf.
        (vec_pack_trunc_v4hi): Use ia64_expand_vec_perm_even_odd.
        (vec_pack_trunc_v2si): Likewise.
        (vec_perm_const<VEC>): New.

From-SVN: r182564
This commit is contained in:
Richard Henderson 2011-12-20 17:03:00 -08:00 committed by Richard Henderson
parent 0f3d6c10f4
commit e6431744b2
4 changed files with 658 additions and 170 deletions

View File

@ -1,3 +1,35 @@
2011-12-20 Richard Henderson <rth@redhat.com>
* config/ia64/ia64.c (MAX_VECT_LEN): New.
(struct expand_vec_perm_d): New.
(TARGET_VECTORIZE_VEC_PERM_CONST_OK): New.
(ia64_unpack_assemble): Use ia64_expand_vec_perm_const_1.
(expand_vselect, expand_vselect_vconcat): New.
(expand_vec_perm_identity, expand_vec_perm_shrp): New.
(expand_vec_perm_1, expand_vec_perm_broadcast): New.
(expand_vec_perm_interleave_2, expand_vec_perm_v4hi_5): New.
(ia64_expand_vec_perm_const_1, ia64_expand_vec_perm_const): New.
(ia64_vectorize_vec_perm_const_ok): New.
(ia64_expand_vec_setv2sf, ia64_expand_vec_perm_even_odd): New.
* config/ia64/ia64-protos.h: Update.
* config/ia64/vect.md (VEC): New mode iterator.
(vecint): New mode attribute.
(vec_interleave_lowv8qi, vec_interleave_highv8qi): Privatize with '*'.
(vec_interleave_lowv4hi, vec_interleave_highv4hi): Likewise.
(vec_interleave_lowv2si, vec_interleave_highv2si): Likewise.
(vec_interleave_lowv2sf, vec_interleave_highv2sf): Likewise.
(mix1_even, mix1_odd, mux1_alt): Likewise.
(mux1_brcst_qi): Remove '*' from name.
(vec_extract_evenv8qi, vec_extract_oddv8qi): Remove.
(vec_extract_evenv4hi, vec_extract_oddv4hi): Remove.
(vec_extract_evenv2si, vec_extract_oddv2si): Remove.
(vec_extract_evenv2sf, vec_extract_oddv2sf): Remove.
(vec_extract_evenodd_helper): Remove.
(vec_setv2sf): Use ia64_expand_vec_setv2sf.
(vec_pack_trunc_v4hi): Use ia64_expand_vec_perm_even_odd.
(vec_pack_trunc_v2si): Likewise.
(vec_perm_const<VEC>): New.
2011-12-20 Richard Henderson <rth@redhat.com>
* tree-vect-generic.c (expand_vector_operations_1): Correct tests

View File

@ -61,6 +61,10 @@ extern int ia64_hard_regno_rename_ok (int, int);
extern enum reg_class ia64_secondary_reload_class (enum reg_class,
enum machine_mode, rtx);
extern const char *get_bundle_name (int);
extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int);
extern bool ia64_expand_vec_perm_const (rtx op[4]);
extern void ia64_expand_vec_setv2sf (rtx op[3]);
#endif /* RTX_CODE */
#ifdef TREE_CODE

View File

@ -330,6 +330,24 @@ static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
static enum machine_mode ia64_get_reg_raw_mode (int regno);
static section * ia64_hpux_function_section (tree, enum node_frequency,
bool, bool);
static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
#define MAX_VECT_LEN 8
struct expand_vec_perm_d
{
rtx target, op0, op1;
unsigned char perm[MAX_VECT_LEN];
enum machine_mode vmode;
unsigned char nelt;
bool one_operand_p;
bool testing_p;
};
static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
/* Table of valid machine attributes. */
static const struct attribute_spec ia64_attribute_table[] =
@ -626,6 +644,9 @@ static const struct attribute_spec ia64_attribute_table[] =
#undef TARGET_DELAY_VARTRACK
#define TARGET_DELAY_VARTRACK true
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
struct gcc_target targetm = TARGET_INITIALIZER;
typedef enum
@ -2027,28 +2048,28 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
void
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{
enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx);
rtx x;
enum machine_mode vmode = GET_MODE (lo);
unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
struct expand_vec_perm_d d;
bool ok;
switch (mode)
d.target = gen_lowpart (vmode, out);
d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
d.vmode = vmode;
d.nelt = nelt;
d.one_operand_p = false;
d.testing_p = false;
high = (highp ? nelt / 2 : 0);
for (i = 0; i < nelt / 2; ++i)
{
case V8QImode:
gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
break;
case V4HImode:
gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
break;
default:
gcc_unreachable ();
d.perm[i * 2] = i + high;
d.perm[i * 2 + 1] = i + high + nelt;
}
x = gen_lowpart (mode, out);
if (TARGET_BIG_ENDIAN)
x = gen (x, hi, lo);
else
x = gen (x, lo, hi);
emit_insn (x);
ok = ia64_expand_vec_perm_const_1 (&d);
gcc_assert (ok);
}
/* Return a vector of the sign-extension of VEC. */
@ -11046,5 +11067,557 @@ ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
{
return NULL;
}
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
static bool
expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
{
rtx rperm[MAX_VECT_LEN], x;
unsigned i;
for (i = 0; i < nelt; ++i)
rperm[i] = GEN_INT (perm[i]);
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
x = gen_rtx_SET (VOIDmode, target, x);
x = emit_insn (x);
if (recog_memoized (x) < 0)
{
remove_insn (x);
return false;
}
return true;
}
/* Similar, but generate a vec_concat from op0 and op1 as well. */
static bool
expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
const unsigned char *perm, unsigned nelt)
{
enum machine_mode v2mode;
rtx x;
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
return expand_vselect (target, x, perm, nelt);
}
/* Try to expand a no-op permutation. */
static bool
expand_vec_perm_identity (struct expand_vec_perm_d *d)
{
unsigned i, nelt = d->nelt;
for (i = 0; i < nelt; ++i)
if (d->perm[i] != i)
return false;
if (!d->testing_p)
emit_move_insn (d->target, d->op0);
return true;
}
/* Try to expand D via a shrp instruction. */
static bool
expand_vec_perm_shrp (struct expand_vec_perm_d *d)
{
unsigned i, nelt = d->nelt, shift, mask;
rtx tmp, op0, op1;;
/* ??? Don't force V2SFmode into the integer registers. */
if (d->vmode == V2SFmode)
return false;
mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
shift = d->perm[0];
for (i = 1; i < nelt; ++i)
if (d->perm[i] != ((shift + i) & mask))
return false;
if (d->testing_p)
return true;
shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
/* We've eliminated the shift 0 case via expand_vec_perm_identity. */
gcc_assert (IN_RANGE (shift, 1, 63));
/* Recall that big-endian elements are numbered starting at the top of
the register. Ideally we'd have a shift-left-pair. But since we
don't, convert to a shift the other direction. */
if (BYTES_BIG_ENDIAN)
shift = 64 - shift;
tmp = gen_reg_rtx (DImode);
op0 = (shift < nelt ? d->op0 : d->op1);
op1 = (shift < nelt ? d->op1 : d->op0);
op0 = gen_lowpart (DImode, op0);
op1 = gen_lowpart (DImode, op1);
emit_insn (gen_shrp (tmp, op0, op1, GEN_INT (shift)));
emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
return true;
}
/* Try to instantiate D in a single instruction. */
static bool
expand_vec_perm_1 (struct expand_vec_perm_d *d)
{
unsigned i, nelt = d->nelt;
unsigned char perm2[MAX_VECT_LEN];
/* Try single-operand selections. */
if (d->one_operand_p)
{
if (expand_vec_perm_identity (d))
return true;
if (expand_vselect (d->target, d->op0, d->perm, nelt))
return true;
}
/* Try two operand selections. */
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
return true;
/* Recognize interleave style patterns with reversed operands. */
if (!d->one_operand_p)
{
for (i = 0; i < nelt; ++i)
{
unsigned e = d->perm[i];
if (e >= nelt)
e -= nelt;
else
e += nelt;
perm2[i] = e;
}
if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
return true;
}
if (expand_vec_perm_shrp (d))
return true;
/* ??? Look for deposit-like permutations where most of the result
comes from one vector unchanged and the rest comes from a
sequential hunk of the other vector. */
return false;
}
/* Pattern match broadcast permutations. */
static bool
expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
{
unsigned i, elt, nelt = d->nelt;
unsigned char perm2[2];
rtx temp;
bool ok;
if (!d->one_operand_p)
return false;
elt = d->perm[0];
for (i = 1; i < nelt; ++i)
if (d->perm[i] != elt)
return false;
switch (d->vmode)
{
case V2SImode:
case V2SFmode:
/* Implementable by interleave. */
perm2[0] = elt;
perm2[1] = elt + 2;
ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
gcc_assert (ok);
break;
case V8QImode:
/* Implementable by extract + broadcast. */
if (BYTES_BIG_ENDIAN)
elt = 7 - elt;
elt *= BITS_PER_UNIT;
temp = gen_reg_rtx (DImode);
emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
GEN_INT (elt), GEN_INT (8)));
emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
break;
case V4HImode:
/* Should have been matched directly by vec_select. */
default:
gcc_unreachable ();
}
return true;
}
/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
two vector permutation into a single vector permutation by using
an interleave operation to merge the vectors. */
static bool
expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
{
struct expand_vec_perm_d dremap, dfinal;
unsigned char remap[2 * MAX_VECT_LEN];
unsigned contents, i, nelt, nelt2;
unsigned h0, h1, h2, h3;
rtx seq;
bool ok;
if (d->one_operand_p)
return false;
nelt = d->nelt;
nelt2 = nelt / 2;
/* Examine from whence the elements come. */
contents = 0;
for (i = 0; i < nelt; ++i)
contents |= 1u << d->perm[i];
memset (remap, 0xff, sizeof (remap));
dremap = *d;
h0 = (1u << nelt2) - 1;
h1 = h0 << nelt2;
h2 = h0 << nelt;
h3 = h0 << (nelt + nelt2);
if ((contents & (h0 | h2)) == contents) /* punpck even halves */
{
for (i = 0; i < nelt; ++i)
{
unsigned which = i / 2 + (i & 1 ? nelt : 0);
remap[which] = i;
dremap.perm[i] = which;
}
}
else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
{
for (i = 0; i < nelt; ++i)
{
unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
remap[which] = i;
dremap.perm[i] = which;
}
}
else if ((contents & 0x5555) == contents) /* mix even elements */
{
for (i = 0; i < nelt; ++i)
{
unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
remap[which] = i;
dremap.perm[i] = which;
}
}
else if ((contents & 0xaaaa) == contents) /* mix odd elements */
{
for (i = 0; i < nelt; ++i)
{
unsigned which = (i | 1) + (i & 1 ? nelt : 0);
remap[which] = i;
dremap.perm[i] = which;
}
}
else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
{
unsigned shift = ctz_hwi (contents);
for (i = 0; i < nelt; ++i)
{
unsigned which = (i + shift) & (2 * nelt - 1);
remap[which] = i;
dremap.perm[i] = which;
}
}
else
return false;
/* Use the remapping array set up above to move the elements from their
swizzled locations into their final destinations. */
dfinal = *d;
for (i = 0; i < nelt; ++i)
{
unsigned e = remap[d->perm[i]];
gcc_assert (e < nelt);
dfinal.perm[i] = e;
}
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
dfinal.op1 = dfinal.op0;
dfinal.one_operand_p = true;
dremap.target = dfinal.op0;
/* Test if the final remap can be done with a single insn. For V4HImode
this *will* succeed. For V8QImode or V2SImode it may not. */
start_sequence ();
ok = expand_vec_perm_1 (&dfinal);
seq = get_insns ();
end_sequence ();
if (!ok)
return false;
if (d->testing_p)
return true;
ok = expand_vec_perm_1 (&dremap);
gcc_assert (ok);
emit_insn (seq);
return true;
}
/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
constant permutation via two mux2 and a merge. */
static bool
expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
{
unsigned char perm2[4];
rtx rmask[4];
unsigned i;
rtx t0, t1, mask, x;
bool ok;
if (d->vmode != V4HImode || d->one_operand_p)
return false;
if (d->testing_p)
return true;
for (i = 0; i < 4; ++i)
{
perm2[i] = d->perm[i] & 3;
rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
}
mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
mask = force_reg (V4HImode, mask);
t0 = gen_reg_rtx (V4HImode);
t1 = gen_reg_rtx (V4HImode);
ok = expand_vselect (t0, d->op0, perm2, 4);
gcc_assert (ok);
ok = expand_vselect (t1, d->op1, perm2, 4);
gcc_assert (ok);
x = gen_rtx_AND (V4HImode, mask, t0);
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
x = gen_rtx_NOT (V4HImode, mask);
x = gen_rtx_AND (V4HImode, x, t1);
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
x = gen_rtx_IOR (V4HImode, t0, t1);
emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
return true;
}
/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
With all of the interface bits taken care of, perform the expansion
in D and return true on success. */
static bool
ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
{
if (expand_vec_perm_1 (d))
return true;
if (expand_vec_perm_broadcast (d))
return true;
if (expand_vec_perm_interleave_2 (d))
return true;
if (expand_vec_perm_v4hi_5 (d))
return true;
return false;
}
bool
ia64_expand_vec_perm_const (rtx operands[4])
{
struct expand_vec_perm_d d;
unsigned char perm[MAX_VECT_LEN];
int i, nelt, which;
rtx sel;
d.target = operands[0];
d.op0 = operands[1];
d.op1 = operands[2];
sel = operands[3];
d.vmode = GET_MODE (d.target);
gcc_assert (VECTOR_MODE_P (d.vmode));
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = false;
gcc_assert (GET_CODE (sel) == CONST_VECTOR);
gcc_assert (XVECLEN (sel, 0) == nelt);
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
for (i = which = 0; i < nelt; ++i)
{
rtx e = XVECEXP (sel, 0, i);
int ei = INTVAL (e) & (2 * nelt - 1);
which |= (ei < nelt ? 1 : 2);
d.perm[i] = ei;
perm[i] = ei;
}
switch (which)
{
default:
gcc_unreachable();
case 3:
if (!rtx_equal_p (d.op0, d.op1))
{
d.one_operand_p = false;
break;
}
/* The elements of PERM do not suggest that only the first operand
is used, but both operands are identical. Allow easier matching
of the permutation by folding the permutation into the single
input vector. */
for (i = 0; i < nelt; ++i)
if (d.perm[i] >= nelt)
d.perm[i] -= nelt;
/* FALLTHRU */
case 1:
d.op1 = d.op0;
d.one_operand_p = true;
break;
case 2:
for (i = 0; i < nelt; ++i)
d.perm[i] -= nelt;
d.op0 = d.op1;
d.one_operand_p = true;
break;
}
if (ia64_expand_vec_perm_const_1 (&d))
return true;
/* If the mask says both arguments are needed, but they are the same,
the above tried to expand with one_operand_p true. If that didn't
work, retry with one_operand_p false, as that's what we used in _ok. */
if (which == 3 && d.one_operand_p)
{
memcpy (d.perm, perm, sizeof (perm));
d.one_operand_p = false;
return ia64_expand_vec_perm_const_1 (&d);
}
return false;
}
/* Implement targetm.vectorize.vec_perm_const_ok. */
static bool
ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel)
{
struct expand_vec_perm_d d;
unsigned int i, nelt, which;
bool ret;
d.vmode = vmode;
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
d.testing_p = true;
/* Extract the values from the vector CST into the permutation
array in D. */
memcpy (d.perm, sel, nelt);
for (i = which = 0; i < nelt; ++i)
{
unsigned char e = d.perm[i];
gcc_assert (e < 2 * nelt);
which |= (e < nelt ? 1 : 2);
}
/* For all elements from second vector, fold the elements to first. */
if (which == 2)
for (i = 0; i < nelt; ++i)
d.perm[i] -= nelt;
/* Check whether the mask can be applied to the vector type. */
d.one_operand_p = (which != 3);
/* Otherwise we have to go through the motions and see if we can
figure out how to generate the requested permutation. */
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
if (!d.one_operand_p)
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
start_sequence ();
ret = ia64_expand_vec_perm_const_1 (&d);
end_sequence ();
return ret;
}
void
ia64_expand_vec_setv2sf (rtx operands[3])
{
struct expand_vec_perm_d d;
unsigned int which;
bool ok;
d.target = operands[0];
d.op0 = operands[0];
d.op1 = gen_reg_rtx (V2SFmode);
d.vmode = V2SFmode;
d.nelt = 2;
d.one_operand_p = false;
d.testing_p = false;
which = INTVAL (operands[2]);
gcc_assert (which <= 1);
d.perm[0] = 1 - which;
d.perm[1] = which + 2;
emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
ok = ia64_expand_vec_perm_const_1 (&d);
gcc_assert (ok);
}
void
ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
{
struct expand_vec_perm_d d;
enum machine_mode vmode = GET_MODE (target);
unsigned int i, nelt = GET_MODE_NUNITS (vmode);
bool ok;
d.target = target;
d.op0 = op0;
d.op1 = op1;
d.vmode = vmode;
d.nelt = nelt;
d.one_operand_p = false;
d.testing_p = false;
for (i = 0; i < nelt; ++i)
d.perm[i] = i * 2 + odd;
ok = ia64_expand_vec_perm_const_1 (&d);
gcc_assert (ok);
}
#include "gt-ia64.h"

View File

@ -20,11 +20,14 @@
;; Integer vector operations
(define_mode_iterator VEC [V8QI V4HI V2SI V2SF])
(define_mode_iterator VECINT [V8QI V4HI V2SI])
(define_mode_iterator VECINT12 [V8QI V4HI])
(define_mode_iterator VECINT24 [V4HI V2SI])
(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
(define_mode_attr vecint
[(V8QI "V8QI") (V4HI "V4HI") (V2SI "V2SI") (V2SF "V2SI")])
(define_expand "mov<mode>"
[(set (match_operand:VECINT 0 "general_operand" "")
@ -756,7 +759,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_lowv8qi"
(define_insn "*vec_interleave_lowv8qi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@ -776,7 +779,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_highv8qi"
(define_insn "*vec_interleave_highv8qi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@ -796,7 +799,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_even"
(define_insn "*mix1_even"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@ -816,7 +819,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_odd"
(define_insn "*mix1_odd"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@ -872,7 +875,7 @@
"mux1 %0 = %1, @shuf"
[(set_attr "itanium_class" "mmshf")])
(define_insn "mux1_alt"
(define_insn "*mux1_alt"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
@ -900,7 +903,7 @@
"mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mux1_brcst_qi"
(define_insn "mux1_brcst_qi"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_duplicate:V8QI
(match_operand:QI 1 "gr_register_operand" "r")))]
@ -908,31 +911,7 @@
"mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_extract_evenv8qi"
[(match_operand:V8QI 0 "gr_register_operand" "")
(match_operand:V8QI 1 "gr_register_operand" "")
(match_operand:V8QI 2 "gr_register_operand" "")]
""
{
rtx temp = gen_reg_rtx (V8QImode);
emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
(define_expand "vec_extract_oddv8qi"
[(match_operand:V8QI 0 "gr_register_operand" "")
(match_operand:V8QI 1 "gr_register_operand" "")
(match_operand:V8QI 2 "gr_register_operand" "")]
""
{
rtx temp = gen_reg_rtx (V8QImode);
emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
(define_insn "vec_interleave_lowv4hi"
(define_insn "*vec_interleave_lowv4hi"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
@ -950,7 +929,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_highv4hi"
(define_insn "*vec_interleave_highv4hi"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
@ -1034,38 +1013,6 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_extract_evenodd_helper"
[(set (match_operand:V4HI 0 "gr_register_operand" "")
(vec_select:V4HI
(match_operand:V4HI 1 "gr_register_operand" "")
(parallel [(const_int 0) (const_int 2)
(const_int 1) (const_int 3)])))]
"")
(define_expand "vec_extract_evenv4hi"
[(match_operand:V4HI 0 "gr_register_operand")
(match_operand:V4HI 1 "gr_reg_or_0_operand")
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
""
{
rtx temp = gen_reg_rtx (V4HImode);
emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
(define_expand "vec_extract_oddv4hi"
[(match_operand:V4HI 0 "gr_register_operand")
(match_operand:V4HI 1 "gr_reg_or_0_operand")
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
""
{
rtx temp = gen_reg_rtx (V4HImode);
emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
(define_insn "*mux2_brcst_hi"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_duplicate:V4HI
@ -1074,7 +1021,7 @@
"mux2 %0 = %1, 0"
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_lowv2si"
(define_insn "*vec_interleave_lowv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
@ -1091,7 +1038,7 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_highv2si"
(define_insn "*vec_interleave_highv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
@ -1108,36 +1055,6 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_extract_evenv2si"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V2SI 1 "gr_register_operand" "")
(match_operand:V2SI 2 "gr_register_operand" "")]
""
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2]));
DONE;
})
(define_expand "vec_extract_oddv2si"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V2SI 1 "gr_register_operand" "")
(match_operand:V2SI 2 "gr_register_operand" "")]
""
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2]));
DONE;
})
(define_expand "vec_initv2si"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand 1 "" "")]
@ -1479,7 +1396,7 @@
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_highv2sf"
(define_insn "*vec_interleave_highv2sf"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_select:V2SF
(vec_concat:V4SF
@ -1496,7 +1413,7 @@
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_lowv2sf"
(define_insn "*vec_interleave_lowv2sf"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_select:V2SF
(vec_concat:V4SF
@ -1530,58 +1447,13 @@
}
[(set_attr "itanium_class" "fmisc")])
(define_expand "vec_extract_evenv2sf"
[(match_operand:V2SF 0 "gr_register_operand" "")
(match_operand:V2SF 1 "gr_register_operand" "")
(match_operand:V2SF 2 "gr_register_operand" "")]
""
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
operands[2]));
DONE;
})
(define_expand "vec_extract_oddv2sf"
[(match_operand:V2SF 0 "gr_register_operand" "")
(match_operand:V2SF 1 "gr_register_operand" "")
(match_operand:V2SF 2 "gr_register_operand" "")]
""
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
operands[2]));
else
emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
operands[2]));
DONE;
})
(define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "fr_register_operand" "")
(match_operand:SF 1 "fr_register_operand" "")
(match_operand 2 "const_int_operand" "")]
""
{
rtx op0 = operands[0];
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
switch (INTVAL (operands[2]))
{
case 0:
emit_insn (gen_fmix_lr (op0, tmp, op0));
break;
case 1:
emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
break;
default:
gcc_unreachable ();
}
ia64_expand_vec_setv2sf (operands);
DONE;
})
@ -1703,10 +1575,7 @@
{
rtx op1 = gen_lowpart (V8QImode, operands[1]);
rtx op2 = gen_lowpart (V8QImode, operands[2]);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
else
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
DONE;
})
@ -1718,13 +1587,23 @@
{
rtx op1 = gen_lowpart (V4HImode, operands[1]);
rtx op2 = gen_lowpart (V4HImode, operands[2]);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
else
emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
DONE;
})
(define_expand "vec_perm_const<mode>"
[(match_operand:VEC 0 "register_operand" "")
(match_operand:VEC 1 "register_operand" "")
(match_operand:VEC 2 "register_operand" "")
(match_operand:<vecint> 3 "" "")]
""
{
if (ia64_expand_vec_perm_const (operands))
DONE;
else
FAIL;
})
;; Missing operations
;; fprcpa
;; fpsqrta