ia64: Implement vec_perm_const.
* config/ia64/ia64.c (MAX_VECT_LEN): New. (struct expand_vec_perm_d): New. (TARGET_VECTORIZE_VEC_PERM_CONST_OK): New. (ia64_unpack_assemble): Use ia64_expand_vec_perm_const_1. (expand_vselect, expand_vselect_vconcat): New. (expand_vec_perm_identity, expand_vec_perm_shrp): New. (expand_vec_perm_1, expand_vec_perm_broadcast): New. (expand_vec_perm_interleave_2, expand_vec_perm_v4hi_5): New. (ia64_expand_vec_perm_const_1, ia64_expand_vec_perm_const): New. (ia64_vectorize_vec_perm_const_ok): New. (ia64_expand_vec_setv2sf, ia64_expand_vec_perm_even_odd): New. * config/ia64/ia64-protos.h: Update. * config/ia64/vect.md (VEC): New mode iterator. (vecint): New mode attribute. (vec_interleave_lowv8qi, vec_interleave_highv8qi): Privatize with '*'. (vec_interleave_lowv4hi, vec_interleave_highv4hi): Likewise. (vec_interleave_lowv2si, vec_interleave_highv2si): Likewise. (vec_interleave_lowv2sf, vec_interleave_highv2sf): Likewise. (mix1_even, mix1_odd, mux1_alt): Likewise. (mux1_brcst_qi): Remove '*' from name. (vec_extract_evenv8qi, vec_extract_oddv8qi): Remove. (vec_extract_evenv4hi, vec_extract_oddv4hi): Remove. (vec_extract_evenv2si, vec_extract_oddv2si): Remove. (vec_extract_evenv2sf, vec_extract_oddv2sf): Remove. (vec_extract_evenodd_helper): Remove. (vec_setv2sf): Use ia64_expand_vec_setv2sf. (vec_pack_trunc_v4hi): Use ia64_expand_vec_perm_even_odd. (vec_pack_trunc_v2si): Likewise. (vec_perm_const<VEC>): New. From-SVN: r182564
This commit is contained in:
parent
0f3d6c10f4
commit
e6431744b2
|
@ -1,3 +1,35 @@
|
||||||
|
2011-12-20 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* config/ia64/ia64.c (MAX_VECT_LEN): New.
|
||||||
|
(struct expand_vec_perm_d): New.
|
||||||
|
(TARGET_VECTORIZE_VEC_PERM_CONST_OK): New.
|
||||||
|
(ia64_unpack_assemble): Use ia64_expand_vec_perm_const_1.
|
||||||
|
(expand_vselect, expand_vselect_vconcat): New.
|
||||||
|
(expand_vec_perm_identity, expand_vec_perm_shrp): New.
|
||||||
|
(expand_vec_perm_1, expand_vec_perm_broadcast): New.
|
||||||
|
(expand_vec_perm_interleave_2, expand_vec_perm_v4hi_5): New.
|
||||||
|
(ia64_expand_vec_perm_const_1, ia64_expand_vec_perm_const): New.
|
||||||
|
(ia64_vectorize_vec_perm_const_ok): New.
|
||||||
|
(ia64_expand_vec_setv2sf, ia64_expand_vec_perm_even_odd): New.
|
||||||
|
* config/ia64/ia64-protos.h: Update.
|
||||||
|
* config/ia64/vect.md (VEC): New mode iterator.
|
||||||
|
(vecint): New mode attribute.
|
||||||
|
(vec_interleave_lowv8qi, vec_interleave_highv8qi): Privatize with '*'.
|
||||||
|
(vec_interleave_lowv4hi, vec_interleave_highv4hi): Likewise.
|
||||||
|
(vec_interleave_lowv2si, vec_interleave_highv2si): Likewise.
|
||||||
|
(vec_interleave_lowv2sf, vec_interleave_highv2sf): Likewise.
|
||||||
|
(mix1_even, mix1_odd, mux1_alt): Likewise.
|
||||||
|
(mux1_brcst_qi): Remove '*' from name.
|
||||||
|
(vec_extract_evenv8qi, vec_extract_oddv8qi): Remove.
|
||||||
|
(vec_extract_evenv4hi, vec_extract_oddv4hi): Remove.
|
||||||
|
(vec_extract_evenv2si, vec_extract_oddv2si): Remove.
|
||||||
|
(vec_extract_evenv2sf, vec_extract_oddv2sf): Remove.
|
||||||
|
(vec_extract_evenodd_helper): Remove.
|
||||||
|
(vec_setv2sf): Use ia64_expand_vec_setv2sf.
|
||||||
|
(vec_pack_trunc_v4hi): Use ia64_expand_vec_perm_even_odd.
|
||||||
|
(vec_pack_trunc_v2si): Likewise.
|
||||||
|
(vec_perm_const<VEC>): New.
|
||||||
|
|
||||||
2011-12-20 Richard Henderson <rth@redhat.com>
|
2011-12-20 Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
* tree-vect-generic.c (expand_vector_operations_1): Correct tests
|
* tree-vect-generic.c (expand_vector_operations_1): Correct tests
|
||||||
|
|
|
@ -61,6 +61,10 @@ extern int ia64_hard_regno_rename_ok (int, int);
|
||||||
extern enum reg_class ia64_secondary_reload_class (enum reg_class,
|
extern enum reg_class ia64_secondary_reload_class (enum reg_class,
|
||||||
enum machine_mode, rtx);
|
enum machine_mode, rtx);
|
||||||
extern const char *get_bundle_name (int);
|
extern const char *get_bundle_name (int);
|
||||||
|
|
||||||
|
extern void ia64_expand_vec_perm_even_odd (rtx, rtx, rtx, int);
|
||||||
|
extern bool ia64_expand_vec_perm_const (rtx op[4]);
|
||||||
|
extern void ia64_expand_vec_setv2sf (rtx op[3]);
|
||||||
#endif /* RTX_CODE */
|
#endif /* RTX_CODE */
|
||||||
|
|
||||||
#ifdef TREE_CODE
|
#ifdef TREE_CODE
|
||||||
|
|
|
@ -330,6 +330,24 @@ static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
|
||||||
static enum machine_mode ia64_get_reg_raw_mode (int regno);
|
static enum machine_mode ia64_get_reg_raw_mode (int regno);
|
||||||
static section * ia64_hpux_function_section (tree, enum node_frequency,
|
static section * ia64_hpux_function_section (tree, enum node_frequency,
|
||||||
bool, bool);
|
bool, bool);
|
||||||
|
|
||||||
|
static bool ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||||
|
const unsigned char *sel);
|
||||||
|
|
||||||
|
#define MAX_VECT_LEN 8
|
||||||
|
|
||||||
|
struct expand_vec_perm_d
|
||||||
|
{
|
||||||
|
rtx target, op0, op1;
|
||||||
|
unsigned char perm[MAX_VECT_LEN];
|
||||||
|
enum machine_mode vmode;
|
||||||
|
unsigned char nelt;
|
||||||
|
bool one_operand_p;
|
||||||
|
bool testing_p;
|
||||||
|
};
|
||||||
|
|
||||||
|
static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
|
||||||
|
|
||||||
|
|
||||||
/* Table of valid machine attributes. */
|
/* Table of valid machine attributes. */
|
||||||
static const struct attribute_spec ia64_attribute_table[] =
|
static const struct attribute_spec ia64_attribute_table[] =
|
||||||
|
@ -626,6 +644,9 @@ static const struct attribute_spec ia64_attribute_table[] =
|
||||||
#undef TARGET_DELAY_VARTRACK
|
#undef TARGET_DELAY_VARTRACK
|
||||||
#define TARGET_DELAY_VARTRACK true
|
#define TARGET_DELAY_VARTRACK true
|
||||||
|
|
||||||
|
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
|
||||||
|
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
|
||||||
|
|
||||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
|
@ -2027,28 +2048,28 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
|
||||||
void
|
void
|
||||||
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
|
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
|
||||||
{
|
{
|
||||||
enum machine_mode mode = GET_MODE (lo);
|
enum machine_mode vmode = GET_MODE (lo);
|
||||||
rtx (*gen) (rtx, rtx, rtx);
|
unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
|
||||||
rtx x;
|
struct expand_vec_perm_d d;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
switch (mode)
|
d.target = gen_lowpart (vmode, out);
|
||||||
|
d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
|
||||||
|
d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
|
||||||
|
d.vmode = vmode;
|
||||||
|
d.nelt = nelt;
|
||||||
|
d.one_operand_p = false;
|
||||||
|
d.testing_p = false;
|
||||||
|
|
||||||
|
high = (highp ? nelt / 2 : 0);
|
||||||
|
for (i = 0; i < nelt / 2; ++i)
|
||||||
{
|
{
|
||||||
case V8QImode:
|
d.perm[i * 2] = i + high;
|
||||||
gen = highp ? gen_vec_interleave_highv8qi : gen_vec_interleave_lowv8qi;
|
d.perm[i * 2 + 1] = i + high + nelt;
|
||||||
break;
|
|
||||||
case V4HImode:
|
|
||||||
gen = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
gcc_unreachable ();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
x = gen_lowpart (mode, out);
|
ok = ia64_expand_vec_perm_const_1 (&d);
|
||||||
if (TARGET_BIG_ENDIAN)
|
gcc_assert (ok);
|
||||||
x = gen (x, hi, lo);
|
|
||||||
else
|
|
||||||
x = gen (x, lo, hi);
|
|
||||||
emit_insn (x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return a vector of the sign-extension of VEC. */
|
/* Return a vector of the sign-extension of VEC. */
|
||||||
|
@ -11046,5 +11067,557 @@ ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
|
||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Construct (set target (vec_select op0 (parallel perm))) and
|
||||||
|
return true if that's a valid instruction in the active ISA. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
|
||||||
|
{
|
||||||
|
rtx rperm[MAX_VECT_LEN], x;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
rperm[i] = GEN_INT (perm[i]);
|
||||||
|
|
||||||
|
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
|
||||||
|
x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
|
||||||
|
x = gen_rtx_SET (VOIDmode, target, x);
|
||||||
|
|
||||||
|
x = emit_insn (x);
|
||||||
|
if (recog_memoized (x) < 0)
|
||||||
|
{
|
||||||
|
remove_insn (x);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Similar, but generate a vec_concat from op0 and op1 as well. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
||||||
|
const unsigned char *perm, unsigned nelt)
|
||||||
|
{
|
||||||
|
enum machine_mode v2mode;
|
||||||
|
rtx x;
|
||||||
|
|
||||||
|
v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
|
||||||
|
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
|
||||||
|
return expand_vselect (target, x, perm, nelt);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to expand a no-op permutation. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_identity (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
unsigned i, nelt = d->nelt;
|
||||||
|
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
if (d->perm[i] != i)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (!d->testing_p)
|
||||||
|
emit_move_insn (d->target, d->op0);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to expand D via a shrp instruction. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_shrp (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
unsigned i, nelt = d->nelt, shift, mask;
|
||||||
|
rtx tmp, op0, op1;;
|
||||||
|
|
||||||
|
/* ??? Don't force V2SFmode into the integer registers. */
|
||||||
|
if (d->vmode == V2SFmode)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
|
||||||
|
|
||||||
|
shift = d->perm[0];
|
||||||
|
for (i = 1; i < nelt; ++i)
|
||||||
|
if (d->perm[i] != ((shift + i) & mask))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (d->testing_p)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
|
||||||
|
|
||||||
|
/* We've eliminated the shift 0 case via expand_vec_perm_identity. */
|
||||||
|
gcc_assert (IN_RANGE (shift, 1, 63));
|
||||||
|
|
||||||
|
/* Recall that big-endian elements are numbered starting at the top of
|
||||||
|
the register. Ideally we'd have a shift-left-pair. But since we
|
||||||
|
don't, convert to a shift the other direction. */
|
||||||
|
if (BYTES_BIG_ENDIAN)
|
||||||
|
shift = 64 - shift;
|
||||||
|
|
||||||
|
tmp = gen_reg_rtx (DImode);
|
||||||
|
op0 = (shift < nelt ? d->op0 : d->op1);
|
||||||
|
op1 = (shift < nelt ? d->op1 : d->op0);
|
||||||
|
op0 = gen_lowpart (DImode, op0);
|
||||||
|
op1 = gen_lowpart (DImode, op1);
|
||||||
|
emit_insn (gen_shrp (tmp, op0, op1, GEN_INT (shift)));
|
||||||
|
|
||||||
|
emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to instantiate D in a single instruction. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
unsigned i, nelt = d->nelt;
|
||||||
|
unsigned char perm2[MAX_VECT_LEN];
|
||||||
|
|
||||||
|
/* Try single-operand selections. */
|
||||||
|
if (d->one_operand_p)
|
||||||
|
{
|
||||||
|
if (expand_vec_perm_identity (d))
|
||||||
|
return true;
|
||||||
|
if (expand_vselect (d->target, d->op0, d->perm, nelt))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try two operand selections. */
|
||||||
|
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Recognize interleave style patterns with reversed operands. */
|
||||||
|
if (!d->one_operand_p)
|
||||||
|
{
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned e = d->perm[i];
|
||||||
|
if (e >= nelt)
|
||||||
|
e -= nelt;
|
||||||
|
else
|
||||||
|
e += nelt;
|
||||||
|
perm2[i] = e;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (expand_vec_perm_shrp (d))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* ??? Look for deposit-like permutations where most of the result
|
||||||
|
comes from one vector unchanged and the rest comes from a
|
||||||
|
sequential hunk of the other vector. */
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pattern match broadcast permutations. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
unsigned i, elt, nelt = d->nelt;
|
||||||
|
unsigned char perm2[2];
|
||||||
|
rtx temp;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
if (!d->one_operand_p)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
elt = d->perm[0];
|
||||||
|
for (i = 1; i < nelt; ++i)
|
||||||
|
if (d->perm[i] != elt)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (d->vmode)
|
||||||
|
{
|
||||||
|
case V2SImode:
|
||||||
|
case V2SFmode:
|
||||||
|
/* Implementable by interleave. */
|
||||||
|
perm2[0] = elt;
|
||||||
|
perm2[1] = elt + 2;
|
||||||
|
ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
|
||||||
|
gcc_assert (ok);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V8QImode:
|
||||||
|
/* Implementable by extract + broadcast. */
|
||||||
|
if (BYTES_BIG_ENDIAN)
|
||||||
|
elt = 7 - elt;
|
||||||
|
elt *= BITS_PER_UNIT;
|
||||||
|
temp = gen_reg_rtx (DImode);
|
||||||
|
emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
|
||||||
|
GEN_INT (elt), GEN_INT (8)));
|
||||||
|
emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V4HImode:
|
||||||
|
/* Should have been matched directly by vec_select. */
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A subroutine of ia64_expand_vec_perm_const_1. Try to simplify a
|
||||||
|
two vector permutation into a single vector permutation by using
|
||||||
|
an interleave operation to merge the vectors. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d dremap, dfinal;
|
||||||
|
unsigned char remap[2 * MAX_VECT_LEN];
|
||||||
|
unsigned contents, i, nelt, nelt2;
|
||||||
|
unsigned h0, h1, h2, h3;
|
||||||
|
rtx seq;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
if (d->one_operand_p)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nelt = d->nelt;
|
||||||
|
nelt2 = nelt / 2;
|
||||||
|
|
||||||
|
/* Examine from whence the elements come. */
|
||||||
|
contents = 0;
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
contents |= 1u << d->perm[i];
|
||||||
|
|
||||||
|
memset (remap, 0xff, sizeof (remap));
|
||||||
|
dremap = *d;
|
||||||
|
|
||||||
|
h0 = (1u << nelt2) - 1;
|
||||||
|
h1 = h0 << nelt2;
|
||||||
|
h2 = h0 << nelt;
|
||||||
|
h3 = h0 << (nelt + nelt2);
|
||||||
|
|
||||||
|
if ((contents & (h0 | h2)) == contents) /* punpck even halves */
|
||||||
|
{
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned which = i / 2 + (i & 1 ? nelt : 0);
|
||||||
|
remap[which] = i;
|
||||||
|
dremap.perm[i] = which;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((contents & (h1 | h3)) == contents) /* punpck odd halves */
|
||||||
|
{
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
|
||||||
|
remap[which] = i;
|
||||||
|
dremap.perm[i] = which;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((contents & 0x5555) == contents) /* mix even elements */
|
||||||
|
{
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
|
||||||
|
remap[which] = i;
|
||||||
|
dremap.perm[i] = which;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((contents & 0xaaaa) == contents) /* mix odd elements */
|
||||||
|
{
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned which = (i | 1) + (i & 1 ? nelt : 0);
|
||||||
|
remap[which] = i;
|
||||||
|
dremap.perm[i] = which;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
|
||||||
|
{
|
||||||
|
unsigned shift = ctz_hwi (contents);
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned which = (i + shift) & (2 * nelt - 1);
|
||||||
|
remap[which] = i;
|
||||||
|
dremap.perm[i] = which;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Use the remapping array set up above to move the elements from their
|
||||||
|
swizzled locations into their final destinations. */
|
||||||
|
dfinal = *d;
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned e = remap[d->perm[i]];
|
||||||
|
gcc_assert (e < nelt);
|
||||||
|
dfinal.perm[i] = e;
|
||||||
|
}
|
||||||
|
dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
||||||
|
dfinal.op1 = dfinal.op0;
|
||||||
|
dfinal.one_operand_p = true;
|
||||||
|
dremap.target = dfinal.op0;
|
||||||
|
|
||||||
|
/* Test if the final remap can be done with a single insn. For V4HImode
|
||||||
|
this *will* succeed. For V8QImode or V2SImode it may not. */
|
||||||
|
start_sequence ();
|
||||||
|
ok = expand_vec_perm_1 (&dfinal);
|
||||||
|
seq = get_insns ();
|
||||||
|
end_sequence ();
|
||||||
|
if (!ok)
|
||||||
|
return false;
|
||||||
|
if (d->testing_p)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
ok = expand_vec_perm_1 (&dremap);
|
||||||
|
gcc_assert (ok);
|
||||||
|
|
||||||
|
emit_insn (seq);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A subroutine of ia64_expand_vec_perm_const_1. Emit a full V4HImode
|
||||||
|
constant permutation via two mux2 and a merge. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
unsigned char perm2[4];
|
||||||
|
rtx rmask[4];
|
||||||
|
unsigned i;
|
||||||
|
rtx t0, t1, mask, x;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
if (d->vmode != V4HImode || d->one_operand_p)
|
||||||
|
return false;
|
||||||
|
if (d->testing_p)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
for (i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
perm2[i] = d->perm[i] & 3;
|
||||||
|
rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
|
||||||
|
}
|
||||||
|
mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
|
||||||
|
mask = force_reg (V4HImode, mask);
|
||||||
|
|
||||||
|
t0 = gen_reg_rtx (V4HImode);
|
||||||
|
t1 = gen_reg_rtx (V4HImode);
|
||||||
|
|
||||||
|
ok = expand_vselect (t0, d->op0, perm2, 4);
|
||||||
|
gcc_assert (ok);
|
||||||
|
ok = expand_vselect (t1, d->op1, perm2, 4);
|
||||||
|
gcc_assert (ok);
|
||||||
|
|
||||||
|
x = gen_rtx_AND (V4HImode, mask, t0);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
|
||||||
|
|
||||||
|
x = gen_rtx_NOT (V4HImode, mask);
|
||||||
|
x = gen_rtx_AND (V4HImode, x, t1);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
|
||||||
|
|
||||||
|
x = gen_rtx_IOR (V4HImode, t0, t1);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, d->target, x));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
|
||||||
|
With all of the interface bits taken care of, perform the expansion
|
||||||
|
in D and return true on success. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
if (expand_vec_perm_1 (d))
|
||||||
|
return true;
|
||||||
|
if (expand_vec_perm_broadcast (d))
|
||||||
|
return true;
|
||||||
|
if (expand_vec_perm_interleave_2 (d))
|
||||||
|
return true;
|
||||||
|
if (expand_vec_perm_v4hi_5 (d))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ia64_expand_vec_perm_const (rtx operands[4])
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d d;
|
||||||
|
unsigned char perm[MAX_VECT_LEN];
|
||||||
|
int i, nelt, which;
|
||||||
|
rtx sel;
|
||||||
|
|
||||||
|
d.target = operands[0];
|
||||||
|
d.op0 = operands[1];
|
||||||
|
d.op1 = operands[2];
|
||||||
|
sel = operands[3];
|
||||||
|
|
||||||
|
d.vmode = GET_MODE (d.target);
|
||||||
|
gcc_assert (VECTOR_MODE_P (d.vmode));
|
||||||
|
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||||
|
d.testing_p = false;
|
||||||
|
|
||||||
|
gcc_assert (GET_CODE (sel) == CONST_VECTOR);
|
||||||
|
gcc_assert (XVECLEN (sel, 0) == nelt);
|
||||||
|
gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
|
||||||
|
|
||||||
|
for (i = which = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
rtx e = XVECEXP (sel, 0, i);
|
||||||
|
int ei = INTVAL (e) & (2 * nelt - 1);
|
||||||
|
|
||||||
|
which |= (ei < nelt ? 1 : 2);
|
||||||
|
d.perm[i] = ei;
|
||||||
|
perm[i] = ei;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (which)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
gcc_unreachable();
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
if (!rtx_equal_p (d.op0, d.op1))
|
||||||
|
{
|
||||||
|
d.one_operand_p = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The elements of PERM do not suggest that only the first operand
|
||||||
|
is used, but both operands are identical. Allow easier matching
|
||||||
|
of the permutation by folding the permutation into the single
|
||||||
|
input vector. */
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
if (d.perm[i] >= nelt)
|
||||||
|
d.perm[i] -= nelt;
|
||||||
|
/* FALLTHRU */
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
d.op1 = d.op0;
|
||||||
|
d.one_operand_p = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 2:
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
d.perm[i] -= nelt;
|
||||||
|
d.op0 = d.op1;
|
||||||
|
d.one_operand_p = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ia64_expand_vec_perm_const_1 (&d))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* If the mask says both arguments are needed, but they are the same,
|
||||||
|
the above tried to expand with one_operand_p true. If that didn't
|
||||||
|
work, retry with one_operand_p false, as that's what we used in _ok. */
|
||||||
|
if (which == 3 && d.one_operand_p)
|
||||||
|
{
|
||||||
|
memcpy (d.perm, perm, sizeof (perm));
|
||||||
|
d.one_operand_p = false;
|
||||||
|
return ia64_expand_vec_perm_const_1 (&d);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Implement targetm.vectorize.vec_perm_const_ok. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
ia64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
|
||||||
|
const unsigned char *sel)
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d d;
|
||||||
|
unsigned int i, nelt, which;
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
d.vmode = vmode;
|
||||||
|
d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||||
|
d.testing_p = true;
|
||||||
|
|
||||||
|
/* Extract the values from the vector CST into the permutation
|
||||||
|
array in D. */
|
||||||
|
memcpy (d.perm, sel, nelt);
|
||||||
|
for (i = which = 0; i < nelt; ++i)
|
||||||
|
{
|
||||||
|
unsigned char e = d.perm[i];
|
||||||
|
gcc_assert (e < 2 * nelt);
|
||||||
|
which |= (e < nelt ? 1 : 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For all elements from second vector, fold the elements to first. */
|
||||||
|
if (which == 2)
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
d.perm[i] -= nelt;
|
||||||
|
|
||||||
|
/* Check whether the mask can be applied to the vector type. */
|
||||||
|
d.one_operand_p = (which != 3);
|
||||||
|
|
||||||
|
/* Otherwise we have to go through the motions and see if we can
|
||||||
|
figure out how to generate the requested permutation. */
|
||||||
|
d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
||||||
|
d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
||||||
|
if (!d.one_operand_p)
|
||||||
|
d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
||||||
|
|
||||||
|
start_sequence ();
|
||||||
|
ret = ia64_expand_vec_perm_const_1 (&d);
|
||||||
|
end_sequence ();
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ia64_expand_vec_setv2sf (rtx operands[3])
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d d;
|
||||||
|
unsigned int which;
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
d.target = operands[0];
|
||||||
|
d.op0 = operands[0];
|
||||||
|
d.op1 = gen_reg_rtx (V2SFmode);
|
||||||
|
d.vmode = V2SFmode;
|
||||||
|
d.nelt = 2;
|
||||||
|
d.one_operand_p = false;
|
||||||
|
d.testing_p = false;
|
||||||
|
|
||||||
|
which = INTVAL (operands[2]);
|
||||||
|
gcc_assert (which <= 1);
|
||||||
|
d.perm[0] = 1 - which;
|
||||||
|
d.perm[1] = which + 2;
|
||||||
|
|
||||||
|
emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
|
||||||
|
|
||||||
|
ok = ia64_expand_vec_perm_const_1 (&d);
|
||||||
|
gcc_assert (ok);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
|
||||||
|
{
|
||||||
|
struct expand_vec_perm_d d;
|
||||||
|
enum machine_mode vmode = GET_MODE (target);
|
||||||
|
unsigned int i, nelt = GET_MODE_NUNITS (vmode);
|
||||||
|
bool ok;
|
||||||
|
|
||||||
|
d.target = target;
|
||||||
|
d.op0 = op0;
|
||||||
|
d.op1 = op1;
|
||||||
|
d.vmode = vmode;
|
||||||
|
d.nelt = nelt;
|
||||||
|
d.one_operand_p = false;
|
||||||
|
d.testing_p = false;
|
||||||
|
|
||||||
|
for (i = 0; i < nelt; ++i)
|
||||||
|
d.perm[i] = i * 2 + odd;
|
||||||
|
|
||||||
|
ok = ia64_expand_vec_perm_const_1 (&d);
|
||||||
|
gcc_assert (ok);
|
||||||
|
}
|
||||||
|
|
||||||
#include "gt-ia64.h"
|
#include "gt-ia64.h"
|
||||||
|
|
|
@ -20,11 +20,14 @@
|
||||||
|
|
||||||
;; Integer vector operations
|
;; Integer vector operations
|
||||||
|
|
||||||
|
(define_mode_iterator VEC [V8QI V4HI V2SI V2SF])
|
||||||
(define_mode_iterator VECINT [V8QI V4HI V2SI])
|
(define_mode_iterator VECINT [V8QI V4HI V2SI])
|
||||||
(define_mode_iterator VECINT12 [V8QI V4HI])
|
(define_mode_iterator VECINT12 [V8QI V4HI])
|
||||||
(define_mode_iterator VECINT24 [V4HI V2SI])
|
(define_mode_iterator VECINT24 [V4HI V2SI])
|
||||||
(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
|
(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")])
|
||||||
(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
|
(define_mode_attr vecwider [(V8QI "V4HI") (V4HI "V2SI")])
|
||||||
|
(define_mode_attr vecint
|
||||||
|
[(V8QI "V8QI") (V4HI "V4HI") (V2SI "V2SI") (V2SF "V2SI")])
|
||||||
|
|
||||||
(define_expand "mov<mode>"
|
(define_expand "mov<mode>"
|
||||||
[(set (match_operand:VECINT 0 "general_operand" "")
|
[(set (match_operand:VECINT 0 "general_operand" "")
|
||||||
|
@ -756,7 +759,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_lowv8qi"
|
(define_insn "*vec_interleave_lowv8qi"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V8QI
|
(vec_select:V8QI
|
||||||
(vec_concat:V16QI
|
(vec_concat:V16QI
|
||||||
|
@ -776,7 +779,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_highv8qi"
|
(define_insn "*vec_interleave_highv8qi"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V8QI
|
(vec_select:V8QI
|
||||||
(vec_concat:V16QI
|
(vec_concat:V16QI
|
||||||
|
@ -796,7 +799,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "mix1_even"
|
(define_insn "*mix1_even"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V8QI
|
(vec_select:V8QI
|
||||||
(vec_concat:V16QI
|
(vec_concat:V16QI
|
||||||
|
@ -816,7 +819,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "mix1_odd"
|
(define_insn "*mix1_odd"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V8QI
|
(vec_select:V8QI
|
||||||
(vec_concat:V16QI
|
(vec_concat:V16QI
|
||||||
|
@ -872,7 +875,7 @@
|
||||||
"mux1 %0 = %1, @shuf"
|
"mux1 %0 = %1, @shuf"
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "mux1_alt"
|
(define_insn "*mux1_alt"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V8QI
|
(vec_select:V8QI
|
||||||
(match_operand:V8QI 1 "gr_register_operand" "r")
|
(match_operand:V8QI 1 "gr_register_operand" "r")
|
||||||
|
@ -900,7 +903,7 @@
|
||||||
"mux1 %0 = %1, @brcst"
|
"mux1 %0 = %1, @brcst"
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "*mux1_brcst_qi"
|
(define_insn "mux1_brcst_qi"
|
||||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||||
(vec_duplicate:V8QI
|
(vec_duplicate:V8QI
|
||||||
(match_operand:QI 1 "gr_register_operand" "r")))]
|
(match_operand:QI 1 "gr_register_operand" "r")))]
|
||||||
|
@ -908,31 +911,7 @@
|
||||||
"mux1 %0 = %1, @brcst"
|
"mux1 %0 = %1, @brcst"
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_expand "vec_extract_evenv8qi"
|
(define_insn "*vec_interleave_lowv4hi"
|
||||||
[(match_operand:V8QI 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
rtx temp = gen_reg_rtx (V8QImode);
|
|
||||||
emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
|
|
||||||
emit_insn (gen_mux1_alt (operands[0], temp));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_extract_oddv8qi"
|
|
||||||
[(match_operand:V8QI 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V8QI 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V8QI 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
rtx temp = gen_reg_rtx (V8QImode);
|
|
||||||
emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
|
|
||||||
emit_insn (gen_mux1_alt (operands[0], temp));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_insn "vec_interleave_lowv4hi"
|
|
||||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V4HI
|
(vec_select:V4HI
|
||||||
(vec_concat:V8HI
|
(vec_concat:V8HI
|
||||||
|
@ -950,7 +929,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_highv4hi"
|
(define_insn "*vec_interleave_highv4hi"
|
||||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V4HI
|
(vec_select:V4HI
|
||||||
(vec_concat:V8HI
|
(vec_concat:V8HI
|
||||||
|
@ -1034,38 +1013,6 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_expand "vec_extract_evenodd_helper"
|
|
||||||
[(set (match_operand:V4HI 0 "gr_register_operand" "")
|
|
||||||
(vec_select:V4HI
|
|
||||||
(match_operand:V4HI 1 "gr_register_operand" "")
|
|
||||||
(parallel [(const_int 0) (const_int 2)
|
|
||||||
(const_int 1) (const_int 3)])))]
|
|
||||||
"")
|
|
||||||
|
|
||||||
(define_expand "vec_extract_evenv4hi"
|
|
||||||
[(match_operand:V4HI 0 "gr_register_operand")
|
|
||||||
(match_operand:V4HI 1 "gr_reg_or_0_operand")
|
|
||||||
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
rtx temp = gen_reg_rtx (V4HImode);
|
|
||||||
emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
|
|
||||||
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_extract_oddv4hi"
|
|
||||||
[(match_operand:V4HI 0 "gr_register_operand")
|
|
||||||
(match_operand:V4HI 1 "gr_reg_or_0_operand")
|
|
||||||
(match_operand:V4HI 2 "gr_reg_or_0_operand")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
rtx temp = gen_reg_rtx (V4HImode);
|
|
||||||
emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
|
|
||||||
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_insn "*mux2_brcst_hi"
|
(define_insn "*mux2_brcst_hi"
|
||||||
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
|
||||||
(vec_duplicate:V4HI
|
(vec_duplicate:V4HI
|
||||||
|
@ -1074,7 +1021,7 @@
|
||||||
"mux2 %0 = %1, 0"
|
"mux2 %0 = %1, 0"
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_lowv2si"
|
(define_insn "*vec_interleave_lowv2si"
|
||||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V2SI
|
(vec_select:V2SI
|
||||||
(vec_concat:V4SI
|
(vec_concat:V4SI
|
||||||
|
@ -1091,7 +1038,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_highv2si"
|
(define_insn "*vec_interleave_highv2si"
|
||||||
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
|
||||||
(vec_select:V2SI
|
(vec_select:V2SI
|
||||||
(vec_concat:V4SI
|
(vec_concat:V4SI
|
||||||
|
@ -1108,36 +1055,6 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "mmshf")])
|
[(set_attr "itanium_class" "mmshf")])
|
||||||
|
|
||||||
(define_expand "vec_extract_evenv2si"
|
|
||||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SI 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
if (TARGET_BIG_ENDIAN)
|
|
||||||
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_extract_oddv2si"
|
|
||||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SI 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SI 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
if (TARGET_BIG_ENDIAN)
|
|
||||||
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_initv2si"
|
(define_expand "vec_initv2si"
|
||||||
[(match_operand:V2SI 0 "gr_register_operand" "")
|
[(match_operand:V2SI 0 "gr_register_operand" "")
|
||||||
(match_operand 1 "" "")]
|
(match_operand 1 "" "")]
|
||||||
|
@ -1479,7 +1396,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "fmisc")])
|
[(set_attr "itanium_class" "fmisc")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_highv2sf"
|
(define_insn "*vec_interleave_highv2sf"
|
||||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||||
(vec_select:V2SF
|
(vec_select:V2SF
|
||||||
(vec_concat:V4SF
|
(vec_concat:V4SF
|
||||||
|
@ -1496,7 +1413,7 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "fmisc")])
|
[(set_attr "itanium_class" "fmisc")])
|
||||||
|
|
||||||
(define_insn "vec_interleave_lowv2sf"
|
(define_insn "*vec_interleave_lowv2sf"
|
||||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||||
(vec_select:V2SF
|
(vec_select:V2SF
|
||||||
(vec_concat:V4SF
|
(vec_concat:V4SF
|
||||||
|
@ -1530,58 +1447,13 @@
|
||||||
}
|
}
|
||||||
[(set_attr "itanium_class" "fmisc")])
|
[(set_attr "itanium_class" "fmisc")])
|
||||||
|
|
||||||
(define_expand "vec_extract_evenv2sf"
|
|
||||||
[(match_operand:V2SF 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SF 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SF 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
if (TARGET_BIG_ENDIAN)
|
|
||||||
emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_extract_oddv2sf"
|
|
||||||
[(match_operand:V2SF 0 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SF 1 "gr_register_operand" "")
|
|
||||||
(match_operand:V2SF 2 "gr_register_operand" "")]
|
|
||||||
""
|
|
||||||
{
|
|
||||||
if (TARGET_BIG_ENDIAN)
|
|
||||||
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_interleave_highv2sf (operands[0], operands[1],
|
|
||||||
operands[2]));
|
|
||||||
DONE;
|
|
||||||
})
|
|
||||||
|
|
||||||
(define_expand "vec_setv2sf"
|
(define_expand "vec_setv2sf"
|
||||||
[(match_operand:V2SF 0 "fr_register_operand" "")
|
[(match_operand:V2SF 0 "fr_register_operand" "")
|
||||||
(match_operand:SF 1 "fr_register_operand" "")
|
(match_operand:SF 1 "fr_register_operand" "")
|
||||||
(match_operand 2 "const_int_operand" "")]
|
(match_operand 2 "const_int_operand" "")]
|
||||||
""
|
""
|
||||||
{
|
{
|
||||||
rtx op0 = operands[0];
|
ia64_expand_vec_setv2sf (operands);
|
||||||
rtx tmp = gen_reg_rtx (V2SFmode);
|
|
||||||
|
|
||||||
emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
|
|
||||||
|
|
||||||
switch (INTVAL (operands[2]))
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
emit_insn (gen_fmix_lr (op0, tmp, op0));
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
gcc_unreachable ();
|
|
||||||
}
|
|
||||||
DONE;
|
DONE;
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -1703,10 +1575,7 @@
|
||||||
{
|
{
|
||||||
rtx op1 = gen_lowpart (V8QImode, operands[1]);
|
rtx op1 = gen_lowpart (V8QImode, operands[1]);
|
||||||
rtx op2 = gen_lowpart (V8QImode, operands[2]);
|
rtx op2 = gen_lowpart (V8QImode, operands[2]);
|
||||||
if (TARGET_BIG_ENDIAN)
|
ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
|
||||||
emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
|
|
||||||
DONE;
|
DONE;
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -1718,13 +1587,23 @@
|
||||||
{
|
{
|
||||||
rtx op1 = gen_lowpart (V4HImode, operands[1]);
|
rtx op1 = gen_lowpart (V4HImode, operands[1]);
|
||||||
rtx op2 = gen_lowpart (V4HImode, operands[2]);
|
rtx op2 = gen_lowpart (V4HImode, operands[2]);
|
||||||
if (TARGET_BIG_ENDIAN)
|
ia64_expand_vec_perm_even_odd (operands[0], op1, op2, TARGET_BIG_ENDIAN);
|
||||||
emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
|
|
||||||
else
|
|
||||||
emit_insn (gen_vec_extract_evenv4hi (operands[0], op1, op2));
|
|
||||||
DONE;
|
DONE;
|
||||||
})
|
})
|
||||||
|
|
||||||
|
(define_expand "vec_perm_const<mode>"
|
||||||
|
[(match_operand:VEC 0 "register_operand" "")
|
||||||
|
(match_operand:VEC 1 "register_operand" "")
|
||||||
|
(match_operand:VEC 2 "register_operand" "")
|
||||||
|
(match_operand:<vecint> 3 "" "")]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
if (ia64_expand_vec_perm_const (operands))
|
||||||
|
DONE;
|
||||||
|
else
|
||||||
|
FAIL;
|
||||||
|
})
|
||||||
|
|
||||||
;; Missing operations
|
;; Missing operations
|
||||||
;; fprcpa
|
;; fprcpa
|
||||||
;; fpsqrta
|
;; fpsqrta
|
||||||
|
|
Loading…
Reference in New Issue