arm.c (output_move_neon): Update comment describing big-endian vector layout.
* config/arm/arm.c (output_move_neon): Update comment describing big-endian vector layout. (arm_assemble_integer): Do not handle big-endian NEON vectors specially. * config/arm/neon.md (vec_set<mode>_internal, vec_extract<mode>, neon_vget_lane<mode>_sext_internal, neon_vget_lane<mode>_zext_internal, neon_vget_lane<mode>): Adjust element indices for big-endian. From-SVN: r138847
This commit is contained in:
parent
058514b381
commit
874d42b93e
@ -1,3 +1,14 @@
|
||||
2008-08-07 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
* config/arm/arm.c (output_move_neon): Update comment describing
|
||||
big-endian vector layout.
|
||||
(arm_assemble_integer): Do not handle big-endian NEON vectors
|
||||
specially.
|
||||
* config/arm/neon.md (vec_set<mode>_internal, vec_extract<mode>,
|
||||
neon_vget_lane<mode>_sext_internal,
|
||||
neon_vget_lane<mode>_zext_internal, neon_vget_lane<mode>): Adjust
|
||||
element indices for big-endian.
|
||||
|
||||
2008-08-07 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* configure.ac (HAVE_GAS_CFI_PERSONALITY_DIRECTIVE): New.
|
||||
|
@ -10335,30 +10335,28 @@ output_move_vfp (rtx *operands)
|
||||
}
|
||||
|
||||
/* Output a Neon quad-word load or store, or a load or store for
|
||||
larger structure modes. We could also support post-modify forms using
|
||||
VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
|
||||
yet.
|
||||
WARNING: The ordering of elements in memory is weird in big-endian mode,
|
||||
because we use VSTM instead of VST1, to make it easy to make vector stores
|
||||
via ARM registers write values in the same order as stores direct from Neon
|
||||
registers. For example, the byte ordering of a quadword vector with 16-byte
|
||||
elements like this:
|
||||
larger structure modes.
|
||||
|
||||
[e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
|
||||
WARNING: The ordering of elements is weird in big-endian mode,
|
||||
because we use VSTM, as required by the EABI. GCC RTL defines
|
||||
element ordering based on in-memory order. This can be differ
|
||||
from the architectural ordering of elements within a NEON register.
|
||||
The intrinsics defined in arm_neon.h use the NEON register element
|
||||
ordering, not the GCC RTL element ordering.
|
||||
|
||||
will be (with lowest address first, h = most-significant byte,
|
||||
l = least-significant byte of element):
|
||||
For example, the in-memory ordering of a big-endian a quadword
|
||||
vector with 16-bit elements when stored from register pair {d0,d1}
|
||||
will be (lowest address first, d0[N] is NEON register element N):
|
||||
|
||||
[e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
|
||||
e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
|
||||
[d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
|
||||
|
||||
When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
|
||||
rN in the order:
|
||||
When necessary, quadword registers (dN, dN+1) are moved to ARM
|
||||
registers from rN in the order:
|
||||
|
||||
dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
|
||||
|
||||
So that STM/LDM can be used on vectors in ARM registers, and the same memory
|
||||
layout will result as if VSTM/VLDM were used. */
|
||||
So that STM/LDM can be used on vectors in ARM registers, and the
|
||||
same memory layout will result as if VSTM/VLDM were used. */
|
||||
|
||||
const char *
|
||||
output_move_neon (rtx *operands)
|
||||
@ -13326,28 +13324,16 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
|
||||
if (arm_vector_mode_supported_p (mode))
|
||||
{
|
||||
int i, units;
|
||||
unsigned int invmask = 0, parts_per_word;
|
||||
|
||||
gcc_assert (GET_CODE (x) == CONST_VECTOR);
|
||||
|
||||
units = CONST_VECTOR_NUNITS (x);
|
||||
size = GET_MODE_SIZE (GET_MODE_INNER (mode));
|
||||
|
||||
/* For big-endian Neon vectors, we must permute the vector to the form
|
||||
which, when loaded by a VLDR or VLDM instruction, will give a vector
|
||||
with the elements in the right order. */
|
||||
if (TARGET_NEON && WORDS_BIG_ENDIAN)
|
||||
{
|
||||
parts_per_word = UNITS_PER_WORD / size;
|
||||
/* FIXME: This might be wrong for 64-bit vector elements, but we don't
|
||||
support those anywhere yet. */
|
||||
invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
|
||||
}
|
||||
|
||||
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
for (i = 0; i < units; i++)
|
||||
{
|
||||
rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
|
||||
rtx elt = CONST_VECTOR_ELT (x, i);
|
||||
assemble_integer
|
||||
(elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
|
||||
}
|
||||
|
@ -735,7 +735,10 @@
|
||||
(match_operand:SI 2 "immediate_operand" "i")))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
operands[2] = GEN_INT (ffs ((int) INTVAL (operands[2]) - 1));
|
||||
int elt = ffs ((int) INTVAL (operands[2]) - 1);
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1";
|
||||
}
|
||||
@ -757,6 +760,9 @@
|
||||
int hi = (elem / half_elts) * 2;
|
||||
int regno = REGNO (operands[0]);
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt = half_elts - 1 - elt;
|
||||
|
||||
operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
@ -804,7 +810,15 @@
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
|
||||
"TARGET_NEON"
|
||||
"vmov%?.<V_uf_sclr>\t%0, %P1[%c2]"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
int elt = INTVAL (operands[2]);
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
return "vmov%?.<V_uf_sclr>\t%0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
@ -821,6 +835,9 @@
|
||||
int hi = (INTVAL (operands[2]) / half_elts) * 2;
|
||||
int regno = REGNO (operands[1]);
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt = half_elts - 1 - elt;
|
||||
|
||||
operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
|
||||
operands[2] = GEN_INT (elt);
|
||||
|
||||
@ -2413,7 +2430,15 @@
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
"vmov%?.s<V_sz_elem>\t%0, %P1[%c2]"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
int elt = INTVAL (operands[2]);
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
return "vmov%?.s<V_sz_elem>\t%0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
@ -2425,7 +2450,15 @@
|
||||
(match_operand:VD 1 "s_register_operand" "w")
|
||||
(parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
|
||||
"TARGET_NEON"
|
||||
"vmov%?.u<V_sz_elem>\t%0, %P1[%c2]"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
int elt = INTVAL (operands[2]);
|
||||
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
return "vmov%?.u<V_sz_elem>\t%0, %P1[%c2]";
|
||||
}
|
||||
[(set_attr "predicable" "yes")
|
||||
(set_attr "neon_type" "neon_bp_simple")]
|
||||
)
|
||||
@ -2442,10 +2475,14 @@
|
||||
int regno = REGNO (operands[1]);
|
||||
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
unsigned int elt_adj = elt % halfelts;
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt_adj = halfelts - 1 - elt_adj;
|
||||
|
||||
ops[0] = operands[0];
|
||||
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
|
||||
ops[2] = GEN_INT (elt % halfelts);
|
||||
ops[2] = GEN_INT (elt_adj);
|
||||
output_asm_insn ("vmov%?.s<V_sz_elem>\t%0, %P1[%c2]", ops);
|
||||
|
||||
return "";
|
||||
@ -2466,10 +2503,14 @@
|
||||
int regno = REGNO (operands[1]);
|
||||
unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
unsigned int elt_adj = elt % halfelts;
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
elt_adj = halfelts - 1 - elt_adj;
|
||||
|
||||
ops[0] = operands[0];
|
||||
ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
|
||||
ops[2] = GEN_INT (elt % halfelts);
|
||||
ops[2] = GEN_INT (elt_adj);
|
||||
output_asm_insn ("vmov%?.u<V_sz_elem>\t%0, %P1[%c2]", ops);
|
||||
|
||||
return "";
|
||||
@ -2490,6 +2531,20 @@
|
||||
|
||||
neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
|
||||
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
/* The intrinsics are defined in terms of a model where the
|
||||
element ordering in memory is vldm order, whereas the generic
|
||||
RTL is defined in terms of a model where the element ordering
|
||||
in memory is array order. Convert the lane number to conform
|
||||
to this model. */
|
||||
unsigned int elt = INTVAL (operands[2]);
|
||||
unsigned int reg_nelts
|
||||
= 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
|
||||
elt ^= reg_nelts - 1;
|
||||
operands[2] = GEN_INT (elt);
|
||||
}
|
||||
|
||||
if ((magic & 3) == 3 || GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)) == 32)
|
||||
insn = gen_vec_extract<mode> (operands[0], operands[1], operands[2]);
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user