re PR target/63724 ([AArch64] Inefficient immediate expansion and hoisting.)

Fix PR target/63724

2014-11-14  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>

	PR target/63724
        * config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
        numerical immediate handling to...
        (aarch64_internal_mov_immediate): ...this. New.
        (aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
        (aarch64_mov_operand_p): Relax predicate.
        * config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
        (*movsi_aarch64): Turn into define_insn_and_split and new alternative
        for 'n'.
        (*movdi_aarch64): Likewise.

From-SVN: r217546
This commit is contained in:
Ramana Radhakrishnan 2014-11-14 09:44:17 +00:00 committed by Ramana Radhakrishnan
parent a7f24614b3
commit 8261494856
3 changed files with 318 additions and 226 deletions

View File

@ -1,3 +1,16 @@
2014-11-14 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
PR target/63724
* config/aarch64/aarch64.c (aarch64_expand_mov_immediate): Split out
numerical immediate handling to...
(aarch64_internal_mov_immediate): ...this. New.
(aarch64_rtx_costs): Use aarch64_internal_mov_immediate.
(aarch64_mov_operand_p): Relax predicate.
* config/aarch64/aarch64.md (mov<mode>:GPI): Do not expand CONST_INTs.
(*movsi_aarch64): Turn into define_insn_and_split and new alternative
for 'n'.
(*movdi_aarch64): Likewise.
2014-11-14 Richard Biener <rguenther@suse.de>
* match.pd: Implement more binary patterns exercised by

View File

@ -1046,10 +1046,10 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
return plus_constant (mode, reg, offset);
}
void
aarch64_expand_mov_immediate (rtx dest, rtx imm)
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
machine_mode mode)
{
machine_mode mode = GET_MODE (dest);
unsigned HOST_WIDE_INT mask;
int i;
bool first;
@ -1057,6 +1057,271 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
bool subtargets;
rtx subtarget;
int one_match, zero_match, first_not_ffff_match;
int num_insns = 0;
if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
{
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
num_insns++;
return num_insns;
}
if (mode == SImode)
{
/* We know we can't do this in 1 insn, and we must be able to do it
in two; so don't mess around looking for sequences that don't buy
us anything. */
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (INTVAL (imm) & 0xffff)));
emit_insn (gen_insv_immsi (dest, GEN_INT (16),
GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
}
num_insns += 2;
return num_insns;
}
/* Remaining cases are all for DImode. */
val = INTVAL (imm);
subtargets = optimize && can_create_pseudo_p ();
one_match = 0;
zero_match = 0;
mask = 0xffff;
first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) == mask)
one_match++;
else
{
if (first_not_ffff_match < 0)
first_not_ffff_match = i;
if ((val & mask) == 0)
zero_match++;
}
}
if (one_match == 2)
{
/* Set one of the quarters and then insert back into result. */
mask = 0xffffll << first_not_ffff_match;
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
GEN_INT ((val >> first_not_ffff_match)
& 0xffff)));
}
num_insns += 2;
return num_insns;
}
if (zero_match == 2)
goto simple_sequence;
mask = 0x0ffff0000UL;
for (i = 16; i < 64; i += 16, mask <<= 16)
{
HOST_WIDE_INT comp = mask & ~(mask - 1);
if (aarch64_uimm12_shift (val - (val & mask)))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (val & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val & mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val + comp) & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val + comp) & mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val - comp) | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val - comp) | ~mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (val | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val | ~mask))));
}
num_insns += 2;
return num_insns;
}
}
/* See if we can do it by arithmetically combining two
immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
int j;
mask = 0xffff;
if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
|| aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - aarch64_bitmasks[i])));
}
num_insns += 2;
return num_insns;
}
for (j = 0; j < 64; j += 16, mask <<= 16)
{
if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
{
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_insv_immdi (dest, GEN_INT (j),
GEN_INT ((val >> j) & 0xffff)));
}
num_insns += 2;
return num_insns;
}
}
}
/* See if we can do it by logically combining two immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_iordi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[j])));
}
num_insns += 2;
return num_insns;
}
}
else if ((val & aarch64_bitmasks[i]) == val)
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[j])));
emit_insn (gen_anddi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[i])));
}
num_insns += 2;
return num_insns;
}
}
}
if (one_match > zero_match)
{
/* Set either first three quarters or all but the third. */
mask = 0xffffll << (16 - first_not_ffff_match);
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val | mask | 0xffffffff00000000ull)));
num_insns ++;
/* Now insert other two quarters. */
for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != mask)
{
if (generate)
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
num_insns ++;
}
}
return num_insns;
}
simple_sequence:
first = true;
mask = 0xffff;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != 0)
{
if (first)
{
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val & mask)));
num_insns ++;
first = false;
}
else
{
if (generate)
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
num_insns ++;
}
}
}
return num_insns;
}
void
aarch64_expand_mov_immediate (rtx dest, rtx imm)
{
machine_mode mode = GET_MODE (dest);
gcc_assert (mode == SImode || mode == DImode);
@ -1118,12 +1383,6 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
}
}
if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
{
emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
return;
}
if (!CONST_INT_P (imm))
{
if (GET_CODE (imm) == HIGH)
@ -1138,203 +1397,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
if (mode == SImode)
{
/* We know we can't do this in 1 insn, and we must be able to do it
in two; so don't mess around looking for sequences that don't buy
us anything. */
emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
emit_insn (gen_insv_immsi (dest, GEN_INT (16),
GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
return;
}
/* Remaining cases are all for DImode. */
val = INTVAL (imm);
subtargets = optimize && can_create_pseudo_p ();
one_match = 0;
zero_match = 0;
mask = 0xffff;
first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) == mask)
one_match++;
else
{
if (first_not_ffff_match < 0)
first_not_ffff_match = i;
if ((val & mask) == 0)
zero_match++;
}
}
if (one_match == 2)
{
/* Set one of the quarters and then insert back into result. */
mask = 0xffffll << first_not_ffff_match;
emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
GEN_INT ((val >> first_not_ffff_match)
& 0xffff)));
return;
}
if (zero_match == 2)
goto simple_sequence;
mask = 0x0ffff0000UL;
for (i = 16; i < 64; i += 16, mask <<= 16)
{
HOST_WIDE_INT comp = mask & ~(mask - 1);
if (aarch64_uimm12_shift (val - (val & mask)))
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val & mask))));
return;
}
else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val + comp) & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val + comp) & mask))));
return;
}
else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val - comp) | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val - comp) | ~mask))));
return;
}
else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (val | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val | ~mask))));
return;
}
}
/* See if we can do it by arithmetically combining two
immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
int j;
mask = 0xffff;
if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
|| aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - aarch64_bitmasks[i])));
return;
}
for (j = 0; j < 64; j += 16, mask <<= 16)
{
if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_insv_immdi (dest, GEN_INT (j),
GEN_INT ((val >> j) & 0xffff)));
return;
}
}
}
/* See if we can do it by logically combining two immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_iordi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[j])));
return;
}
}
else if ((val & aarch64_bitmasks[i]) == val)
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[j])));
emit_insn (gen_anddi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[i])));
return;
}
}
}
if (one_match > zero_match)
{
/* Set either first three quarters or all but the third. */
mask = 0xffffll << (16 - first_not_ffff_match);
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val | mask | 0xffffffff00000000ull)));
/* Now insert other two quarters. */
for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != mask)
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
}
return;
}
simple_sequence:
first = true;
mask = 0xffff;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != 0)
{
if (first)
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val & mask)));
first = false;
}
else
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
}
}
aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
}
static bool
@ -5240,9 +5303,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
proportionally expensive to the number of instructions
required to build that constant. This is true whether we
are compiling for SPEED or otherwise. */
*cost = COSTS_N_INSNS (aarch64_build_constant (0,
INTVAL (x),
false));
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
(NULL_RTX, x, false, mode));
}
return true;
@ -8041,7 +8103,7 @@ aarch64_mov_operand_p (rtx x,
&& aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
return true;
if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
if (CONST_INT_P (x))
return true;
if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))

View File

@ -746,17 +746,20 @@
if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx)
operands[1] = force_reg (<MODE>mode, operands[1]);
if (CONSTANT_P (operands[1]))
{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}
/* FIXME: RR we still need to fix up what we are doing with
symbol_refs and other types of constants. */
if (CONSTANT_P (operands[1])
&& !CONST_INT_P (operands[1]))
{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}
"
)
(define_insn "*movsi_aarch64"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r ,*w, r,*w")
(match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
(define_insn_and_split "*movsi_aarch64"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r ,*w, r,*w")
(match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m, m,rZ,*w,S,Ush,rZ,*w,*w"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
@ -764,6 +767,7 @@
mov\\t%w0, %w1
mov\\t%w0, %w1
mov\\t%w0, %1
#
ldr\\t%w0, %1
ldr\\t%s0, %1
str\\t%w1, %0
@ -773,14 +777,20 @@
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)"
[(const_int 0)]
"{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
adr,adr,f_mcr,f_mrc,fmov")
(set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
(set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
)
(define_insn "*movdi_aarch64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w")
(match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
(define_insn_and_split "*movdi_aarch64"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,*w,m, m,r,r, *w, r,*w,w")
(match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,n,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@ -788,6 +798,7 @@
mov\\t%0, %x1
mov\\t%x0, %1
mov\\t%x0, %1
#
ldr\\t%x0, %1
ldr\\t%d0, %1
str\\t%x1, %0
@ -798,10 +809,16 @@
fmov\\t%x0, %d1
fmov\\t%d0, %d1
movi\\t%d0, %1"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\
"(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode))"
[(const_int 0)]
"{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
adr,adr,f_mcr,f_mrc,fmov,fmov")
(set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
(set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
(set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
(set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"