i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New.
* i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New. (sse_prologue_save_insn expander): Use new pattern. (sse_prologue_save_insn1): New pattern and splitter. (sse_prologue_save_insn): Update to deal also with 64bit aligned blocks. * i386.c (setup_incoming_varargs_64): Do not compute jump destination here. (ix86_gimplify_va_arg): Update alignment needed. (ix86_local_alignment): Do not align all local arrays to 128bit. From-SVN: r158483
This commit is contained in:
parent
0d29aedcb8
commit
07b3ef2e78
|
@ -1,3 +1,15 @@
|
|||
2010-04-18 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* i386.md (UNSPEC_SSE_PROLOGUE_SAVE_LOW): New.
|
||||
(sse_prologue_save_insn expander): Use new pattern.
|
||||
(sse_prologue_save_insn1): New pattern and splitter.
|
||||
(sse_prologue_save_insn): Update to deal also with 64bit aligned
|
||||
blocks.
|
||||
* i386.c (setup_incoming_varargs_64): Do not compute jump destination here.
|
||||
(ix86_gimplify_va_arg): Update alignment needed.
|
||||
(ix86_local_alignment): Do not align all local arrays
|
||||
to 128bit.
|
||||
|
||||
2010-04-17 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* ipa-inline.c (cgraph_early_inlining): Handle flattening too.
|
||||
|
|
|
@ -6790,7 +6790,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
|||
{
|
||||
rtx save_area, mem;
|
||||
rtx label;
|
||||
rtx label_ref;
|
||||
rtx tmp_reg;
|
||||
rtx nsse_reg;
|
||||
alias_set_type set;
|
||||
|
@ -6841,35 +6840,9 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
|||
SSE saves. We need some preparation work to get this working. */
|
||||
|
||||
label = gen_label_rtx ();
|
||||
label_ref = gen_rtx_LABEL_REF (Pmode, label);
|
||||
|
||||
/* Compute address to jump to :
|
||||
label - eax*4 + nnamed_sse_arguments*4 Or
|
||||
label - eax*5 + nnamed_sse_arguments*5 for AVX. */
|
||||
tmp_reg = gen_reg_rtx (Pmode);
|
||||
nsse_reg = gen_reg_rtx (Pmode);
|
||||
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
|
||||
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
|
||||
gen_rtx_MULT (Pmode, nsse_reg,
|
||||
GEN_INT (4))));
|
||||
|
||||
/* vmovaps is one byte longer than movaps. */
|
||||
if (TARGET_AVX)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
|
||||
gen_rtx_PLUS (Pmode, tmp_reg,
|
||||
nsse_reg)));
|
||||
|
||||
if (cum->sse_regno)
|
||||
emit_move_insn
|
||||
(nsse_reg,
|
||||
gen_rtx_CONST (DImode,
|
||||
gen_rtx_PLUS (DImode,
|
||||
label_ref,
|
||||
GEN_INT (cum->sse_regno
|
||||
* (TARGET_AVX ? 5 : 4)))));
|
||||
else
|
||||
emit_move_insn (nsse_reg, label_ref);
|
||||
emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
|
||||
|
||||
/* Compute address of memory block we save into. We always use pointer
|
||||
pointing 127 bytes after first byte to store - this is needed to keep
|
||||
|
@ -6882,11 +6855,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
|||
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
|
||||
MEM_NOTRAP_P (mem) = 1;
|
||||
set_mem_alias_set (mem, set);
|
||||
set_mem_align (mem, BITS_PER_WORD);
|
||||
set_mem_align (mem, 64);
|
||||
|
||||
/* And finally do the dirty job! */
|
||||
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
|
||||
GEN_INT (cum->sse_regno), label));
|
||||
GEN_INT (cum->sse_regno), label,
|
||||
gen_reg_rtx (Pmode)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7047,7 +7021,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
|||
int indirect_p = 0;
|
||||
tree ptrtype;
|
||||
enum machine_mode nat_mode;
|
||||
int arg_boundary;
|
||||
unsigned int arg_boundary;
|
||||
|
||||
/* Only 64bit target needs something special. */
|
||||
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
|
||||
|
@ -7279,6 +7253,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
|||
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
|
||||
size_int (-align));
|
||||
t = fold_convert (TREE_TYPE (ovf), t);
|
||||
if (crtl->stack_alignment_needed < arg_boundary)
|
||||
crtl->stack_alignment_needed = arg_boundary;
|
||||
}
|
||||
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
|
||||
gimplify_assign (addr, t, pre_p);
|
||||
|
@ -20099,10 +20075,26 @@ ix86_local_alignment (tree exp, enum machine_mode mode,
|
|||
}
|
||||
|
||||
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
|
||||
to 16byte boundary. */
|
||||
if (TARGET_64BIT)
|
||||
to 16byte boundary. Exact wording is:
|
||||
|
||||
An array uses the same alignment as its elements, except that a local or
|
||||
global array variable of length at least 16 bytes or
|
||||
a C99 variable-length array variable always has alignment of at least 16 bytes.
|
||||
|
||||
This was added to allow use of aligned SSE instructions at arrays. This
|
||||
rule is meant for static storage (where compiler can not do the analysis
|
||||
by itself). We follow it for automatic variables only when convenient.
|
||||
We fully control everything in the function compiled and functions from
|
||||
other unit can not rely on the alignment.
|
||||
|
||||
Exclude va_list type. It is the common case of local array where
|
||||
we can not benefit from the alignment. */
|
||||
if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
|
||||
&& TARGET_SSE)
|
||||
{
|
||||
if (AGGREGATE_TYPE_P (type)
|
||||
&& (TYPE_MAIN_VARIANT (type)
|
||||
!= TYPE_MAIN_VARIANT (va_list_type_node))
|
||||
&& TYPE_SIZE (type)
|
||||
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
|
||||
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
|
||||
|
|
|
@ -85,6 +85,7 @@
|
|||
(UNSPEC_SET_RIP 16)
|
||||
(UNSPEC_SET_GOT_OFFSET 17)
|
||||
(UNSPEC_MEMORY_BLOCKAGE 18)
|
||||
(UNSPEC_SSE_PROLOGUE_SAVE_LOW 19)
|
||||
|
||||
; TLS support
|
||||
(UNSPEC_TP 20)
|
||||
|
@ -18441,13 +18442,110 @@
|
|||
(reg:DI XMM5_REG)
|
||||
(reg:DI XMM6_REG)
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(use (match_operand:DI 1 "register_operand" ""))
|
||||
(clobber (match_operand:DI 1 "register_operand" ""))
|
||||
(use (match_operand:DI 2 "immediate_operand" ""))
|
||||
(use (label_ref:DI (match_operand 3 "" "")))])]
|
||||
(use (label_ref:DI (match_operand 3 "" "")))
|
||||
(clobber (match_operand:DI 4 "register_operand" ""))
|
||||
(use (match_dup 1))])]
|
||||
"TARGET_64BIT"
|
||||
"")
|
||||
|
||||
(define_insn "*sse_prologue_save_insn"
|
||||
;; Pre-reload version of prologue save. Until after prologue generation we don't know
|
||||
;; what the size of save instruction will be.
|
||||
;; Operand 0+operand 6 is the memory save area
|
||||
;; Operand 1 is number of registers to save (will get overwritten to operand 5)
|
||||
;; Operand 2 is number of non-vaargs SSE arguments
|
||||
;; Operand 3 is label starting the save block
|
||||
;; Operand 4 is used for temporary computation of jump address
|
||||
(define_insn "*sse_prologue_save_insn1"
|
||||
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
|
||||
(match_operand:DI 6 "const_int_operand" "n")))
|
||||
(unspec:BLK [(reg:DI XMM0_REG)
|
||||
(reg:DI XMM1_REG)
|
||||
(reg:DI XMM2_REG)
|
||||
(reg:DI XMM3_REG)
|
||||
(reg:DI XMM4_REG)
|
||||
(reg:DI XMM5_REG)
|
||||
(reg:DI XMM6_REG)
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(clobber (match_operand:DI 1 "register_operand" "=r"))
|
||||
(use (match_operand:DI 2 "const_int_operand" "i"))
|
||||
(use (label_ref:DI (match_operand 3 "" "X")))
|
||||
(clobber (match_operand:DI 4 "register_operand" "=&r"))
|
||||
(use (match_operand:DI 5 "register_operand" "1"))]
|
||||
"TARGET_64BIT
|
||||
&& INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
|
||||
&& INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
|
||||
"#"
|
||||
[(set_attr "type" "other")
|
||||
(set_attr "memory" "store")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
;; We know size of save instruction; expand the computation of jump address
|
||||
;; in the jumptable.
|
||||
(define_split
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(unspec:BLK [(reg:DI XMM0_REG)
|
||||
(reg:DI XMM1_REG)
|
||||
(reg:DI XMM2_REG)
|
||||
(reg:DI XMM3_REG)
|
||||
(reg:DI XMM4_REG)
|
||||
(reg:DI XMM5_REG)
|
||||
(reg:DI XMM6_REG)
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(clobber (match_operand:DI 1 "register_operand" ""))
|
||||
(use (match_operand:DI 2 "const_int_operand" ""))
|
||||
(use (match_operand 3 "" ""))
|
||||
(clobber (match_operand:DI 4 "register_operand" ""))
|
||||
(use (match_operand:DI 5 "register_operand" ""))])]
|
||||
"reload_completed"
|
||||
[(parallel [(set (match_dup 0)
|
||||
(unspec:BLK [(reg:DI XMM0_REG)
|
||||
(reg:DI XMM1_REG)
|
||||
(reg:DI XMM2_REG)
|
||||
(reg:DI XMM3_REG)
|
||||
(reg:DI XMM4_REG)
|
||||
(reg:DI XMM5_REG)
|
||||
(reg:DI XMM6_REG)
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
|
||||
(use (match_dup 1))
|
||||
(use (match_dup 2))
|
||||
(use (match_dup 3))
|
||||
(use (match_dup 5))])]
|
||||
{
|
||||
/* Movaps is 4 bytes, AVX and movsd is 5 bytes. */
|
||||
int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
|
||||
|
||||
/* Compute address to jump to:
|
||||
label - eax*size + nnamed_sse_arguments*size. */
|
||||
if (size == 5)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
|
||||
gen_rtx_PLUS
|
||||
(Pmode,
|
||||
gen_rtx_MULT (Pmode, operands[1],
|
||||
GEN_INT (4)),
|
||||
operands[1])));
|
||||
else if (size == 4)
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[4],
|
||||
gen_rtx_MULT (Pmode, operands[1],
|
||||
GEN_INT (4))));
|
||||
else
|
||||
gcc_unreachable ();
|
||||
if (INTVAL (operands[2]))
|
||||
emit_move_insn
|
||||
(operands[1],
|
||||
gen_rtx_CONST (DImode,
|
||||
gen_rtx_PLUS (DImode,
|
||||
operands[3],
|
||||
GEN_INT (INTVAL (operands[2])
|
||||
* size))));
|
||||
else
|
||||
emit_move_insn (operands[1], operands[3]);
|
||||
emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
|
||||
operands[5] = GEN_INT (size);
|
||||
})
|
||||
|
||||
(define_insn "sse_prologue_save_insn"
|
||||
[(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
|
||||
(match_operand:DI 4 "const_int_operand" "n")))
|
||||
(unspec:BLK [(reg:DI XMM0_REG)
|
||||
|
@ -18457,10 +18555,11 @@
|
|||
(reg:DI XMM4_REG)
|
||||
(reg:DI XMM5_REG)
|
||||
(reg:DI XMM6_REG)
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
|
||||
(reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
|
||||
(use (match_operand:DI 1 "register_operand" "r"))
|
||||
(use (match_operand:DI 2 "const_int_operand" "i"))
|
||||
(use (label_ref:DI (match_operand 3 "" "X")))]
|
||||
(use (label_ref:DI (match_operand 3 "" "X")))
|
||||
(use (match_operand:DI 5 "const_int_operand" "i"))]
|
||||
"TARGET_64BIT
|
||||
&& INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
|
||||
&& INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
|
||||
|
@ -18480,7 +18579,10 @@
|
|||
PUT_MODE (operands[4], TImode);
|
||||
if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
|
||||
output_asm_insn ("rex", operands);
|
||||
output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
|
||||
if (crtl->stack_alignment_needed < 128)
|
||||
output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
|
||||
else
|
||||
output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
|
||||
}
|
||||
(*targetm.asm_out.internal_label) (asm_out_file, "L",
|
||||
CODE_LABEL_NUMBER (operands[3]));
|
||||
|
@ -18489,11 +18591,11 @@
|
|||
[(set_attr "type" "other")
|
||||
(set_attr "length_immediate" "0")
|
||||
(set_attr "length_address" "0")
|
||||
;; 2 bytes for jump and opernds[4] bytes for each save.
|
||||
(set (attr "length")
|
||||
(if_then_else
|
||||
(eq (symbol_ref "TARGET_AVX") (const_int 0))
|
||||
(const_string "34")
|
||||
(const_string "42")))
|
||||
(plus (const_int 2)
|
||||
(mult (symbol_ref ("INTVAL (operands[5])"))
|
||||
(symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
|
||||
(set_attr "memory" "store")
|
||||
(set_attr "modrm" "0")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
|
|
Loading…
Reference in New Issue