i386.md (popdi1): Rewrite using POST_INC memory operand.

* config/i386/i386.md (popdi1): Rewrite using POST_INC memory operand.
	(popsi1): Ditto.
	(*popdi1_epilogue): Ditto.
	(*popsi1_epilogue): Ditto.
	(popsi, popdi peephole2 patterns): Update peepholes for changed
	pop{si,di}1 and *pop{si,di}1_epilogue patterns.

	(pop<mode>1): Macroize insn from pop{si,di}1 using P code iterator.
	(*pop<mode>1_epilogue): Ditto from *pop{si,di}1_epilogue.

	* config/i386/i386.c (*ix86_gen_pop1): Remove indirect function.
	(override_options): Do not initialize removed ix86_gen_pop1.
	(gen_pop): New static function.
	(ix86_expand_prologue): Use gen_pop instead of ix86_gen_pop1.
	(release_scratch_register_on_entry): Ditto.
	(ix86_restore_reg_using_pop): Ditto.
	(ix86_expand_epilogue): Ditto.

From-SVN: r163679
This commit is contained in:
Uros Bizjak 2010-08-31 18:53:56 +02:00
parent 0f643366ab
commit 7d2c38c0b9
3 changed files with 91 additions and 123 deletions

View File

@ -1,3 +1,23 @@
2010-08-31 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (popdi1): Rewrite using POST_INC memory operand.
(popsi1): Ditto.
(*popdi1_epilogue): Ditto.
(*popsi1_epilogue): Ditto.
(popsi, popdi peephole2 patterns): Update peepholes for changed
pop{si,di}1 and *pop{si,di}1_epilogue patterns.
(pop<mode>1): Macroize insn from pop{si,di}1 using P code iterator.
(*pop<mode>1_epilogue): Ditto from *pop{si,di}1_epilogue.
* config/i386/i386.c (*ix86_gen_pop1): Remove indirect function.
(override_options): Do not initialize removed ix86_gen_pop1.
(gen_pop): New static function.
(ix86_expand_prologue): Use gen_pop instead of ix86_gen_pop1.
(release_scratch_register_on_entry): Ditto.
(ix86_restore_reg_using_pop): Ditto.
(ix86_expand_epilogue): Ditto.
2010-08-31 Jakub Jelinek <jakub@redhat.com>
PR middle-end/45461
@ -414,9 +434,9 @@
(VSr3): Ditto.
2010-08-27 Basile Starynkevitch <basile@starynkevitch.net>
Jeremie Salvucci <jeremie.salvucci@free.fr>
Jeremie Salvucci <jeremie.salvucci@free.fr>
* gengtype.c (output_type_enum): Test the right union member.
* gengtype.c (output_type_enum): Test the right union member.
2010-08-27 Uros Bizjak <ubizjak@gmail.com>

View File

@ -1896,7 +1896,6 @@ static const char ix86_force_align_arg_pointer_string[]
= "force_align_arg_pointer";
static rtx (*ix86_gen_leave) (void);
static rtx (*ix86_gen_pop1) (rtx);
static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
@ -3655,7 +3654,6 @@ override_options (bool main_args_p)
if (TARGET_64BIT)
{
ix86_gen_leave = gen_leave_rex64;
ix86_gen_pop1 = gen_popdi1;
ix86_gen_add3 = gen_adddi3;
ix86_gen_sub3 = gen_subdi3;
ix86_gen_sub3_carry = gen_subdi3_carry;
@ -3669,7 +3667,6 @@ override_options (bool main_args_p)
else
{
ix86_gen_leave = gen_leave;
ix86_gen_pop1 = gen_popsi1;
ix86_gen_add3 = gen_addsi3;
ix86_gen_sub3 = gen_subsi3;
ix86_gen_sub3_carry = gen_subsi3_carry;
@ -8173,6 +8170,18 @@ gen_push (rtx arg)
arg);
}
/* Generate an "pop" pattern for input ARG. */
static rtx
gen_pop (rtx arg)
{
return gen_rtx_SET (VOIDmode,
arg,
gen_rtx_MEM (Pmode,
gen_rtx_POST_INC (Pmode,
stack_pointer_rtx)));
}
/* Return >= 0 if there is an unused call-clobbered register available
for the entire function. */
@ -9052,7 +9061,7 @@ release_scratch_register_on_entry (struct scratch_reg *sr)
{
if (sr->saved)
{
rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
rtx x, insn = emit_insn (gen_pop (sr->reg));
/* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
RTX_FRAME_RELATED_P (insn) = 1;
@ -9478,7 +9487,7 @@ ix86_expand_prologue (void)
{
/* The frame pointer is not needed so pop %ebp again.
This leaves us with a pristine state. */
emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
emit_insn (gen_pop (hard_frame_pointer_rtx));
}
}
@ -9786,7 +9795,7 @@ static void
ix86_emit_restore_reg_using_pop (rtx reg)
{
struct machine_function *m = cfun->machine;
rtx insn = emit_insn (ix86_gen_pop1 (reg));
rtx insn = emit_insn (gen_pop (reg));
ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
m->fs.sp_offset -= UNITS_PER_WORD;
@ -9809,10 +9818,12 @@ ix86_emit_restore_reg_using_pop (rtx reg)
if (m->fs.cfa_reg == stack_pointer_rtx)
{
m->fs.cfa_offset -= UNITS_PER_WORD;
add_reg_note (insn, REG_CFA_ADJUST_CFA,
copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
RTX_FRAME_RELATED_P (insn) = 1;
m->fs.cfa_offset -= UNITS_PER_WORD;
}
/* When the frame pointer is the CFA, and we pop it, we are
@ -10229,7 +10240,7 @@ ix86_expand_epilogue (int style)
/* There is no "pascal" calling convention in any 64bit ABI. */
gcc_assert (!TARGET_64BIT);
insn = emit_insn (gen_popsi1 (ecx));
insn = emit_insn (gen_pop (ecx));
m->fs.cfa_offset -= UNITS_PER_WORD;
m->fs.sp_offset -= UNITS_PER_WORD;

View File

@ -1636,14 +1636,7 @@
""
"ix86_expand_move (<MODE>mode, operands); DONE;")
;; Push/pop instructions. They are separate since autoinc/dec is not a
;; general_operand.
;;
;; %%% We don't use a post-inc memory reference because x86 is not a
;; general AUTO_INC_DEC host, which impacts how it is treated in flow.
;; Changing this impacts compiler performance on other non-AUTO_INC_DEC
;; targets without our curiosities, and it is just as easy to represent
;; this differently.
;; Push/pop instructions.
(define_insn "*pushdi2_rex64"
[(set (match_operand:DI 0 "push_operand" "=<,!<")
@ -1756,47 +1749,22 @@
[(set_attr "type" "push")
(set_attr "mode" "<MODE>")])
(define_insn "popdi1"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
(mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG)
(plus:DI (reg:DI SP_REG) (const_int 8)))]
"TARGET_64BIT"
"pop{q}\t%0"
(define_insn "pop<mode>1"
[(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
(match_operand:P 1 "pop_operand" ">"))]
""
"pop{<imodesuffix>}\t%0"
[(set_attr "type" "pop")
(set_attr "mode" "DI")])
(set_attr "mode" "<MODE>")])
(define_insn "popsi1"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
(mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG) (const_int 4)))]
"!TARGET_64BIT"
"pop{l}\t%0"
[(set_attr "type" "pop")
(set_attr "mode" "SI")])
(define_insn "*popdi1_epilogue"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r*m")
(mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG)
(plus:DI (reg:DI SP_REG) (const_int 8)))
(define_insn "*pop<mode>1_epilogue"
[(set (match_operand:P 0 "nonimmediate_operand" "=r*m")
(match_operand:P 1 "pop_operand" ">"))
(clobber (mem:BLK (scratch)))]
"TARGET_64BIT"
"pop{q}\t%0"
""
"pop{<imodesuffix>}\t%0"
[(set_attr "type" "pop")
(set_attr "mode" "DI")])
(define_insn "*popsi1_epilogue"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
(mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG)
(plus:SI (reg:SI SP_REG) (const_int 4)))
(clobber (mem:BLK (scratch)))]
"!TARGET_64BIT"
"pop{l}\t%0"
[(set_attr "type" "pop")
(set_attr "mode" "SI")])
(set_attr "mode" "<MODE>")])
(define_insn "*mov<mode>_xor"
[(set (match_operand:SWI48 0 "register_operand" "=r")
@ -17037,12 +17005,13 @@
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));")
;; The ESP adjustments can be done by the push and pop instructions. Resulting
;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On
;; many CPUs it is also faster, since special hardware to avoid esp
;; code is shorter, since push is only 1 byte, while add imm, %esp is 3 bytes.
;; On many CPUs it is also faster, since special hardware to avoid esp
;; dependencies is present.
;; While some of these conversions may be done using splitters, we use peepholes
;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
;; While some of these conversions may be done using splitters, we use
;; peepholes in order to allow combine_stack_adjustments pass to see
;; nonobfuscated RTL.
;; Convert prologue esp subtractions to push.
;; We need register to push. In order to keep verify_flow_info happy we have
@ -17101,13 +17070,11 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
(clobber (mem:BLK (scratch)))])]
"")
[(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))
(clobber (mem:BLK (scratch)))])])
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
;; Two pops case is tricky, since pop causes dependency
;; on destination register. We use two registers if available.
(define_peephole2
[(match_scratch:SI 0 "r")
(match_scratch:SI 1 "r")
@ -17115,12 +17082,9 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
[(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))
(clobber (mem:BLK (scratch)))])
(parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
"")
(set (match_dup 1) (mem:SI (post_inc:SI (reg:SI SP_REG))))])
(define_peephole2
[(match_scratch:SI 0 "r")
@ -17128,12 +17092,9 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p ()"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
[(parallel [(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))
(clobber (mem:BLK (scratch)))])
(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
"")
(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))])
;; Convert esp additions to pop.
(define_peephole2
@ -17141,34 +17102,26 @@
(parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
(clobber (reg:CC FLAGS_REG))])]
""
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
"")
[(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))])
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
;; Two pops case is tricky, since pop causes dependency
;; on destination register. We use two registers if available.
(define_peephole2
[(match_scratch:SI 0 "r")
(match_scratch:SI 1 "r")
(parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])]
""
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
(parallel [(set (match_dup 1) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
"")
[(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))
(set (match_dup 1) (mem:SI (post_inc:SI (reg:SI SP_REG))))])
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])]
"optimize_insn_for_size_p ()"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])]
"")
[(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))
(set (match_dup 0) (mem:SI (post_inc:SI (reg:SI SP_REG))))])
(define_peephole2
[(match_scratch:DI 0 "r")
@ -17216,13 +17169,11 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
(clobber (mem:BLK (scratch)))])]
"")
[(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))
(clobber (mem:BLK (scratch)))])])
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
;; Two pops case is tricky, since pop causes dependency
;; on destination register. We use two registers if available.
(define_peephole2
[(match_scratch:DI 0 "r")
(match_scratch:DI 1 "r")
@ -17230,12 +17181,9 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
[(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))
(clobber (mem:BLK (scratch)))])
(parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
"")
(set (match_dup 1) (mem:DI (post_inc:DI (reg:DI SP_REG))))])
(define_peephole2
[(match_scratch:DI 0 "r")
@ -17243,12 +17191,9 @@
(clobber (reg:CC FLAGS_REG))
(clobber (mem:BLK (scratch)))])]
"optimize_insn_for_size_p ()"
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
[(parallel [(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))
(clobber (mem:BLK (scratch)))])
(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
"")
(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))])
;; Convert esp additions to pop.
(define_peephole2
@ -17256,34 +17201,26 @@
(parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
(clobber (reg:CC FLAGS_REG))])]
""
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
"")
[(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))])
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
;; Two pops case is tricky, since pop causes dependency
;; on destination register. We use two registers if available.
(define_peephole2
[(match_scratch:DI 0 "r")
(match_scratch:DI 1 "r")
(parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
(clobber (reg:CC FLAGS_REG))])]
""
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
(parallel [(set (match_dup 1) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
"")
[(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))
(set (match_dup 1) (mem:DI (post_inc:DI (reg:DI SP_REG))))])
(define_peephole2
[(match_scratch:DI 0 "r")
(parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
(clobber (reg:CC FLAGS_REG))])]
"optimize_insn_for_size_p ()"
[(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])]
"")
[(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))
(set (match_dup 0) (mem:DI (post_inc:DI (reg:DI SP_REG))))])
;; Convert compares with 1 to shorter inc/dec operations when CF is not
;; required and register dies. Similarly for 128 to -128.