From d85c755027830f7779768c633aecdaf530a47034 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 18 Jan 2007 14:06:57 +0100 Subject: [PATCH] reg-stack.c (subst_stack_regs_pat): Use generic code for instructions that operate on the top of stack. * reg-stack.c (subst_stack_regs_pat) [UNSPEC_SINCOS_COS, UNSPEC_XTRACT_FRACT]: Use generic code for instructions that operate on the top of stack. [UNSPEC_SINCOS_SIN, UNSPEC_XTRACT_EXP, UNSPEC_TAN]: Rewrite register handling of instructions that output to the second stack slot. [UNSPEC_TAN_ONE, UNSPEC_TAN_TAN]: Remove. (move_for_stack_reg): Special-case check for dead destination stack slot for constant load of 1.0 inside UNSPEC_TAN. * config/i386/i386.md (UNSPEC_TAN): New constant. (UNSPEC_TAN_ONE, UNSPEC_TAN_TAN): Remove. (fptanxf4_i387, fptan_extendxf4_i387): New patterns to correctly model move of constant 1.0 to top stack slot. (*tandf3_1, *tansf3_1, *tanxf3_1): Remove insn patterns. (unnamed peephole2 pattern): Remove corresponding peephole2 pattern that optimizes tan insn and loading of constant 1.0. (tanxf2): Use fptanxf4_i387. (tan2): Rename from tansf2 and tandf2 and macroize insn patterns using X87MODEF12 mode macro. Use fptan_extendxf4_i387 and truncate result to requested mode. Use SSE_FLOAT_MODE_P to disable patterns for SSE math. (sincos3): Use truncxf2_i387_noop for truncation. (fyl2x_extendxf3_i387): Use X87MODEF12 for operand 1. testsuite/ChangeLog: * gcc.target/i386/387-8.c: Update comment about optimizing inherent load of 1.0 of fptan instruction. From-SVN: r120899 --- gcc/ChangeLog | 52 ++++++-- gcc/config/i386/i386.md | 172 ++++++++------------------ gcc/reg-stack.c | 111 ++++++++--------- gcc/testsuite/ChangeLog | 13 +- gcc/testsuite/gcc.target/i386/387-8.c | 5 +- 5 files changed, 152 insertions(+), 201 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9d832473795..7edc6df8bfa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,17 +1,44 @@ +2007-01-18 Uros Bizjak + + * reg-stack.c (subst_stack_regs_pat) [UNSPEC_SINCOS_COS, + UNSPEC_XTRACT_FRACT]: Use generic code for instructions that + operate on the top of stack. + [UNSPEC_SINCOS_SIN, UNSPEC_XTRACT_EXP, UNSPEC_TAN]: Rewrite + register handling of instructions that output to the second + stack slot. + [UNSPEC_TAN_ONE, UNSPEC_TAN_TAN]: Remove. + (move_for_stack_reg): Special-case check for dead destination + stack slot for constant load of 1.0 inside UNSPEC_TAN. + + * config/i386/i386.md (UNSPEC_TAN): New constant. + (UNSPEC_TAN_ONE, UNSPEC_TAN_TAN): Remove. + (fptanxf4_i387, fptan_extendxf4_i387): New patterns + to correctly model move of constant 1.0 to top stack slot. + (*tandf3_1, *tansf3_1, *tanxf3_1): Remove insn patterns. + (unnamed peephole2 pattern): Remove corresponding peephole2 + pattern that optimizes tan insn and loading of constant 1.0. + (tanxf2): Use fptanxf4_i387. + (tan2): Rename from tansf2 and tandf2 and macroize insn + patterns using X87MODEF12 mode macro. Use fptan_extendxf4_i387 + and truncate result to requested mode. Use SSE_FLOAT_MODE_P to + disable patterns for SSE math. + (sincos3): Use truncxf2_i387_noop for truncation. + (fyl2x_extendxf3_i387): Use X87MODEF12 for operand 1. + 2007-01-18 Dirk Mueller -· Richard Guenther + Richard Guenther -· PR diagnostic/8268 -· * doc/invoke.texi (Warray-bounds): Document -Warray-bounds. -· * common.opt (Warray-bounds): Add new warning option. -· * c-opts.c (c_common_handle_option): Define -Warray-bounds -· if -Wall is given. + PR diagnostic/8268 + * doc/invoke.texi (Warray-bounds): Document -Warray-bounds. + * common.opt (Warray-bounds): Add new warning option. + * c-opts.c (c_common_handle_option): Define -Warray-bounds + if -Wall is given. * Makefile.in: make tree-vrp.o depend on toplev.h -· * tree-vrp.c (vrp_finalize): Call check_array_refs if -Warray-bounds -· is enabled. -· (check_array_refs, check_array_bounds, check_array_ref): New. + * tree-vrp.c (vrp_finalize): Call check_array_refs if -Warray-bounds + is enabled. + (check_array_refs, check_array_bounds, check_array_ref): New. -18-01-2007 Jan Hubicka +2007-01-18 Jan Hubicka * tree-ssa-ccp.c (ccp_finalize): Return if something changed. (execute_ssa_ccp): Return flags conditionally. @@ -19,7 +46,7 @@ changed. * tree-ssa-propagate.h (substitute_and_fold): Update prototype. -18-01-2007 Steven Bosscher +2007-01-18 Steven Bosscher * cfgcleanup.c (cleanup_cfg): Detect cfglayout mode and set the CLEANUP_CFGLAYOUT flag when in cfglayout mode. @@ -256,7 +283,7 @@ * config/i386/i386.md (fyl2xxf3_i387): Rename from fyl2x_xf3. (fyl2x_extendxf3_i387): New insn pattern. (log2): Rename from logsf2 and logdf2 and macroize insn - insn patterns using X87MODEF12 mode macro. Extend operand 1 + patterns using X87MODEF12 mode macro. Extend operand 1 to XFmode. Use SSE_FLOAT_MODE_P to disable patterns for SSE math. (log102): Ditto. (log22): Ditto. @@ -267,7 +294,6 @@ (*fxtractxf3_i387): Rename from *fxtractxf3. (fxtract_extendxf3_i387): New insn pattern. (ilogbsi2): Use match_dup 3, not match_operand:XF 3. - * config/i386/i386.c (ix86_emit_i387_log1p): Use gen_fyl2xp1xf3_i387() and gen_fyl2xxf3_i387(). diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c4567f699e4..0e535ba5b1f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -121,6 +121,7 @@ (UNSPEC_FRNDINT 65) (UNSPEC_FIST 66) (UNSPEC_F2XM1 67) + (UNSPEC_TAN 68) ; x87 Rounding (UNSPEC_FRNDINT_FLOOR 70) @@ -133,8 +134,6 @@ ; x87 Double output FP (UNSPEC_SINCOS_COS 80) (UNSPEC_SINCOS_SIN 81) - (UNSPEC_TAN_ONE 82) - (UNSPEC_TAN_TAN 83) (UNSPEC_XTRACT_FRACT 84) (UNSPEC_XTRACT_EXP 85) (UNSPEC_FSCALE_FRACT 86) @@ -15862,139 +15861,70 @@ rtx op1 = gen_reg_rtx (XFmode); emit_insn (gen_sincos_extendxf3_i387 (op0, op1, operands[2])); - emit_insn (gen_truncxf2_i387_noop_unspec (operands[0], op0)); - emit_insn (gen_truncxf2_i387_noop_unspec (operands[1], op1)); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + emit_insn (gen_truncxf2_i387_noop (operands[1], op1)); DONE; }) -(define_insn "*tandf3_1" - [(set (match_operand:DF 0 "register_operand" "=f") - (unspec:DF [(match_operand:DF 2 "register_operand" "0")] - UNSPEC_TAN_ONE)) - (set (match_operand:DF 1 "register_operand" "=u") - (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fptan" - [(set_attr "type" "fpspc") - (set_attr "mode" "DF")]) - -;; optimize sequence: fptan -;; fstp %st(0) -;; fld1 -;; into fptan insn. - -(define_peephole2 - [(parallel[(set (match_operand:DF 0 "register_operand" "") - (unspec:DF [(match_operand:DF 2 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:DF 1 "register_operand" "") - (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))]) - (set (match_dup 0) - (match_operand:DF 3 "immediate_operand" ""))] - "standard_80387_constant_p (operands[3]) == 2" - [(parallel[(set (match_dup 0) (unspec:DF [(match_dup 2)] UNSPEC_TAN_ONE)) - (set (match_dup 1) (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))])] - "") - -(define_expand "tandf2" - [(parallel [(set (match_dup 2) - (unspec:DF [(match_operand:DF 1 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:DF 0 "register_operand" "") - (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])] - "TARGET_USE_FANCY_MATH_387 - && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (DFmode); -}) - -(define_insn "*tansf3_1" - [(set (match_operand:SF 0 "register_operand" "=f") - (unspec:SF [(match_operand:SF 2 "register_operand" "0")] - UNSPEC_TAN_ONE)) - (set (match_operand:SF 1 "register_operand" "=u") - (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))] - "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" - "fptan" - [(set_attr "type" "fpspc") - (set_attr "mode" "SF")]) - -;; optimize sequence: fptan -;; fstp %st(0) -;; fld1 -;; into fptan insn. - -(define_peephole2 - [(parallel[(set (match_operand:SF 0 "register_operand" "") - (unspec:SF [(match_operand:SF 2 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:SF 1 "register_operand" "") - (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))]) - (set (match_dup 0) - (match_operand:SF 3 "immediate_operand" ""))] - "standard_80387_constant_p (operands[3]) == 2" - [(parallel[(set (match_dup 0) (unspec:SF [(match_dup 2)] UNSPEC_TAN_ONE)) - (set (match_dup 1) (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))])] - "") - -(define_expand "tansf2" - [(parallel [(set (match_dup 2) - (unspec:SF [(match_operand:SF 1 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:SF 0 "register_operand" "") - (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])] - "TARGET_USE_FANCY_MATH_387 - && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387) - && flag_unsafe_math_optimizations" -{ - operands[2] = gen_reg_rtx (SFmode); -}) - -(define_insn "*tanxf3_1" +(define_insn "fptanxf4_i387" [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 2 "register_operand" "0")] - UNSPEC_TAN_ONE)) + (match_operand:XF 3 "const_double_operand" "F")) (set (match_operand:XF 1 "register_operand" "=u") - (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))] + (unspec:XF [(match_operand:XF 2 "register_operand" "0")] + UNSPEC_TAN))] "TARGET_USE_FANCY_MATH_387 - && flag_unsafe_math_optimizations" + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" "fptan" [(set_attr "type" "fpspc") (set_attr "mode" "XF")]) -;; optimize sequence: fptan -;; fstp %st(0) -;; fld1 -;; into fptan insn. - -(define_peephole2 - [(parallel[(set (match_operand:XF 0 "register_operand" "") - (unspec:XF [(match_operand:XF 2 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:XF 1 "register_operand" "") - (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))]) - (set (match_dup 0) - (match_operand:XF 3 "immediate_operand" ""))] - "standard_80387_constant_p (operands[3]) == 2" - [(parallel[(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_TAN_ONE)) - (set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_TAN_TAN))])] - "") +(define_insn "fptan_extendxf4_i387" + [(set (match_operand:X87MODEF12 0 "register_operand" "=f") + (match_operand:X87MODEF12 3 "const_double_operand" "F")) + (set (match_operand:XF 1 "register_operand" "=u") + (unspec:XF [(float_extend:XF + (match_operand:X87MODEF12 2 "register_operand" "0"))] + UNSPEC_TAN))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations + && standard_80387_constant_p (operands[3]) == 2" + "fptan" + [(set_attr "type" "fpspc") + (set_attr "mode" "XF")]) (define_expand "tanxf2" - [(parallel [(set (match_dup 2) - (unspec:XF [(match_operand:XF 1 "register_operand" "")] - UNSPEC_TAN_ONE)) - (set (match_operand:XF 0 "register_operand" "") - (unspec:XF [(match_dup 1)] UNSPEC_TAN_TAN))])] + [(use (match_operand:XF 0 "register_operand" "")) + (use (match_operand:XF 1 "register_operand" ""))] "TARGET_USE_FANCY_MATH_387 && flag_unsafe_math_optimizations" { - operands[2] = gen_reg_rtx (XFmode); + rtx one = gen_reg_rtx (XFmode); + operands[2] = CONST1_RTX (XFmode); /* fld1 */ + + emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], operands[2])); + DONE; +}) + +(define_expand "tan2" + [(use (match_operand:X87MODEF12 0 "register_operand" "")) + (use (match_operand:X87MODEF12 1 "register_operand" ""))] + "TARGET_USE_FANCY_MATH_387 + && (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) + || TARGET_MIX_SSE_I387) + && flag_unsafe_math_optimizations" +{ + rtx op0 = gen_reg_rtx (XFmode); + + rtx one = gen_reg_rtx (mode); + operands[2] = CONST1_RTX (mode); /* fld1 */ + + emit_insn (gen_fptan_extendxf4_i387 (one, op0, + operands[1], operands[2])); + emit_insn (gen_truncxf2_i387_noop (operands[0], op0)); + DONE; }) (define_insn "atan2df3_1" @@ -16270,7 +16200,7 @@ (define_insn "fyl2x_extendxf3_i387" [(set (match_operand:XF 0 "register_operand" "=f") (unspec:XF [(float_extend:XF - (match_operand:X87MODEF 1 "register_operand" "0")) + (match_operand:X87MODEF12 1 "register_operand" "0")) (match_operand:XF 2 "register_operand" "u")] UNSPEC_FYL2X)) (clobber (match_scratch:XF 3 "=2"))] diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c index 793c6b58ec4..0df425eebeb 100644 --- a/gcc/reg-stack.c +++ b/gcc/reg-stack.c @@ -1059,6 +1059,8 @@ move_for_stack_reg (rtx insn, stack regstack, rtx pat) } else { + rtx pat = PATTERN (insn); + gcc_assert (STACK_REG_P (dest)); /* Load from MEM, or possibly integer REG or constant, into the @@ -1066,8 +1068,16 @@ move_for_stack_reg (rtx insn, stack regstack, rtx pat) stack. The stack mapping is changed to reflect that DEST is now at top of stack. */ - /* The destination ought to be dead. */ - gcc_assert (get_hard_regnum (regstack, dest) < FIRST_STACK_REG); + /* The destination ought to be dead. However, there is a + special case with i387 UNSPEC_TAN, where destination is live + (an argument to fptan) but inherent load of 1.0 is modelled + as a load from a constant. */ + if (! (GET_CODE (pat) == PARALLEL + && XVECLEN (pat, 0) == 2 + && GET_CODE (XVECEXP (pat, 0, 1)) == SET + && GET_CODE (SET_SRC (XVECEXP (pat, 0, 1))) == UNSPEC + && XINT (SET_SRC (XVECEXP (pat, 0, 1)), 1) == UNSPEC_TAN)) + gcc_assert (get_hard_regnum (regstack, dest) < FIRST_STACK_REG); gcc_assert (regstack->top < REG_STACK_SIZE); @@ -1629,14 +1639,19 @@ subst_stack_regs_pat (rtx insn, stack regstack, rtx pat) case UNSPEC_FRNDINT_TRUNC: case UNSPEC_FRNDINT_MASK_PM: - /* These insns only operate on the top of the stack. */ + /* Above insns operate on the top of the stack. */ + + case UNSPEC_SINCOS_COS: + case UNSPEC_XTRACT_FRACT: + + /* Above insns operate on the top two stack slots, + first part of one input, double output insn. */ src1 = get_true_reg (&XVECEXP (pat_src, 0, 0)); emit_swap_insn (insn, regstack, *src1); - /* Input should never die, it is - replaced with output. */ + /* Input should never die, it is replaced with output. */ src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1)); gcc_assert (!src1_note); @@ -1646,6 +1661,36 @@ subst_stack_regs_pat (rtx insn, stack regstack, rtx pat) replace_reg (src1, FIRST_STACK_REG); break; + case UNSPEC_SINCOS_SIN: + case UNSPEC_XTRACT_EXP: + + /* These insns operate on the top two stack slots, + second part of one input, double output insn. */ + + regstack->top++; + /* FALLTHRU */ + + case UNSPEC_TAN: + + /* For UNSPEC_TAN, regstack->top is already increased + by inherent load of constant 1.0. */ + + /* Output value is generated in the second stack slot. + Move current value from second slot to the top. */ + regstack->reg[regstack->top] + = regstack->reg[regstack->top - 1]; + + gcc_assert (STACK_REG_P (*dest)); + + regstack->reg[regstack->top - 1] = REGNO (*dest); + SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest)); + replace_reg (dest, FIRST_STACK_REG + 1); + + src1 = get_true_reg (&XVECEXP (pat_src, 0, 0)); + + replace_reg (src1, FIRST_STACK_REG); + break; + case UNSPEC_FPATAN: case UNSPEC_FYL2X: case UNSPEC_FYL2XP1: @@ -1744,62 +1789,6 @@ subst_stack_regs_pat (rtx insn, stack regstack, rtx pat) replace_reg (src2, FIRST_STACK_REG + 1); break; - case UNSPEC_SINCOS_COS: - case UNSPEC_TAN_ONE: - case UNSPEC_XTRACT_FRACT: - /* These insns operate on the top two stack slots, - first part of one input, double output insn. */ - - src1 = get_true_reg (&XVECEXP (pat_src, 0, 0)); - - emit_swap_insn (insn, regstack, *src1); - - /* Input should never die, it is - replaced with output. */ - src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1)); - gcc_assert (!src1_note); - - /* Push the result back onto stack. Empty stack slot - will be filled in second part of insn. */ - if (STACK_REG_P (*dest)) - { - regstack->reg[regstack->top + 1] = REGNO (*dest); - SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest)); - replace_reg (dest, FIRST_STACK_REG); - } - - replace_reg (src1, FIRST_STACK_REG); - break; - - case UNSPEC_SINCOS_SIN: - case UNSPEC_TAN_TAN: - case UNSPEC_XTRACT_EXP: - /* These insns operate on the top two stack slots, - second part of one input, double output insn. */ - - src1 = get_true_reg (&XVECEXP (pat_src, 0, 0)); - - emit_swap_insn (insn, regstack, *src1); - - /* Input should never die, it is - replaced with output. */ - src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1)); - gcc_assert (!src1_note); - - /* Push the result back onto stack. Fill empty slot from - first part of insn and fix top of stack pointer. */ - if (STACK_REG_P (*dest)) - { - regstack->reg[regstack->top] = REGNO (*dest); - SET_HARD_REG_BIT (regstack->reg_set, REGNO (*dest)); - replace_reg (dest, FIRST_STACK_REG + 1); - - regstack->top++; - } - - replace_reg (src1, FIRST_STACK_REG); - break; - case UNSPEC_SAHF: /* (unspec [(unspec [(compare)] UNSPEC_FNSTSW)] UNSPEC_SAHF) The combination matches the PPRO fcomi instruction. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 66b6bd431cd..9171b0d01b1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,8 +1,13 @@ -2007-01-18 Dirk Mueller -· Richard Guenther +2007-01-18 Uros Bizjak -· PR diagnostic/8268 -· * gcc.dg/Warray-bounds.c: New testcase. + * gcc.target/i386/387-8.c: Update comment about optimizing + inherent load of 1.0 of fptan instruction. + +2007-01-18 Dirk Mueller + Richard Guenther + + PR diagnostic/8268 + * gcc.dg/Warray-bounds.c: New testcase. * gcc.dg/Warray-bounds-2.c: New testcase. * g++.dg/warn/Warray-bounds.C: New testcase. * g++.dg/warn/Warray-bounds-2.C: New testcase. diff --git a/gcc/testsuite/gcc.target/i386/387-8.c b/gcc/testsuite/gcc.target/i386/387-8.c index 6af895998d0..578e0a38b5f 100644 --- a/gcc/testsuite/gcc.target/i386/387-8.c +++ b/gcc/testsuite/gcc.target/i386/387-8.c @@ -1,5 +1,6 @@ -/* Verify that 387 fptan instruction is generated. Also check fptan - peephole2 optimizer. */ +/* Verify that 387 fptan instruction is generated. Also check that + inherent load of 1.0 is used in further calculations. */ + /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-require-effective-target ilp32 } */ /* { dg-options "-O2 -ffast-math -march=i686" } */