From 9f09b1f27285bb8b72ee3fa02619fc2fac845b3b Mon Sep 17 00:00:00 2001 From: "J\"orn Rennecke" Date: Tue, 15 Feb 2000 22:34:33 +0000 Subject: [PATCH] Makefile.in (lcm.o): Depend on insn-attr.h. * Makefile.in (lcm.o): Depend on insn-attr.h. * basic-block.h (optimize_mode_switching): Declare. * lcm.c (tm_p.h, insn-attr.h): #include. (seginfo, bb_info): New structs. (antic, transp, comp, delete, insert) : New file-scope static variables. (new_seginfo, add_seginfo, make_preds_opaque, reg_dies): New functions. (reg_becomes_live, optimize_mode_switching): Likewise. * tm.texi: Add description of mode switching macros. * toplev.c (rest_of_compilation): Call optimize_mode_switching. * sh-protos.h (remove_dead_before_cse): Remove prototype. (fldi_ok, fpscr_set_from_mem): New prototypes. * sh.h (OPTIMIZATION_OPTION): Remove sh_flag_remove_dead_before_cse set. (CONST_DOUBLE_OK_FOR_LETTER_P, SECONDARY_INPUT_RELOAD_CLASS): Disable fldi for (TARGET_SH4 && ! TARGET_FMOVD). (sh_flag_remove_dead_before_cse): Remove declaration. (NUM_MODES_FOR_MODE_SWITCHING, OPTIMIZE_MODE_SWITCHING): New macros. (MODE_USES_IN_EXIT_BLOCK, MODE_NEEDED, MODE_AT_ENTRY): Likewise. (MODE_PRIORITY_TO_MODE, EMIT_MODE_SET): Likewise. * sh.c (broken_move): Disable fldi for (TARGET_SH4 && ! TARGET_FMOVD). (barrier_align): Allow for JUMP_INSNS containing a parallel. (machine_dependent_reorg): Remove sh_flag_remove_dead_before_cse set. (fldi_ok): New function. (get_fpscr_rtx): Add fpscr_rtx as GC root. (emit_sf_insn): Only generate fpu switches when optimize < 1. (emit_df_insn): Likewise. (expand_fp_branch, emit_fpscr_use, remove_dead_before_cse): Delete. (sh_flag_remove_dead_before_cse): Delete. (get_free_reg, fpscr_set_from_mem): New functions. * sh.md (movdf, movsf): Remove no_new_pseudos code. (return): Remove emit_fpscr_use / remove_dead_before_cse calls. Co-Authored-By: Andrew MacLeod From-SVN: r31990 --- gcc/ChangeLog | 35 +++ gcc/Makefile.in | 2 +- gcc/basic-block.h | 1 + gcc/config/sh/sh-protos.h | 8 +- gcc/config/sh/sh.c | 222 ++++++++---------- gcc/config/sh/sh.h | 38 +++- gcc/config/sh/sh.md | 26 +-- gcc/lcm.c | 459 ++++++++++++++++++++++++++++++++++++++ gcc/tm.texi | 80 +++++++ gcc/toplev.c | 5 + 10 files changed, 707 insertions(+), 169 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 29fe2203ad6..e315302d136 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +Tue Feb 15 22:30:36 2000 J"orn Rennecke + Andrew MacLeod + + * Makefile.in (lcm.o): Depend on insn-attr.h. + * basic-block.h (optimize_mode_switching): Declare. + * lcm.c (tm_p.h, insn-attr.h): #include. + (seginfo, bb_info): New structs. + (antic, transp, comp, delete, insert) : New file-scope static variables. + (new_seginfo, add_seginfo, make_preds_opaque, reg_dies): New functions. + (reg_becomes_live, optimize_mode_switching): Likewise. + * tm.texi: Add description of mode switching macros. + * toplev.c (rest_of_compilation): Call optimize_mode_switching. + + * sh-protos.h (remove_dead_before_cse): Remove prototype. + (fldi_ok, fpscr_set_from_mem): New prototypes. + * sh.h (OPTIMIZATION_OPTION): Remove sh_flag_remove_dead_before_cse set. + (CONST_DOUBLE_OK_FOR_LETTER_P, SECONDARY_INPUT_RELOAD_CLASS): + Disable fldi for (TARGET_SH4 && ! TARGET_FMOVD). + (sh_flag_remove_dead_before_cse): Remove declaration. + (NUM_MODES_FOR_MODE_SWITCHING, OPTIMIZE_MODE_SWITCHING): New macros. + (MODE_USES_IN_EXIT_BLOCK, MODE_NEEDED, MODE_AT_ENTRY): Likewise. + (MODE_PRIORITY_TO_MODE, EMIT_MODE_SET): Likewise. + * sh.c (broken_move): Disable fldi for (TARGET_SH4 && ! TARGET_FMOVD). + (barrier_align): Allow for JUMP_INSNS containing a parallel. + (machine_dependent_reorg): Remove sh_flag_remove_dead_before_cse set. + (fldi_ok): New function. + (get_fpscr_rtx): Add fpscr_rtx as GC root. + (emit_sf_insn): Only generate fpu switches when optimize < 1. + (emit_df_insn): Likewise. + (expand_fp_branch, emit_fpscr_use, remove_dead_before_cse): Delete. + (sh_flag_remove_dead_before_cse): Delete. + (get_free_reg, fpscr_set_from_mem): New functions. + * sh.md (movdf, movsf): Remove no_new_pseudos code. + (return): Remove emit_fpscr_use / remove_dead_before_cse calls. + 2000-02-15 Loren Rittle * ginclude/stddef.h: Correct usage of _BSD_RUNE_T_ for FreeBSD. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 03d19cb0bc8..b773bc3580c 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1567,7 +1567,7 @@ resource.o : resource.c $(CONFIG_H) $(RTL_H) hard-reg-set.h system.h \ $(BASIC_BLOCK_H) $(REGS_H) flags.h output.h resource.h function.h toplev.h \ insn-attr.h lcm.o : lcm.c $(CONFIG_H) system.h $(RTL_H) $(REGS_H) hard-reg-set.h flags.h \ - real.h insn-config.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) + real.h insn-config.h insn-attr.h $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) profile.o : profile.c $(CONFIG_H) system.h $(RTL_H) flags.h insn-flags.h \ gcov-io.h $(TREE_H) output.h $(REGS_H) toplev.h function.h insn-config.h \ ggc.h diff --git a/gcc/basic-block.h b/gcc/basic-block.h index 5ecdb776b38..b83e0f4c458 100644 --- a/gcc/basic-block.h +++ b/gcc/basic-block.h @@ -435,6 +435,7 @@ extern struct edge_list *pre_edge_rev_lcm PARAMS ((FILE *, int, sbitmap *, sbitmap **)); extern void compute_available PARAMS ((sbitmap *, sbitmap *, sbitmap *, sbitmap *)); +extern void optimize_mode_switching PARAMS ((FILE *)); /* In emit-rtl.c. */ extern rtx emit_block_insn_after PARAMS ((rtx, rtx, basic_block)); diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 86a92d16801..bb962255dd0 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -1,5 +1,5 @@ /* Definitions of target machine for GNU compiler for Hitachi Super-H. - Copyright (C) 1993-1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc. Contributed by Steve Chamberlain (sac@cygnus.com). Improved by Jim Wilson (wilson@cygnus.com). @@ -109,4 +109,8 @@ extern void sh_expand_epilogue PARAMS ((void)); extern void function_epilogue PARAMS ((FILE *, int)); extern int initial_elimination_offset PARAMS ((int, int)); extern void emit_fpscr_use PARAMS ((void)); -extern void remove_dead_before_cse PARAMS ((void)); +extern int fldi_ok PARAMS ((void)); + +#ifdef HARD_CONST +extern void fpscr_set_from_mem PARAMS ((int, HARD_REG_SET)); +#endif diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index d039650afe1..549f453a1ff 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -1,5 +1,5 @@ /* Output routines for GCC for Hitachi Super-H. - Copyright (C) 1993-1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc. Contributed by Steve Chamberlain (sac@cygnus.com). Improved by Jim Wilson (wilson@cygnus.com). @@ -2025,6 +2025,9 @@ broken_move (insn) && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE && (fp_zero_operand (SET_SRC (pat)) || fp_one_operand (SET_SRC (pat))) + /* ??? If this is a -m4 or -m4-single compilation, we don't + know the current setting of fpscr, so disable fldi. */ + && (! TARGET_SH4 || TARGET_FMOVD) && GET_CODE (SET_DEST (pat)) == REG && REGNO (SET_DEST (pat)) >= FIRST_FP_REG && REGNO (SET_DEST (pat)) <= LAST_FP_REG) @@ -2772,9 +2775,14 @@ barrier_align (barrier_or_label) if (prev && GET_CODE (prev) == JUMP_INSN && JUMP_LABEL (prev) - && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label) - && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0))) - return 0; + && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)) + { + rtx pat = PATTERN (prev); + if (GET_CODE (pat) == PARALLEL) + pat = XVECEXP (pat, 0, 0); + if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) + return 0; + } } return CACHE_LOG; @@ -3203,13 +3211,11 @@ machine_dependent_reorg (first) #if 0 /* fpscr is not actually a user variable, but we pretend it is for the sake of the previous optimization passes, since we want it handled like - one. However, we don't have eny debugging information for it, so turn + one. However, we don't have any debugging information for it, so turn it into a non-user variable now. */ if (TARGET_SH4) REG_USERVAR_P (get_fpscr_rtx ()) = 0; #endif - if (optimize) - sh_flag_remove_dead_before_cse = 1; mdep_reorg_phase = SH_AFTER_MDEP_REORG; } @@ -4617,6 +4623,19 @@ fp_one_operand (op) return REAL_VALUES_EQUAL (r, dconst1); } +/* For -m4 and -m4-single-only, mode switching is used. If we are + compiling without -mfmovd, movsf_ie isn't taken into account for + mode switching. We could check in machine_dependent_reorg for + cases where we know we are in single precision mode, but there is + interface to find that out during reload, so we must avoid + choosing an fldi alternative during reload and thus failing to + allocate a scratch register for the constant loading. */ +int +fldi_ok () +{ + return ! TARGET_SH4 || TARGET_FMOVD || reload_completed; +} + int tertiary_reload_operand (op, mode) rtx op; @@ -4815,6 +4834,7 @@ get_fpscr_rtx () fpscr_rtx = gen_rtx (REG, PSImode, 48); REG_USERVAR_P (fpscr_rtx) = 1; pop_obstacks (); + ggc_add_rtx_root (&fpscr_rtx, 1); mark_user_reg (fpscr_rtx); } if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG) @@ -4829,13 +4849,13 @@ emit_sf_insn (pat) rtx addr; /* When generating reload insns, we must not create new registers. FPSCR should already have the correct value, so do nothing to change it. */ - if (! TARGET_FPU_SINGLE && ! reload_in_progress) + if (! TARGET_FPU_SINGLE && ! reload_in_progress && optimize < 1) { addr = gen_reg_rtx (SImode); emit_insn (gen_fpu_switch0 (addr)); } emit_insn (pat); - if (! TARGET_FPU_SINGLE && ! reload_in_progress) + if (! TARGET_FPU_SINGLE && ! reload_in_progress && optimize < 1) { addr = gen_reg_rtx (SImode); emit_insn (gen_fpu_switch1 (addr)); @@ -4847,13 +4867,13 @@ emit_df_insn (pat) rtx pat; { rtx addr; - if (TARGET_FPU_SINGLE && ! reload_in_progress) + if (TARGET_FPU_SINGLE && ! reload_in_progress && optimize < 1) { addr = gen_reg_rtx (SImode); emit_insn (gen_fpu_switch0 (addr)); } emit_insn (pat); - if (TARGET_FPU_SINGLE && ! reload_in_progress) + if (TARGET_FPU_SINGLE && ! reload_in_progress && optimize < 1) { addr = gen_reg_rtx (SImode); emit_insn (gen_fpu_switch1 (addr)); @@ -4893,65 +4913,6 @@ expand_df_binop (fun, operands) emit_df_insn ((*fun) (operands[0], operands[1], operands[2], get_fpscr_rtx ())); } - -void -expand_fp_branch (compare, branch) - rtx (*compare) PARAMS ((void)), (*branch) PARAMS ((void)); -{ - (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn) - ((*compare) ()); - emit_jump_insn ((*branch) ()); -} - -/* We don't want to make fpscr call-saved, because that would prevent - channging it, and it would also cost an exstra instruction to save it. - We don't want it to be known as a global register either, because - that disables all flow analysis. But it has to be live at the function - return. Thus, we need to insert a USE at the end of the function. */ -/* This should best be called at about the time FINALIZE_PIC is called, - but not dependent on flag_pic. Alas, there is no suitable hook there, - so this gets called from HAVE_RETURN. */ -void -emit_fpscr_use () -{ - static int fpscr_uses = 0; - - if (rtx_equal_function_value_matters) - { - emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ())); - fpscr_uses++; - } - else - { - if (fpscr_uses > 1) - { - /* Due to he crude way we emit the USEs, we might end up with - some extra ones. Delete all but the last one. */ - rtx insn; - - for (insn = get_last_insn(); insn; insn = PREV_INSN (insn)) - if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == USE - && GET_CODE (XEXP (PATTERN (insn), 0)) == REG - && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) - { - insn = PREV_INSN (insn); - break; - } - for (; insn; insn = PREV_INSN (insn)) - if (GET_CODE (insn) == INSN - && GET_CODE (PATTERN (insn)) == USE - && GET_CODE (XEXP (PATTERN (insn), 0)) == REG - && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG) - { - PUT_CODE (insn, NOTE); - NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (insn) = 0; - } - } - fpscr_uses = 0; - } -} /* ??? gcc does flow analysis strictly after common subexpression elimination. As a result, common subespression elimination fails @@ -4980,8 +4941,6 @@ f(double a) remove assignments that are dead due to a following assignment in the same basic block. */ -int sh_flag_remove_dead_before_cse; - static void mark_use (x, reg_set_block) rtx x, *reg_set_block; @@ -5035,70 +4994,67 @@ mark_use (x, reg_set_block) } } } + +static rtx get_free_reg PARAMS ((HARD_REG_SET)); -void -remove_dead_before_cse () +/* This function returns a register to use to load the address to load + the fpscr from. Currently it always returns r1 or r7, but when we are + able to use pseudo registers after combine, or have a better mechanism + for choosing a register, it should be done here. */ +/* REGS_LIVE is the liveness information for the point for which we + need this allocation. In some bare-bones exit blocks, r1 is live at the + start. We can even have all of r0..r3 being live: +__complex__ long long f (double d) { if (d == 0) return 2; else return 3; } + INSN before which new insns are placed with will clobber the register + we return. If a basic block consists only of setting the return value + register to a pseudo and using that register, the return value is not + live before or after this block, yet we we'll insert our insns right in + the middle. */ + +static rtx +get_free_reg (regs_live) + HARD_REG_SET regs_live; { - rtx *reg_set_block, last, last_call, insn, set; - int in_libcall = 0; + rtx reg; - /* This pass should run just once, after rtl generation. */ + if (! TEST_HARD_REG_BIT (regs_live, 1)) + return gen_rtx_REG (Pmode, 1); - if (! sh_flag_remove_dead_before_cse - || rtx_equal_function_value_matters - || reload_completed) - return; + /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target, + there shouldn't be anything but a jump before the function end. */ + if (! TEST_HARD_REG_BIT (regs_live, 7)) + return gen_rtx_REG (Pmode, 7); - sh_flag_remove_dead_before_cse = 0; - - reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx)); - bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx)); - last_call = last = get_last_insn (); - for (insn = last; insn; insn = PREV_INSN (insn)) - { - if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') - continue; - if (GET_CODE (insn) == JUMP_INSN) - { - last_call = last = insn; - continue; - } - set = single_set (insn); - - /* Don't delete parts of libcalls, since that would confuse cse, loop - and flow. */ - if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) - in_libcall = 1; - else if (in_libcall) - { - if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) - in_libcall = 0; - } - else if (set && GET_CODE (SET_DEST (set)) == REG) - { - int regno = REGNO (SET_DEST (set)); - rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno] - ? last_call - : last); - if (reg_set_block[regno] == ref_insn - && (regno >= FIRST_PSEUDO_REGISTER - || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1) - && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn))) - { - PUT_CODE (insn, NOTE); - NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED; - NOTE_SOURCE_FILE (insn) = 0; - continue; - } - else - reg_set_block[REGNO (SET_DEST (set))] = ref_insn; - } - if (GET_CODE (insn) == CALL_INSN) - { - last_call = insn; - mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block); - } - mark_use (PATTERN (insn), reg_set_block); - } - return; + abort (); +} + +/* This function will set the fpscr from memory. + MODE is the mode we are setting it to. */ +void +fpscr_set_from_mem (mode, regs_live) + int mode; + HARD_REG_SET regs_live; +{ + enum attr_fp_mode fp_mode = mode; + rtx i; + rtx addr_reg = get_free_reg (regs_live); + + i = gen_rtx_SET (VOIDmode, addr_reg, + gen_rtx_SYMBOL_REF (SImode, "__fpscr_values")); + emit_insn (i); + if (fp_mode == (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE)) + { + rtx r = addr_reg; + addr_reg = get_free_reg (regs_live); + i = gen_rtx_SET (VOIDmode, addr_reg, + gen_rtx_PLUS (Pmode, r, GEN_INT (4))); + emit_insn (i); + } + + i = gen_rtx_SET (VOIDmode, + get_fpscr_rtx (), + gen_rtx_MEM (PSImode, gen_rtx_POST_INC (Pmode, addr_reg))); + i = emit_insn (i); + REG_NOTES (i) = gen_rtx_EXPR_LIST (REG_DEAD, addr_reg, REG_NOTES (i)); + REG_NOTES (i) = gen_rtx_EXPR_LIST (REG_INC, addr_reg, REG_NOTES (i)); } diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index e2cefc858ac..a3870df4832 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -1,5 +1,5 @@ /* Definitions of target machine for GNU compiler for Hitachi Super-H. - Copyright (C) 1993-1998, 1999 Free Software Foundation, Inc. + Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc. Contributed by Steve Chamberlain (sac@cygnus.com). Improved by Jim Wilson (wilson@cygnus.com). @@ -204,8 +204,6 @@ extern int target_flags; do { \ if (LEVEL) \ flag_omit_frame_pointer = -1; \ - if (LEVEL) \ - sh_flag_remove_dead_before_cse = 1; \ if (SIZE) \ target_flags |= SPACE_BIT; \ } while (0) @@ -756,9 +754,9 @@ extern enum reg_class reg_class_from_letter[]; /* Similar, but for floating constants, and defining letters G and H. Here VALUE is the CONST_DOUBLE rtx itself. */ -#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ -((C) == 'G' ? fp_zero_operand (VALUE) \ - : (C) == 'H' ? fp_one_operand (VALUE) \ +#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ +((C) == 'G' ? (fp_zero_operand (VALUE) && fldi_ok ()) \ + : (C) == 'H' ? (fp_one_operand (VALUE) && fldi_ok ()) \ : (C) == 'F') /* Given an rtx X being reloaded into a reg required to be @@ -791,7 +789,8 @@ extern enum reg_class reg_class_from_letter[]; #define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X) \ ((((CLASS) == FP_REGS || (CLASS) == FP0_REGS || (CLASS) == DF_REGS) \ && immediate_operand ((X), (MODE)) \ - && ! ((fp_zero_operand (X) || fp_one_operand (X)) && (MODE) == SFmode))\ + && ! ((fp_zero_operand (X) || fp_one_operand (X)) \ + && (MODE) == SFmode && fldi_ok ())) \ ? R0_REGS \ : CLASS == FPUL_REGS && immediate_operand ((X), (MODE)) \ ? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X)) \ @@ -2122,7 +2121,6 @@ sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS) #define PRAGMA_INSERT_ATTRIBUTES(node, pattr, prefix_attr) \ sh_pragma_insert_attributes (node, pattr, prefix_attr) -extern int sh_flag_remove_dead_before_cse; extern int rtx_equal_function_value_matters; extern struct rtx_def *fpscr_rtx; @@ -2239,3 +2237,27 @@ do { \ #define SH_DYNAMIC_SHIFT_COST \ (TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20) + + +#define NUM_MODES_FOR_MODE_SWITCHING { FP_MODE_NONE } + +#define OPTIMIZE_MODE_SWITCHING(ENTITY) TARGET_SH4 + +#define MODE_USES_IN_EXIT_BLOCK gen_rtx_USE (VOIDmode, get_fpscr_rtx ()) + +#define MODE_NEEDED(ENTITY, INSN) \ + (recog_memoized (INSN) >= 0 \ + ? get_attr_fp_mode (INSN) \ + : (GET_CODE (PATTERN (INSN)) == USE \ + && rtx_equal_p (XEXP (PATTERN (INSN), 0), get_fpscr_rtx ())) \ + ? (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE) \ + : FP_MODE_NONE) + +#define MODE_AT_ENTRY(ENTITY) \ + (TARGET_FPU_SINGLE ? FP_MODE_SINGLE : FP_MODE_DOUBLE) + +#define MODE_PRIORITY_TO_MODE(ENTITY, N) \ + ((TARGET_FPU_SINGLE != 0) ^ (N) ? FP_MODE_SINGLE : FP_MODE_DOUBLE) + +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + fpscr_set_from_mem ((MODE), (HARD_REGS_LIVE)) diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index a0d50366b08..237f774298d 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -2812,18 +2812,7 @@ if (prepare_move_operands (operands, DFmode)) DONE; if (TARGET_SH4) { - if (no_new_pseudos) - { - /* ??? FIXME: This is only a stopgap fix. There is no guarantee - that fpscr is in the right state. */ - emit_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); - DONE; - } emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); - /* We need something to tag possible REG_LIBCALL notes on to. */ - if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters - && GET_CODE (operands[0]) == REG) - emit_insn (gen_mov_nop (operands[0])); DONE; } }") @@ -2910,18 +2899,7 @@ DONE; if (TARGET_SH3E) { - if (no_new_pseudos) - { - /* ??? FIXME: This is only a stopgap fix. There is no guarantee - that fpscr is in the right state. */ - emit_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); - DONE; - } emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); - /* We need something to tag possible REG_LIBCALL notes on to. */ - if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters - && GET_CODE (operands[0]) == REG) - emit_insn (gen_mov_nop (operands[0])); DONE; } }") @@ -3415,9 +3393,7 @@ ;; that doesn't mix with emitting a prologue. (define_insn "return" [(return)] - "emit_fpscr_use (), - remove_dead_before_cse (), - reload_completed" + "reload_completed" "%@ %#" [(set_attr "type" "return") (set_attr "needs_delay_slot" "yes")]) diff --git a/gcc/lcm.c b/gcc/lcm.c index f3e0dc50aab..7598b34d126 100644 --- a/gcc/lcm.c +++ b/gcc/lcm.c @@ -61,6 +61,10 @@ Boston, MA 02111-1307, USA. */ #include "insn-config.h" #include "recog.h" #include "basic-block.h" +#include "tm_p.h" +/* We want target macros for the mode switching code to be able to refer + to instruction attribute values. */ +#include "insn-attr.h" /* Edge based LCM routines. */ static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *, @@ -794,3 +798,458 @@ pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill, return edge_list; } + +/* MODE SWITCHING */ +/* The algorithm for setting the modes consists of scanning the insn list + and finding all the insns which require a specific mode. Each insn gets + a unique struct seginfo element. These structures are inserted into a list + for each basic block. For each entity, there is an array of bb_info over + the flow graph basic blocks (local var 'bb_info'), and contains a list + of all insns within that basic block, in the order they are encountered. + + For each entity, any basic block WITHOUT any insns requiring a specific + mode are given a single entry, without a mode. (Each basic block + in the flow graph must have at least one entry in the segment table.) + + The LCM algorithm is then run over the flow graph to determine where to + place the sets to the highest-priority value in respect of first the first + insn in any one block. Any adjustments required to the transparancy + vectors are made, then the next iteration starts for the next-lower + priority mode, till for each entity all modes are exhasted. + + More details are located in the code for optimize_mode_switching(). */ + +/* This structure contains the information for each insn which requires + either single or double mode to be set. + MODE is the mode this insn must be executed in. + INSN_PTR is the insn to be executed. + BBNUM is the flow graph basic block this insn occurs in. + NEXT is the next insn in the same basic block. */ +struct seginfo +{ + int mode; + rtx insn_ptr; + int bbnum; + struct seginfo *next; + HARD_REG_SET regs_live; +}; + +struct bb_info +{ + struct seginfo *seginfo; + int computing; +}; + +/* These bitmaps are used for the LCM algorithm. */ + +static sbitmap *antic; +static sbitmap *transp; +static sbitmap *comp; +static sbitmap *delete; +static sbitmap *insert; + +static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));; +static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *)); +static void make_preds_opaque PARAMS ((basic_block, int)); +static void reg_dies PARAMS ((rtx, HARD_REG_SET)); +static void reg_becomes_live PARAMS ((rtx, rtx, void *)); + +/* This function will allocate a new BBINFO structure, initialized + with the FP_MODE, INSN, and basic block BB parameters. */ +static struct seginfo * +new_seginfo (mode, insn, bb, regs_live) + int mode; + rtx insn; + int bb; + HARD_REG_SET regs_live; +{ + struct seginfo *ptr; + ptr = xmalloc (sizeof (struct seginfo)); + ptr->mode = mode; + ptr->insn_ptr = insn; + ptr->bbnum = bb; + ptr->next = NULL; + COPY_HARD_REG_SET (ptr->regs_live, regs_live); + return ptr; +} + +/* Add a seginfo element to the end of a list. + HEAD is a pointer to the list beginning. + INFO is the structure to be linked in. */ +static void +add_seginfo (head, info) + struct bb_info *head; + struct seginfo *info; +{ + struct seginfo *ptr; + + if (head->seginfo == NULL) + head->seginfo = info; + else + { + ptr = head->seginfo; + while (ptr->next != NULL) + ptr = ptr->next; + ptr->next = info; + } +} + +/* Make all predecessors of basic block B opaque, recursively, till we hit + some that are already non-transparent, or an edge where aux is set; that + denotes that a mode set is to be done on that edge. + J is the bit number in the bitmaps that corresponds to the entity that + we are currently handling mode-switching for. */ +static void +make_preds_opaque (b, j) + basic_block b; + int j; +{ + edge e; + + for (e = b->pred; e; e = e->pred_next) + { + basic_block pb = e->src; + if (e->aux || ! TEST_BIT (transp[pb->index], j)) + continue; + RESET_BIT (transp[pb->index], j); + make_preds_opaque (pb, j); + } +} + +/* Record in LIVE that register REG died. */ +static void +reg_dies (reg, live) + rtx reg; + HARD_REG_SET live; +{ + int regno; + + if (GET_CODE (reg) != REG) + return; + regno = REGNO (reg); + if (regno < FIRST_PSEUDO_REGISTER) + { + int nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + + for (; --nregs >=0; nregs--, regno++) + CLEAR_HARD_REG_BIT (live, regno); + } +} + +/* Record in LIVE that register REG became live. + This is called via note_stores. */ +static void +reg_becomes_live (reg, setter, live) + rtx reg; + rtx setter ATTRIBUTE_UNUSED; + void *live; +{ + int regno; + + if (GET_CODE (reg) == SUBREG) + reg = SUBREG_REG (reg); + + if (GET_CODE (reg) != REG) + return; + + regno = REGNO (reg); + if (regno < FIRST_PSEUDO_REGISTER) + { + int nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)); + + for (; nregs-- > 0; regno++) + SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno); + } +} + +/* Find all insns that need a particular mode + setting, and insert the necessary mode switches. */ +void +optimize_mode_switching (file) + FILE *file ATTRIBUTE_UNUSED; +{ +#ifdef OPTIMIZE_MODE_SWITCHING + rtx insn; + int bb, e; + edge eg; + int need_commit = 0; + sbitmap *kill; + struct edge_list *edge_list; + static int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING; +#define N_ENTITIES (sizeof num_modes / sizeof (int)) + int entity_map[N_ENTITIES]; + struct bb_info *bb_info[N_ENTITIES]; + int i, j; + int n_entities; + int max_num_modes = 0; + + for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--) + { + if (OPTIMIZE_MODE_SWITCHING (e)) + { + /* Create the list of segments within each basic block. */ + bb_info[n_entities] + = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info); + entity_map[n_entities++] = e; + if (num_modes[e] > max_num_modes) + max_num_modes = num_modes[e]; + } + } + if (! n_entities) + return; + +#ifdef MODE_USES_IN_EXIT_BLOCK + /* For some ABIs a particular mode setting is required at function exit. */ + + for (eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next) + { + int bb = eg->src->index; + + rtx insn = BLOCK_END (bb); + rtx use = MODE_USES_IN_EXIT_BLOCK; + + /* If the block ends with the use of the return value + and / or a return, insert the new use(s) in front of them. */ + while ((GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == USE) + || GET_CODE (insn) == JUMP_INSN) + insn = PREV_INSN (insn); + use = emit_insn_after (use, insn); + if (insn == BLOCK_END (bb)) + BLOCK_END (bb) = use; + else if (NEXT_INSN (use) == BLOCK_HEAD (bb)) + BLOCK_HEAD (bb) = NEXT_INSN (insn); + } +#endif + + /* Create the bitmap vectors. */ + + antic = sbitmap_vector_alloc (n_basic_blocks, n_entities); + transp = sbitmap_vector_alloc (n_basic_blocks, n_entities); + comp = sbitmap_vector_alloc (n_basic_blocks, n_entities); + + sbitmap_vector_ones (transp, n_basic_blocks); + + for (j = n_entities - 1; j >= 0; j--) + { + int e = entity_map[j]; + int no_mode = num_modes[e]; + struct bb_info *info = bb_info[j]; + + /* Determine what the first use (if any) need for a mode of entity E is. + This will be th mode that is anticipatable for this block. + Also compute the initial transparency settings. */ + for (bb = 0 ; bb < n_basic_blocks; bb++) + { + struct seginfo *ptr; + int last_mode = no_mode; + HARD_REG_SET live_now; + + REG_SET_TO_HARD_REG_SET (live_now, + BASIC_BLOCK (bb)->global_live_at_start); + for (insn = BLOCK_HEAD (bb); + insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); + insn = NEXT_INSN (insn)) + { + if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') + { + int mode = MODE_NEEDED (e, insn); + rtx link; + + if (mode != no_mode && mode != last_mode) + { + last_mode = mode; + ptr = new_seginfo (mode, insn, bb, live_now); + add_seginfo (info + bb, ptr); + RESET_BIT (transp[bb], j); + } + + /* Update LIVE_NOW. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD) + reg_dies (XEXP (link, 0), live_now); + note_stores (PATTERN (insn), reg_becomes_live, &live_now); + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_UNUSED) + reg_dies (XEXP (link, 0), live_now); + } + } + info[bb].computing = last_mode; + /* Check for blocks without ANY mode requirements. */ + if (last_mode == no_mode) + { + ptr = new_seginfo (no_mode, insn, bb, live_now); + add_seginfo (info + bb, ptr); + } + } +#ifdef MODE_AT_ENTRY + { + int mode = MODE_AT_ENTRY (e); + if (mode != no_mode) + { + for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next) + { + bb = eg->dest->index; + + /* By always making this nontransparent, we save + an extra check in make_preds_opaque. We also + need this to avoid confusing pre_edge_lcm when + antic is cleared but transp and comp are set. */ + RESET_BIT (transp[bb], j); + + /* If the block already has MODE, pretend it + has none (because we don't need to set it), + but retain whatever mode it computes. */ + if (info[bb].seginfo->mode == mode) + { + info[bb].seginfo->mode = no_mode; + } + /* Insert a fake computing definition of MODE into entry blocks + which compute no mode. This represents the mode on entry. */ + else if (info[bb].computing == no_mode) + { + info[bb].computing = mode; + info[bb].seginfo->mode = no_mode; + } + } + } + } +#endif /* MODE_AT_ENTRY */ + } + + kill = sbitmap_vector_alloc (n_basic_blocks, n_entities); + for (i = 0; i < max_num_modes; i++) + { + int current_mode[N_ENTITIES]; + + /* Set the anticipatable and computing arrays. */ + sbitmap_vector_zero (antic, n_basic_blocks); + sbitmap_vector_zero (comp, n_basic_blocks); + for (j = n_entities - 1; j >= 0; j--) + { + int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i); + struct bb_info *info = bb_info[j]; + + for (bb = 0 ; bb < n_basic_blocks; bb++) + { + + if (info[bb].seginfo->mode == m) + SET_BIT (antic[bb], j); + + if (info[bb].computing == m) + SET_BIT (comp[bb], j); + } + } + + /* Calculate the optimal locations for the + placement mode switches to modes with priority I. */ + + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + sbitmap_not (kill[bb], transp[bb]); + edge_list = pre_edge_lcm (file, 1, transp, comp, antic, + kill, &insert, &delete); + + for (j = n_entities - 1; j >=0; j--) + { + /* Insert all mode sets that have been inserted by lcm. */ + int no_mode = num_modes[entity_map[j]]; + /* Wherever we have moved a mode setting upwards in the flow graph, + the blocks between the new setting site and the now redundant + computation ceases to be transparent for any lower-priority + mode of the same entity. First set the aux field of each + insertion site edge non-transparent, then propagate the new + non-transparency from the redundant computation upwards till + we hit an insertion site or an already non-transparent block. */ + for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--) + { + edge eg = INDEX_EDGE (edge_list, e); + int mode; + basic_block src_bb; + HARD_REG_SET live_at_edge; + rtx mode_set; + + eg->aux = 0; + + if (! TEST_BIT (insert[e], j)) + continue; + + eg->aux = (void *)1; + + mode = current_mode[j]; + src_bb = eg->src; + + REG_SET_TO_HARD_REG_SET (live_at_edge, src_bb->global_live_at_end); + start_sequence (); + EMIT_MODE_SET (entity_map[j], mode, live_at_edge); + mode_set = gen_sequence (); + end_sequence (); + + /* If this is an abnormal edge, we'll insert at the end of the + previous block. */ + if (eg->flags & EDGE_ABNORMAL) + { + + src_bb->end = emit_insn_after (mode_set, src_bb->end); + bb_info[j][src_bb->index].computing = mode; + RESET_BIT (transp[src_bb->index], j); + } + else + { + need_commit = 1; + insert_insn_on_edge (mode_set, eg); + } + + } + + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + { + if (TEST_BIT (delete[bb], j)) + { + make_preds_opaque (BASIC_BLOCK (bb), j); + /* Cancel the 'deleted' mode set. */ + bb_info[j][bb].seginfo->mode = no_mode; + } + } + } + free_edge_list (edge_list); + } + + /* Now output the remaining mode sets in all the segments. */ + for (j = n_entities - 1; j >= 0; j--) + { + for (bb = n_basic_blocks - 1; bb >= 0; bb--) + { + struct seginfo *ptr, *next; + for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next) + { + next = ptr->next; + if (ptr->mode != FP_MODE_NONE) + { + rtx mode_set; + + start_sequence (); + EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live); + mode_set = gen_sequence (); + end_sequence (); + + emit_block_insn_before (mode_set, ptr->insn_ptr, + BASIC_BLOCK (ptr->bbnum)); + } + free (ptr); + } + } + free (bb_info[j]); + } + + /* Finished. Free up all the things we've allocated. */ + + sbitmap_vector_free (kill); + sbitmap_vector_free (antic); + sbitmap_vector_free (transp); + sbitmap_vector_free (comp); + sbitmap_vector_free (delete); + sbitmap_vector_free (insert); + + if (need_commit) + commit_edge_insertions (); +#endif /* OPTIMIZE_MODE_SWITCHING */ +} diff --git a/gcc/tm.texi b/gcc/tm.texi index 14365ce0c24..fe89ee080d2 100644 --- a/gcc/tm.texi +++ b/gcc/tm.texi @@ -37,6 +37,7 @@ includes @file{tm.h} and most compiler source files include * Assembler Format:: Defining how to write insns and pseudo-ops to output. * Debugging Info:: Defining the format of debugging output. * Cross-compilation:: Handling floating point for cross-compilers. +* Mode Switching:: Insertion of mode-switching instructions. * Misc:: Everything else. @end menu @@ -7194,6 +7195,85 @@ The value is in the target machine's representation for mode @var{mode} and has the type @code{REAL_VALUE_TYPE}. @end table +@node Mode Switching +@section Mode Switching Instructions +@cindex mode switching +The following macros control mode switching optimizations: + +@table @code +@findex OPTIMIZE_MODE_SWITCHING +@item OPTIMIZE_MODE_SWITCHING (@var{entity}) +Define this macro if the port needs extra instructions inserted for mode +switching in an optimizing compilation. + +For an example, the SH4 can perform both single and double precision +floating point operations, but to perform a single precision operation, +the FPSCR PR bit has to be cleared, while for a double precision +operation, this bit has to be set. Changing the PR bit requires a general +purpose register as a scratch register, hence these FPSCR sets have to +be inserted before reload, i.e. you can't put this into instruction emitting +or MACHINE_DEPENDENT_REORG. + +You can have multiple entities that are mode-switched, and select at run time +which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should +return non-zero for any @var{entity} that that needs mode-switching. +If you define this macro, you also have to define +@code{NUM_MODES_FOR_MODE_SWITCHING}, @code{MODE_NEEDED}, +@code{MODE_PRIORITY_TO_MODE} and @code{EMIT_MODE_SET}. +@code{MODE_AT_ENTRY} and @code{MODE_USES_IN_EXIT_BLOCK} are optional. + +@findex NUM_MODES_FOR_MODE_SWITCHING +@item NUM_MODES_FOR_MODE_SWITCHING +If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as +initializer for an array of integers. Each initializer element +N refers to an entity that needs mode switching, and specifies the number +of different modes that might need to be set for this entity. +The position of the initializer in the initializer - starting counting at +zero - determines the integer that is used to refer to the mode-switched +entity in question. +In macros that take mode arguments / yield a mode result, modes are +represented as numbers 0 .. N - 1. N is used to specify that no mode +switch is needed / supplied. + +@findex MODE_USES_IN_EXIT_BLOCK +@item MODE_USES_IN_EXIT_BLOCK +If this macro is defined, it is called for each exit block when mode switching +optimization is performed. Its return value should be the pattern of an insn, +or a sequence of insns. It is emitted before the return insn / use insns at +the end of the exit block. + +This is done before insns are examined for their need of any mode switching. + +@findex MODE_NEEDED +@item MODE_NEEDED (@var{entity}, @var{insn}) +@var{entity} is an integer specifying a mode-switched entity. If +@code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro to +return an integer value not larger than the corresponding element in +NUM_MODES_FOR_MODE_SWITCHING, to denote the mode that @var{entity} must +be switched into prior to the execution of INSN. + +@findex MODE_AT_ENTRY +@item MODE_AT_ENTRY (@var{entity}) +If this macro is defined, it is evaluated for every @var{entity} that needs +mode switching. It should evaluate to an integer, which is a mode that +@var{entity} is assumed to be switched to at function entry. + +@findex MODE_PRIORITY_TO_MODE +@item MODE_PRIORITY_TO_MODE (@var{entity}, @var{n}) +This macro specifies the order in which modes for ENTITY are processed. +0 is the highest priority, NUM_MODES_FOR_MODE_SWITCHING[ENTITY] - 1 the +lowest. The value of the macro should be an integer designating a mode +for ENTITY. For any fixed @var{entity}, @code{mode_priority_to_mode} +(@var{entity}, @var{n}) shall be a bijection in 0 .. +@code{num_modes_for_mode_switching}[@var{entity}] - 1 . + +@findex EMIT_MODE_SET +@item EMIT_MODE_SET (@var{entity}, @var{mode}, @var{hard_regs_live}) +Generate one or more insns to set @var{entity} to @var{mode}. +@var{hard_reg_live} is the set of hard registers live at the point where +the insn(s) are to be inserted. +@end table + @node Misc @section Miscellaneous Parameters @cindex parameters, miscellaneous diff --git a/gcc/toplev.c b/gcc/toplev.c index 4d9eb8a8e53..42e47a4d2f1 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -3305,6 +3305,11 @@ rest_of_compilation (decl) /* Print function header into sched dump now because doing the sched analysis makes some of the dump. */ + if (optimize && n_basic_blocks) + { + optimize_mode_switching (NULL_PTR); + } + #ifdef INSN_SCHEDULING if (optimize > 0 && flag_schedule_insns) {