From 17bf6d250c5f405a75ca3744074cb63386a3376d Mon Sep 17 00:00:00 2001 From: Chung-Ju Wu Date: Fri, 4 Jul 2014 07:39:23 +0000 Subject: [PATCH] Move fp-as-gp optimization to nds32-fp-as-gp.c module. gcc/ * config/nds32/nds32.c (nds32_have_prologue_p): Move to ... (nds32_symbol_load_store_p): Move to ... (nds32_fp_as_gp_check_available): Move to ... * config/nds32/nds32-fp-as-gp.c: ... here. * config/nds32/nds32-protos.h (nds32_symbol_load_store_p): Remove extern declaration. Co-Authored-By: Kito Cheng Co-Authored-By: Monk Chiang From-SVN: r212288 --- gcc/ChangeLog | 11 ++ gcc/config/nds32/nds32-fp-as-gp.c | 199 ++++++++++++++++++++++++++++++ gcc/config/nds32/nds32-protos.h | 1 - gcc/config/nds32/nds32.c | 162 ------------------------ 4 files changed, 210 insertions(+), 163 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e7029bf6a68..3b0f8ef6bcd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2014-07-04 Chung-Ju Wu + Kito Cheng + Monk Chiang + + * config/nds32/nds32.c (nds32_have_prologue_p): Move to ... + (nds32_symbol_load_store_p): Move to ... + (nds32_fp_as_gp_check_available): Move to ... + * config/nds32/nds32-fp-as-gp.c: ... here. + * config/nds32/nds32-protos.h (nds32_symbol_load_store_p): Remove + extern declaration. + 2014-07-04 Chung-Ju Wu Kito Cheng Monk Chiang diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c index a29235848cb..d7932678b84 100644 --- a/gcc/config/nds32/nds32-fp-as-gp.c +++ b/gcc/config/nds32/nds32-fp-as-gp.c @@ -17,3 +17,202 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ + +/* ------------------------------------------------------------------------ */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "stor-layout.h" +#include "varasm.h" +#include "calls.h" +#include "rtl.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "insn-config.h" /* Required by recog.h. */ +#include "conditions.h" +#include "output.h" +#include "insn-attr.h" /* For DFA state_t. */ +#include "insn-codes.h" /* For CODE_FOR_xxx. */ +#include "reload.h" /* For push_reload(). */ +#include "flags.h" +#include "function.h" +#include "expr.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "df.h" +#include "tm_p.h" +#include "tm-constrs.h" +#include "optabs.h" /* For GEN_FCN. */ +#include "target.h" +#include "target-def.h" +#include "langhooks.h" /* For add_builtin_function(). */ +#include "ggc.h" +#include "builtins.h" + +/* ------------------------------------------------------------------------ */ + +/* A helper function to check if this function should contain prologue. */ +static int +nds32_have_prologue_p (void) +{ + int i; + + for (i = 0; i < 28; i++) + if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) + return 1; + + return (flag_pic + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); +} + +/* Return true if is load/store with SYMBOL_REF addressing mode + and memory mode is SImode. */ +static bool +nds32_symbol_load_store_p (rtx insn) +{ + rtx mem_src = NULL_RTX; + + switch (get_attr_type (insn)) + { + case TYPE_LOAD: + mem_src = SET_SRC (PATTERN (insn)); + break; + case TYPE_STORE: + mem_src = SET_DEST (PATTERN (insn)); + break; + default: + break; + } + + /* Find load/store insn with addressing mode is SYMBOL_REF. */ + if (mem_src != NULL_RTX) + { + if ((GET_CODE (mem_src) == ZERO_EXTEND) + || (GET_CODE (mem_src) == SIGN_EXTEND)) + mem_src = XEXP (mem_src, 0); + + if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF) + || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM)) + return true; + } + + return false; +} + +/* Function to determine whether it is worth to do fp_as_gp optimization. + Return 0: It is NOT worth to do fp_as_gp optimization. + Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. + Note that if it is worth to do fp_as_gp optimization, + we MUST set FP_REGNUM ever live in this function. */ +int +nds32_fp_as_gp_check_available (void) +{ + /* If there exists ANY of following conditions, + we DO NOT perform fp_as_gp optimization: + 1. TARGET_FORBID_FP_AS_GP is set + regardless of the TARGET_FORCE_FP_AS_GP. + 2. User explicitly uses 'naked' attribute. + 3. Not optimize for size. + 4. Need frame pointer. + 5. If $fp is already required to be saved, + it means $fp is already choosen by register allocator. + Thus we better not to use it for fp_as_gp optimization. + 6. This function is a vararg function. + DO NOT apply fp_as_gp optimization on this function + because it may change and break stack frame. + 7. The epilogue is empty. + This happens when the function uses exit() + or its attribute is no_return. + In that case, compiler will not expand epilogue + so that we have no chance to output .omit_fp_end directive. */ + if (TARGET_FORBID_FP_AS_GP + || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) + || !optimize_size + || frame_pointer_needed + || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) + || (cfun->stdarg == 1) + || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) + return 0; + + /* Now we can check the possibility of using fp_as_gp optimization. */ + if (TARGET_FORCE_FP_AS_GP) + { + /* User explicitly issues -mforce-fp-as-gp option. */ + df_set_regs_ever_live (FP_REGNUM, 1); + return 1; + } + else + { + /* In the following we are going to evaluate whether + it is worth to do fp_as_gp optimization. */ + int good_gain = 0; + int symbol_count = 0; + + int threshold; + rtx insn; + + /* We check if there already requires prologue. + Note that $gp will be saved in prologue for PIC code generation. + After that, we can set threshold by the existence of prologue. + Each fp-implied instruction will gain 2-byte code size + from gp-aware instruction, so we have following heuristics. */ + if (flag_pic + || nds32_have_prologue_p ()) + { + /* Have-prologue: + Compiler already intends to generate prologue content, + so the fp_as_gp optimization will only insert + 'la $fp,_FP_BASE_' instruction, which will be + converted into 4-byte instruction at link time. + The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ + threshold = 3; + } + else + { + /* None-prologue: + Compiler originally does not generate prologue content, + so the fp_as_gp optimization will NOT ONLY insert + 'la $fp,_FP_BASE' instruction, but also causes + push/pop instructions. + If we are using v3push (push25/pop25), + the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; + If we are using normal push (smw/lmw), + the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ + threshold = 5 + (TARGET_V3PUSH ? 0 : 2); + } + + /* We would like to traverse every instruction in this function. + So we need to have push_topmost_sequence()/pop_topmost_sequence() + surrounding our for-loop evaluation. */ + push_topmost_sequence (); + /* Counting the insn number which the addressing mode is symbol. */ + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (single_set (insn) && nds32_symbol_load_store_p (insn)) + symbol_count++; + + if (symbol_count == threshold) + { + good_gain = 1; + break; + } + } + pop_topmost_sequence (); + + /* Enable fp_as_gp optimization when potential gain is good enough. */ + if (good_gain) + { + df_set_regs_ever_live (FP_REGNUM, 1); + return 1; + } + } + + /* By default we return 0. */ + return 0; +} + +/* ------------------------------------------------------------------------ */ diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h index 2bebaa62991..307361bfc34 100644 --- a/gcc/config/nds32/nds32-protos.h +++ b/gcc/config/nds32/nds32-protos.h @@ -96,7 +96,6 @@ extern int nds32_adjust_insn_length (rtx, int); /* Auxiliary functions for FP_AS_GP detection. */ -extern bool nds32_symbol_load_store_p (rtx); extern int nds32_fp_as_gp_check_available (void); /* Auxiliary functions for jump table generation. */ diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c index ad05eb135f2..8777e8b4295 100644 --- a/gcc/config/nds32/nds32.c +++ b/gcc/config/nds32/nds32.c @@ -1106,21 +1106,6 @@ nds32_legitimate_index_p (enum machine_mode outer_mode, } } -/* A helper function to check if this function should contain prologue. */ -static int -nds32_have_prologue_p (void) -{ - int i; - - for (i = 0; i < 28; i++) - if (NDS32_REQUIRED_CALLEE_SAVED_P (i)) - return 1; - - return (flag_pic - || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) - || NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM)); -} - /* ------------------------------------------------------------------------ */ /* PART 3: Implement target hook stuff definitions. */ @@ -3164,153 +3149,6 @@ nds32_adjust_insn_length (rtx insn, int length) } - -/* Return true if is load/store with SYMBOL_REF addressing mode - and memory mode is SImode. */ -bool -nds32_symbol_load_store_p (rtx insn) -{ - rtx mem_src = NULL_RTX; - - switch (get_attr_type (insn)) - { - case TYPE_LOAD: - mem_src = SET_SRC (PATTERN (insn)); - break; - case TYPE_STORE: - mem_src = SET_DEST (PATTERN (insn)); - break; - default: - break; - } - - /* Find load/store insn with addressing mode is SYMBOL_REF. */ - if (mem_src != NULL_RTX) - { - if ((GET_CODE (mem_src) == ZERO_EXTEND) - || (GET_CODE (mem_src) == SIGN_EXTEND)) - mem_src = XEXP (mem_src, 0); - - if ((GET_CODE (XEXP (mem_src, 0)) == SYMBOL_REF) - || (GET_CODE (XEXP (mem_src, 0)) == LO_SUM)) - return true; - } - - return false; -} - -/* Function to determine whether it is worth to do fp_as_gp optimization. - Return 0: It is NOT worth to do fp_as_gp optimization. - Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization. - Note that if it is worth to do fp_as_gp optimization, - we MUST set FP_REGNUM ever live in this function. */ -int -nds32_fp_as_gp_check_available (void) -{ - /* If there exists ANY of following conditions, - we DO NOT perform fp_as_gp optimization: - 1. TARGET_FORBID_FP_AS_GP is set - regardless of the TARGET_FORCE_FP_AS_GP. - 2. User explicitly uses 'naked' attribute. - 3. Not optimize for size. - 4. Need frame pointer. - 5. If $fp is already required to be saved, - it means $fp is already choosen by register allocator. - Thus we better not to use it for fp_as_gp optimization. - 6. This function is a vararg function. - DO NOT apply fp_as_gp optimization on this function - because it may change and break stack frame. - 7. The epilogue is empty. - This happens when the function uses exit() - or its attribute is no_return. - In that case, compiler will not expand epilogue - so that we have no chance to output .omit_fp_end directive. */ - if (TARGET_FORBID_FP_AS_GP - || lookup_attribute ("naked", DECL_ATTRIBUTES (current_function_decl)) - || !optimize_size - || frame_pointer_needed - || NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM) - || (cfun->stdarg == 1) - || (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL)) - return 0; - - /* Now we can check the possibility of using fp_as_gp optimization. */ - if (TARGET_FORCE_FP_AS_GP) - { - /* User explicitly issues -mforce-fp-as-gp option. */ - df_set_regs_ever_live (FP_REGNUM, 1); - return 1; - } - else - { - /* In the following we are going to evaluate whether - it is worth to do fp_as_gp optimization. */ - int good_gain = 0; - int symbol_count = 0; - - int threshold; - rtx insn; - - /* We check if there already requires prologue. - Note that $gp will be saved in prologue for PIC code generation. - After that, we can set threshold by the existence of prologue. - Each fp-implied instruction will gain 2-byte code size - from gp-aware instruction, so we have following heuristics. */ - if (flag_pic - || nds32_have_prologue_p ()) - { - /* Have-prologue: - Compiler already intends to generate prologue content, - so the fp_as_gp optimization will only insert - 'la $fp,_FP_BASE_' instruction, which will be - converted into 4-byte instruction at link time. - The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */ - threshold = 3; - } - else - { - /* None-prologue: - Compiler originally does not generate prologue content, - so the fp_as_gp optimization will NOT ONLY insert - 'la $fp,_FP_BASE' instruction, but also causes - push/pop instructions. - If we are using v3push (push25/pop25), - the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2; - If we are using normal push (smw/lmw), - the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */ - threshold = 5 + (TARGET_V3PUSH ? 0 : 2); - } - - /* We would like to traverse every instruction in this function. - So we need to have push_topmost_sequence()/pop_topmost_sequence() - surrounding our for-loop evaluation. */ - push_topmost_sequence (); - /* Counting the insn number which the addressing mode is symbol. */ - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (single_set (insn) && nds32_symbol_load_store_p (insn)) - symbol_count++; - - if (symbol_count == threshold) - { - good_gain = 1; - break; - } - } - pop_topmost_sequence (); - - /* Enable fp_as_gp optimization when potential gain is good enough. */ - if (good_gain) - { - df_set_regs_ever_live (FP_REGNUM, 1); - return 1; - } - } - - /* By default we return 0. */ - return 0; -} - /* Return align 2 (log base 2) if the next instruction of LABEL is 4 byte. */ int nds32_target_alignment (rtx label)