[NDS32] Implement fp-as-gp optimization.

gcc/
	* config/nds32/constants.md (unspec_volatile_element): Add
	UNSPEC_VOLATILE_OMIT_FP_BEGIN and UNSPEC_VOLATILE_OMIT_FP_END.
	* config/nds32/nds32-fp-as-gp.c: New implementation of fp_as_gp
	optimization.
	* config/nds32/nds32-protos.h (nds32_naked_function_p): Declare.
	(make_pass_nds32_fp_as_gp): Declare.
	* config/nds32/nds32.c (nds32_register_passes): Add fp_as_gp as one
	optmization pass.
	(nds32_asm_function_end_prologue): Remove unused asm output.
	(nds32_asm_function_begin_epilogue): Remove unused asm output.
	(nds32_asm_file_start): Output necessary fp_as_gp information.
	(nds32_option_override): Adjust register usage.
	(nds32_expand_prologue): Consider fp_as_gp situation.
	(nds32_expand_prologue_v3push): Consider fp_as_gp situation.
	* config/nds32/nds32.md (prologue): Check fp_as_gp_p and naked_p.
	(epilogue): Ditto.
	(return): Ditto.
	(simple_return): Ditto.
	(omit_fp_begin): Output special directive for fp_as_gp.
	(omit_fp_end): Output special directive for fp_as_gp.
	* config/nds32/nds32.opt (mfp-as-gp, mno-fp-as-gp, mforce-fp-as-gp,
	mforbid-fp-as-gp): New options.

Co-Authored-By: Shiva Chen <shiva0217@gmail.com>

From-SVN: r261115
This commit is contained in:
Chung-Ju Wu 2018-06-02 11:14:04 +00:00 committed by Chung-Ju Wu
parent e812d4dd5d
commit 2140297cb3
7 changed files with 366 additions and 54 deletions

View File

@ -1,3 +1,29 @@
2018-06-02 Chung-Ju Wu <jasonwucj@gmail.com>
Shiva Chen <shiva0217@gmail.com>
* config/nds32/constants.md (unspec_volatile_element): Add
UNSPEC_VOLATILE_OMIT_FP_BEGIN and UNSPEC_VOLATILE_OMIT_FP_END.
* config/nds32/nds32-fp-as-gp.c: New implementation of fp_as_gp
optimization.
* config/nds32/nds32-protos.h (nds32_naked_function_p): Declare.
(make_pass_nds32_fp_as_gp): Declare.
* config/nds32/nds32.c (nds32_register_passes): Add fp_as_gp as one
optmization pass.
(nds32_asm_function_end_prologue): Remove unused asm output.
(nds32_asm_function_begin_epilogue): Remove unused asm output.
(nds32_asm_file_start): Output necessary fp_as_gp information.
(nds32_option_override): Adjust register usage.
(nds32_expand_prologue): Consider fp_as_gp situation.
(nds32_expand_prologue_v3push): Consider fp_as_gp situation.
* config/nds32/nds32.md (prologue): Check fp_as_gp_p and naked_p.
(epilogue): Ditto.
(return): Ditto.
(simple_return): Ditto.
(omit_fp_begin): Output special directive for fp_as_gp.
(omit_fp_end): Output special directive for fp_as_gp.
* config/nds32/nds32.opt (mfp-as-gp, mno-fp-as-gp, mforce-fp-as-gp,
mforbid-fp-as-gp): New options.
2018-06-01 Mark Wielaard <mark@klomp.org>
* dwarf2out.c (dwarf2out_finish): Remove generation of

View File

@ -169,6 +169,8 @@
UNSPEC_VOLATILE_SET_TRIG_EDGE
UNSPEC_VOLATILE_GET_TRIG_TYPE
UNSPEC_VOLATILE_RELAX_GROUP
UNSPEC_VOLATILE_OMIT_FP_BEGIN
UNSPEC_VOLATILE_OMIT_FP_END
UNSPEC_VOLATILE_POP25_RETURN
UNSPEC_VOLATILE_UNALIGNED_FEATURE
UNSPEC_VOLATILE_ENABLE_UNALIGNED

View File

@ -26,19 +26,255 @@
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "hard-reg-set.h"
#include "tm_p.h"
#include "rtl.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "insn-config.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "ira.h"
#include "ira-int.h"
#include "df.h"
#include "tree-core.h"
#include "tree-pass.h"
#include "nds32-protos.h"
/* ------------------------------------------------------------------------ */
/* A helper function to check if this function should contain prologue. */
static bool
nds32_have_prologue_p (void)
{
int i;
for (i = 0; i < 28; i++)
if (NDS32_REQUIRED_CALLEE_SAVED_P (i))
return true;
return (flag_pic
|| NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
|| NDS32_REQUIRED_CALLEE_SAVED_P (LP_REGNUM));
}
static int
nds32_get_symbol_count (void)
{
int symbol_count = 0;
rtx_insn *insn;
basic_block bb;
FOR_EACH_BB_FN (bb, cfun)
{
FOR_BB_INSNS (bb, insn)
{
/* Counting the insn number which the addressing mode is symbol. */
if (single_set (insn) && nds32_symbol_load_store_p (insn))
{
rtx pattern = PATTERN (insn);
rtx mem;
gcc_assert (GET_CODE (pattern) == SET);
if (GET_CODE (SET_SRC (pattern)) == REG )
mem = SET_DEST (pattern);
else
mem = SET_SRC (pattern);
/* We have only lwi37 and swi37 for fp-as-gp optimization,
so don't count any other than SImode.
MEM for QImode and HImode will wrap by ZERO_EXTEND
or SIGN_EXTEND */
if (GET_CODE (mem) == MEM)
symbol_count++;
}
}
}
return symbol_count;
}
/* Function to determine whether it is worth to do fp_as_gp optimization.
Return 0: It is NOT worth to do fp_as_gp optimization.
Return 1: It is APPROXIMATELY worth to do fp_as_gp optimization.
Return false: It is NOT worth to do fp_as_gp optimization.
Return true: It is APPROXIMATELY worth to do fp_as_gp optimization.
Note that if it is worth to do fp_as_gp optimization,
we MUST set FP_REGNUM ever live in this function. */
int
static bool
nds32_fp_as_gp_check_available (void)
{
/* By default we return 0. */
return 0;
basic_block bb;
basic_block exit_bb;
edge_iterator ei;
edge e;
bool first_exit_blocks_p;
/* If there exists ANY of following conditions,
we DO NOT perform fp_as_gp optimization:
1. TARGET_FORBID_FP_AS_GP is set
regardless of the TARGET_FORCE_FP_AS_GP.
2. User explicitly uses 'naked'/'no_prologue' attribute.
We use nds32_naked_function_p() to help such checking.
3. Not optimize for size.
4. Need frame pointer.
5. If $fp is already required to be saved,
it means $fp is already choosen by register allocator.
Thus we better not to use it for fp_as_gp optimization.
6. This function is a vararg function.
DO NOT apply fp_as_gp optimization on this function
because it may change and break stack frame.
7. The epilogue is empty.
This happens when the function uses exit()
or its attribute is no_return.
In that case, compiler will not expand epilogue
so that we have no chance to output .omit_fp_end directive. */
if (TARGET_FORBID_FP_AS_GP
|| nds32_naked_function_p (current_function_decl)
|| !optimize_size
|| frame_pointer_needed
|| NDS32_REQUIRED_CALLEE_SAVED_P (FP_REGNUM)
|| (cfun->stdarg == 1)
|| (find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) == NULL))
return false;
/* Disable fp_as_gp if there is any infinite loop since the fp may
reuse in infinite loops by register rename.
For check infinite loops we should make sure exit_bb is post dominate
all other basic blocks if there is no infinite loops. */
first_exit_blocks_p = true;
exit_bb = NULL;
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
{
/* More than one exit block also do not perform fp_as_gp optimization. */
if (!first_exit_blocks_p)
return false;
exit_bb = e->src;
first_exit_blocks_p = false;
}
/* Not found exit_bb? just abort fp_as_gp! */
if (!exit_bb)
return false;
/* Each bb should post dominate by exit_bb if there is no infinite loop! */
FOR_EACH_BB_FN (bb, cfun)
{
if (!dominated_by_p (CDI_POST_DOMINATORS,
bb,
exit_bb))
return false;
}
/* Now we can check the possibility of using fp_as_gp optimization. */
if (TARGET_FORCE_FP_AS_GP)
{
/* User explicitly issues -mforce-fp-as-gp option. */
return true;
}
else
{
/* In the following we are going to evaluate whether
it is worth to do fp_as_gp optimization. */
bool good_gain = false;
int symbol_count;
int threshold;
/* We check if there already requires prologue.
Note that $gp will be saved in prologue for PIC code generation.
After that, we can set threshold by the existence of prologue.
Each fp-implied instruction will gain 2-byte code size
from gp-aware instruction, so we have following heuristics. */
if (flag_pic
|| nds32_have_prologue_p ())
{
/* Have-prologue:
Compiler already intends to generate prologue content,
so the fp_as_gp optimization will only insert
'la $fp,_FP_BASE_' instruction, which will be
converted into 4-byte instruction at link time.
The threshold is "3" symbol accesses, 2 + 2 + 2 > 4. */
threshold = 3;
}
else
{
/* None-prologue:
Compiler originally does not generate prologue content,
so the fp_as_gp optimization will NOT ONLY insert
'la $fp,_FP_BASE' instruction, but also causes
push/pop instructions.
If we are using v3push (push25/pop25),
the threshold is "5" symbol accesses, 5*2 > 4 + 2 + 2;
If we are using normal push (smw/lmw),
the threshold is "5+2" symbol accesses 7*2 > 4 + 4 + 4. */
threshold = 5 + (TARGET_V3PUSH ? 0 : 2);
}
symbol_count = nds32_get_symbol_count ();
if (symbol_count >= threshold)
good_gain = true;
/* Enable fp_as_gp optimization when potential gain is good enough. */
return good_gain;
}
}
static unsigned int
nds32_fp_as_gp (void)
{
bool fp_as_gp_p;
calculate_dominance_info (CDI_POST_DOMINATORS);
fp_as_gp_p = nds32_fp_as_gp_check_available ();
/* Here is a hack to IRA for enable/disable a hard register per function.
We *MUST* review this way after migrate gcc 4.9! */
if (fp_as_gp_p) {
SET_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
df_set_regs_ever_live (FP_REGNUM, 1);
} else {
CLEAR_HARD_REG_BIT(this_target_ira_int->x_no_unit_alloc_regs, FP_REGNUM);
}
cfun->machine->fp_as_gp_p = fp_as_gp_p;
free_dominance_info (CDI_POST_DOMINATORS);
return 1;
}
const pass_data pass_data_nds32_fp_as_gp =
{
RTL_PASS, /* type */
"fp_as_gp", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_MACH_DEP, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0 /* todo_flags_finish */
};
class pass_nds32_fp_as_gp : public rtl_opt_pass
{
public:
pass_nds32_fp_as_gp (gcc::context *ctxt)
: rtl_opt_pass (pass_data_nds32_fp_as_gp, ctxt)
{}
/* opt_pass methods: */
bool gate (function *)
{
return TARGET_16_BIT
&& optimize_size;
}
unsigned int execute (function *) { return nds32_fp_as_gp (); }
};
rtl_opt_pass *
make_pass_nds32_fp_as_gp (gcc::context *ctxt)
{
return new pass_nds32_fp_as_gp (ctxt);
}
/* ------------------------------------------------------------------------ */

View File

@ -155,9 +155,8 @@ extern int nds32_adjust_insn_length (rtx_insn *, int);
/* Auxiliary functions for FP_AS_GP detection. */
extern int nds32_fp_as_gp_check_available (void);
extern bool nds32_symbol_load_store_p (rtx_insn *);
extern bool nds32_naked_function_p (tree);
/* Auxiliary functions for jump table generation. */
@ -367,5 +366,6 @@ extern bool nds32_use_load_post_increment(machine_mode);
/* Functions for create nds32 specific optimization pass. */
extern rtl_opt_pass *make_pass_nds32_relax_opt (gcc::context *);
extern rtl_opt_pass *make_pass_nds32_fp_as_gp (gcc::context *);
/* ------------------------------------------------------------------------ */

View File

@ -1391,7 +1391,7 @@ nds32_needs_double_word_align (machine_mode mode, const_tree type)
}
/* Return true if FUNC is a naked function. */
static bool
bool
nds32_naked_function_p (tree func)
{
/* FOR BACKWARD COMPATIBILITY,
@ -1626,6 +1626,11 @@ nds32_register_pass (
static void
nds32_register_passes (void)
{
nds32_register_pass (
make_pass_nds32_fp_as_gp,
PASS_POS_INSERT_BEFORE,
"ira");
nds32_register_pass (
make_pass_nds32_relax_opt,
PASS_POS_INSERT_AFTER,
@ -2191,56 +2196,12 @@ static void
nds32_asm_function_end_prologue (FILE *file)
{
fprintf (file, "\t! END PROLOGUE\n");
/* If frame pointer is NOT needed and -mfp-as-gp is issued,
we can generate special directive: ".omit_fp_begin"
to guide linker doing fp-as-gp optimization.
However, for a naked function, which means
it should not have prologue/epilogue,
using fp-as-gp still requires saving $fp by push/pop behavior and
there is no benefit to use fp-as-gp on such small function.
So we need to make sure this function is NOT naked as well. */
if (!frame_pointer_needed
&& !cfun->machine->naked_p
&& cfun->machine->fp_as_gp_p)
{
fprintf (file, "\t! ----------------------------------------\n");
fprintf (file, "\t! Guide linker to do "
"link time optimization: fp-as-gp\n");
fprintf (file, "\t! We add one more instruction to "
"initialize $fp near to $gp location.\n");
fprintf (file, "\t! If linker fails to use fp-as-gp transformation,\n");
fprintf (file, "\t! this extra instruction should be "
"eliminated at link stage.\n");
fprintf (file, "\t.omit_fp_begin\n");
fprintf (file, "\tla\t$fp,_FP_BASE_\n");
fprintf (file, "\t! ----------------------------------------\n");
}
}
/* Before rtl epilogue has been expanded, this function is used. */
static void
nds32_asm_function_begin_epilogue (FILE *file)
{
/* If frame pointer is NOT needed and -mfp-as-gp is issued,
we can generate special directive: ".omit_fp_end"
to claim fp-as-gp optimization range.
However, for a naked function,
which means it should not have prologue/epilogue,
using fp-as-gp still requires saving $fp by push/pop behavior and
there is no benefit to use fp-as-gp on such small function.
So we need to make sure this function is NOT naked as well. */
if (!frame_pointer_needed
&& !cfun->machine->naked_p
&& cfun->machine->fp_as_gp_p)
{
fprintf (file, "\t! ----------------------------------------\n");
fprintf (file, "\t! Claim the range of fp-as-gp "
"link time optimization\n");
fprintf (file, "\t.omit_fp_end\n");
fprintf (file, "\t! ----------------------------------------\n");
}
fprintf (file, "\t! BEGIN EPILOGUE\n");
}
@ -3168,6 +3129,18 @@ nds32_asm_file_start (void)
"for checking inconsistency on interrupt handler\n");
fprintf (asm_out_file, "\t.vec_size\t%d\n", nds32_isr_vector_size);
/* If user enables '-mforce-fp-as-gp' or compiles programs with -Os,
the compiler may produce 'la $fp,_FP_BASE_' instruction
at prologue for fp-as-gp optimization.
We should emit weak reference of _FP_BASE_ to avoid undefined reference
in case user does not pass '--relax' option to linker. */
if (TARGET_FORCE_FP_AS_GP || optimize_size)
{
fprintf (asm_out_file, "\t! This weak reference is required to do "
"fp-as-gp link time optimization\n");
fprintf (asm_out_file, "\t.weak\t_FP_BASE_\n");
}
fprintf (asm_out_file, "\t! ------------------------------------\n");
if (TARGET_ISA_V2)
@ -4126,6 +4099,12 @@ nds32_option_override (void)
fixed_regs[r] = call_used_regs[r] = 1;
}
/* See if user explicitly would like to use fp-as-gp optimization.
If so, we must prevent $fp from being allocated
during register allocation. */
if (TARGET_FORCE_FP_AS_GP)
fixed_regs[FP_REGNUM] = call_used_regs[FP_REGNUM] = 1;
if (!TARGET_16_BIT)
{
/* Under no 16 bit ISA, we need to strictly disable TARGET_V3PUSH. */
@ -4544,6 +4523,10 @@ nds32_expand_prologue (void)
The result will be in cfun->machine. */
nds32_compute_stack_frame ();
/* Check frame_pointer_needed again to prevent fp is need after reload. */
if (frame_pointer_needed)
cfun->machine->fp_as_gp_p = false;
/* If this is a variadic function, first we need to push argument
registers that hold the unnamed argument value. */
if (cfun->machine->va_args_size != 0)
@ -4951,6 +4934,10 @@ nds32_expand_prologue_v3push (void)
if (cfun->machine->callee_saved_gpr_regs_size > 0)
df_set_regs_ever_live (FP_REGNUM, 1);
/* Check frame_pointer_needed again to prevent fp is need after reload. */
if (frame_pointer_needed)
cfun->machine->fp_as_gp_p = false;
/* If the function is 'naked',
we do not have to generate prologue code fragment. */
if (cfun->machine->naked_p && !flag_pic)

View File

@ -1830,12 +1830,33 @@
nds32_expand_prologue_v3push ();
else
nds32_expand_prologue ();
/* If cfun->machine->fp_as_gp_p is true, we can generate special
directive to guide linker doing fp-as-gp optimization.
However, for a naked function, which means
it should not have prologue/epilogue,
using fp-as-gp still requires saving $fp by push/pop behavior and
there is no benefit to use fp-as-gp on such small function.
So we need to make sure this function is NOT naked as well. */
if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
DONE;
})
(define_expand "epilogue" [(const_int 0)]
""
{
/* If cfun->machine->fp_as_gp_p is true, we can generate special
directive to guide linker doing fp-as-gp optimization.
However, for a naked function, which means
it should not have prologue/epilogue,
using fp-as-gp still requires saving $fp by push/pop behavior and
there is no benefit to use fp-as-gp on such small function.
So we need to make sure this function is NOT naked as well. */
if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
/* Note that only under V3/V3M ISA, we could use v3pop epilogue.
In addition, we need to check if v3push is indeed available. */
if (NDS32_V3PUSH_AVAILABLE_P)
@ -1935,7 +1956,8 @@
"nds32_can_use_return_insn ()"
{
/* Emit as the simple return. */
if (cfun->machine->naked_p
if (!cfun->machine->fp_as_gp_p
&& cfun->machine->naked_p
&& (cfun->machine->va_args_size == 0))
{
emit_jump_insn (gen_return_internal ());
@ -1945,9 +1967,14 @@
;; This pattern is expanded only by the shrink-wrapping optimization
;; on paths where the function prologue has not been executed.
;; However, such optimization may reorder the prologue/epilogue blocks
;; together with basic blocks within function body.
;; So we must disable this pattern if we have already decided
;; to perform fp_as_gp optimization, which requires prologue to be
;; first block and epilogue to be last block.
(define_expand "simple_return"
[(simple_return)]
""
"!cfun->machine->fp_as_gp_p"
""
)
@ -2162,6 +2189,25 @@
[(set_attr "length" "0")]
)
;; Output .omit_fp_begin for fp-as-gp optimization.
;; Also we have to set $fp register.
(define_insn "omit_fp_begin"
[(set (match_operand:SI 0 "register_operand" "=x")
(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
""
"! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
[(set_attr "length" "8")]
)
;; Output .omit_fp_end for fp-as-gp optimization.
;; Claim that we have to use $fp register.
(define_insn "omit_fp_end"
[(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
""
"! -----\;.omit_fp_end\;! -----"
[(set_attr "length" "0")]
)
(define_insn "pop25return"
[(return)
(unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]

View File

@ -32,6 +32,13 @@ EL
Target RejectNegative Alias(mlittle-endian)
Generate code in little-endian mode.
mfp-as-gp
Target RejectNegative Alias(mforce-fp-as-gp)
Force performing fp-as-gp optimization.
mno-fp-as-gp
Target RejectNegative Alias(mforbid-fp-as-gp)
Forbid performing fp-as-gp optimization.
; ---------------------------------------------------------------
@ -85,6 +92,14 @@ mlittle-endian
Target Undocumented RejectNegative Negative(mbig-endian) InverseMask(BIG_ENDIAN)
Generate code in little-endian mode.
mforce-fp-as-gp
Target Undocumented Mask(FORCE_FP_AS_GP)
Prevent $fp being allocated during register allocation so that compiler is able to force performing fp-as-gp optimization.
mforbid-fp-as-gp
Target Undocumented Mask(FORBID_FP_AS_GP)
Forbid using $fp to access static and global variables. This option strictly forbids fp-as-gp optimization regardless of '-mforce-fp-as-gp'.
mict-model=
Target Undocumented RejectNegative Joined Enum(nds32_ict_model_type) Var(nds32_ict_model) Init(ICT_MODEL_SMALL)
Specify the address generation strategy for ICT call's code model.