aarch64-cores.def (xgene1): Update/add the xgene1 (APM XGene-1) core definition.

2015-01-15  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>

        * config/aarch64/aarch64-cores.def (xgene1): Update/add the
        xgene1 (APM XGene-1) core definition.
        * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
        * config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
        * doc/invoke.texi: Document -mcpu=xgene1.

From-SVN: r219656
This commit is contained in:
Philipp Tomsich 2015-01-15 13:48:14 +00:00 committed by Philipp Tomsich
parent 36566b39c3
commit 381e27aad5
6 changed files with 181 additions and 2 deletions

View File

@ -1,3 +1,11 @@
2015-01-15 Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
* config/aarch64/aarch64-cores.def (xgene1): Update/add the
xgene1 (APM XGene-1) core definition.
* gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
* config/arm/aarch-cost-tables.h: Add cost tables for APM XGene-1
* doc/invoke.texi: Document -mcpu=xgene1.
2015-10-15 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
* dojump.h: New header file.

View File

@ -37,6 +37,7 @@
AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53)
AARCH64_CORE("cortex-a57", cortexa15, cortexa15, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57)
AARCH64_CORE("thunderx", thunderx, thunderx, 8, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx)
AARCH64_CORE("xgene1", xgene1, xgene1, 8, AARCH64_FL_FOR_ARCH8, xgene1)
/* V8 big.LITTLE implementations. */

View File

@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
"cortexa53,cortexa15,thunderx,cortexa57cortexa53"
"cortexa53,cortexa15,thunderx,xgene1,cortexa57cortexa53"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))

View File

@ -240,6 +240,27 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table =
NAMED_PARAM (imm_offset, 0),
};
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
static const struct cpu_addrcost_table xgene1_addrcost_table =
{
#if HAVE_DESIGNATED_INITIALIZERS
.addr_scale_costs =
#endif
{
NAMED_PARAM (hi, 1),
NAMED_PARAM (si, 0),
NAMED_PARAM (di, 0),
NAMED_PARAM (ti, 1),
},
NAMED_PARAM (pre_modify, 1),
NAMED_PARAM (post_modify, 0),
NAMED_PARAM (register_offset, 0),
NAMED_PARAM (register_extend, 1),
NAMED_PARAM (imm_offset, 0),
};
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
@ -281,6 +302,16 @@ static const struct cpu_regmove_cost thunderx_regmove_cost =
NAMED_PARAM (FP2FP, 4)
};
static const struct cpu_regmove_cost xgene1_regmove_cost =
{
NAMED_PARAM (GP2GP, 1),
/* Avoid the use of slow int<->fp moves for spilling by setting
their cost higher than memmov_cost. */
NAMED_PARAM (GP2FP, 8),
NAMED_PARAM (FP2GP, 8),
NAMED_PARAM (FP2FP, 2)
};
/* Generic costs for vector insn classes. */
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
@ -321,6 +352,26 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
NAMED_PARAM (cond_not_taken_branch_cost, 1)
};
/* Generic costs for vector insn classes. */
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
static const struct cpu_vector_cost xgene1_vector_cost =
{
NAMED_PARAM (scalar_stmt_cost, 1),
NAMED_PARAM (scalar_load_cost, 5),
NAMED_PARAM (scalar_store_cost, 1),
NAMED_PARAM (vec_stmt_cost, 2),
NAMED_PARAM (vec_to_scalar_cost, 4),
NAMED_PARAM (scalar_to_vec_cost, 4),
NAMED_PARAM (vec_align_load_cost, 10),
NAMED_PARAM (vec_unalign_load_cost, 10),
NAMED_PARAM (vec_unalign_store_cost, 2),
NAMED_PARAM (vec_store_cost, 2),
NAMED_PARAM (cond_taken_branch_cost, 2),
NAMED_PARAM (cond_not_taken_branch_cost, 1)
};
#define AARCH64_FUSE_NOTHING (0)
#define AARCH64_FUSE_MOV_MOVK (1 << 0)
#define AARCH64_FUSE_ADRP_ADD (1 << 1)
@ -400,6 +451,23 @@ static const struct tune_params thunderx_tunings =
1 /* vec_reassoc_width. */
};
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
&xgene1_addrcost_table,
&xgene1_regmove_cost,
&xgene1_vector_cost,
NAMED_PARAM (memmov_cost, 6),
NAMED_PARAM (issue_rate, 4),
NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
16, /* function_align. */
8, /* jump_align. */
16, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
/* A processor implementing AArch64. */
struct processor
{

View File

@ -325,4 +325,105 @@ const struct cpu_cost_table cortexa57_extra_costs =
}
};
const struct cpu_cost_table xgene1_extra_costs =
{
/* ALU */
{
0, /* arith. */
0, /* logical. */
0, /* shift. */
COSTS_N_INSNS (1), /* shift_reg. */
COSTS_N_INSNS (1), /* arith_shift. */
COSTS_N_INSNS (1), /* arith_shift_reg. */
COSTS_N_INSNS (1), /* log_shift. */
COSTS_N_INSNS (1), /* log_shift_reg. */
COSTS_N_INSNS (1), /* extend. */
0, /* extend_arithm. */
COSTS_N_INSNS (1), /* bfi. */
COSTS_N_INSNS (1), /* bfx. */
0, /* clz. */
COSTS_N_INSNS (1), /* rev. */
0, /* non_exec. */
true /* non_exec_costs_exec. */
},
{
/* MULT SImode */
{
COSTS_N_INSNS (4), /* simple. */
COSTS_N_INSNS (4), /* flag_setting. */
COSTS_N_INSNS (4), /* extend. */
COSTS_N_INSNS (4), /* add. */
COSTS_N_INSNS (4), /* extend_add. */
COSTS_N_INSNS (20) /* idiv. */
},
/* MULT DImode */
{
COSTS_N_INSNS (5), /* simple. */
0, /* flag_setting (N/A). */
COSTS_N_INSNS (5), /* extend. */
COSTS_N_INSNS (5), /* add. */
COSTS_N_INSNS (5), /* extend_add. */
COSTS_N_INSNS (21) /* idiv. */
}
},
/* LD/ST */
{
COSTS_N_INSNS (5), /* load. */
COSTS_N_INSNS (6), /* load_sign_extend. */
COSTS_N_INSNS (5), /* ldrd. */
COSTS_N_INSNS (5), /* ldm_1st. */
1, /* ldm_regs_per_insn_1st. */
1, /* ldm_regs_per_insn_subsequent. */
COSTS_N_INSNS (10), /* loadf. */
COSTS_N_INSNS (10), /* loadd. */
COSTS_N_INSNS (5), /* load_unaligned. */
0, /* store. */
0, /* strd. */
0, /* stm_1st. */
1, /* stm_regs_per_insn_1st. */
1, /* stm_regs_per_insn_subsequent. */
0, /* storef. */
0, /* stored. */
0, /* store_unaligned. */
},
{
/* FP SFmode */
{
COSTS_N_INSNS (23), /* div. */
COSTS_N_INSNS (5), /* mult. */
COSTS_N_INSNS (5), /* mult_addsub. */
COSTS_N_INSNS (5), /* fma. */
COSTS_N_INSNS (5), /* addsub. */
COSTS_N_INSNS (2), /* fpconst. */
COSTS_N_INSNS (3), /* neg. */
COSTS_N_INSNS (2), /* compare. */
COSTS_N_INSNS (6), /* widen. */
COSTS_N_INSNS (6), /* narrow. */
COSTS_N_INSNS (4), /* toint. */
COSTS_N_INSNS (4), /* fromint. */
COSTS_N_INSNS (4) /* roundint. */
},
/* FP DFmode */
{
COSTS_N_INSNS (29), /* div. */
COSTS_N_INSNS (5), /* mult. */
COSTS_N_INSNS (5), /* mult_addsub. */
COSTS_N_INSNS (5), /* fma. */
COSTS_N_INSNS (5), /* addsub. */
COSTS_N_INSNS (3), /* fpconst. */
COSTS_N_INSNS (3), /* neg. */
COSTS_N_INSNS (2), /* compare. */
COSTS_N_INSNS (6), /* widen. */
COSTS_N_INSNS (6), /* narrow. */
COSTS_N_INSNS (4), /* toint. */
COSTS_N_INSNS (4), /* fromint. */
COSTS_N_INSNS (4) /* roundint. */
}
},
/* Vector */
{
COSTS_N_INSNS (2) /* alu. */
}
};
#endif /* GCC_AARCH_COST_TABLES_H */

View File

@ -12101,7 +12101,8 @@ architecture.
@opindex mtune
Specify the name of the target processor for which GCC should tune the
performance of the code. Permissible values for this option are:
@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx}.
@samp{generic}, @samp{cortex-a53}, @samp{cortex-a57}, @samp{thunderx},
@samp{xgene1}.
Additionally, this option can specify that GCC should tune the performance
of the code for a big.LITTLE system. The only permissible value is