Remove index from AARCH64_FUSION_PAIR
Instead of doing an explict index in aarch64-fusion-pairs.def, we should have an enum which does the index instead. This allows you to add/remove them without worrying about the order being correct and having holes or worry about merge conficts. OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. ChangeLog: * aarch64-fusion-pairs.def: Remove all index to AARCH64_FUSION_PAIR. * config/aarch64/aarch64-protos.h (aarch64_fusion_pairs_index): New enum. (aarch64_fusion_pairs): Base the shifted value on the index instead Rewrite AARCH64_FUSE_ALL to be based on the end index. of the argument to AARCH64_FUSION_PAIR. * config/aarch64/aarch64.c: Remove the last argument to AARCH64_FUSION_PAIR. From-SVN: r227094
This commit is contained in:
parent
2321dd914f
commit
ed9fa8d2a3
@ -1,3 +1,14 @@
|
||||
2015-08-22 Andrew Pinski <apinski@cavium.com>
|
||||
|
||||
* aarch64-fusion-pairs.def: Remove all index to AARCH64_FUSION_PAIR.
|
||||
* config/aarch64/aarch64-protos.h
|
||||
(aarch64_fusion_pairs_index): New enum.
|
||||
(aarch64_fusion_pairs): Base the shifted value on the index instead
|
||||
Rewrite AARCH64_FUSE_ALL to be based on the end index.
|
||||
of the argument to AARCH64_FUSION_PAIR.
|
||||
* config/aarch64/aarch64.c: Remove the last argument to
|
||||
AARCH64_FUSION_PAIR.
|
||||
|
||||
2015-08-22 Mikhail Maltsev <maltsevm@gmail.com>
|
||||
|
||||
* dominance.c (new_zero_array): Define.
|
||||
|
@ -20,19 +20,17 @@
|
||||
/* Pairs of instructions which can be fused. before including this file,
|
||||
define a macro:
|
||||
|
||||
AARCH64_FUSION_PAIR (name, internal_name, index_bit)
|
||||
AARCH64_FUSION_PAIR (name, internal_name)
|
||||
|
||||
Where:
|
||||
|
||||
NAME is a string giving a friendly name for the instructions to fuse.
|
||||
INTERNAL_NAME gives the internal name suitable for appending to
|
||||
AARCH64_FUSE_ to give an enum name.
|
||||
INDEX_BIT is the bit to set in the bitmask of supported fusion
|
||||
operations. */
|
||||
AARCH64_FUSE_ to give an enum name. */
|
||||
|
||||
AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK, 0)
|
||||
AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD, 1)
|
||||
AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK, 2)
|
||||
AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR, 3)
|
||||
AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH, 4)
|
||||
AARCH64_FUSION_PAIR ("mov+movk", MOV_MOVK)
|
||||
AARCH64_FUSION_PAIR ("adrp+add", ADRP_ADD)
|
||||
AARCH64_FUSION_PAIR ("movk+movk", MOVK_MOVK)
|
||||
AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
|
||||
AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
|
||||
|
||||
|
@ -201,23 +201,24 @@ struct tune_params
|
||||
unsigned int extra_tuning_flags;
|
||||
};
|
||||
|
||||
#define AARCH64_FUSION_PAIR(x, name, index) \
|
||||
AARCH64_FUSE_##name = (1 << index),
|
||||
#define AARCH64_FUSION_PAIR(x, name) \
|
||||
AARCH64_FUSE_##name##_index,
|
||||
/* Supported fusion operations. */
|
||||
enum aarch64_fusion_pairs_index
|
||||
{
|
||||
#include "aarch64-fusion-pairs.def"
|
||||
AARCH64_FUSE_index_END
|
||||
};
|
||||
#undef AARCH64_FUSION_PAIR
|
||||
|
||||
#define AARCH64_FUSION_PAIR(x, name) \
|
||||
AARCH64_FUSE_##name = (1u << AARCH64_FUSE_##name##_index),
|
||||
/* Supported fusion operations. */
|
||||
enum aarch64_fusion_pairs
|
||||
{
|
||||
AARCH64_FUSE_NOTHING = 0,
|
||||
#include "aarch64-fusion-pairs.def"
|
||||
|
||||
/* Hacky macro to build AARCH64_FUSE_ALL. The sequence below expands
|
||||
to:
|
||||
AARCH64_FUSE_ALL = 0 | AARCH64_FUSE_index1 | AARCH64_FUSE_index2 ... */
|
||||
#undef AARCH64_FUSION_PAIR
|
||||
#define AARCH64_FUSION_PAIR(x, name, y) \
|
||||
| AARCH64_FUSE_##name
|
||||
|
||||
AARCH64_FUSE_ALL = 0
|
||||
#include "aarch64-fusion-pairs.def"
|
||||
AARCH64_FUSE_ALL = (1u << AARCH64_FUSE_index_END) - 1
|
||||
};
|
||||
#undef AARCH64_FUSION_PAIR
|
||||
|
||||
|
@ -171,7 +171,7 @@ struct aarch64_flag_desc
|
||||
unsigned int flag;
|
||||
};
|
||||
|
||||
#define AARCH64_FUSION_PAIR(name, internal_name, y) \
|
||||
#define AARCH64_FUSION_PAIR(name, internal_name) \
|
||||
{ name, AARCH64_FUSE_##internal_name },
|
||||
static const struct aarch64_flag_desc aarch64_fusible_pairs[] =
|
||||
{
|
||||
|
@ -39,7 +39,7 @@
|
||||
|
||||
(define_insn_reservation "thunderx_shift" 1
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
|
||||
(eq_attr "type" "bfm,extend,shift_imm,shift_reg,rbit,rev"))
|
||||
"thunderx_pipe0 | thunderx_pipe1")
|
||||
|
||||
|
||||
@ -66,12 +66,18 @@
|
||||
(eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
|
||||
"thunderx_pipe1 + thunderx_mult")
|
||||
|
||||
;; Multiply high instructions take an extra cycle and cause the muliply unit to
|
||||
;; be busy for an extra cycle.
|
||||
;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1
|
||||
|
||||
;(define_insn_reservation "thunderx_mul_high" 5
|
||||
(define_insn_reservation "thunderx_crc32" 4
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "crc"))
|
||||
"thunderx_pipe1 + thunderx_mult")
|
||||
|
||||
;; crcx is 5 cycles and only happen on pipe 1
|
||||
;(define_insn_reservation "thunderx_crc64" 5
|
||||
; (and (eq_attr "tune" "thunderx")
|
||||
; (eq_attr "type" "smull,umull"))
|
||||
; (eq_attr "type" "crc")
|
||||
; (eq_attr "mode" "DI"))
|
||||
; "thunderx_pipe1 + thunderx_mult")
|
||||
|
||||
(define_insn_reservation "thunderx_div32" 22
|
||||
@ -97,6 +103,11 @@
|
||||
(eq_attr "type" "store2"))
|
||||
"thunderx_pipe0 + thunderx_pipe1")
|
||||
|
||||
;; Prefetch are single issued
|
||||
;(define_insn_reservation "thunderx_prefetch" 1
|
||||
; (and (eq_attr "tune" "thunderx")
|
||||
; (eq_attr "type" "prefetch"))
|
||||
; "thunderx_pipe0 + thunderx_pipe1")
|
||||
|
||||
;; loads (and load pairs) from L1 take 3 cycles in pipe 0
|
||||
(define_insn_reservation "thunderx_load" 3
|
||||
@ -121,10 +132,21 @@
|
||||
(eq_attr "type" "fconsts,fconstd"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
;; Moves between fp are 2 cycles including min/max/select/abs/neg
|
||||
;; Moves between fp are 2 cycles including min/max
|
||||
(define_insn_reservation "thunderx_fmov" 2
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
|
||||
(eq_attr "type" "fmov,f_minmaxs,f_minmaxd"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
;; ABS, and NEG are 1 cycle
|
||||
(define_insn_reservation "thunderx_fabs" 1
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "ffariths,ffarithd"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
(define_insn_reservation "thunderx_fcsel" 3
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "fcsel"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
(define_insn_reservation "thunderx_fmovgpr" 2
|
||||
@ -132,6 +154,11 @@
|
||||
(eq_attr "type" "f_mrc, f_mcr"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
(define_insn_reservation "thunderx_fcmp" 3
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "fcmps,fcmpd"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
(define_insn_reservation "thunderx_fmul" 6
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
|
||||
@ -152,21 +179,21 @@
|
||||
(eq_attr "type" "fsqrts"))
|
||||
"thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
|
||||
|
||||
(define_insn_reservation "thunderx_fsqrtd" 28
|
||||
(define_insn_reservation "thunderx_fsqrtd" 31
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "fsqrtd"))
|
||||
"thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
|
||||
"thunderx_pipe1 + thunderx_divide, thunderx_divide*27")
|
||||
|
||||
;; The rounding conversion inside fp is 4 cycles
|
||||
(define_insn_reservation "thunderx_frint" 4
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "f_rints,f_rintd"))
|
||||
(eq_attr "type" "f_cvt,f_rints,f_rintd"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
;; Float to integer with a move from int to/from float is 6 cycles
|
||||
(define_insn_reservation "thunderx_f_cvt" 6
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
|
||||
(eq_attr "type" "f_cvtf2i,f_cvti2f"))
|
||||
"thunderx_pipe1")
|
||||
|
||||
;; FP/SIMD load/stores happen in pipe 0
|
||||
@ -184,9 +211,12 @@
|
||||
"thunderx_pipe0+thunderx_pipe1")
|
||||
|
||||
;; FP/SIMD Stores takes one cycle in pipe 0
|
||||
;; ST1 with one registers either multiple structures or single structure is
|
||||
;; also one cycle.
|
||||
(define_insn_reservation "thunderx_simd_fp_store" 1
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
|
||||
(eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \
|
||||
neon_store1_one_lane, neon_store1_one_lane_q"))
|
||||
"thunderx_pipe0")
|
||||
|
||||
;; 64bit neon store pairs are single issue for one cycle
|
||||
@ -201,24 +231,38 @@
|
||||
(eq_attr "type" "neon_store1_2reg_q"))
|
||||
"(thunderx_pipe0 + thunderx_pipe1)*2")
|
||||
|
||||
;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0
|
||||
(define_insn_reservation "thunderx_neon_ld1" 6
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_load1_all_lanes"))
|
||||
"thunderx_pipe0")
|
||||
|
||||
;; SIMD/NEON (q forms take an extra cycle)
|
||||
;; SIMD For ThunderX is 64bit wide,
|
||||
|
||||
;; Thunder simd move instruction types - 2/3 cycles
|
||||
;; ThunderX simd move instruction types - 2/3 cycles
|
||||
;; ThunderX dup, ins is the same
|
||||
;; ThunderX SIMD fabs/fneg instruction types
|
||||
(define_insn_reservation "thunderx_neon_move" 2
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
|
||||
neon_fp_compare_d, neon_move"))
|
||||
neon_fp_compare_d, neon_move, neon_dup, \
|
||||
neon_ins, neon_from_gp, neon_to_gp, \
|
||||
neon_abs, neon_neg, \
|
||||
neon_fp_neg_s, neon_fp_abs_s"))
|
||||
"thunderx_pipe1 + thunderx_simd")
|
||||
|
||||
(define_insn_reservation "thunderx_neon_move_q" 3
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
|
||||
neon_fp_compare_d_q, neon_move_q"))
|
||||
neon_fp_compare_d_q, neon_move_q, neon_dup_q, \
|
||||
neon_ins_q, neon_from_gp_q, neon_to_gp_q, \
|
||||
neon_abs_q, neon_neg_q, \
|
||||
neon_fp_neg_s_q, neon_fp_neg_d_q, \
|
||||
neon_fp_abs_s_q, neon_fp_abs_d_q"))
|
||||
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
||||
|
||||
|
||||
;; Thunder simd simple/add instruction types - 4/5 cycles
|
||||
;; ThunderX simd simple/add instruction types - 4/5 cycles
|
||||
|
||||
(define_insn_reservation "thunderx_neon_add" 4
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
@ -227,7 +271,9 @@
|
||||
neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
|
||||
neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
|
||||
neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
|
||||
neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
|
||||
neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \
|
||||
neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \
|
||||
neon_fp_reduc_minmax_s"))
|
||||
"thunderx_pipe1 + thunderx_simd")
|
||||
|
||||
;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
|
||||
@ -240,13 +286,74 @@
|
||||
neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
|
||||
neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
|
||||
neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
|
||||
neon_add_long, neon_sub_long"))
|
||||
neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \
|
||||
neon_arith_acc_q, neon_rev_q, \
|
||||
neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q"))
|
||||
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
||||
|
||||
;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions
|
||||
;; are 6/7 cycles
|
||||
(define_insn_reservation "thunderx_neon_mult" 6
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \
|
||||
neon_mla_b, neon_mla_h, neon_mla_s, \
|
||||
neon_mla_h_scalar, neon_mla_s_scalar, \
|
||||
neon_ext, neon_shift_imm, neon_permute, \
|
||||
neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \
|
||||
neon_sat_shift_reg, neon_shift_acc, \
|
||||
neon_mul_b, neon_mul_h, neon_mul_s, \
|
||||
neon_mul_h_scalar, neon_mul_s_scalar, \
|
||||
neon_fp_mul_s_scalar, \
|
||||
neon_fp_mla_s_scalar"))
|
||||
"thunderx_pipe1 + thunderx_simd")
|
||||
|
||||
(define_insn_reservation "thunderx_neon_mult_q" 7
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \
|
||||
neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \
|
||||
neon_mla_h_scalar_q, neon_mla_s_scalar_q, \
|
||||
neon_ext_q, neon_shift_imm_q, neon_permute_q, \
|
||||
neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \
|
||||
neon_sat_shift_reg_q, neon_shift_acc_q, \
|
||||
neon_shift_imm_long, \
|
||||
neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \
|
||||
neon_mul_h_scalar_q, neon_mul_s_scalar_q, \
|
||||
neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \
|
||||
neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \
|
||||
neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \
|
||||
neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q"))
|
||||
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
||||
|
||||
|
||||
;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
|
||||
(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
|
||||
(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
|
||||
;; AES[ED] is 5 cycles
|
||||
(define_insn_reservation "thunderx_crypto_aese" 5
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "crypto_aese"))
|
||||
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
||||
|
||||
;; AES{,I}MC is 3 cycles
|
||||
(define_insn_reservation "thunderx_crypto_aesmc" 3
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "crypto_aesmc"))
|
||||
"thunderx_pipe1 + thunderx_simd, thunderx_simd")
|
||||
|
||||
|
||||
;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle
|
||||
(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
||||
(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
||||
(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q")
|
||||
|
||||
;; 64bit TBL is emulated and takes 160 cycles
|
||||
(define_insn_reservation "thunderx_tbl" 160
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_tbl1"))
|
||||
"(thunderx_pipe1+thunderx_pipe0)*160")
|
||||
|
||||
;; 128bit TBL is emulated and takes 320 cycles
|
||||
(define_insn_reservation "thunderx_tblq" 320
|
||||
(and (eq_attr "tune" "thunderx")
|
||||
(eq_attr "type" "neon_tbl1_q"))
|
||||
"(thunderx_pipe1+thunderx_pipe0)*320")
|
||||
|
||||
;; Assume both pipes are needed for unknown and multiple-instruction
|
||||
;; patterns.
|
||||
|
Loading…
Reference in New Issue
Block a user