parent
ac1857a3ef
commit
d6d1127249
|
@ -1,3 +1,24 @@
|
|||
2013-09-27 Xinliang David Li <davidxl@google.com>
|
||||
|
||||
* opts.c (finish_options): Adjust parameters
|
||||
according to vect cost model.
|
||||
(common_handle_option): Set dynamic vect cost
|
||||
model for FDO.
|
||||
targhooks.c (default_add_stmt_cost): Compute stmt cost
|
||||
unconditionally.
|
||||
* tree-vect-loop.c (vect_estimate_min_profitable_iters):
|
||||
Use helper function.
|
||||
* tree-vectorizer.h (unlimited_cost_model): New function.
|
||||
* tree-vect-slp.c (vect_slp_analyze_bb_1): Use helper function.
|
||||
* tree-vect-data-refs.c (vect_peeling_hash_insert): Use helper
|
||||
function.
|
||||
(vect_enhance_data_refs_alignment): Ditto.
|
||||
* flag-types.h: New enum.
|
||||
* common/config/i386/i386-common.c (ix86_option_init_struct):
|
||||
No need to initialize vect_cost_model flag.
|
||||
* config/i386/i386.c (ix86_add_stmt_cost): Compute stmt cost
|
||||
unconditionally.
|
||||
|
||||
2013-09-27 Diego Novillo <dnovillo@google.com>
|
||||
|
||||
* gimple.h (enum ssa_mode): Remove.
|
||||
|
|
|
@ -2278,13 +2278,33 @@ ftree-slp-vectorize
|
|||
Common Report Var(flag_tree_slp_vectorize) Optimization
|
||||
Enable basic block vectorization (SLP) on trees
|
||||
|
||||
fvect-cost-model=
|
||||
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT)
|
||||
Specifies the cost model for vectorization
|
||||
|
||||
Enum
|
||||
Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
|
||||
|
||||
EnumValue
|
||||
Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED)
|
||||
|
||||
EnumValue
|
||||
Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
|
||||
|
||||
EnumValue
|
||||
Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
|
||||
|
||||
fvect-cost-model
|
||||
Common Report Var(flag_vect_cost_model) Optimization
|
||||
Enable use of cost model in vectorization
|
||||
Common RejectNegative Alias(fvect-cost-model=,dynamic)
|
||||
Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
|
||||
|
||||
fno-vect-cost-model
|
||||
Common RejectNegative Alias(fvect-cost-model=,unlimited)
|
||||
Enables the unlimited vectorizer cost model. Preserved for backward compatibility.
|
||||
|
||||
ftree-vect-loop-version
|
||||
Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization
|
||||
Enable loop versioning when doing loop vectorization on trees
|
||||
Common Ignore
|
||||
Does nothing. Preserved for backward compatibility.
|
||||
|
||||
ftree-scev-cprop
|
||||
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
|
||||
|
|
|
@ -811,7 +811,6 @@ ix86_option_init_struct (struct gcc_options *opts)
|
|||
|
||||
opts->x_flag_pcc_struct_return = 2;
|
||||
opts->x_flag_asynchronous_unwind_tables = 2;
|
||||
opts->x_flag_vect_cost_model = 1;
|
||||
}
|
||||
|
||||
/* On the x86 -fsplit-stack and -fstack-protector both use the same
|
||||
|
|
|
@ -42782,20 +42782,17 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
|||
unsigned *cost = (unsigned *) data;
|
||||
unsigned retval = 0;
|
||||
|
||||
if (flag_vect_cost_model)
|
||||
{
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
|
||||
|
||||
/* Statements in an inner loop relative to the loop being
|
||||
vectorized are weighted more heavily. The value here is
|
||||
arbitrary and could potentially be improved with analysis. */
|
||||
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
|
||||
count *= 50; /* FIXME. */
|
||||
/* Statements in an inner loop relative to the loop being
|
||||
vectorized are weighted more heavily. The value here is
|
||||
arbitrary and could potentially be improved with analysis. */
|
||||
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
|
||||
count *= 50; /* FIXME. */
|
||||
|
||||
retval = (unsigned) (count * stmt_cost);
|
||||
cost[where] += retval;
|
||||
}
|
||||
retval = (unsigned) (count * stmt_cost);
|
||||
cost[where] += retval;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
|
@ -423,7 +423,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
|
||||
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
|
||||
-ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
|
||||
-ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
|
||||
-ftree-vectorize -ftree-vrp @gol
|
||||
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
|
||||
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
|
||||
-fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
|
||||
|
@ -6770,7 +6770,7 @@ optimizations designed to reduce code size.
|
|||
@option{-Os} disables the following optimization flags:
|
||||
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
|
||||
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
|
||||
-fprefetch-loop-arrays -ftree-vect-loop-version}
|
||||
-fprefetch-loop-arrays}
|
||||
|
||||
@item -Ofast
|
||||
@opindex Ofast
|
||||
|
@ -8025,19 +8025,20 @@ Perform loop vectorization on trees. This flag is enabled by default at
|
|||
Perform basic block vectorization on trees. This flag is enabled by default at
|
||||
@option{-O3} and when @option{-ftree-vectorize} is enabled.
|
||||
|
||||
@item -ftree-vect-loop-version
|
||||
@opindex ftree-vect-loop-version
|
||||
Perform loop versioning when doing loop vectorization on trees. When a loop
|
||||
appears to be vectorizable except that data alignment or data dependence cannot
|
||||
be determined at compile time, then vectorized and non-vectorized versions of
|
||||
the loop are generated along with run-time checks for alignment or dependence
|
||||
to control which version is executed. This option is enabled by default
|
||||
except at level @option{-Os} where it is disabled.
|
||||
|
||||
@item -fvect-cost-model
|
||||
@item -fvect-cost-model=@var{model}
|
||||
@opindex fvect-cost-model
|
||||
Enable cost model for vectorization. This option is enabled by default at
|
||||
@option{-O3}.
|
||||
Alter the cost model used for vectorization. The @var{model} argument
|
||||
should be one of @code{unlimited}, @code{dynamic} or @code{cheap}.
|
||||
With the @code{unlimited} model the vectorized code-path is assumed
|
||||
to be profitable while with the @code{dynamic} model a runtime check
|
||||
will guard the vectorized code-path to enable it only for iteration
|
||||
counts that will likely execute faster than when executing the original
|
||||
scalar loop. The @code{cheap} model will disable vectorization of
|
||||
loops where doing so would be cost prohibitive for example due to
|
||||
required runtime checks for data dependence or alignment but otherwise
|
||||
is equal to the @code{dynamic} model.
|
||||
The default cost model depends on other optimization flags and is
|
||||
either @code{dynamic} or @code{cheap}.
|
||||
|
||||
@item -ftree-vrp
|
||||
@opindex ftree-vrp
|
||||
|
@ -9443,13 +9444,11 @@ constraints. The default value is 0.
|
|||
|
||||
@item vect-max-version-for-alignment-checks
|
||||
The maximum number of run-time checks that can be performed when
|
||||
doing loop versioning for alignment in the vectorizer. See option
|
||||
@option{-ftree-vect-loop-version} for more information.
|
||||
doing loop versioning for alignment in the vectorizer.
|
||||
|
||||
@item vect-max-version-for-alias-checks
|
||||
The maximum number of run-time checks that can be performed when
|
||||
doing loop versioning for alias in the vectorizer. See option
|
||||
@option{-ftree-vect-loop-version} for more information.
|
||||
doing loop versioning for alias in the vectorizer.
|
||||
|
||||
@item vect-max-peeling-for-alignment
|
||||
The maximum number of loop peels to enhance access alignment
|
||||
|
|
|
@ -191,6 +191,15 @@ enum fp_contract_mode {
|
|||
FP_CONTRACT_FAST = 2
|
||||
};
|
||||
|
||||
/* Vectorizer cost-model. */
|
||||
enum vect_cost_model {
|
||||
VECT_COST_MODEL_UNLIMITED = 0,
|
||||
VECT_COST_MODEL_CHEAP = 1,
|
||||
VECT_COST_MODEL_DYNAMIC = 2,
|
||||
VECT_COST_MODEL_DEFAULT = 3
|
||||
};
|
||||
|
||||
|
||||
/* Different instrumentation modes. */
|
||||
enum sanitize_code {
|
||||
/* AddressSanitizer. */
|
||||
|
|
16
gcc/opts.c
16
gcc/opts.c
|
@ -486,6 +486,7 @@ static const struct default_options default_options_table[] =
|
|||
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
|
||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
|
||||
|
||||
|
@ -500,7 +501,7 @@ static const struct default_options default_options_table[] =
|
|||
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
|
||||
|
||||
|
@ -825,6 +826,17 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
|||
}
|
||||
}
|
||||
|
||||
/* Tune vectorization related parametees according to cost model. */
|
||||
if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
|
||||
{
|
||||
maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS,
|
||||
6, opts->x_param_values, opts_set->x_param_values);
|
||||
maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS,
|
||||
0, opts->x_param_values, opts_set->x_param_values);
|
||||
maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT,
|
||||
0, opts->x_param_values, opts_set->x_param_values);
|
||||
}
|
||||
|
||||
/* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
|
||||
is disabled. */
|
||||
if ((!opts->x_flag_tree_loop_vectorize && !opts->x_flag_tree_slp_vectorize)
|
||||
|
@ -1669,7 +1681,7 @@ common_handle_option (struct gcc_options *opts,
|
|||
&& !opts_set->x_flag_tree_vectorize)
|
||||
opts->x_flag_tree_slp_vectorize = value;
|
||||
if (!opts_set->x_flag_vect_cost_model)
|
||||
opts->x_flag_vect_cost_model = value;
|
||||
opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
|
||||
if (!opts_set->x_flag_tree_loop_distribute_patterns)
|
||||
opts->x_flag_tree_loop_distribute_patterns = value;
|
||||
/* Indirect call profiling should do all useful transformations
|
||||
|
|
|
@ -1057,20 +1057,17 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
|||
unsigned *cost = (unsigned *) data;
|
||||
unsigned retval = 0;
|
||||
|
||||
if (flag_vect_cost_model)
|
||||
{
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
|
||||
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
|
||||
int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
|
||||
misalign);
|
||||
/* Statements in an inner loop relative to the loop being
|
||||
vectorized are weighted more heavily. The value here is
|
||||
arbitrary and could potentially be improved with analysis. */
|
||||
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
|
||||
count *= 50; /* FIXME. */
|
||||
/* Statements in an inner loop relative to the loop being
|
||||
vectorized are weighted more heavily. The value here is
|
||||
arbitrary and could potentially be improved with analysis. */
|
||||
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
|
||||
count *= 50; /* FIXME. */
|
||||
|
||||
retval = (unsigned) (count * stmt_cost);
|
||||
cost[where] += retval;
|
||||
}
|
||||
retval = (unsigned) (count * stmt_cost);
|
||||
cost[where] += retval;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
|
|
@ -1115,7 +1115,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
|
|||
*new_slot = slot;
|
||||
}
|
||||
|
||||
if (!supportable_dr_alignment && !flag_vect_cost_model)
|
||||
if (!supportable_dr_alignment && unlimited_cost_model ())
|
||||
slot->count += VECT_MAX_COST;
|
||||
}
|
||||
|
||||
|
@ -1225,7 +1225,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
|||
res.peel_info.dr = NULL;
|
||||
res.body_cost_vec = stmt_vector_for_cost();
|
||||
|
||||
if (flag_vect_cost_model)
|
||||
if (!unlimited_cost_model ())
|
||||
{
|
||||
res.inside_cost = INT_MAX;
|
||||
res.outside_cost = INT_MAX;
|
||||
|
@ -1454,7 +1454,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
vectorization factor.
|
||||
We do this automtically for cost model, since we calculate cost
|
||||
for every peeling option. */
|
||||
if (!flag_vect_cost_model)
|
||||
if (unlimited_cost_model ())
|
||||
possible_npeel_number = vf /nelements;
|
||||
|
||||
/* Handle the aligned case. We may decide to align some other
|
||||
|
@ -1462,7 +1462,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
if (DR_MISALIGNMENT (dr) == 0)
|
||||
{
|
||||
npeel_tmp = 0;
|
||||
if (!flag_vect_cost_model)
|
||||
if (unlimited_cost_model ())
|
||||
possible_npeel_number++;
|
||||
}
|
||||
|
||||
|
@ -1795,16 +1795,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
|||
/* (2) Versioning to force alignment. */
|
||||
|
||||
/* Try versioning if:
|
||||
1) flag_tree_vect_loop_version is TRUE
|
||||
2) optimize loop for speed
|
||||
3) there is at least one unsupported misaligned data ref with an unknown
|
||||
1) optimize loop for speed
|
||||
2) there is at least one unsupported misaligned data ref with an unknown
|
||||
misalignment, and
|
||||
4) all misaligned data refs with a known misalignment are supported, and
|
||||
5) the number of runtime alignment checks is within reason. */
|
||||
3) all misaligned data refs with a known misalignment are supported, and
|
||||
4) the number of runtime alignment checks is within reason. */
|
||||
|
||||
do_versioning =
|
||||
flag_tree_vect_loop_version
|
||||
&& optimize_loop_nest_for_speed_p (loop)
|
||||
optimize_loop_nest_for_speed_p (loop)
|
||||
&& (!loop->inner); /* FORNOW */
|
||||
|
||||
if (do_versioning)
|
||||
|
|
|
@ -2680,7 +2680,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
|||
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||
|
||||
/* Cost model disabled. */
|
||||
if (!flag_vect_cost_model)
|
||||
if (unlimited_cost_model ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
|
||||
*ret_min_profitable_niters = 0;
|
||||
|
|
|
@ -2168,7 +2168,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
|
|||
}
|
||||
|
||||
/* Cost model: check if the vectorization is worthwhile. */
|
||||
if (flag_vect_cost_model
|
||||
if (!unlimited_cost_model ()
|
||||
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
|
|
|
@ -880,6 +880,14 @@ known_alignment_for_access_p (struct data_reference *data_ref_info)
|
|||
return (DR_MISALIGNMENT (data_ref_info) != -1);
|
||||
}
|
||||
|
||||
|
||||
/* Return true if the vect cost model is unlimited. */
|
||||
static inline bool
|
||||
unlimited_cost_model ()
|
||||
{
|
||||
return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED;
|
||||
}
|
||||
|
||||
/* Source location */
|
||||
extern LOC vect_location;
|
||||
|
||||
|
|
Loading…
Reference in New Issue