vectorizer cost model enhancement

From-SVN: r202980
This commit is contained in:
Xinliang David Li 2013-09-27 16:48:15 +00:00 committed by Xinliang David Li
parent ac1857a3ef
commit d6d1127249
12 changed files with 122 additions and 62 deletions

View File

@ -1,3 +1,24 @@
2013-09-27 Xinliang David Li <davidxl@google.com>
* opts.c (finish_options): Adjust parameters
according to vect cost model.
(common_handle_option): Set dynamic vect cost
model for FDO.
targhooks.c (default_add_stmt_cost): Compute stmt cost
unconditionally.
* tree-vect-loop.c (vect_estimate_min_profitable_iters):
Use helper function.
* tree-vectorizer.h (unlimited_cost_model): New function.
* tree-vect-slp.c (vect_slp_analyze_bb_1): Use helper function.
* tree-vect-data-refs.c (vect_peeling_hash_insert): Use helper
function.
(vect_enhance_data_refs_alignment): Ditto.
* flag-types.h: New enum.
* common/config/i386/i386-common.c (ix86_option_init_struct):
No need to initialize vect_cost_model flag.
* config/i386/i386.c (ix86_add_stmt_cost): Compute stmt cost
unconditionally.
2013-09-27 Diego Novillo <dnovillo@google.com>
* gimple.h (enum ssa_mode): Remove.

View File

@ -2278,13 +2278,33 @@ ftree-slp-vectorize
Common Report Var(flag_tree_slp_vectorize) Optimization
Enable basic block vectorization (SLP) on trees
fvect-cost-model=
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT)
Specifies the cost model for vectorization
Enum
Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
EnumValue
Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED)
EnumValue
Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
EnumValue
Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
fvect-cost-model
Common Report Var(flag_vect_cost_model) Optimization
Enable use of cost model in vectorization
Common RejectNegative Alias(fvect-cost-model=,dynamic)
Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
fno-vect-cost-model
Common RejectNegative Alias(fvect-cost-model=,unlimited)
Enables the unlimited vectorizer cost model. Preserved for backward compatibility.
ftree-vect-loop-version
Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization
Enable loop versioning when doing loop vectorization on trees
Common Ignore
Does nothing. Preserved for backward compatibility.
ftree-scev-cprop
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization

View File

@ -811,7 +811,6 @@ ix86_option_init_struct (struct gcc_options *opts)
opts->x_flag_pcc_struct_return = 2;
opts->x_flag_asynchronous_unwind_tables = 2;
opts->x_flag_vect_cost_model = 1;
}
/* On the x86 -fsplit-stack and -fstack-protector both use the same

View File

@ -42782,20 +42782,17 @@ ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
if (flag_vect_cost_model)
{
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
count *= 50; /* FIXME. */
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
count *= 50; /* FIXME. */
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
}
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
return retval;
}

View File

@ -423,7 +423,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
-ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
-ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
-fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
@ -6770,7 +6770,7 @@ optimizations designed to reduce code size.
@option{-Os} disables the following optimization flags:
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
-fprefetch-loop-arrays -ftree-vect-loop-version}
-fprefetch-loop-arrays}
@item -Ofast
@opindex Ofast
@ -8025,19 +8025,20 @@ Perform loop vectorization on trees. This flag is enabled by default at
Perform basic block vectorization on trees. This flag is enabled by default at
@option{-O3} and when @option{-ftree-vectorize} is enabled.
@item -ftree-vect-loop-version
@opindex ftree-vect-loop-version
Perform loop versioning when doing loop vectorization on trees. When a loop
appears to be vectorizable except that data alignment or data dependence cannot
be determined at compile time, then vectorized and non-vectorized versions of
the loop are generated along with run-time checks for alignment or dependence
to control which version is executed. This option is enabled by default
except at level @option{-Os} where it is disabled.
@item -fvect-cost-model
@item -fvect-cost-model=@var{model}
@opindex fvect-cost-model
Enable cost model for vectorization. This option is enabled by default at
@option{-O3}.
Alter the cost model used for vectorization. The @var{model} argument
should be one of @code{unlimited}, @code{dynamic} or @code{cheap}.
With the @code{unlimited} model the vectorized code-path is assumed
to be profitable while with the @code{dynamic} model a runtime check
will guard the vectorized code-path to enable it only for iteration
counts that will likely execute faster than when executing the original
scalar loop. The @code{cheap} model will disable vectorization of
loops where doing so would be cost prohibitive for example due to
required runtime checks for data dependence or alignment but otherwise
is equal to the @code{dynamic} model.
The default cost model depends on other optimization flags and is
either @code{dynamic} or @code{cheap}.
@item -ftree-vrp
@opindex ftree-vrp
@ -9443,13 +9444,11 @@ constraints. The default value is 0.
@item vect-max-version-for-alignment-checks
The maximum number of run-time checks that can be performed when
doing loop versioning for alignment in the vectorizer. See option
@option{-ftree-vect-loop-version} for more information.
doing loop versioning for alignment in the vectorizer.
@item vect-max-version-for-alias-checks
The maximum number of run-time checks that can be performed when
doing loop versioning for alias in the vectorizer. See option
@option{-ftree-vect-loop-version} for more information.
doing loop versioning for alias in the vectorizer.
@item vect-max-peeling-for-alignment
The maximum number of loop peels to enhance access alignment

View File

@ -191,6 +191,15 @@ enum fp_contract_mode {
FP_CONTRACT_FAST = 2
};
/* Vectorizer cost-model. */
enum vect_cost_model {
VECT_COST_MODEL_UNLIMITED = 0,
VECT_COST_MODEL_CHEAP = 1,
VECT_COST_MODEL_DYNAMIC = 2,
VECT_COST_MODEL_DEFAULT = 3
};
/* Different instrumentation modes. */
enum sanitize_code {
/* AddressSanitizer. */

View File

@ -486,6 +486,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
@ -500,7 +501,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
@ -825,6 +826,17 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
}
}
/* Tune vectorization related parametees according to cost model. */
if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
{
maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS,
6, opts->x_param_values, opts_set->x_param_values);
maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS,
0, opts->x_param_values, opts_set->x_param_values);
maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT,
0, opts->x_param_values, opts_set->x_param_values);
}
/* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
is disabled. */
if ((!opts->x_flag_tree_loop_vectorize && !opts->x_flag_tree_slp_vectorize)
@ -1669,7 +1681,7 @@ common_handle_option (struct gcc_options *opts,
&& !opts_set->x_flag_tree_vectorize)
opts->x_flag_tree_slp_vectorize = value;
if (!opts_set->x_flag_vect_cost_model)
opts->x_flag_vect_cost_model = value;
opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
if (!opts_set->x_flag_tree_loop_distribute_patterns)
opts->x_flag_tree_loop_distribute_patterns = value;
/* Indirect call profiling should do all useful transformations

View File

@ -1057,20 +1057,17 @@ default_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
if (flag_vect_cost_model)
{
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
misalign);
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
count *= 50; /* FIXME. */
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
count *= 50; /* FIXME. */
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
}
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
return retval;
}

View File

@ -1115,7 +1115,7 @@ vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
*new_slot = slot;
}
if (!supportable_dr_alignment && !flag_vect_cost_model)
if (!supportable_dr_alignment && unlimited_cost_model ())
slot->count += VECT_MAX_COST;
}
@ -1225,7 +1225,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost();
if (flag_vect_cost_model)
if (!unlimited_cost_model ())
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
@ -1454,7 +1454,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
if (!flag_vect_cost_model)
if (unlimited_cost_model ())
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
@ -1462,7 +1462,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
if (!flag_vect_cost_model)
if (unlimited_cost_model ())
possible_npeel_number++;
}
@ -1795,16 +1795,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
/* (2) Versioning to force alignment. */
/* Try versioning if:
1) flag_tree_vect_loop_version is TRUE
2) optimize loop for speed
3) there is at least one unsupported misaligned data ref with an unknown
1) optimize loop for speed
2) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
4) all misaligned data refs with a known misalignment are supported, and
5) the number of runtime alignment checks is within reason. */
3) all misaligned data refs with a known misalignment are supported, and
4) the number of runtime alignment checks is within reason. */
do_versioning =
flag_tree_vect_loop_version
&& optimize_loop_nest_for_speed_p (loop)
optimize_loop_nest_for_speed_p (loop)
&& (!loop->inner); /* FORNOW */
if (do_versioning)

View File

@ -2680,7 +2680,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
if (!flag_vect_cost_model)
if (unlimited_cost_model ())
{
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
*ret_min_profitable_niters = 0;

View File

@ -2168,7 +2168,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
}
/* Cost model: check if the vectorization is worthwhile. */
if (flag_vect_cost_model
if (!unlimited_cost_model ()
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (dump_enabled_p ())

View File

@ -880,6 +880,14 @@ known_alignment_for_access_p (struct data_reference *data_ref_info)
return (DR_MISALIGNMENT (data_ref_info) != -1);
}
/* Return true if the vect cost model is unlimited. */
static inline bool
unlimited_cost_model ()
{
return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED;
}
/* Source location */
extern LOC vect_location;