tree-vect-data-refs.c (vect_setup_realignment): Support realignment in basic blocks.

* tree-vect-data-refs.c (vect_setup_realignment): Support realignment
	in basic blocks.
	(vect_supportable_dr_alignment): Check alignment for basic blocks.
	* tree-vect-slp.c (vect_build_slp_tree): Allow different codes for 
	data references.
	(vect_bb_vectorization_profitable_p): New function.
	(vect_slp_analyze_bb): Call vect_bb_vectorization_profitable_p() to
	check if it's worthwhile to vectorize the basic block.

From-SVN: r163260
This commit is contained in:
Ira Rosen 2010-08-15 07:00:32 +00:00 committed by Ira Rosen
parent 2d684b3e96
commit 69f11a1360
9 changed files with 206 additions and 31 deletions

View File

@ -1,3 +1,14 @@
2010-08-15 Ira Rosen <irar@il.ibm.com>
* tree-vect-data-refs.c (vect_setup_realignment): Support realignment
in basic blocks.
(vect_supportable_dr_alignment): Check alignment for basic blocks.
* tree-vect-slp.c (vect_build_slp_tree): Allow different codes for
data references.
(vect_bb_vectorization_profitable_p): New function.
(vect_slp_analyze_bb): Call vect_bb_vectorization_profitable_p() to
check if it's worthwhile to vectorize the basic block.
2010-08-14 Anatoly Sokolov <aesok@post.ru>
* reload.h (register_move_cost, memory_move_secondary_cost,

View File

@ -1,3 +1,14 @@
2010-08-15 Ira Rosen <irar@il.ibm.com>
* gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a.c: New test.
* gcc.dg/vect/costmodel/ppc/ppc-costmodel-vect.exp: Run basic block
SLP tests.
* gcc.dg/vect/bb-slp-9.c: Now vectorizable on targets that support
misaligned loads.
* gcc.dg/vect/bb-slp-10.c: Now vectorizable on targets that support
misaligned stores.c
* gcc.dg/vect/bb-slp-2.c: Avoid loop vectorization.
2010-08-14 Mingjie Xing <mingjie.xing@gmail.com>
* gcc.dg/vect/fast-math-vect-reduc-8.c: Move

View File

@ -50,7 +50,7 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */
/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" { xfail vect_hw_misalign } } } */
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */

View File

@ -24,8 +24,8 @@ main1 (int dummy)
*pout++ = *pin++;
/* Avoid loop vectorization. */
if (dummy == 32)
abort ();
if (dummy)
__asm__ volatile ("" : : : "memory");
}
/* check results: */

View File

@ -47,7 +47,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 0 "slp" } } */
/* { dg-final { scan-tree-dump-times "unsupported alignment in basic block." 1 "slp" } } */
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */

View File

@ -0,0 +1,47 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "../../tree-vect.h"
#define N 16
unsigned int out[N];
unsigned int in[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
__attribute__ ((noinline)) int
main1 (unsigned int x, unsigned int y)
{
int i;
unsigned int *pin = &in[1];
unsigned int *pout = &out[0];
unsigned int a0, a1, a2, a3;
/* Misaligned load. */
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
*pout++ = *pin++;
/* Check results. */
if (out[0] != in[1]
|| out[1] != in[2]
|| out[2] != in[3]
|| out[3] != in[4])
abort();
return 0;
}
int main (void)
{
check_vect ();
main1 (2, 3);
return 0;
}
/* { dg-final { scan-tree-dump-times "basic block vectorized using SLP" 1 "slp" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "slp" } } */

View File

@ -57,7 +57,10 @@ if [check_vmx_hw_available] {
# Initialize `dg'.
dg-init
set VECT_SLP_CFLAGS $DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
lappend VECT_SLP_CFLAGS "-fdump-tree-slp-details"
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \
@ -66,6 +69,8 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-slp-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-bb-slp*.\[cS\]]] \
"" $VECT_SLP_CFLAGS
#### Tests with special options
global SAVED_DEFAULT_VECTCFLAGS

View File

@ -3467,8 +3467,8 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
edge pe;
struct loop *loop = NULL;
edge pe = NULL;
tree scalar_dest = gimple_assign_lhs (stmt);
tree vec_dest;
gimple inc;
@ -3483,9 +3483,15 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
gimple_seq stmts = NULL;
bool inv_p;
bool compute_in_loop = false;
bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
bool nested_in_vect_loop = false;
struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
struct loop *loop_for_initial_load;
struct loop *loop_for_initial_load = NULL;
if (loop_vinfo)
{
loop = LOOP_VINFO_LOOP (loop_vinfo);
nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
}
gcc_assert (alignment_support_scheme == dr_explicit_realign
|| alignment_support_scheme == dr_explicit_realign_optimized);
@ -3523,7 +3529,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
or not, which in turn determines if the misalignment is computed inside
the inner-loop, or outside LOOP. */
if (init_addr != NULL_TREE)
if (init_addr != NULL_TREE || !loop_vinfo)
{
compute_in_loop = true;
gcc_assert (alignment_support_scheme == dr_explicit_realign);
@ -3555,6 +3561,9 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
if (at_loop)
*at_loop = loop_for_initial_load;
if (loop_for_initial_load)
pe = loop_preheader_edge (loop_for_initial_load);
/* 3. For the case of the optimized realignment, create the first vector
load at the loop preheader. */
@ -3563,7 +3572,6 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
/* Create msq_init = *(floor(p1)) in the loop preheader */
gcc_assert (!compute_in_loop);
pe = loop_preheader_edge (loop_for_initial_load);
vec_dest = vect_create_destination_var (scalar_dest, vectype);
ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
&init_addr, &inc, true, &inv_p);
@ -3582,8 +3590,14 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp);
mark_symbols_for_renaming (new_stmt);
new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
gcc_assert (!new_bb);
if (pe)
{
new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
gcc_assert (!new_bb);
}
else
gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
msq_init = gimple_assign_lhs (new_stmt);
}
@ -3596,16 +3610,19 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
tree builtin_decl;
/* Compute INIT_ADDR - the initial addressed accessed by this memref. */
if (compute_in_loop)
gcc_assert (init_addr); /* already computed by the caller. */
else
if (!init_addr)
{
/* Generate the INIT_ADDR computation outside LOOP. */
init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
NULL_TREE, loop);
pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
if (loop)
{
pe = loop_preheader_edge (loop);
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
else
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
}
builtin_decl = targetm.vectorize.builtin_mask_for_load ();
@ -3979,12 +3996,11 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (aligned_access_p (dr) && !check_aligned_accesses)
return dr_aligned;
if (!loop_vinfo)
/* FORNOW: Misaligned accesses are supported only in loops. */
return dr_unaligned_unsupported;
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
if (loop_vinfo)
{
vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
}
/* Possibly unaligned access. */
@ -4059,9 +4075,10 @@ vect_supportable_dr_alignment (struct data_reference *dr,
|| targetm.vectorize.builtin_mask_for_load ()))
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
if (nested_in_vect_loop
&& (TREE_INT_CST_LOW (DR_STEP (dr))
!= GET_MODE_SIZE (TYPE_MODE (vectype))))
if ((nested_in_vect_loop
&& (TREE_INT_CST_LOW (DR_STEP (dr))
!= GET_MODE_SIZE (TYPE_MODE (vectype))))
|| !loop_vinfo)
return dr_explicit_realign;
else
return dr_explicit_realign_optimized;

View File

@ -456,7 +456,12 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
&& (first_stmt_code != IMAGPART_EXPR
|| rhs_code != REALPART_EXPR)
&& (first_stmt_code != REALPART_EXPR
|| rhs_code != IMAGPART_EXPR))
|| rhs_code != IMAGPART_EXPR)
&& !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
&& (first_stmt_code == ARRAY_REF
|| first_stmt_code == INDIRECT_REF
|| first_stmt_code == COMPONENT_REF
|| first_stmt_code == MEM_REF)))
{
if (vect_print_dump_info (REPORT_SLP))
{
@ -1509,7 +1514,75 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo)
}
/* Cheick if the basic block can be vectorized. */
/* Check if vectorization of the basic block is profitable. */
static bool
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
{
VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
int i;
unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
unsigned int stmt_cost;
gimple stmt;
gimple_stmt_iterator si;
basic_block bb = BB_VINFO_BB (bb_vinfo);
stmt_vec_info stmt_info = NULL;
tree dummy_type = NULL;
int dummy = 0;
/* Calculate vector costs. */
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
{
vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
}
/* Calculate scalar cost. */
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
stmt = gsi_stmt (si);
stmt_info = vinfo_for_stmt (stmt);
if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
|| !PURE_SLP_STMT (stmt_info))
continue;
if (STMT_VINFO_DATA_REF (stmt_info))
{
if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
stmt_cost = targetm.vectorize.builtin_vectorization_cost
(scalar_load, dummy_type, dummy);
else
stmt_cost = targetm.vectorize.builtin_vectorization_cost
(scalar_store, dummy_type, dummy);
}
else
stmt_cost = targetm.vectorize.builtin_vectorization_cost
(scalar_stmt, dummy_type, dummy);
scalar_cost += stmt_cost;
}
if (vect_print_dump_info (REPORT_COST))
{
fprintf (vect_dump, "Cost model analysis: \n");
fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
vec_inside_cost);
fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
vec_outside_cost);
fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
}
/* Vectorization is profitable if its cost is less than the cost of scalar
version. */
if (vec_outside_cost + vec_inside_cost >= scalar_cost)
return false;
return true;
}
/* Check if the basic block can be vectorized. */
bb_vec_info
vect_slp_analyze_bb (basic_block bb)
@ -1641,6 +1714,18 @@ vect_slp_analyze_bb (basic_block bb)
return NULL;
}
/* Cost model: check if the vectorization is worthwhile. */
if (flag_vect_cost_model
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fprintf (vect_dump, "not vectorized: vectorization is not "
"profitable.\n");
destroy_bb_vec_info (bb_vinfo);
return NULL;
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "Basic block will be vectorized using SLP\n");