tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by known_alignment_for_access_p.

* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
        known_alignment_for_access_p.
        (known_alignment_for_access_p): New.
        (do_peeling_for_alignment): Field made int instead of bool and renamed
        to peeling_for_alignment.
        (LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
        * tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
        functionality used to be in vect_analyze_operations.
        (vect_analyze_operations): Code to determine vectorization factor was
        moved to vect_determine_vectorization_factor.
        (vect_enhance_data_refs_alignment): Update to correct alignment when it
        is known instead of -1.  Set LOOP_PEELING_FOR_ALIGNMENT to peeling
        factor.
        (vect_analyze_loop): Call vect_determine_vectorization_factor (used to
        be part of vect_analyze_operations).
        * tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
        creating the guard condition, as the number of iterations may be
        constant.
        (slpeel_tree_peel_loop_to_edge): Use new name of
        LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
        * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
        alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
        (vect_do_peeling_for_alignment): Use fold.
        (vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.

        (vect_update_inits_of_dr): Renamed to
        vect_update_init_of_dr.
        (vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
        (vectorizable_store): Fix assertion to use == instead of =.

From-SVN: r96526
This commit is contained in:
Dorit Naishlos 2005-03-15 18:33:09 +00:00 committed by Dorit Nuzman
parent 119bb233e2
commit 5f55a1ba12
10 changed files with 504 additions and 174 deletions

View File

@ -1,3 +1,35 @@
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* tree-vectorizer.h (unknown_alignment_for_access_p): Replaced by
known_alignment_for_access_p.
(known_alignment_for_access_p): New.
(do_peeling_for_alignment): Field made int instead of bool and renamed
to peeling_for_alignment.
(LOOP_DO_PEELING_FOR_ALIGNMENT): Renamed to LOOP_PEELING_FOR_ALIGNMENT.
* tree-vect-analyze.c (vect_determine_vectorization_factor): New. This
functionality used to be in vect_analyze_operations.
(vect_analyze_operations): Code to determine vectorization factor was
moved to vect_determine_vectorization_factor.
(vect_enhance_data_refs_alignment): Update to correct alignment when it
is known instead of -1. Set LOOP_PEELING_FOR_ALIGNMENT to peeling
factor.
(vect_analyze_loop): Call vect_determine_vectorization_factor (used to
be part of vect_analyze_operations).
* tree-vectorizer.c (slpeel_tree_peel_loop_to_edge): Use fold when
creating the guard condition, as the number of iterations may be
constant.
(slpeel_tree_peel_loop_to_edge): Use new name of
LOOP_DO_PEELING_FOR_ALIGNMENT. Set it to 0 instead of false.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Handle known
alignment case more efficiently. Use LOOP_PEELING_FOR_ALIGNMENT.
(vect_do_peeling_for_alignment): Use fold.
(vect_transform_loop): Use new name of LOOP_DO_PEELING_FOR_ALIGNMENT.
(vect_update_inits_of_dr): Renamed to
vect_update_init_of_dr.
(vect_update_inits_of_drs): Use new name of vect_update_inits_of_dr.
(vectorizable_store): Fix assertion to use == instead of =.
2005-03-15 Daniel Jacobowitz <dan@codesourcery.com>
* config/arm/arm.h (CONDITIONAL_REGISTER_USAGE): Don't clear

View File

@ -1,3 +1,11 @@
2005-03-15 Dorit Naishlos <dorit@il.ibm.com>
* gcc.dg/vect/vect-54.c: Now vectorizable on targets that don't support
misaligned accesses.
* gcc.dg/vect/vect-58.c: Likewise.
* gcc.dg/vect/vect-92.c: New.
* gcc.dg/vect/vect-93.c: New.
2005-03-15 Feng Wang <fengwang@nudt.edu.cn>
PR fortran/18827

View File

@ -50,6 +50,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */

View File

@ -51,6 +51,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */

View File

@ -0,0 +1,90 @@
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 256
typedef float afloat __attribute__ ((__aligned__(16)));
/* known misalignment: same alignment */
int
main1 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 5; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 5; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main2 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc)
{
int i;
for (i = 0; i < 6; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < 6; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int
main3 (afloat * __restrict__ pa, afloat * __restrict__ pb, afloat * __restrict__ pc, int n)
{
int i;
for (i = 0; i < n; i++)
{
pa[i+1] = pb[i+1] * pc[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
if (pa[i+1] != (pb[i+1] * pc[i+1]))
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
afloat c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
check_vect ();
main1 (a,b,c);
main2 (a,b,c);
main3 (a,b,c,N);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" } } */

View File

@ -0,0 +1,76 @@
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 3001
typedef float afloat __attribute__ ((__aligned__(16)));
int
main1 (float *pa)
{
int i;
for (i = 0; i < 3001; i++)
{
pa[i] = 2.0;
}
/* check results: */
for (i = 0; i < 3001; i++)
{
if (pa[i] != 2.0)
abort ();
}
for (i = 1; i <= 10; i++)
{
pa[i] = 3.0;
}
/* check results: */
for (i = 1; i <= 10; i++)
{
if (pa[i] != 3.0)
abort ();
}
return 0;
}
int main (void)
{
int i;
afloat a[N];
afloat b[N];
check_vect ();
/* from bzip2: */
for (i=0; i<N; i++) b[i] = i;
a[0] = 0;
for (i = 1; i <= 256; i++) a[i] = b[i-1];
/* check results: */
for (i = 1; i <= 256; i++)
{
if (a[i] != i-1)
abort ();
}
if (a[0] != 0)
abort ();
main1 (a);
return 0;
}
/* in main1 */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail vect_no_align } } } */
/* in main */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { xfail vect_no_align } } } */

View File

@ -50,6 +50,7 @@ static bool vect_analyze_data_refs_alignment (loop_vec_info);
static bool vect_compute_data_refs_alignment (loop_vec_info);
static void vect_enhance_data_refs_alignment (loop_vec_info);
static bool vect_analyze_operations (loop_vec_info);
static bool vect_determine_vectorization_factor (loop_vec_info);
/* Utility functions for the analyses. */
static bool exist_non_indexing_operands_for_use_p (tree, tree);
@ -285,6 +286,150 @@ vect_analyze_offset_expr (tree expr,
}
/* Function vect_determine_vectorization_factor
Determine the vectorization factor (VF). VF is the number of data elements
that are operated upon in parallel in a single iteration of the vectorized
loop. For example, when vectorizing a loop that operates on 4byte elements,
on a target with vector size (VS) 16byte, the VF is set to 4, since 4
elements can fit in a single vector register.
We currently support vectorization of loops in which all types operated upon
are of the same size. Therefore this function currently sets VF according to
the size of the types operated upon, and fails if there are multiple sizes
in the loop.
VF is also the factor by which the loop iterations are strip-mined, e.g.:
original loop:
for (i=0; i<N; i++){
a[i] = b[i] + c[i];
}
vectorized loop:
for (i=0; i<N; i+=VF){
a[i:VF] = b[i:VF] + c[i:VF];
}
*/
static bool
vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
block_stmt_iterator si;
unsigned int vectorization_factor = 0;
int i;
tree scalar_type;
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt = bsi_stmt (si);
unsigned int nunits;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "==> examining statement: ");
print_generic_expr (vect_dump, stmt, TDF_SLIM);
}
gcc_assert (stmt_info);
/* skip stmts which do not need to be vectorized. */
if (!STMT_VINFO_RELEVANT_P (stmt_info))
continue;
if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
{
fprintf (vect_dump, "not vectorized: vector stmt in loop:");
print_generic_expr (vect_dump, stmt, TDF_SLIM);
}
return false;
}
if (STMT_VINFO_DATA_REF (stmt_info))
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
else if (TREE_CODE (stmt) == MODIFY_EXPR)
scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
else
scalar_type = TREE_TYPE (stmt);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "get vectype for scalar type: ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
{
fprintf (vect_dump, "not vectorized: unsupported data-type ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
return false;
}
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "vectype: ");
print_generic_expr (vect_dump, vectype, TDF_SLIM);
}
STMT_VINFO_VECTYPE (stmt_info) = vectype;
nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "nunits = %d", nunits);
if (vectorization_factor)
{
/* FORNOW: don't allow mixed units.
This restriction will be relaxed in the future. */
if (nunits != vectorization_factor)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "not vectorized: mixed data-types");
return false;
}
}
else
vectorization_factor = nunits;
#ifdef ENABLE_CHECKING
gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
* vectorization_factor == UNITS_PER_SIMD_WORD);
#endif
}
}
/* TODO: Analyze cost. Decide if worth while to vectorize. */
if (vectorization_factor <= 1)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "not vectorized: unsupported data-type");
return false;
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
return true;
}
/* Function vect_analyze_operations.
Scan the loop stmts and make sure they are all vectorizable. */
@ -299,11 +444,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
unsigned int vectorization_factor = 0;
int i;
bool ok;
tree scalar_type;
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "=== vect_analyze_operations ===");
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@ -311,9 +458,7 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt = bsi_stmt (si);
unsigned int nunits;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype;
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
@ -337,49 +482,13 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
continue;
}
if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
{
fprintf (vect_dump, "not vectorized: vector stmt in loop:");
print_generic_expr (vect_dump, stmt, TDF_SLIM);
}
return false;
}
if (STMT_VINFO_DATA_REF (stmt_info))
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
else if (TREE_CODE (stmt) == MODIFY_EXPR)
scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
else
scalar_type = TREE_TYPE (stmt);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "get vectype for scalar type: ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
{
fprintf (vect_dump,
"not vectorized: unsupported data-type ");
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
}
return false;
}
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "vectype: ");
print_generic_expr (vect_dump, vectype, TDF_SLIM);
}
STMT_VINFO_VECTYPE (stmt_info) = vectype;
#ifdef ENABLE_CHECKING
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
}
#endif
ok = (vectorizable_operation (stmt, NULL, NULL)
|| vectorizable_assignment (stmt, NULL, NULL)
@ -396,44 +505,11 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
}
return false;
}
nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "nunits = %d", nunits);
if (vectorization_factor)
{
/* FORNOW: don't allow mixed units.
This restriction will be relaxed in the future. */
if (nunits != vectorization_factor)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "not vectorized: mixed data-types");
return false;
}
}
else
vectorization_factor = nunits;
#ifdef ENABLE_CHECKING
gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
* vectorization_factor == UNITS_PER_SIMD_WORD);
#endif
}
}
/* TODO: Analyze cost. Decide if worth while to vectorize. */
if (vectorization_factor <= 1)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "not vectorized: unsupported data-type");
return false;
}
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump,
@ -933,7 +1009,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
{
varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
unsigned int i;
varray_type datarefs;
struct data_reference *dr0 = NULL;
unsigned int i, j;
/*
This pass will require a cost model to guide it whether to apply peeling
@ -1036,26 +1114,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
if (!aligned_access_p (dr))
{
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
if (!aligned_access_p (dr0))
{
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
break;
}
}
}
if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
{
if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "Peeling for alignment will not be applied.");
return;
}
else
if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "Peeling for alignment will be applied.");
/* (1.2) Update the alignment info according to the peeling factor.
If the misalignment of the DR we peel for is M, then the
peeling factor is VF - M, and the misalignment of each access DR_i
@ -1063,37 +1130,54 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
If the misalignment of the DR we peel for is unknown, then the
misalignment of each access DR_i in the loop is also unknown.
FORNOW: set the misalignment of the accesses to unknown even
if the peeling factor is known at compile time.
TODO: - consider accesses that are known to have the same
alignment, even if that alignment is unknown. */
TODO: - if the peeling factor is known at compile time, use that
when updating the misalignment info of the loop DRs.
- consider accesses that are known to have the same
alignment, even if that alignment is unknown. */
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
int mis;
int npeel = 0;
if (known_alignment_for_access_p (dr0))
{
DR_MISALIGNMENT (dr) = 0;
if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "Alignment of access forced using peeling.");
/* Since it's known at compile time, compute the number of iterations
in the peeled loop (the peeling factor) for use in updating
DR_MISALIGNMENT values. The peeling factor is the vectorization
factor minus the misalignment as an element count. */
mis = DR_MISALIGNMENT (dr0);
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
}
else
DR_MISALIGNMENT (dr) = -1;
}
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
datarefs = loop_write_datarefs;
for (j = 0; j < 2; j++)
{
DR_MISALIGNMENT (dr) = 0;
if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "Alignment of access forced using peeling.");
for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
if (dr == dr0)
continue;
if (known_alignment_for_access_p (dr)
&& DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
DR_MISALIGNMENT (dr) = 0;
else if (known_alignment_for_access_p (dr)
&& known_alignment_for_access_p (dr0))
{
int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
DR_MISALIGNMENT (dr) += npeel * drsize;
DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
}
else
DR_MISALIGNMENT (dr) = -1;
}
datarefs = loop_read_datarefs;
}
else
DR_MISALIGNMENT (dr) = -1;
DR_MISALIGNMENT (dr0) = 0;
if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "Alignment of access forced using peeling.");
}
}
@ -2479,6 +2563,15 @@ vect_analyze_loop (struct loop *loop)
return NULL;
}
ok = vect_determine_vectorization_factor (loop_vinfo);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
fprintf (vect_dump, "can't determine vectorization factor.");
destroy_loop_vec_info (loop_vinfo);
return NULL;
}
/* Analyze the alignment of the data-refs in the loop.
FORNOW: Only aligned accesses are handled. */

View File

@ -64,7 +64,7 @@ static void vect_generate_tmps_on_preheader
static tree vect_build_loop_niters (loop_vec_info);
static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
static void vect_update_inits_of_dr (struct data_reference *, tree niters);
static void vect_update_init_of_dr (struct data_reference *, tree niters);
static void vect_update_inits_of_drs (loop_vec_info, tree);
static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
static void vect_do_peeling_for_loop_bound
@ -907,7 +907,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
alignment_support_cheme = vect_supportable_dr_alignment (dr);
gcc_assert (alignment_support_cheme);
gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
/* Handle use - get the vectorized def from the defining stmt. */
vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
@ -1451,14 +1451,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
Set the number of iterations for the loop represented by LOOP_VINFO
to the minimum between LOOP_NITERS (the original iteration count of the loop)
and the misalignment of DR - the first data reference recorded in
and the misalignment of DR - the data reference recorded in
LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
this loop, the data reference DR will refer to an aligned location.
The following computation is generated:
compute address misalignment in bytes:
addr_mis = addr & (vectype_size - 1)
If the misalignment of DR is known at compile time:
addr_mis = int mis = DR_MISALIGNMENT (dr);
Else, compute address misalignment in bytes:
addr_mis = addr & (vectype_size - 1)
prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
@ -1479,37 +1481,53 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree elem_misalign;
tree byte_misalign;
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
tree niters_type = TREE_TYPE (loop_niters);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
pe = loop_preheader_edge (loop);
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int element_size = vectype_align/vf;
int elem_misalign = byte_misalign / element_size;
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "known alignment = %d.", byte_misalign);
iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
}
else
{
tree new_stmts = NULL_TREE;
tree start_addr =
vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
tree ptr_type = TREE_TYPE (start_addr);
tree size = TYPE_SIZE (ptr_type);
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log =
build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
tree vf_tree = build_int_cst (unsigned_type_node, vf);
tree byte_misalign;
tree elem_misalign;
new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
gcc_assert (!new_bb);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
iters = fold_convert (niters_type, iters);
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
iters = fold_convert (niters_type, iters);
}
/* Create: prolog_loop_niters = min (iters, loop_niters) */
/* If the loop bound is known at compile time we already verified that it is
greater than vf; since the misalignment ('iters') is at most vf, there's
@ -1517,12 +1535,17 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (TREE_CODE (loop_niters) != INTEGER_CST)
iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "niters for prolog loop: ");
print_generic_expr (vect_dump, iters, TDF_SLIM);
}
var = create_tmp_var (niters_type, "prolog_loop_niters");
add_referenced_tmp_var (var);
iters_name = force_gimple_operand (iters, &stmt, false, var);
/* Insert stmt on loop preheader edge. */
pe = loop_preheader_edge (loop);
if (stmt)
{
basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
@ -1533,7 +1556,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
}
/* Function vect_update_inits_of_dr
/* Function vect_update_init_of_dr
NITERS iterations were peeled from LOOP. DR represents a data reference
in LOOP. This function updates the information recorded in DR to
@ -1541,7 +1564,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
executed. Specifically, it updates the OFFSET field of stmt_info. */
static void
vect_update_inits_of_dr (struct data_reference *dr, tree niters)
vect_update_init_of_dr (struct data_reference *dr, tree niters)
{
stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
@ -1574,13 +1597,13 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
vect_update_inits_of_dr (dr, niters);
vect_update_init_of_dr (dr, niters);
}
for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
{
struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
vect_update_inits_of_dr (dr, niters);
vect_update_init_of_dr (dr, niters);
}
}
@ -1618,8 +1641,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
/* Update number of times loop executes. */
n_iters = LOOP_VINFO_NITERS (loop_vinfo);
LOOP_VINFO_NITERS (loop_vinfo) =
build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
/* Update the init conditions of the access functions of all data refs. */
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
@ -1656,7 +1679,7 @@ vect_transform_loop (loop_vec_info loop_vinfo,
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
vect_do_peeling_for_alignment (loop_vinfo, loops);
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a

View File

@ -963,7 +963,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
add_bb_to_loop (bb_before_second_loop, first_loop->outer);
pre_condition =
build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
bb_before_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
@ -1001,7 +1001,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
bb_after_second_loop = split_edge (second_loop->single_exit);
add_bb_to_loop (bb_after_second_loop, second_loop->outer);
pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
pre_condition =
fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
bb_after_second_loop, bb_before_first_loop);
slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
@ -1213,7 +1214,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_EXIT_COND (res) = NULL;
LOOP_VINFO_NITERS (res) = NULL;
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
LOOP_VINFO_VECT_FACTOR (res) = 0;
VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
"loop_write_datarefs");

View File

@ -95,9 +95,15 @@ typedef struct _loop_vec_info {
/* Unknown DRs according to which loop was peeled. */
struct data_reference *unaligned_dr;
/* If true, loop is peeled.
unaligned_drs show in this case DRs used for peeling. */
bool do_peeling_for_alignment;
/* peeling_for_alignment indicates whether peeling for alignment will take
place, and what the peeling factor should be:
peeling_for_alignment = X means:
If X=0: Peeling for alignment will not be applied.
If X>0: Peel first X iterations.
If X=-1: Generate a runtime test to calculate the number of iterations
to be peeled, using the dataref recorded in the field
unaligned_dr. */
int peeling_for_alignment;
/* All data references in the loop that are being written to. */
varray_type data_ref_writes;
@ -119,7 +125,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_DATAREF_WRITES(L) (L)->data_ref_writes
#define LOOP_VINFO_DATAREF_READS(L) (L)->data_ref_reads
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
#define LOOP_DO_PEELING_FOR_ALIGNMENT(L) (L)->do_peeling_for_alignment
#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
#define LOOP_VINFO_LOC(L) (L)->loop_line_number
@ -242,7 +248,8 @@ vinfo_for_stmt (tree stmt)
/* Info on data references alignment. */
/*-----------------------------------------------------------------*/
/* The misalignment of the memory access in bytes. */
/* Reflects actual alignment of first access in the vectorized loop,
taking into account peeling/versioning if applied. */
#define DR_MISALIGNMENT(DR) (DR)->aux
static inline bool
@ -252,9 +259,9 @@ aligned_access_p (struct data_reference *data_ref_info)
}
static inline bool
unknown_alignment_for_access_p (struct data_reference *data_ref_info)
known_alignment_for_access_p (struct data_reference *data_ref_info)
{
return (DR_MISALIGNMENT (data_ref_info) == -1);
return (DR_MISALIGNMENT (data_ref_info) != -1);
}
/* Perform signed modulo, always returning a non-negative value. */