i386.c (override_options): Initialize ix86_veclib_handler to ix86_veclibabi_svml when -mveclibabi=svml is used.
* config/i386/i386.c (override_options): Initialize ix86_veclib_handler to ix86_veclibabi_svml when -mveclibabi=svml is used. (ix86_veclibabi_svml): New function for SVML ABI style vectorization support. * doc/invoke.texi (-mveclibabi) [svml]: Document new target option. testsuite/ChangeLog: * gcc.target/i386/vectorize6.c: New test. From-SVN: r133692
This commit is contained in:
parent
d60a2d4d2f
commit
9aba5d2249
@ -1,3 +1,12 @@
|
||||
2008-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.c (override_options): Initialize
|
||||
ix86_veclib_handler to ix86_veclibabi_svml when
|
||||
-mveclibabi=svml is used.
|
||||
(ix86_veclibabi_svml): New function for SVML ABI style
|
||||
vectorization support.
|
||||
* doc/invoke.texi (-mveclibabi) [svml]: Document new target option.
|
||||
|
||||
2008-03-28 Rafael Espindola <espindola@google.com>
|
||||
|
||||
* fold-const.c (tree_unary_nonnegative_warnv_p): Make it public.
|
||||
@ -34,8 +43,7 @@
|
||||
ASSERT_EXPR <name, expr OP limit>.
|
||||
(register_edge_assert_for_1): Adjust callers.
|
||||
(find_assert_locations): Likewise.
|
||||
(process_assert_insertions_for): Build condition from
|
||||
expression.
|
||||
(process_assert_insertions_for): Build condition from expression.
|
||||
(extract_range_from_assert): Handle ASSERT_EXPRs
|
||||
of the form ASSERT_EXPR <name, expr OP limit>.
|
||||
(register_edge_assert_for_2): New helper registering
|
||||
@ -54,7 +62,7 @@
|
||||
2008-03-28 Nick Clifton <nickc@redhat.com>
|
||||
|
||||
* config/mn10300/mn10300.c (mn10300_secondary_reload_class):
|
||||
Return GENERAL_REGS for stack adjustment reloads.
|
||||
Return GENERAL_REGS for stack adjustment reloads.
|
||||
|
||||
2008-03-28 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
|
||||
@ -123,44 +131,42 @@
|
||||
tree_to_aff_combination_expand.
|
||||
(get_inner_reference_aff): New function.
|
||||
* tree-parloops.c (loop_parallel_p): Free vectorizer info.
|
||||
* tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h.
|
||||
(struct lim_aux_data): sm_done field removed.
|
||||
(mem_ref_loc_p, mem_ref_locs_p): New types.
|
||||
(struct mem_ref): Added id, stored, accesses_in_loop,
|
||||
indep_loop, dep_loop, indep_ref, dep_ref fields.
|
||||
Removed is_stored, locs and next fields.
|
||||
(memory_accesses): New variable.
|
||||
(movement_possibility): Do not allow moving statements
|
||||
that store to memory.
|
||||
(outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt):
|
||||
New functions.
|
||||
(determine_max_movement): For statements with memory references,
|
||||
find the outermost loop in that the reference is independent.
|
||||
(move_computations_stmt): Mark the virtual operands for
|
||||
renaming.
|
||||
(memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored,
|
||||
gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq,
|
||||
vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores,
|
||||
add_vop_ref_mapping, create_vop_ref_mapping_loop,
|
||||
create_vop_ref_mapping, analyze_memory_references,
|
||||
cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc,
|
||||
get_all_locs_in_loop, ref_always_accessed_p,
|
||||
refs_independent_p, record_indep_loop, ref_indep_loop_p_1,
|
||||
ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm,
|
||||
store_motion_loop, store_motion): New functions.
|
||||
(struct vop_to_refs_elt): New type.
|
||||
(record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs,
|
||||
memref_hash, memref_eq, hoist_memory_references): Rewritten.
|
||||
(schedule_sm): Replaced by...
|
||||
(execute_sm): ... this.
|
||||
(determine_lsm_ref, hoist_memory_references,
|
||||
loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs,
|
||||
find_more_ref_vops, free_mem_ref, free_mem_refs,
|
||||
determine_lsm_loop, determine_lsm): Removed.
|
||||
(tree_ssa_lim_finalize): Free data structures used by store
|
||||
motion.
|
||||
(tree_ssa_lim): Call analyze_memory_references. Use
|
||||
store_motion instead of determine_lsm.
|
||||
* tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h.
|
||||
(struct lim_aux_data): sm_done field removed.
|
||||
(mem_ref_loc_p, mem_ref_locs_p): New types.
|
||||
(struct mem_ref): Added id, stored, accesses_in_loop,
|
||||
indep_loop, dep_loop, indep_ref, dep_ref fields.
|
||||
Removed is_stored, locs and next fields.
|
||||
(memory_accesses): New variable.
|
||||
(movement_possibility): Do not allow moving statements
|
||||
that store to memory.
|
||||
(outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt):
|
||||
New functions.
|
||||
(determine_max_movement): For statements with memory references,
|
||||
find the outermost loop in that the reference is independent.
|
||||
(move_computations_stmt): Mark the virtual operands for renaming.
|
||||
(memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored,
|
||||
gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq,
|
||||
vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores,
|
||||
add_vop_ref_mapping, create_vop_ref_mapping_loop,
|
||||
create_vop_ref_mapping, analyze_memory_references,
|
||||
cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc,
|
||||
get_all_locs_in_loop, ref_always_accessed_p,
|
||||
refs_independent_p, record_indep_loop, ref_indep_loop_p_1,
|
||||
ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm,
|
||||
store_motion_loop, store_motion): New functions.
|
||||
(struct vop_to_refs_elt): New type.
|
||||
(record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs,
|
||||
memref_hash, memref_eq, hoist_memory_references): Rewritten.
|
||||
(schedule_sm): Replaced by...
|
||||
(execute_sm): ... this.
|
||||
(determine_lsm_ref, hoist_memory_references,
|
||||
loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs,
|
||||
find_more_ref_vops, free_mem_ref, free_mem_refs,
|
||||
determine_lsm_loop, determine_lsm): Removed.
|
||||
(tree_ssa_lim_finalize): Free data structures used by store motion.
|
||||
(tree_ssa_lim): Call analyze_memory_references. Use
|
||||
store_motion instead of determine_lsm.
|
||||
|
||||
2008-03-27 Paolo Bonzini <bonzini@gnu.org>
|
||||
|
||||
@ -523,8 +529,7 @@
|
||||
|
||||
2008-03-23 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and
|
||||
CX_REG.
|
||||
* config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and CX_REG.
|
||||
|
||||
2008-03-23 Zuxy Meng <zuxy.meng@gmail.com>
|
||||
|
||||
|
@ -1830,6 +1830,7 @@ static int ix86_isa_flags_explicit;
|
||||
|
||||
/* Vectorization library interface and handlers. */
|
||||
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
|
||||
static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
|
||||
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
|
||||
|
||||
/* Implement TARGET_HANDLE_OPTION. */
|
||||
@ -2673,7 +2674,9 @@ override_options (void)
|
||||
/* Use external vectorized library in vectorizing intrinsics. */
|
||||
if (ix86_veclibabi_string)
|
||||
{
|
||||
if (strcmp (ix86_veclibabi_string, "acml") == 0)
|
||||
if (strcmp (ix86_veclibabi_string, "svml") == 0)
|
||||
ix86_veclib_handler = ix86_veclibabi_svml;
|
||||
else if (strcmp (ix86_veclibabi_string, "acml") == 0)
|
||||
ix86_veclib_handler = ix86_veclibabi_acml;
|
||||
else
|
||||
error ("unknown vectorization library ABI type (%s) for "
|
||||
@ -21415,8 +21418,120 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Handler for an ACML-style interface to a library with vectorized
|
||||
intrinsics. */
|
||||
/* Handler for an SVML-style interface to
|
||||
a library with vectorized intrinsics. */
|
||||
|
||||
static tree
|
||||
ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
|
||||
{
|
||||
char name[20];
|
||||
tree fntype, new_fndecl, args;
|
||||
unsigned arity;
|
||||
const char *bname;
|
||||
enum machine_mode el_mode, in_mode;
|
||||
int n, in_n;
|
||||
|
||||
/* The SVML is suitable for unsafe math only. */
|
||||
if (!flag_unsafe_math_optimizations)
|
||||
return NULL_TREE;
|
||||
|
||||
el_mode = TYPE_MODE (TREE_TYPE (type_out));
|
||||
n = TYPE_VECTOR_SUBPARTS (type_out);
|
||||
in_mode = TYPE_MODE (TREE_TYPE (type_in));
|
||||
in_n = TYPE_VECTOR_SUBPARTS (type_in);
|
||||
if (el_mode != in_mode
|
||||
|| n != in_n)
|
||||
return NULL_TREE;
|
||||
|
||||
switch (fn)
|
||||
{
|
||||
case BUILT_IN_EXP:
|
||||
case BUILT_IN_LOG:
|
||||
case BUILT_IN_LOG10:
|
||||
case BUILT_IN_POW:
|
||||
case BUILT_IN_TANH:
|
||||
case BUILT_IN_TAN:
|
||||
case BUILT_IN_ATAN:
|
||||
case BUILT_IN_ATAN2:
|
||||
case BUILT_IN_ATANH:
|
||||
case BUILT_IN_CBRT:
|
||||
case BUILT_IN_SINH:
|
||||
case BUILT_IN_SIN:
|
||||
case BUILT_IN_ASINH:
|
||||
case BUILT_IN_ASIN:
|
||||
case BUILT_IN_COSH:
|
||||
case BUILT_IN_COS:
|
||||
case BUILT_IN_ACOSH:
|
||||
case BUILT_IN_ACOS:
|
||||
if (el_mode != DFmode || n != 2)
|
||||
return NULL_TREE;
|
||||
break;
|
||||
|
||||
case BUILT_IN_EXPF:
|
||||
case BUILT_IN_LOGF:
|
||||
case BUILT_IN_LOG10F:
|
||||
case BUILT_IN_POWF:
|
||||
case BUILT_IN_TANHF:
|
||||
case BUILT_IN_TANF:
|
||||
case BUILT_IN_ATANF:
|
||||
case BUILT_IN_ATAN2F:
|
||||
case BUILT_IN_ATANHF:
|
||||
case BUILT_IN_CBRTF:
|
||||
case BUILT_IN_SINHF:
|
||||
case BUILT_IN_SINF:
|
||||
case BUILT_IN_ASINHF:
|
||||
case BUILT_IN_ASINF:
|
||||
case BUILT_IN_COSHF:
|
||||
case BUILT_IN_COSF:
|
||||
case BUILT_IN_ACOSHF:
|
||||
case BUILT_IN_ACOSF:
|
||||
if (el_mode != SFmode || n != 4)
|
||||
return NULL_TREE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
|
||||
|
||||
if (fn == BUILT_IN_LOGF)
|
||||
strcpy (name, "vmlsLn4");
|
||||
else if (fn == BUILT_IN_LOG)
|
||||
strcpy (name, "vmldLn2");
|
||||
else if (n == 4)
|
||||
{
|
||||
sprintf (name, "vmls%s", bname+10);
|
||||
name[strlen (name)-1] = '4';
|
||||
}
|
||||
else
|
||||
sprintf (name, "vmld%s2", bname+10);
|
||||
|
||||
/* Convert to uppercase. */
|
||||
name[4] &= ~0x20;
|
||||
|
||||
arity = 0;
|
||||
for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
|
||||
args = TREE_CHAIN (args))
|
||||
arity++;
|
||||
|
||||
if (arity == 1)
|
||||
fntype = build_function_type_list (type_out, type_in, NULL);
|
||||
else
|
||||
fntype = build_function_type_list (type_out, type_in, type_in, NULL);
|
||||
|
||||
/* Build a function declaration for the vectorized function. */
|
||||
new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
|
||||
TREE_PUBLIC (new_fndecl) = 1;
|
||||
DECL_EXTERNAL (new_fndecl) = 1;
|
||||
DECL_IS_NOVOPS (new_fndecl) = 1;
|
||||
TREE_READONLY (new_fndecl) = 1;
|
||||
|
||||
return new_fndecl;
|
||||
}
|
||||
|
||||
/* Handler for an ACML-style interface to
|
||||
a library with vectorized intrinsics. */
|
||||
|
||||
static tree
|
||||
ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
|
||||
|
@ -10765,15 +10765,26 @@ decreased by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994).
|
||||
@item -mveclibabi=@var{type}
|
||||
@opindex mveclibabi
|
||||
Specifies the ABI type to use for vectorizing intrinsics using an
|
||||
external library. Supported types are @code{acml} for the AMD
|
||||
math core library style of interfacing. GCC will currently emit
|
||||
calls to @code{__vrd2_sin}, @code{__vrd2_cos}, @code{__vrd2_exp},
|
||||
@code{__vrd2_log}, @code{__vrd2_log2}, @code{__vrd2_log10},
|
||||
@code{__vrs4_sinf}, @code{__vrs4_cosf}, @code{__vrs4_expf},
|
||||
@code{__vrs4_logf}, @code{__vrs4_log2f}, @code{__vrs4_log10f}
|
||||
and @code{__vrs4_powf} when using this type and @option{-ftree-vectorize}
|
||||
is enabled. A ACML ABI compatible library will have to be specified
|
||||
at link time.
|
||||
external library. Supported types are @code{svml} for the Intel short
|
||||
vector math library and @code{acml} for the AMD math core library style
|
||||
of interfacing. GCC will currently emit calls to @code{vmldExp2},
|
||||
@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2},
|
||||
@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2},
|
||||
@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2},
|
||||
@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2},
|
||||
@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104},
|
||||
@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4},
|
||||
@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4},
|
||||
@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4},
|
||||
@code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding
|
||||
function type when @option{-mveclibabi=svml} is used and @code{__vrd2_sin},
|
||||
@code{__vrd2_cos}, @code{__vrd2_exp}, @code{__vrd2_log}, @code{__vrd2_log2},
|
||||
@code{__vrd2_log10}, @code{__vrs4_sinf}, @code{__vrs4_cosf},
|
||||
@code{__vrs4_expf}, @code{__vrs4_logf}, @code{__vrs4_log2f},
|
||||
@code{__vrs4_log10f} and @code{__vrs4_powf} for corresponding function type
|
||||
when @option{-mveclibabi=acml} is used. Both @option{-ftree-vectorize} and
|
||||
@option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI
|
||||
compatible library will have to be specified at link time.
|
||||
|
||||
@item -mpush-args
|
||||
@itemx -mno-push-args
|
||||
|
@ -1,3 +1,7 @@
|
||||
2008-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* gcc.target/i386/vectorize6.c: New test.
|
||||
|
||||
2008-03-28 Tobias Burnus <burnus@net-b.de>
|
||||
|
||||
PR fortran/35721
|
||||
@ -20,7 +24,7 @@
|
||||
2008-03-28 Andrew Pinski <andrew_pinski@playstation.sony.com>
|
||||
|
||||
PR target/31334
|
||||
* gcc.target/powerpc/altivec-25.c: Nnew testcase.
|
||||
* gcc.target/powerpc/altivec-25.c: New testcase.
|
||||
|
||||
2008-03-27 Jerry DeLisle <jvdelisle@gcc.gnu.org>
|
||||
|
||||
|
16
gcc/testsuite/gcc.target/i386/vectorize6.c
Normal file
16
gcc/testsuite/gcc.target/i386/vectorize6.c
Normal file
@ -0,0 +1,16 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2 -ftree-vectorize -mveclibabi=svml -ffast-math" } */
|
||||
|
||||
double x[256];
|
||||
|
||||
extern double sin(double);
|
||||
|
||||
void foo(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i<256; ++i)
|
||||
x[i] = sin(x[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "vmldSin2" } } */
|
Loading…
Reference in New Issue
Block a user