diff --git a/boehm-gc/ChangeLog b/boehm-gc/ChangeLog index 14ba80a9a7a..fa53669ca2a 100644 --- a/boehm-gc/ChangeLog +++ b/boehm-gc/ChangeLog @@ -1,3 +1,12 @@ +2009-07-17 Michael Meissner + + PR boehm-gc/40785 + * include/private/gc_locks.h (GC_test_and_set): If GCC 4.4, use + the __sync_lock_test_and _set and __sync_lock_release builtins on + the powerpc. If not GCC 4.4, fix up the constraints so that it + builds without error. + (GC_clear): Ditto. + 2009-07-17 Kai Tietz * configure.ac: Add rule for mingw targets to add -DGC_BUILD=1 to diff --git a/boehm-gc/include/private/gc_locks.h b/boehm-gc/include/private/gc_locks.h index 4e2b641b78b..d1bb2e4521a 100644 --- a/boehm-gc/include/private/gc_locks.h +++ b/boehm-gc/include/private/gc_locks.h @@ -139,49 +139,35 @@ # define GC_TEST_AND_SET_DEFINED # endif # if defined(POWERPC) -# if 0 /* CPP_WORDSZ == 64 totally broken to use int locks with ldarx */ - inline static int GC_test_and_set(volatile unsigned int *addr) { - unsigned long oldval; - unsigned long temp = 1; /* locked value */ - - __asm__ __volatile__( - "1:\tldarx %0,0,%3\n" /* load and reserve */ - "\tcmpdi %0, 0\n" /* if load is */ - "\tbne 2f\n" /* non-zero, return already set */ - "\tstdcx. %2,0,%1\n" /* else store conditional */ - "\tbne- 1b\n" /* retry if lost reservation */ - "\tsync\n" /* import barrier */ - "2:\t\n" /* oldval is zero if we set */ - : "=&r"(oldval), "=p"(addr) - : "r"(temp), "1"(addr) - : "cr0","memory"); - return (int)oldval; - } +# define GC_TEST_AND_SET_DEFINED +# define GC_CLEAR_DEFINED +# if (__GNUC__>4)||((__GNUC__==4)&&(__GNUC_MINOR__>=4)) +# define GC_test_and_set(addr) __sync_lock_test_and_set (addr, 1) +# define GC_clear(addr) __sync_lock_release (addr) # else inline static int GC_test_and_set(volatile unsigned int *addr) { int oldval; int temp = 1; /* locked value */ __asm__ __volatile__( - "1:\tlwarx %0,0,%3\n" /* load and reserve */ + "\n1:\n" + "\tlwarx %0,%y3\n" /* load and reserve, 32-bits */ "\tcmpwi %0, 0\n" /* if load is */ "\tbne 2f\n" /* non-zero, return already set */ - "\tstwcx. %2,0,%1\n" /* else store conditional */ + "\tstwcx. %2,%y3\n" /* else store conditional */ "\tbne- 1b\n" /* retry if lost reservation */ "\tsync\n" /* import barrier */ "2:\t\n" /* oldval is zero if we set */ - : "=&r"(oldval), "=p"(addr) - : "r"(temp), "1"(addr) + : "=&r"(oldval), "=m"(addr) + : "r"(temp), "Z"(addr) : "cr0","memory"); return oldval; } -# endif -# define GC_TEST_AND_SET_DEFINED inline static void GC_clear(volatile unsigned int *addr) { __asm__ __volatile__("lwsync" : : : "memory"); *(addr) = 0; } -# define GC_CLEAR_DEFINED +# endif # endif # if defined(ALPHA) inline static int GC_test_and_set(volatile unsigned int * addr) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c56aae0d30..e12299f95ce 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,188 @@ +2009-07-22 Michael Meissner + Pat Haugen + Revital Eres + + * config/rs6000/vector.md: New file. Move most of the vector + expander support here from altivec.md to allow for the VSX vector + unit in the future. Add support for secondary_reload patterns. + Rewrite the patterns for vector comparison, and vector comparison + predicate instructions so that the RTL expresses the desired + behavior, instead of using unspec. + + * config/rs6000/constraints.md ("f" constraint): Use + rs6000_constraints to hold the precalculated register class. + ("d" constraint): Ditto. + ("wd" constraint): New constraint for VSX. + ("wf" constraint): Ditto. + ("ws" constraint): Ditto. + ("wa" constraint): Ditto. + ("wZ" constraint): Ditto. + ("j" constraint): Ditto. + + * config/rs6000/predicates.md (vsx_register_operand): New + predicate for VSX. + (vfloat_operand): New predicate for vector.md. + (vint_operand): Ditto. + (vlogical_operand): Ditto. + (easy_fp_constant): If VSX, 0.0 is an easy constant. + (easy_vector_constant): Add VSX support. + (altivec_indexed_or_indirect_operand): New predicate for + recognizing Altivec style memory references with AND -16. + + * config/rs6000/rs6000.c (rs6000_vector_reload): New static global + for vector secondary reload support. + (rs6000_vector_reg_class): Delete, replacing it with rs6000_constraints. + (rs6000_vsx_reg_class): Ditto. + (rs6000_constraints): New array to hold the register classes of + each of the register constraints that can vary at runtime. + (builtin_mode_to_type): New static array for builtin function type + creation. + (builtin_hash_table): New static hash table for builtin function + type creation. + (TARGET_SECONDARY_RELOAD): Define target hook. + (TARGET_IRA_COVER_CLASSES): Ditto. + (rs6000_hard_regno_nregs_internal): If -mvsx, floating point + registers are 128 bits if VSX memory reference instructions are + used. + (rs6000_hard_regno_mode_ok): For VSX, only check if the VSX memory + unit is being used. + (rs6000_debug_vector_unit): Move into rs6000_debug_reg_global. + (rs6000_debug_reg_global): Move -mdebug=reg statements here. + Print several of the scheduling related parameters. + (rs6000_init_hard_regno_mode_ok): Switch to putting constraints in + rs6000_constraints instead of rs6000_vector_reg_class. Move + -mdebug=reg code to rs6000_debug_reg_global. Add support for + -mvsx-align-128 debug switch. Drop testing float_p if VSX or + Altivec. Add VSX support. Setup for secondary reload support on + Altivec/VSX registers. + (rs6000_override_options): Make power7 set the scheduling groups + like the power5. Add support for new debug switches to override + the scheduling defaults. Temporarily disable -mcpu=power7 from + setting -mvsx. Add support for debug switches -malways-hint, + -msched-groups, and -malign-branch-targets. + (rs6000_buitlin_conversion): Add support for returning unsigned + vector conversion functions to fix regressions due to stricter + type checking. + (rs6000_builtin_mul_widen_even): Ditto. + (rs6000_builtin_mul_widen_odd): Ditto. + (rs6000_builtin_vec_perm): Ditto. + (rs6000_vec_const_move): On VSX, use xxlxor to clear register. + (rs6000_expand_vector_init): Initial VSX support for using xxlxor + to zero a register. + (rs6000_emit_move): Fixup invalid const symbol_ref+reg that is + generated upstream. + (bdesc_3arg): Add builtins for unsigned types. Add builtins for + VSX types for bit operations. Changes to accomidate vector.md. + (bdesc_2arg): Ditto. + (bdesc_1arg): Ditto. + (struct builtin_description_predicates): Rewrite predicate + handling so that RTL describes the operation, instead of passing + the instruction to be used as a string argument. + (bdesc_altivec_preds): Ditto. + (altivec_expand_predicate_builtin): Ditto. + (altivec_expand_builtin): Ditto. + (rs6000_expand_ternop_builtin): Use a switch instead of an if + statement for vsldoi support. + (altivec_expand_ld_builtin): Change to use new names from + vector.md. + (altivec_expand_st_builtin): Ditto. + (paired_expand_builtin): Whitespace changes. + (rs6000_init_builtins): Add V2DF/V2DI types. Initialize the + builtin_mode_to_type table for secondary reload. Call + builtin_function_type to build random builtin functions. + (altivec_init_builtins): Change to use builtin_function_type to + create builtin function types dynamically as we need them. + (builtin_hash_function): New support for hashing the tree types + for builtin function as we need it, rather than trying to build + all of the trees that we need. Add initial preliminary VSX + support. + (builtin_function_type): Ditto. + (builtin_function_eq): Ditto. + (builtin_hash_struct): Ditto. + (rs6000_init_builtins): Ditto. + (rs6000_common_init_builtins): Ditto. + (altivec_init_builtins): Ditto. + (rs6000_common_init_builtins): Ditto. + (enum reload_reg_type): New enum for simplifing reg classes. + (rs6000_reload_register_type): Simplify register classes into GPR, + Vector, and other registers. + Altivec and VSX addresses in reload. + (rs6000_secondary_reload_inner): Ditto. + (rs6000_ira_cover_classes): New target hook, that returns the + appropriate cover classes, based on -mvsx being used or not. + (rs6000_secondary_reload_class): Add VSX support. + (get_vec_cmp_insn): Delete, rewrite vector conditionals. + (get_vsel_insn): Ditto. + (rs6000_emit_vector_compare): Rewrite vector conditional support + so that where we can, we use RTL operators, instead of blindly use + UNSPEC. + (rs6000_emit_vector_select): Ditto. + (rs6000_emit_vector_cond_expr): Ditto. + (rs6000_emit_minmax): Directly generate min/max under altivec, + vsx. + (create_TOC_reference): Add -mdebug=addr support. + (emit_frame_save): VSX loads/stores need register indexed + addressing. + + * config/rs6000/rs6000.md: Include vector.md. + + * config/rs6000/t-rs6000 (MD_INCLUDES): Add vector.md. + + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + support for V2DI, V2DF in logical, permute, select operations. + + * config/rs6000/rs6000.opt (-mvsx-scalar-double): Add new debug + switch for vsx/power7. + (-mvsx-scalar-memory): Ditto. + (-mvsx-align-128): Ditto. + (-mallow-movmisalign): Ditto. + (-mallow-df-permute): Ditto. + (-msched-groups): Ditto. + (-malways-hint): Ditto. + (-malign-branch-targets): Ditto. + + * config/rs6000/rs6000.h (IRA_COVER_CLASSES): Delete, use target + hook instead. + (IRA_COVER_CLASSES_PRE_VSX): Cover classes if not -mvsx. + (IRA_COVER_CLASSES_VSX): Cover classes if -mvsx. + (rs6000_vector_reg_class): Delete. + (rs6000_vsx_reg_class): Ditto. + (enum rs6000_reg_class_enum): New enum for the constraints that + vary based on target switches. + (rs6000_constraints): New array to hold the register class for all + of the register constraints that vary based on the switches used. + (ALTIVEC_BUILTIN_*_UNS): Add unsigned builtin functions. + (enum rs6000_builtins): Add unsigned varients for the builtin + declarations returned by target hooks for expanding multiplies, + select, and permute operations. Add VSX builtins. + (enum rs6000_builtin_type_index): Add entries for VSX. + (V2DI_type_node): Ditto. + (V2DF_type_node): Ditto. + (unsigned_V2DI_type_node): Ditto. + (bool_long_type_node): Ditto. + (intDI_type_internal_node): Ditto. + (uintDI_type_internal_node): Ditto. + (double_type_internal_node): Ditto. + + * config/rs6000/altivec.md (whole file): Move all expanders to + vector.md from altivec.md. Rename insn matching functions to be + altivec_foo. + (UNSPEC_VCMP*): Delete, rewrite vector comparisons. + (altivec_vcmp*): Ditto. + (UNSPEC_VPERM_UNS): New, add for unsigned types using vperm. + (VM): New iterator for moves that includes the VSX types. + (altivec_vperm_): Add VSX types. Add unsigned types. + (altivec_vperm__uns): New, for unsigned types. + (altivec_vsel_*): Rewrite vector comparisons and predicate + builtins. + (altivec_eq): Ditto. + (altivec_gt): Ditto. + (altivec_gtu): Ditto. + (altivec_eqv4sf): Ditto. + (altivec_gev4sf): Ditto. + (altivec_gtv4sf): Ditto. + (altivec_vcmpbfp_p): Ditto. + 2009-07-23 Richard Earnshaw (split for ior/xor with shift and zero-extend): Cast op3 to diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 9c6245ae8ac..58af47c15ce 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -21,18 +21,7 @@ (define_constants [(UNSPEC_VCMPBFP 50) - (UNSPEC_VCMPEQUB 51) - (UNSPEC_VCMPEQUH 52) - (UNSPEC_VCMPEQUW 53) - (UNSPEC_VCMPEQFP 54) - (UNSPEC_VCMPGEFP 55) - (UNSPEC_VCMPGTUB 56) - (UNSPEC_VCMPGTSB 57) - (UNSPEC_VCMPGTUH 58) - (UNSPEC_VCMPGTSH 59) - (UNSPEC_VCMPGTUW 60) - (UNSPEC_VCMPGTSW 61) - (UNSPEC_VCMPGTFP 62) + ;; 51-62 deleted (UNSPEC_VMSUMU 65) (UNSPEC_VMSUMM 66) (UNSPEC_VMSUMSHM 68) @@ -63,7 +52,7 @@ (UNSPEC_VPKSHUS 101) (UNSPEC_VPKUWUS 102) (UNSPEC_VPKSWUS 103) - (UNSPEC_VRL 104) + ;; 104 deleted (UNSPEC_VSLV4SI 110) (UNSPEC_VSLO 111) (UNSPEC_VSR 118) @@ -76,6 +65,7 @@ (UNSPEC_VSUM2SWS 134) (UNSPEC_VSUMSWS 135) (UNSPEC_VPERM 144) + (UNSPEC_VPERM_UNS 145) (UNSPEC_VRFIP 148) (UNSPEC_VRFIN 149) (UNSPEC_VRFIM 150) @@ -87,10 +77,7 @@ (UNSPEC_VEXPTEFP 156) (UNSPEC_VRSQRTEFP 157) (UNSPEC_VREFP 158) - (UNSPEC_VSEL4SI 159) - (UNSPEC_VSEL4SF 160) - (UNSPEC_VSEL8HI 161) - (UNSPEC_VSEL16QI 162) + ;; 159-162 deleted (UNSPEC_VLSDOI 163) (UNSPEC_VUPKHSB 167) (UNSPEC_VUPKHPX 168) @@ -98,7 +85,7 @@ (UNSPEC_VUPKLSB 170) (UNSPEC_VUPKLPX 171) (UNSPEC_VUPKLSH 172) - (UNSPEC_PREDICATE 173) + ;; 173 deleted (UNSPEC_DST 190) (UNSPEC_DSTT 191) (UNSPEC_DSTST 192) @@ -111,7 +98,7 @@ (UNSPEC_STVE 203) (UNSPEC_SET_VSCR 213) (UNSPEC_GET_VRSAVE 214) - (UNSPEC_REALIGN_LOAD 215) + ;; 215 deleted (UNSPEC_REDUC_PLUS 217) (UNSPEC_VECSH 219) (UNSPEC_EXTEVEN_V4SI 220) @@ -125,11 +112,11 @@ (UNSPEC_INTERHI_V4SI 228) (UNSPEC_INTERHI_V8HI 229) (UNSPEC_INTERHI_V16QI 230) - (UNSPEC_INTERHI_V4SF 231) + ;; delete 231 (UNSPEC_INTERLO_V4SI 232) (UNSPEC_INTERLO_V8HI 233) (UNSPEC_INTERLO_V16QI 234) - (UNSPEC_INTERLO_V4SF 235) + ;; delete 235 (UNSPEC_LVLX 236) (UNSPEC_LVLXL 237) (UNSPEC_LVRX 238) @@ -176,39 +163,20 @@ (define_mode_iterator VF [V4SF]) ;; Vec modes, pity mode iterators are not composable (define_mode_iterator V [V4SI V8HI V16QI V4SF]) +;; Vec modes for move/logical/permute ops, include vector types for move not +;; otherwise handled by altivec (v2df, v2di, ti) +(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI]) + +;; Like VM, except don't do TImode +(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) (define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")]) -;; Generic LVX load instruction. -(define_insn "altivec_lvx_" - [(set (match_operand:V 0 "altivec_register_operand" "=v") - (match_operand:V 1 "memory_operand" "Z"))] - "TARGET_ALTIVEC" - "lvx %0,%y1" - [(set_attr "type" "vecload")]) - -;; Generic STVX store instruction. -(define_insn "altivec_stvx_" - [(set (match_operand:V 0 "memory_operand" "=Z") - (match_operand:V 1 "altivec_register_operand" "v"))] - "TARGET_ALTIVEC" - "stvx %1,%y0" - [(set_attr "type" "vecstore")]) - ;; Vector move instructions. -(define_expand "mov" - [(set (match_operand:V 0 "nonimmediate_operand" "") - (match_operand:V 1 "any_operand" ""))] - "TARGET_ALTIVEC" -{ - rs6000_emit_move (operands[0], operands[1], mode); - DONE; -}) - -(define_insn "*mov_internal" - [(set (match_operand:V 0 "nonimmediate_operand" "=Z,v,v,o,r,r,v") - (match_operand:V 1 "input_operand" "v,Z,v,r,o,r,W"))] - "TARGET_ALTIVEC +(define_insn "*altivec_mov" + [(set (match_operand:VM2 0 "nonimmediate_operand" "=Z,v,v,*o,*r,*r,v,v") + (match_operand:VM2 1 "input_operand" "v,Z,v,r,o,r,j,W"))] + "VECTOR_MEM_ALTIVEC_P (mode) && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { @@ -220,52 +188,42 @@ case 3: return "#"; case 4: return "#"; case 5: return "#"; - case 6: return output_vec_const_move (operands); + case 6: return "vxor %0,%0,%0"; + case 7: return output_vec_const_move (operands); default: gcc_unreachable (); } } - [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")]) + [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) -(define_split - [(set (match_operand:V4SI 0 "nonimmediate_operand" "") - (match_operand:V4SI 1 "input_operand" ""))] - "TARGET_ALTIVEC && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" - [(pc)] +;; Unlike other altivec moves, allow the GPRs, since a normal use of TImode +;; is for unions. However for plain data movement, slightly favor the vector +;; loads +(define_insn "*altivec_movti" + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,v,v,?o,?r,?r,v,v") + (match_operand:TI 1 "input_operand" "v,Z,v,r,o,r,j,W"))] + "VECTOR_MEM_ALTIVEC_P (TImode) + && (register_operand (operands[0], TImode) + || register_operand (operands[1], TImode))" { - rs6000_split_multireg_move (operands[0], operands[1]); DONE; -}) + switch (which_alternative) + { + case 0: return "stvx %1,%y0"; + case 1: return "lvx %0,%y1"; + case 2: return "vor %0,%1,%1"; + case 3: return "#"; + case 4: return "#"; + case 5: return "#"; + case 6: return "vxor %0,%0,%0"; + case 7: return output_vec_const_move (operands); + default: gcc_unreachable (); + } +} + [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,vecsimple,*")]) (define_split - [(set (match_operand:V8HI 0 "nonimmediate_operand" "") - (match_operand:V8HI 1 "input_operand" ""))] - "TARGET_ALTIVEC && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" - [(pc)] -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) - -(define_split - [(set (match_operand:V16QI 0 "nonimmediate_operand" "") - (match_operand:V16QI 1 "input_operand" ""))] - "TARGET_ALTIVEC && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" - [(pc)] -{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) - -(define_split - [(set (match_operand:V4SF 0 "nonimmediate_operand" "") - (match_operand:V4SF 1 "input_operand" ""))] - "TARGET_ALTIVEC && reload_completed - && gpr_or_gpr_p (operands[0], operands[1])" - [(pc)] -{ - rs6000_split_multireg_move (operands[0], operands[1]); DONE; -}) - -(define_split - [(set (match_operand:V 0 "altivec_register_operand" "") - (match_operand:V 1 "easy_vector_constant_add_self" ""))] - "TARGET_ALTIVEC && reload_completed" + [(set (match_operand:VM 0 "altivec_register_operand" "") + (match_operand:VM 1 "easy_vector_constant_add_self" ""))] + "VECTOR_UNIT_ALTIVEC_P (mode) && reload_completed" [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))] { @@ -346,11 +304,11 @@ "vaddum %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "addv4sf3" +(define_insn "*altivec_addv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (plus:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vaddfp %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -392,11 +350,11 @@ "vsubum %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "subv4sf3" +(define_insn "*altivec_subv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (minus:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vsubfp %0,%1,%2" [(set_attr "type" "vecfloat")]) @@ -457,113 +415,75 @@ "vcmpbfp %0,%1,%2" [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpequb" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VCMPEQUB))] +(define_insn "*altivec_eq" + [(set (match_operand:VI 0 "altivec_register_operand" "=v") + (eq:VI (match_operand:VI 1 "altivec_register_operand" "v") + (match_operand:VI 2 "altivec_register_operand" "v")))] "TARGET_ALTIVEC" - "vcmpequb %0,%1,%2" - [(set_attr "type" "vecsimple")]) + "vcmpequ %0,%1,%2" + [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpequh" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VCMPEQUH))] +(define_insn "*altivec_gt" + [(set (match_operand:VI 0 "altivec_register_operand" "=v") + (gt:VI (match_operand:VI 1 "altivec_register_operand" "v") + (match_operand:VI 2 "altivec_register_operand" "v")))] "TARGET_ALTIVEC" - "vcmpequh %0,%1,%2" - [(set_attr "type" "vecsimple")]) + "vcmpgts %0,%1,%2" + [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpequw" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VCMPEQUW))] +(define_insn "*altivec_gtu" + [(set (match_operand:VI 0 "altivec_register_operand" "=v") + (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v") + (match_operand:VI 2 "altivec_register_operand" "v")))] "TARGET_ALTIVEC" - "vcmpequw %0,%1,%2" - [(set_attr "type" "vecsimple")]) + "vcmpgtu %0,%1,%2" + [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpeqfp" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")] - UNSPEC_VCMPEQFP))] - "TARGET_ALTIVEC" +(define_insn "*altivec_eqv4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (eq:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vcmpeqfp %0,%1,%2" [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpgefp" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")] - UNSPEC_VCMPGEFP))] - "TARGET_ALTIVEC" +(define_insn "*altivec_gtv4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (gt:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgtfp %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_gev4sf" + [(set (match_operand:V4SF 0 "altivec_register_operand" "=v") + (ge:V4SF (match_operand:V4SF 1 "altivec_register_operand" "v") + (match_operand:V4SF 2 "altivec_register_operand" "v")))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vcmpgefp %0,%1,%2" [(set_attr "type" "veccmp")]) -(define_insn "altivec_vcmpgtub" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VCMPGTUB))] - "TARGET_ALTIVEC" - "vcmpgtub %0,%1,%2" - [(set_attr "type" "vecsimple")]) +(define_insn "*altivec_vsel" + [(set (match_operand:VM 0 "altivec_register_operand" "=v") + (if_then_else:VM + (ne:CC (match_operand:VM 1 "altivec_register_operand" "v") + (const_int 0)) + (match_operand:VM 2 "altivec_register_operand" "v") + (match_operand:VM 3 "altivec_register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "vsel %0,%3,%2,%1" + [(set_attr "type" "vecperm")]) -(define_insn "altivec_vcmpgtsb" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")] - UNSPEC_VCMPGTSB))] - "TARGET_ALTIVEC" - "vcmpgtsb %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vcmpgtuh" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VCMPGTUH))] - "TARGET_ALTIVEC" - "vcmpgtuh %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vcmpgtsh" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")] - UNSPEC_VCMPGTSH))] - "TARGET_ALTIVEC" - "vcmpgtsh %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vcmpgtuw" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VCMPGTUW))] - "TARGET_ALTIVEC" - "vcmpgtuw %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vcmpgtsw" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")] - UNSPEC_VCMPGTSW))] - "TARGET_ALTIVEC" - "vcmpgtsw %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "altivec_vcmpgtfp" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")] - UNSPEC_VCMPGTFP))] - "TARGET_ALTIVEC" - "vcmpgtfp %0,%1,%2" - [(set_attr "type" "veccmp")]) +(define_insn "*altivec_vsel_uns" + [(set (match_operand:VM 0 "altivec_register_operand" "=v") + (if_then_else:VM + (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v") + (const_int 0)) + (match_operand:VM 2 "altivec_register_operand" "v") + (match_operand:VM 3 "altivec_register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "vsel %0,%3,%2,%1" + [(set_attr "type" "vecperm")]) ;; Fused multiply add (define_insn "altivec_vmaddfp" @@ -571,17 +491,17 @@ (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")) (match_operand:V4SF 3 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vmaddfp %0,%1,%2,%3" [(set_attr "type" "vecfloat")]) ;; We do multiply as a fused multiply-add with an add of a -0.0 vector. -(define_expand "mulv4sf3" +(define_expand "altivec_mulv4sf3" [(use (match_operand:V4SF 0 "register_operand" "")) (use (match_operand:V4SF 1 "register_operand" "")) (use (match_operand:V4SF 2 "register_operand" ""))] - "TARGET_ALTIVEC && TARGET_FUSED_MADD" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD" " { rtx neg0; @@ -631,7 +551,7 @@ emit_insn (gen_altivec_vspltisw (sixteen, gen_rtx_CONST_INT (V4SImode, -16))); swap = gen_reg_rtx (V4SImode); - emit_insn (gen_altivec_vrlw (swap, operands[2], sixteen)); + emit_insn (gen_vrotlv4si3 (swap, operands[2], sixteen)); one = gen_reg_rtx (V8HImode); convert_move (one, operands[1], 0); @@ -684,7 +604,7 @@ (neg:V4SF (minus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")) (match_operand:V4SF 3 "register_operand" "v"))))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vnmsubfp %0,%1,%2,%3" [(set_attr "type" "vecfloat")]) @@ -758,11 +678,11 @@ "vmaxs %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "smaxv4sf3" +(define_insn "*altivec_smaxv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (smax:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vmaxfp %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -782,11 +702,11 @@ "vmins %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "sminv4sf3" +(define_insn "*altivec_sminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=v") (smin:V4SF (match_operand:V4SF 1 "register_operand" "v") (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vminfp %0,%1,%2" [(set_attr "type" "veccmp")]) @@ -901,11 +821,11 @@ (const_int 3) (const_int 1)])) (const_int 5)))] - "TARGET_ALTIVEC" + "VECTOR_MEM_ALTIVEC_P (V4SImode)" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrghsf" +(define_insn "*altivec_vmrghsf" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") (parallel [(const_int 0) @@ -918,7 +838,7 @@ (const_int 3) (const_int 1)])) (const_int 5)))] - "TARGET_ALTIVEC" + "VECTOR_MEM_ALTIVEC_P (V4SFmode)" "vmrghw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -990,35 +910,37 @@ (define_insn "altivec_vmrglw" [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_merge:V4SI (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_ALTIVEC" + (vec_merge:V4SI + (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "v") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "v") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "VECTOR_MEM_ALTIVEC_P (V4SImode)" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vmrglsf" +(define_insn "*altivec_vmrglsf" [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_merge:V4SF (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") - (parallel [(const_int 2) - (const_int 0) - (const_int 3) - (const_int 1)])) - (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") - (parallel [(const_int 0) - (const_int 2) - (const_int 1) - (const_int 3)])) - (const_int 5)))] - "TARGET_ALTIVEC" + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "v") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "v") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "VECTOR_MEM_ALTIVEC_P (V4SFmode)" "vmrglw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -1095,68 +1017,53 @@ [(set_attr "type" "veccomplex")]) -;; logical ops +;; logical ops. Have the logical ops follow the memory ops in +;; terms of whether to prefer VSX or Altivec -(define_insn "and3" - [(set (match_operand:VI 0 "register_operand" "=v") - (and:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" +(define_insn "*altivec_and3" + [(set (match_operand:VM 0 "register_operand" "=v") + (and:VM (match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vand %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "ior3" - [(set (match_operand:VI 0 "register_operand" "=v") - (ior:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" +(define_insn "*altivec_ior3" + [(set (match_operand:VM 0 "register_operand" "=v") + (ior:VM (match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vor %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "xor3" - [(set (match_operand:VI 0 "register_operand" "=v") - (xor:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" +(define_insn "*altivec_xor3" + [(set (match_operand:VM 0 "register_operand" "=v") + (xor:VM (match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vxor %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "xorv4sf3" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (xor:V4SF (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - "vxor %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "one_cmpl2" - [(set (match_operand:VI 0 "register_operand" "=v") - (not:VI (match_operand:VI 1 "register_operand" "v")))] - "TARGET_ALTIVEC" +(define_insn "*altivec_one_cmpl2" + [(set (match_operand:VM 0 "register_operand" "=v") + (not:VM (match_operand:VM 1 "register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vnor %0,%1,%1" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_nor3" - [(set (match_operand:VI 0 "register_operand" "=v") - (not:VI (ior:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v"))))] - "TARGET_ALTIVEC" +(define_insn "*altivec_nor3" + [(set (match_operand:VM 0 "register_operand" "=v") + (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v"))))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vnor %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "andc3" - [(set (match_operand:VI 0 "register_operand" "=v") - (and:VI (not:VI (match_operand:VI 2 "register_operand" "v")) - (match_operand:VI 1 "register_operand" "v")))] - "TARGET_ALTIVEC" - "vandc %0,%1,%2" - [(set_attr "type" "vecsimple")]) - -(define_insn "*andc3_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (and:V4SF (not:V4SF (match_operand:V4SF 2 "register_operand" "v")) - (match_operand:V4SF 1 "register_operand" "v")))] - "TARGET_ALTIVEC" +(define_insn "*altivec_andc3" + [(set (match_operand:VM 0 "register_operand" "=v") + (and:VM (not:VM (match_operand:VM 2 "register_operand" "v")) + (match_operand:VM 1 "register_operand" "v")))] + "VECTOR_MEM_ALTIVEC_P (mode)" "vandc %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1247,11 +1154,10 @@ "vpkswus %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "altivec_vrl" +(define_insn "*altivec_vrl" [(set (match_operand:VI 0 "register_operand" "=v") - (unspec:VI [(match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v")] - UNSPEC_VRL))] + (rotate:VI (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v")))] "TARGET_ALTIVEC" "vrl %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1274,26 +1180,26 @@ "vslo %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "vashl3" +(define_insn "*altivec_vsl" [(set (match_operand:VI 0 "register_operand" "=v") (ashift:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v") ))] + (match_operand:VI 2 "register_operand" "v")))] "TARGET_ALTIVEC" "vsl %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "vlshr3" +(define_insn "*altivec_vsr" [(set (match_operand:VI 0 "register_operand" "=v") (lshiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v") ))] + (match_operand:VI 2 "register_operand" "v")))] "TARGET_ALTIVEC" "vsr %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "vashr3" +(define_insn "*altivec_vsra" [(set (match_operand:VI 0 "register_operand" "=v") (ashiftrt:VI (match_operand:VI 1 "register_operand" "v") - (match_operand:VI 2 "register_operand" "v") ))] + (match_operand:VI 2 "register_operand" "v")))] "TARGET_ALTIVEC" "vsra %0,%1,%2" [(set_attr "type" "vecsimple")]) @@ -1386,13 +1292,13 @@ "vspltw %0,%1,%2" [(set_attr "type" "vecperm")]) -(define_insn "*altivec_vspltsf" +(define_insn "altivec_vspltsf" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_duplicate:V4SF (vec_select:SF (match_operand:V4SF 1 "register_operand" "v") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "i")]))))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vspltw %0,%1,%2" [(set_attr "type" "vecperm")]) @@ -1404,19 +1310,29 @@ "vspltis %0,%1" [(set_attr "type" "vecperm")]) -(define_insn "ftruncv4sf2" +(define_insn "*altivec_ftruncv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=v") (fix:V4SF (match_operand:V4SF 1 "register_operand" "v")))] - "TARGET_ALTIVEC" + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) (define_insn "altivec_vperm_" - [(set (match_operand:V 0 "register_operand" "=v") - (unspec:V [(match_operand:V 1 "register_operand" "v") - (match_operand:V 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] - UNSPEC_VPERM))] + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM))] + "TARGET_ALTIVEC" + "vperm %0,%1,%2,%3" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vperm__uns" + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:V16QI 3 "register_operand" "v")] + UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) @@ -1515,185 +1431,11 @@ "vrefp %0,%1" [(set_attr "type" "vecfloat")]) -(define_expand "vcondv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (if_then_else:V4SI - (match_operator 3 "comparison_operator" - [(match_operand:V4SI 4 "register_operand" "v") - (match_operand:V4SI 5 "register_operand" "v")]) - (match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vconduv4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (if_then_else:V4SI - (match_operator 3 "comparison_operator" - [(match_operand:V4SI 4 "register_operand" "v") - (match_operand:V4SI 5 "register_operand" "v")]) - (match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vcondv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (if_then_else:V4SF - (match_operator 3 "comparison_operator" - [(match_operand:V4SF 4 "register_operand" "v") - (match_operand:V4SF 5 "register_operand" "v")]) - (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vcondv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (if_then_else:V8HI - (match_operator 3 "comparison_operator" - [(match_operand:V8HI 4 "register_operand" "v") - (match_operand:V8HI 5 "register_operand" "v")]) - (match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vconduv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (if_then_else:V8HI - (match_operator 3 "comparison_operator" - [(match_operand:V8HI 4 "register_operand" "v") - (match_operand:V8HI 5 "register_operand" "v")]) - (match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vcondv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (if_then_else:V16QI - (match_operator 3 "comparison_operator" - [(match_operand:V16QI 4 "register_operand" "v") - (match_operand:V16QI 5 "register_operand" "v")]) - (match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - -(define_expand "vconduv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (if_then_else:V16QI - (match_operator 3 "comparison_operator" - [(match_operand:V16QI 4 "register_operand" "v") - (match_operand:V16QI 5 "register_operand" "v")]) - (match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v")))] - "TARGET_ALTIVEC" - " -{ - if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], - operands[3], operands[4], operands[5])) - DONE; - else - FAIL; -} - ") - - -(define_insn "altivec_vsel_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") - (match_operand:V4SI 2 "register_operand" "v") - (match_operand:V4SI 3 "register_operand" "v")] - UNSPEC_VSEL4SI))] - "TARGET_ALTIVEC" - "vsel %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vsel_v4sf" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v") - (match_operand:V4SI 3 "register_operand" "v")] - UNSPEC_VSEL4SF))] - "TARGET_ALTIVEC" - "vsel %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vsel_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") - (match_operand:V8HI 2 "register_operand" "v") - (match_operand:V8HI 3 "register_operand" "v")] - UNSPEC_VSEL8HI))] - "TARGET_ALTIVEC" - "vsel %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - -(define_insn "altivec_vsel_v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=v") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") - (match_operand:V16QI 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] - UNSPEC_VSEL16QI))] - "TARGET_ALTIVEC" - "vsel %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - (define_insn "altivec_vsldoi_" - [(set (match_operand:V 0 "register_operand" "=v") - (unspec:V [(match_operand:V 1 "register_operand" "v") - (match_operand:V 2 "register_operand" "v") - (match_operand:QI 3 "immediate_operand" "i")] + [(set (match_operand:VM 0 "register_operand" "=v") + (unspec:VM [(match_operand:VM 1 "register_operand" "v") + (match_operand:VM 2 "register_operand" "v") + (match_operand:QI 3 "immediate_operand" "i")] UNSPEC_VLSDOI))] "TARGET_ALTIVEC" "vsldoi %0,%1,%2,%3" @@ -1747,50 +1489,92 @@ "vupklsh %0,%1" [(set_attr "type" "vecperm")]) -;; AltiVec predicates. - -(define_expand "cr6_test_for_zero" - [(set (match_operand:SI 0 "register_operand" "=r") - (eq:SI (reg:CC 74) - (const_int 0)))] - "TARGET_ALTIVEC" - "") - -(define_expand "cr6_test_for_zero_reverse" - [(set (match_operand:SI 0 "register_operand" "=r") - (eq:SI (reg:CC 74) - (const_int 0))) - (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] - "TARGET_ALTIVEC" - "") - -(define_expand "cr6_test_for_lt" - [(set (match_operand:SI 0 "register_operand" "=r") - (lt:SI (reg:CC 74) - (const_int 0)))] - "TARGET_ALTIVEC" - "") - -(define_expand "cr6_test_for_lt_reverse" - [(set (match_operand:SI 0 "register_operand" "=r") - (lt:SI (reg:CC 74) - (const_int 0))) - (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] - "TARGET_ALTIVEC" - "") - -;; We can get away with generating the opcode on the fly (%3 below) -;; because all the predicates have the same scheduling parameters. - -(define_insn "altivec_predicate_" +;; Compare vectors producing a vector result and a predicate, setting CR6 to +;; indicate a combined status +(define_insn "*altivec_vcmpequ_p" [(set (reg:CC 74) - (unspec:CC [(match_operand:V 1 "register_operand" "v") - (match_operand:V 2 "register_operand" "v") - (match_operand 3 "any_operand" "")] UNSPEC_PREDICATE)) - (clobber (match_scratch:V 0 "=v"))] - "TARGET_ALTIVEC" - "%3 %0,%1,%2" -[(set_attr "type" "veccmp")]) + (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "register_operand" "=v") + (eq:VI (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "vcmpequ. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgts_p" + [(set (reg:CC 74) + (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "register_operand" "=v") + (gt:VI (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "vcmpgts. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgtu_p" + [(set (reg:CC 74) + (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v") + (match_operand:VI 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:VI 0 "register_operand" "=v") + (gtu:VI (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "vcmpgtu. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpeqfp_p" + [(set (reg:CC 74) + (unspec:CC [(eq:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (eq:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpeqfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgtfp_p" + [(set (reg:CC 74) + (unspec:CC [(gt:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (gt:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgtfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "*altivec_vcmpgefp_p" + [(set (reg:CC 74) + (unspec:CC [(ge:CC (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))] + UNSPEC_PREDICATE)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (ge:V4SF (match_dup 1) + (match_dup 2)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "vcmpgefp. %0,%1,%2" + [(set_attr "type" "veccmp")]) + +(define_insn "altivec_vcmpbfp_p" + [(set (reg:CC 74) + (unspec:CC [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")] + UNSPEC_VCMPBFP)) + (set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_dup 1) + (match_dup 2)] + UNSPEC_VCMPBFP))] + "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)" + "vcmpbfp. %0,%1,%2" + [(set_attr "type" "veccmp")]) (define_insn "altivec_mtvscr" [(set (reg:SI 110) @@ -1959,95 +1743,6 @@ "stvewx %1,%y0" [(set_attr "type" "vecstore")]) -(define_expand "vec_init" - [(match_operand:V 0 "register_operand" "") - (match_operand 1 "" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_init (operands[0], operands[1]); - DONE; -}) - -(define_expand "vec_setv4si" - [(match_operand:V4SI 0 "register_operand" "") - (match_operand:SI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_setv8hi" - [(match_operand:V8HI 0 "register_operand" "") - (match_operand:HI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_setv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:QI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_setv4sf" - [(match_operand:V4SF 0 "register_operand" "") - (match_operand:SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_extractv4si" - [(match_operand:SI 0 "register_operand" "") - (match_operand:V4SI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_extract (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_extractv8hi" - [(match_operand:HI 0 "register_operand" "") - (match_operand:V8HI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_extract (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_extractv16qi" - [(match_operand:QI 0 "register_operand" "") - (match_operand:V16QI 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_extract (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - -(define_expand "vec_extractv4sf" - [(match_operand:SF 0 "register_operand" "") - (match_operand:V4SF 1 "register_operand" "") - (match_operand 2 "const_int_operand" "")] - "TARGET_ALTIVEC" -{ - rs6000_expand_vector_extract (operands[0], operands[1], INTVAL (operands[2])); - DONE; -}) - ;; Generate ;; vspltis? SCRATCH0,0 ;; vsubu?m SCRATCH2,SCRATCH1,%1 @@ -2069,7 +1764,7 @@ ;; vspltisw SCRATCH1,-1 ;; vslw SCRATCH2,SCRATCH1,SCRATCH1 ;; vandc %0,%1,SCRATCH2 -(define_expand "absv4sf2" +(define_expand "altivec_absv4sf2" [(set (match_dup 2) (vec_duplicate:V4SI (const_int -1))) (set (match_dup 3) @@ -2102,66 +1797,6 @@ operands[3] = gen_reg_rtx (GET_MODE (operands[0])); }) -;; Vector shift left in bits. Currently supported ony for shift -;; amounts that can be expressed as byte shifts (divisible by 8). -;; General shift amounts can be supported using vslo + vsl. We're -;; not expecting to see these yet (the vectorizer currently -;; generates only shifts divisible by byte_size). -(define_expand "vec_shl_" - [(set (match_operand:V 0 "register_operand" "=v") - (unspec:V [(match_operand:V 1 "register_operand" "v") - (match_operand:QI 2 "reg_or_short_operand" "")] - UNSPEC_VECSH))] - "TARGET_ALTIVEC" - " -{ - rtx bitshift = operands[2]; - rtx byteshift = gen_reg_rtx (QImode); - HOST_WIDE_INT bitshift_val; - HOST_WIDE_INT byteshift_val; - - if (! CONSTANT_P (bitshift)) - FAIL; - bitshift_val = INTVAL (bitshift); - if (bitshift_val & 0x7) - FAIL; - byteshift_val = bitshift_val >> 3; - byteshift = gen_rtx_CONST_INT (QImode, byteshift_val); - emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], - byteshift)); - DONE; -}") - -;; Vector shift left in bits. Currently supported ony for shift -;; amounts that can be expressed as byte shifts (divisible by 8). -;; General shift amounts can be supported using vsro + vsr. We're -;; not expecting to see these yet (the vectorizer currently -;; generates only shifts divisible by byte_size). -(define_expand "vec_shr_" - [(set (match_operand:V 0 "register_operand" "=v") - (unspec:V [(match_operand:V 1 "register_operand" "v") - (match_operand:QI 2 "reg_or_short_operand" "")] - UNSPEC_VECSH))] - "TARGET_ALTIVEC" - " -{ - rtx bitshift = operands[2]; - rtx byteshift = gen_reg_rtx (QImode); - HOST_WIDE_INT bitshift_val; - HOST_WIDE_INT byteshift_val; - - if (! CONSTANT_P (bitshift)) - FAIL; - bitshift_val = INTVAL (bitshift); - if (bitshift_val & 0x7) - FAIL; - byteshift_val = 16 - (bitshift_val >> 3); - byteshift = gen_rtx_CONST_INT (QImode, byteshift_val); - emit_insn (gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], - byteshift)); - DONE; -}") - (define_insn "altivec_vsumsws_nomode" [(set (match_operand 0 "register_operand" "=v") (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") @@ -2204,16 +1839,6 @@ DONE; }") -(define_insn "vec_realign_load_" - [(set (match_operand:V 0 "register_operand" "=v") - (unspec:V [(match_operand:V 1 "register_operand" "v") - (match_operand:V 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "v")] - UNSPEC_REALIGN_LOAD))] - "TARGET_ALTIVEC" - "vperm %0,%1,%2,%3" - [(set_attr "type" "vecperm")]) - (define_expand "neg2" [(use (match_operand:VI 0 "register_operand" "")) (use (match_operand:VI 1 "register_operand" ""))] @@ -2665,7 +2290,7 @@ DONE; }") -(define_expand "negv4sf2" +(define_expand "altivec_negv4sf2" [(use (match_operand:V4SF 0 "register_operand" "")) (use (match_operand:V4SF 1 "register_operand" ""))] "TARGET_ALTIVEC" @@ -2994,29 +2619,6 @@ emit_insn (gen_vpkuhum_nomode (operands[0], operands[1], operands[2])); DONE; }") -(define_expand "vec_interleave_highv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")] - UNSPEC_INTERHI_V4SF))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrghsf (operands[0], operands[1], operands[2])); - DONE; -}") - -(define_expand "vec_interleave_lowv4sf" - [(set (match_operand:V4SF 0 "register_operand" "") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") - (match_operand:V4SF 2 "register_operand" "")] - UNSPEC_INTERLO_V4SF))] - "TARGET_ALTIVEC" - " -{ - emit_insn (gen_altivec_vmrglsf (operands[0], operands[1], operands[2])); - DONE; -}") (define_expand "vec_interleave_high" [(set (match_operand:VI 0 "register_operand" "") diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index a937d99397d..81326aa6d57 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -17,14 +17,14 @@ ;; along with GCC; see the file COPYING3. If not see ;; . +;; Available constraint letters: "e", "k", "u", "A", "B", "C", "D" + ;; Register constraints -(define_register_constraint "f" "TARGET_HARD_FLOAT && TARGET_FPRS - ? FLOAT_REGS : NO_REGS" +(define_register_constraint "f" "rs6000_constraints[RS6000_CONSTRAINT_f]" "@internal") -(define_register_constraint "d" "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT - ? FLOAT_REGS : NO_REGS" +(define_register_constraint "d" "rs6000_constraints[RS6000_CONSTRAINT_d]" "@internal") (define_register_constraint "b" "BASE_REGS" @@ -54,6 +54,28 @@ (define_register_constraint "z" "XER_REGS" "@internal") +;; Use w as a prefix to add VSX modes +;; vector double (V2DF) +(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]" + "@internal") + +;; vector float (V4SF) +(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]" + "@internal") + +;; scalar double (DF) +(define_register_constraint "ws" "rs6000_constraints[RS6000_CONSTRAINT_ws]" + "@internal") + +;; any VSX register +(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]" + "@internal") + +;; Altivec style load/store that ignores the bottom bits of the address +(define_memory_constraint "wZ" + "Indexed or indirect memory operand, ignoring the bottom 4 bits" + (match_operand 0 "altivec_indexed_or_indirect_operand")) + ;; Integer constraints (define_constraint "I" @@ -173,3 +195,7 @@ usually better to use @samp{m} or @samp{es} in @code{asm} statements)" (define_constraint "W" "vector constant that does not require memory" (match_operand 0 "easy_vector_constant")) + +(define_constraint "j" + "Zero vector constant" + (match_test "(op == const0_rtx || op == CONST0_RTX (GET_MODE (op)))")) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index b6b443bf3f8..3e5c1a1a8df 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -38,6 +38,37 @@ || ALTIVEC_REGNO_P (REGNO (op)) || REGNO (op) > LAST_VIRTUAL_REGISTER"))) +;; Return 1 if op is a VSX register. +(define_predicate "vsx_register_operand" + (and (match_operand 0 "register_operand") + (match_test "GET_CODE (op) != REG + || VSX_REGNO_P (REGNO (op)) + || REGNO (op) > LAST_VIRTUAL_REGISTER"))) + +;; Return 1 if op is a vector register that operates on floating point vectors +;; (either altivec or VSX). +(define_predicate "vfloat_operand" + (and (match_operand 0 "register_operand") + (match_test "GET_CODE (op) != REG + || VFLOAT_REGNO_P (REGNO (op)) + || REGNO (op) > LAST_VIRTUAL_REGISTER"))) + +;; Return 1 if op is a vector register that operates on integer vectors +;; (only altivec, VSX doesn't support integer vectors) +(define_predicate "vint_operand" + (and (match_operand 0 "register_operand") + (match_test "GET_CODE (op) != REG + || VINT_REGNO_P (REGNO (op)) + || REGNO (op) > LAST_VIRTUAL_REGISTER"))) + +;; Return 1 if op is a vector register to do logical operations on (and, or, +;; xor, etc.) +(define_predicate "vlogical_operand" + (and (match_operand 0 "register_operand") + (match_test "GET_CODE (op) != REG + || VLOGICAL_REGNO_P (REGNO (op)) + || REGNO (op) > LAST_VIRTUAL_REGISTER"))) + ;; Return 1 if op is XER register. (define_predicate "xer_operand" (and (match_code "reg") @@ -234,6 +265,10 @@ && num_insns_constant_wide ((HOST_WIDE_INT) k[3]) == 1); case DFmode: + /* The constant 0.f is easy under VSX. */ + if (op == CONST0_RTX (DFmode) && VECTOR_UNIT_VSX_P (DFmode)) + return 1; + /* Force constants to memory before reload to utilize compress_float_constant. Avoid this when flag_unsafe_math_optimizations is enabled @@ -292,6 +327,9 @@ if (TARGET_PAIRED_FLOAT) return false; + if ((VSX_VECTOR_MODE (mode) || mode == TImode) && zero_constant (op, mode)) + return true; + if (ALTIVEC_VECTOR_MODE (mode)) { if (zero_constant (op, mode)) @@ -394,16 +432,36 @@ (match_code "mem") { op = XEXP (op, 0); - if (TARGET_ALTIVEC - && ALTIVEC_VECTOR_MODE (mode) + if (VECTOR_MEM_ALTIVEC_P (mode) && GET_CODE (op) == AND && GET_CODE (XEXP (op, 1)) == CONST_INT && INTVAL (XEXP (op, 1)) == -16) op = XEXP (op, 0); + else if (VECTOR_MEM_VSX_P (mode) + && GET_CODE (op) == PRE_MODIFY) + op = XEXP (op, 1); + return indexed_or_indirect_address (op, mode); }) +;; Return 1 if the operand is an indexed or indirect memory operand with an +;; AND -16 in it, used to recognize when we need to switch to Altivec loads +;; to realign loops instead of VSX (altivec silently ignores the bottom bits, +;; while VSX uses the full address and traps) +(define_predicate "altivec_indexed_or_indirect_operand" + (match_code "mem") +{ + op = XEXP (op, 0); + if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) + && GET_CODE (op) == AND + && GET_CODE (XEXP (op, 1)) == CONST_INT + && INTVAL (XEXP (op, 1)) == -16) + return indexed_or_indirect_address (XEXP (op, 0), mode); + + return 0; +}) + ;; Return 1 if the operand is an indexed or indirect address. (define_special_predicate "indexed_or_indirect_address" (and (match_test "REG_P (op) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index d1ab9da9b29..3b3ba96b5cd 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -670,6 +670,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, + RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_AND, ALTIVEC_BUILTIN_VAND, @@ -718,6 +724,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, + RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_ANDC, ALTIVEC_BUILTIN_VANDC, @@ -1482,6 +1494,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_NOR, ALTIVEC_BUILTIN_VNOR, @@ -1506,6 +1520,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, + RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_OR, ALTIVEC_BUILTIN_VOR, @@ -2122,6 +2142,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, 0 }, + { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, + RS6000_BTI_V2DF, RS6000_BTI_bool_V4SI, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, { ALTIVEC_BUILTIN_VEC_XOR, ALTIVEC_BUILTIN_VXOR, @@ -2366,6 +2392,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI }, { ALTIVEC_BUILTIN_VEC_NMSUB, ALTIVEC_BUILTIN_VNMSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_4SI, @@ -2392,10 +2422,28 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI, + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_unsigned_V4SI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6063a6c69b2..b077c83c2db 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -245,6 +245,9 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; /* Map register number to register class. */ enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; +/* Reload functions based on the type and the vector unit. */ +static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2]; + /* Built in types. */ tree rs6000_builtin_types[RS6000_BTI_MAX]; tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; @@ -302,10 +305,16 @@ struct builtin_description /* Describe the vector unit used for modes. */ enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; -enum reg_class rs6000_vector_reg_class[NUM_MACHINE_MODES]; + +/* Register classes for various constraints that are based on the target + switches. */ +enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; /* Describe the alignment of a vector. */ int rs6000_vector_align[NUM_MACHINE_MODES]; + +/* Map selected modes to types for builtins. */ +static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; /* Target cpu costs. */ @@ -918,6 +927,11 @@ static rtx rs6000_expand_binop_builtin (enum insn_code, tree, rtx); static rtx rs6000_expand_ternop_builtin (enum insn_code, tree, rtx); static rtx rs6000_expand_builtin (tree, rtx, rtx, enum machine_mode, int); static void altivec_init_builtins (void); +static unsigned builtin_hash_function (const void *); +static int builtin_hash_eq (const void *, const void *); +static tree builtin_function_type (enum machine_mode, enum machine_mode, + enum machine_mode, enum machine_mode, + enum rs6000_builtins, const char *name); static void rs6000_common_init_builtins (void); static void rs6000_init_libfuncs (void); @@ -944,8 +958,7 @@ static rtx altivec_expand_ld_builtin (tree, rtx, bool *); static rtx altivec_expand_st_builtin (tree, rtx, bool *); static rtx altivec_expand_dst_builtin (tree, rtx, bool *); static rtx altivec_expand_abs_builtin (enum insn_code, tree, rtx); -static rtx altivec_expand_predicate_builtin (enum insn_code, - const char *, tree, rtx); +static rtx altivec_expand_predicate_builtin (enum insn_code, tree, rtx); static rtx altivec_expand_stv_builtin (enum insn_code, tree); static rtx altivec_expand_vec_init_builtin (tree, tree, rtx); static rtx altivec_expand_vec_set_builtin (tree); @@ -1008,12 +1021,9 @@ static tree rs6000_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); static bool rs6000_must_pass_in_stack (enum machine_mode, const_tree); static bool rs6000_scalar_mode_supported_p (enum machine_mode); static bool rs6000_vector_mode_supported_p (enum machine_mode); -static int get_vec_cmp_insn (enum rtx_code, enum machine_mode, - enum machine_mode); +static rtx rs6000_emit_vector_compare_inner (enum rtx_code, rtx, rtx); static rtx rs6000_emit_vector_compare (enum rtx_code, rtx, rtx, enum machine_mode); -static int get_vsel_insn (enum machine_mode); -static void rs6000_emit_vector_select (rtx, rtx, rtx, rtx); static tree rs6000_stack_protect_fail (void); static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int, @@ -1069,6 +1079,12 @@ bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode, enum reg_class) = rs6000_cannot_change_mode_class; +static enum reg_class rs6000_secondary_reload (bool, rtx, enum reg_class, + enum machine_mode, + struct secondary_reload_info *); + +static const enum reg_class *rs6000_ira_cover_classes (void); + const int INSN_NOT_AVAILABLE = -1; static enum machine_mode rs6000_eh_return_filter_mode (void); @@ -1084,6 +1100,17 @@ struct GTY(()) toc_hash_struct }; static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table; + +/* Hash table to keep track of the argument types for builtin functions. */ + +struct GTY(()) builtin_hash_struct +{ + tree type; + enum machine_mode mode[4]; /* return value + 3 arguments. */ + unsigned char uns_p[4]; /* and whether the types are unsigned. */ +}; + +static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table; /* Default register names. */ char rs6000_reg_names[][8] = @@ -1414,6 +1441,12 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_INSTANTIATE_DECLS #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls +#undef TARGET_SECONDARY_RELOAD +#define TARGET_SECONDARY_RELOAD rs6000_secondary_reload + +#undef TARGET_IRA_COVER_CLASSES +#define TARGET_IRA_COVER_CLASSES rs6000_ira_cover_classes + #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p @@ -1437,7 +1470,9 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode) unsigned HOST_WIDE_INT reg_size; if (FP_REGNO_P (regno)) - reg_size = UNITS_PER_FP_WORD; + reg_size = (VECTOR_MEM_VSX_P (mode) + ? UNITS_PER_VSX_WORD + : UNITS_PER_FP_WORD); else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) reg_size = UNITS_PER_SPE_WORD; @@ -1465,22 +1500,45 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode) static int rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) { + int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; + + /* VSX registers that overlap the FPR registers are larger than for non-VSX + implementations. Don't allow an item to be split between a FP register + and an Altivec register. */ + if (VECTOR_MEM_VSX_P (mode)) + { + if (FP_REGNO_P (regno)) + return FP_REGNO_P (last_regno); + + if (ALTIVEC_REGNO_P (regno)) + return ALTIVEC_REGNO_P (last_regno); + } + /* The GPRs can hold any mode, but values bigger than one register cannot go past R31. */ if (INT_REGNO_P (regno)) - return INT_REGNO_P (regno + HARD_REGNO_NREGS (regno, mode) - 1); + return INT_REGNO_P (last_regno); - /* The float registers can only hold floating modes and DImode. - This excludes the 32-bit decimal float mode for now. */ + /* The float registers (except for VSX vector modes) can only hold floating + modes and DImode. This excludes the 32-bit decimal float mode for + now. */ if (FP_REGNO_P (regno)) - return - ((SCALAR_FLOAT_MODE_P (mode) - && (mode != TDmode || (regno % 2) == 0) - && FP_REGNO_P (regno + HARD_REGNO_NREGS (regno, mode) - 1)) - || (GET_MODE_CLASS (mode) == MODE_INT + { + if (SCALAR_FLOAT_MODE_P (mode) + && (mode != TDmode || (regno % 2) == 0) + && FP_REGNO_P (last_regno)) + return 1; + + if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) - || (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT - && PAIRED_VECTOR_MODE (mode))); + return 1; + + if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT + && PAIRED_VECTOR_MODE (mode)) + return 1; + + return 0; + } /* The CR register can only hold CC modes. */ if (CR_REGNO_P (regno)) @@ -1497,8 +1555,9 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) return 1; - /* We cannot put TImode anywhere except general register and it must be - able to fit within the register set. */ + /* We cannot put TImode anywhere except general register and it must be able + to fit within the register set. In the future, allow TImode in the + Altivec or VSX registers. */ return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; } @@ -1575,23 +1634,141 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) } } -/* Map enum rs6000_vector to string. */ -static const char * -rs6000_debug_vector_unit[] = { - "none", - "altivec", - "vsx", - "paired", - "spe", - "other" -}; +/* Print various interesting information with -mdebug=reg. */ +static void +rs6000_debug_reg_global (void) +{ + const char *nl = (const char *)0; + int m; + char costly_num[20]; + char nop_num[20]; + const char *costly_str; + const char *nop_str; + + /* Map enum rs6000_vector to string. */ + static const char *rs6000_debug_vector_unit[] = { + "none", + "altivec", + "vsx", + "paired", + "spe", + "other" + }; + + fprintf (stderr, "Register information: (last virtual reg = %d)\n", + LAST_VIRTUAL_REGISTER); + rs6000_debug_reg_print (0, 31, "gr"); + rs6000_debug_reg_print (32, 63, "fp"); + rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, + LAST_ALTIVEC_REGNO, + "vs"); + rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); + rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); + rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); + rs6000_debug_reg_print (MQ_REGNO, MQ_REGNO, "mq"); + rs6000_debug_reg_print (XER_REGNO, XER_REGNO, "xer"); + rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); + rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); + rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); + rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); + + fprintf (stderr, + "\n" + "d reg_class = %s\n" + "f reg_class = %s\n" + "v reg_class = %s\n" + "wa reg_class = %s\n" + "wd reg_class = %s\n" + "wf reg_class = %s\n" + "ws reg_class = %s\n\n", + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], + reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]); + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + if (rs6000_vector_unit[m] || rs6000_vector_mem[m]) + { + nl = "\n"; + fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n", + GET_MODE_NAME (m), + rs6000_debug_vector_unit[ rs6000_vector_unit[m] ], + rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]); + } + + if (nl) + fputs (nl, stderr); + + switch (rs6000_sched_costly_dep) + { + case max_dep_latency: + costly_str = "max_dep_latency"; + break; + + case no_dep_costly: + costly_str = "no_dep_costly"; + break; + + case all_deps_costly: + costly_str = "all_deps_costly"; + break; + + case true_store_to_load_dep_costly: + costly_str = "true_store_to_load_dep_costly"; + break; + + case store_to_load_dep_costly: + costly_str = "store_to_load_dep_costly"; + break; + + default: + costly_str = costly_num; + sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); + break; + } + + switch (rs6000_sched_insert_nops) + { + case sched_finish_regroup_exact: + nop_str = "sched_finish_regroup_exact"; + break; + + case sched_finish_pad_groups: + nop_str = "sched_finish_pad_groups"; + break; + + case sched_finish_none: + nop_str = "sched_finish_none"; + break; + + default: + nop_str = nop_num; + sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); + break; + } + + fprintf (stderr, + "always_hint = %s\n" + "align_branch_targets = %s\n" + "sched_restricted_insns_priority = %d\n" + "sched_costly_dep = %s\n" + "sched_insert_nops = %s\n\n", + rs6000_always_hint ? "true" : "false", + rs6000_align_branch_targets ? "true" : "false", + (int)rs6000_sched_restricted_insns_priority, + costly_str, nop_str); +} /* Initialize the various global tables that are based on register size. */ static void rs6000_init_hard_regno_mode_ok (void) { int r, m, c; - bool float_p = (TARGET_HARD_FLOAT && TARGET_FPRS); + int align64; + int align32; /* Precalculate REGNO_REG_CLASS. */ rs6000_regno_regclass[0] = GENERAL_REGS; @@ -1627,57 +1804,158 @@ rs6000_init_hard_regno_mode_ok (void) for (m = 0; m < NUM_MACHINE_MODES; ++m) { rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE; - rs6000_vector_reg_class[m] = NO_REGS; + rs6000_vector_reload[m][0] = CODE_FOR_nothing; + rs6000_vector_reload[m][1] = CODE_FOR_nothing; } - /* V4SF mode, Altivec only. */ - if (float_p && TARGET_ALTIVEC) + for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++) + rs6000_constraints[c] = NO_REGS; + + /* The VSX hardware allows native alignment for vectors, but control whether the compiler + believes it can use native alignment or still uses 128-bit alignment. */ + if (TARGET_VSX && !TARGET_VSX_ALIGN_128) + { + align64 = 64; + align32 = 32; + } + else + { + align64 = 128; + align32 = 128; + } + + /* V2DF mode, VSX only. */ + if (TARGET_VSX) + { + rs6000_vector_unit[V2DFmode] = VECTOR_VSX; + rs6000_vector_mem[V2DFmode] = VECTOR_VSX; + rs6000_vector_align[V2DFmode] = align64; + } + + /* V4SF mode, either VSX or Altivec. */ + if (TARGET_VSX) + { + rs6000_vector_unit[V4SFmode] = VECTOR_VSX; + rs6000_vector_mem[V4SFmode] = VECTOR_VSX; + rs6000_vector_align[V4SFmode] = align32; + } + else if (TARGET_ALTIVEC) { rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; - rs6000_vector_align[V4SFmode] = 128; + rs6000_vector_align[V4SFmode] = align32; } - /* V16QImode, V8HImode, V4SImode are Altivec only. */ + /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads + and stores. */ if (TARGET_ALTIVEC) { rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; + rs6000_vector_align[V4SImode] = align32; + rs6000_vector_align[V8HImode] = align32; + rs6000_vector_align[V16QImode] = align32; - rs6000_vector_reg_class[V16QImode] = ALTIVEC_REGS; - rs6000_vector_reg_class[V8HImode] = ALTIVEC_REGS; - rs6000_vector_reg_class[V4SImode] = ALTIVEC_REGS; - - rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; - rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; - rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; - rs6000_vector_align[V4SImode] = 128; - rs6000_vector_align[V8HImode] = 128; - rs6000_vector_align[V16QImode] = 128; + if (TARGET_VSX) + { + rs6000_vector_mem[V4SImode] = VECTOR_VSX; + rs6000_vector_mem[V8HImode] = VECTOR_VSX; + rs6000_vector_mem[V16QImode] = VECTOR_VSX; + } + else + { + rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; + rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; + } } /* V2DImode, prefer vsx over altivec, since the main use will be for vectorized floating point conversions. */ - if (TARGET_ALTIVEC) + if (TARGET_VSX) + { + rs6000_vector_mem[V2DImode] = VECTOR_VSX; + rs6000_vector_unit[V2DImode] = VECTOR_NONE; + rs6000_vector_align[V2DImode] = align64; + } + else if (TARGET_ALTIVEC) { rs6000_vector_mem[V2DImode] = VECTOR_ALTIVEC; rs6000_vector_unit[V2DImode] = VECTOR_NONE; - rs6000_vector_reg_class[V2DImode] = ALTIVEC_REGS; - rs6000_vector_align[V2DImode] = 128; + rs6000_vector_align[V2DImode] = align64; + } + + /* DFmode, see if we want to use the VSX unit. */ + if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) + { + rs6000_vector_unit[DFmode] = VECTOR_VSX; + rs6000_vector_mem[DFmode] + = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE); + rs6000_vector_align[DFmode] = align64; } /* TODO add SPE and paired floating point vector support. */ - /* Set the VSX register classes. */ - rs6000_vector_reg_class[V4SFmode] - = (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) - ? ALTIVEC_REGS - : NO_REGS); + /* Register class constaints for the constraints that depend on compile + switches. */ + if (TARGET_HARD_FLOAT && TARGET_FPRS) + rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; - rs6000_vector_reg_class[V2DFmode] = NO_REGS; + if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) + rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; - rs6000_vector_reg_class[DFmode] = (!float_p ? NO_REGS : FLOAT_REGS); + if (TARGET_VSX) + { + /* At present, we just use VSX_REGS, but we have different constraints + based on the use, in case we want to fine tune the default register + class used. wa = any VSX register, wf = register class to use for + V4SF, wd = register class to use for V2DF, and ws = register classs to + use for DF scalars. */ + rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; + if (TARGET_VSX_SCALAR_DOUBLE) + rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; + } + + if (TARGET_ALTIVEC) + rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; + + /* Set up the reload helper functions. */ + if (TARGET_VSX || TARGET_ALTIVEC) + { + if (TARGET_64BIT) + { + rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store; + rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load; + rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store; + rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load; + rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store; + rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load; + rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store; + rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load; + rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store; + rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load; + rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store; + rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load; + } + else + { + rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store; + rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load; + rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store; + rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load; + rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store; + rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load; + rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store; + rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load; + rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store; + rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load; + rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store; + rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load; + } + } /* Precalculate HARD_REGNO_NREGS. */ for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) @@ -1696,7 +1974,10 @@ rs6000_init_hard_regno_mode_ok (void) { int reg_size; - if (c == ALTIVEC_REGS) + if (TARGET_VSX && VSX_REG_CLASS_P (c)) + reg_size = UNITS_PER_VSX_WORD; + + else if (c == ALTIVEC_REGS) reg_size = UNITS_PER_ALTIVEC_WORD; else if (c == FLOAT_REGS) @@ -1714,56 +1995,7 @@ rs6000_init_hard_regno_mode_ok (void) rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1; if (TARGET_DEBUG_REG) - { - const char *nl = (const char *)0; - - fprintf (stderr, "Register information: (last virtual reg = %d)\n", - LAST_VIRTUAL_REGISTER); - rs6000_debug_reg_print (0, 31, "gr"); - rs6000_debug_reg_print (32, 63, "fp"); - rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, - LAST_ALTIVEC_REGNO, - "vs"); - rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); - rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); - rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); - rs6000_debug_reg_print (MQ_REGNO, MQ_REGNO, "mq"); - rs6000_debug_reg_print (XER_REGNO, XER_REGNO, "xer"); - rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); - rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); - rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); - rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); - - fprintf (stderr, - "\n" - "V16QI reg_class = %s\n" - "V8HI reg_class = %s\n" - "V4SI reg_class = %s\n" - "V2DI reg_class = %s\n" - "V4SF reg_class = %s\n" - "V2DF reg_class = %s\n" - "DF reg_class = %s\n\n", - reg_class_names[rs6000_vector_reg_class[V16QImode]], - reg_class_names[rs6000_vector_reg_class[V8HImode]], - reg_class_names[rs6000_vector_reg_class[V4SImode]], - reg_class_names[rs6000_vector_reg_class[V2DImode]], - reg_class_names[rs6000_vector_reg_class[V4SFmode]], - reg_class_names[rs6000_vector_reg_class[V2DFmode]], - reg_class_names[rs6000_vector_reg_class[DFmode]]); - - for (m = 0; m < NUM_MACHINE_MODES; ++m) - if (rs6000_vector_unit[m] || rs6000_vector_mem[m]) - { - nl = "\n"; - fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n", - GET_MODE_NAME (m), - rs6000_debug_vector_unit[ rs6000_vector_unit[m] ], - rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]); - } - - if (nl) - fputs (nl, stderr); - } + rs6000_debug_reg_global (); } #if TARGET_MACHO @@ -1937,7 +2169,7 @@ rs6000_override_options (const char *default_cpu) {"power7", PROCESSOR_POWER7, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX}, /* Don't add MASK_ISEL by default */ + /* | MASK_VSX */}, /* Don't add MASK_ISEL by default */ {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -2246,12 +2478,23 @@ rs6000_override_options (const char *default_cpu) && rs6000_cpu != PROCESSOR_POWER7 && rs6000_cpu != PROCESSOR_CELL); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 - || rs6000_cpu == PROCESSOR_POWER5); + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER7); rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5 || rs6000_cpu == PROCESSOR_POWER6 || rs6000_cpu == PROCESSOR_POWER7); + /* Allow debug switches to override the above settings. */ + if (TARGET_ALWAYS_HINT > 0) + rs6000_always_hint = TARGET_ALWAYS_HINT; + + if (TARGET_SCHED_GROUPS > 0) + rs6000_sched_groups = TARGET_SCHED_GROUPS; + + if (TARGET_ALIGN_BRANCH_TARGETS > 0) + rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; + rs6000_sched_restricted_insns_priority = (rs6000_sched_groups ? 1 : 0); @@ -2517,18 +2760,19 @@ rs6000_builtin_conversion (unsigned int tcode, tree type) { enum tree_code code = (enum tree_code) tcode; - if (!TARGET_ALTIVEC) - return NULL_TREE; - switch (code) { case FIX_TRUNC_EXPR: switch (TYPE_MODE (type)) { case V4SImode: + if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode)) + return NULL_TREE; + return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VCTUXS] - : rs6000_builtin_decls[ALTIVEC_BUILTIN_VCTSXS]; + ? rs6000_builtin_decls[VECTOR_BUILTIN_FIXUNS_V4SF_V4SI] + : rs6000_builtin_decls[VECTOR_BUILTIN_FIX_V4SF_V4SI]; + default: return NULL_TREE; } @@ -2537,9 +2781,13 @@ rs6000_builtin_conversion (unsigned int tcode, tree type) switch (TYPE_MODE (type)) { case V4SImode: + if (VECTOR_UNIT_NONE_P (V4SImode) || VECTOR_UNIT_NONE_P (V4SFmode)) + return NULL_TREE; + return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFUX] - : rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFSX]; + ? rs6000_builtin_decls[VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF] + : rs6000_builtin_decls[VECTOR_BUILTIN_FLOAT_V4SI_V4SF]; + default: return NULL_TREE; } @@ -2560,12 +2808,12 @@ rs6000_builtin_mul_widen_even (tree type) { case V8HImode: return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH] + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH_UNS] : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH]; case V16QImode: return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB] + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB_UNS] : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB]; default: return NULL_TREE; @@ -2583,12 +2831,12 @@ rs6000_builtin_mul_widen_odd (tree type) { case V8HImode: return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH] + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH_UNS] : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH]; case V16QImode: return TYPE_UNSIGNED (type) - ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB] + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB_UNS] : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB]; default: return NULL_TREE; @@ -2630,6 +2878,8 @@ rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_pac tree rs6000_builtin_vec_perm (tree type, tree *mask_element_type) { + tree inner_type = TREE_TYPE (type); + bool uns_p = TYPE_UNSIGNED (inner_type); tree d; *mask_element_type = unsigned_char_type_node; @@ -2637,15 +2887,21 @@ rs6000_builtin_vec_perm (tree type, tree *mask_element_type) switch (TYPE_MODE (type)) { case V16QImode: - d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_16QI]; + d = (uns_p + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_16QI_UNS] + : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_16QI]); break; case V8HImode: - d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_8HI]; + d = (uns_p + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_8HI_UNS] + : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_8HI]); break; case V4SImode: - d = rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SI]; + d = (uns_p + ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SI_UNS] + : rs6000_builtin_decls[ALTIVEC_BUILTIN_VPERM_4SI]); break; case V4SFmode: @@ -3475,6 +3731,9 @@ output_vec_const_move (rtx *operands) vec = operands[1]; mode = GET_MODE (dest); + if (TARGET_VSX && zero_constant (vec, mode)) + return "xxlxor %x0,%x0,%x0"; + if (TARGET_ALTIVEC) { rtx splat_vec; @@ -3698,20 +3957,21 @@ rs6000_expand_vector_init (rtx target, rtx vals) if (n_var == 0) { rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); - if (mode != V4SFmode && all_const_zero) + bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + if ((int_vector_p || TARGET_VSX) && all_const_zero) { /* Zero register. */ emit_insn (gen_rtx_SET (VOIDmode, target, gen_rtx_XOR (mode, target, target))); return; } - else if (mode != V4SFmode && easy_vector_constant (const_vec, mode)) + else if (int_vector_p && easy_vector_constant (const_vec, mode)) { /* Splat immediate. */ emit_insn (gen_rtx_SET (VOIDmode, target, const_vec)); return; } - else if (all_same) + else if (all_same && int_vector_p) ; /* Splat vector element. */ else { @@ -5597,6 +5857,32 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) return; } + /* Fix up invalid (const (plus (symbol_ref) (reg))) that seems to be created + in the secondary_reload phase, which evidently overwrites the CONST_INT + with a register. */ + if (GET_CODE (source) == CONST && GET_CODE (XEXP (source, 0)) == PLUS + && mode == Pmode) + { + rtx add_op0 = XEXP (XEXP (source, 0), 0); + rtx add_op1 = XEXP (XEXP (source, 0), 1); + + if (GET_CODE (add_op0) == SYMBOL_REF && GET_CODE (add_op1) == REG) + { + rtx tmp = (can_create_pseudo_p ()) ? gen_reg_rtx (Pmode) : dest; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nrs6000_emit_move: bad source\n"); + debug_rtx (source); + } + + rs6000_emit_move (tmp, add_op0, Pmode); + emit_insn (gen_rtx_SET (VOIDmode, dest, + gen_rtx_PLUS (Pmode, tmp, add_op1))); + return; + } + } + if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM && !gpc_reg_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); @@ -7865,14 +8151,26 @@ static const struct builtin_description bdesc_3arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vmsumuhs, "__builtin_altivec_vmsumuhs", ALTIVEC_BUILTIN_VMSUMUHS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmsumshs, "__builtin_altivec_vmsumshs", ALTIVEC_BUILTIN_VMSUMSHS }, { MASK_ALTIVEC, CODE_FOR_altivec_vnmsubfp, "__builtin_altivec_vnmsubfp", ALTIVEC_BUILTIN_VNMSUBFP }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2df, "__builtin_altivec_vperm_2df", ALTIVEC_BUILTIN_VPERM_2DF }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di, "__builtin_altivec_vperm_2di", ALTIVEC_BUILTIN_VPERM_2DI }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4sf, "__builtin_altivec_vperm_4sf", ALTIVEC_BUILTIN_VPERM_4SF }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4si, "__builtin_altivec_vperm_4si", ALTIVEC_BUILTIN_VPERM_4SI }, { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v8hi, "__builtin_altivec_vperm_8hi", ALTIVEC_BUILTIN_VPERM_8HI }, - { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v16qi, "__builtin_altivec_vperm_16qi", ALTIVEC_BUILTIN_VPERM_16QI }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsel_v4sf, "__builtin_altivec_vsel_4sf", ALTIVEC_BUILTIN_VSEL_4SF }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsel_v4si, "__builtin_altivec_vsel_4si", ALTIVEC_BUILTIN_VSEL_4SI }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsel_v8hi, "__builtin_altivec_vsel_8hi", ALTIVEC_BUILTIN_VSEL_8HI }, - { MASK_ALTIVEC, CODE_FOR_altivec_vsel_v16qi, "__builtin_altivec_vsel_16qi", ALTIVEC_BUILTIN_VSEL_16QI }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_altivec_vperm_16qi", ALTIVEC_BUILTIN_VPERM_16QI }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v2di_uns, "__builtin_altivec_vperm_2di_uns", ALTIVEC_BUILTIN_VPERM_2DI_UNS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v4si_uns, "__builtin_altivec_vperm_4si_uns", ALTIVEC_BUILTIN_VPERM_4SI_UNS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v8hi_uns, "__builtin_altivec_vperm_8hi_uns", ALTIVEC_BUILTIN_VPERM_8HI_UNS }, + { MASK_ALTIVEC, CODE_FOR_altivec_vperm_v16qi_uns, "__builtin_altivec_vperm_16qi_uns", ALTIVEC_BUILTIN_VPERM_16QI_UNS }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v4sf, "__builtin_altivec_vsel_4sf", ALTIVEC_BUILTIN_VSEL_4SF }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v4si, "__builtin_altivec_vsel_4si", ALTIVEC_BUILTIN_VSEL_4SI }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v8hi, "__builtin_altivec_vsel_8hi", ALTIVEC_BUILTIN_VSEL_8HI }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v16qi, "__builtin_altivec_vsel_16qi", ALTIVEC_BUILTIN_VSEL_16QI }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v2df, "__builtin_altivec_vsel_2df", ALTIVEC_BUILTIN_VSEL_2DF }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v2di, "__builtin_altivec_vsel_2di", ALTIVEC_BUILTIN_VSEL_2DI }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v4si_uns, "__builtin_altivec_vsel_4si_uns", ALTIVEC_BUILTIN_VSEL_4SI_UNS }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v8hi_uns, "__builtin_altivec_vsel_8hi_uns", ALTIVEC_BUILTIN_VSEL_8HI_UNS }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v16qi_uns, "__builtin_altivec_vsel_16qi_uns", ALTIVEC_BUILTIN_VSEL_16QI_UNS }, + { MASK_ALTIVEC, CODE_FOR_vector_select_v2di_uns, "__builtin_altivec_vsel_2di_uns", ALTIVEC_BUILTIN_VSEL_2DI_UNS }, { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v16qi, "__builtin_altivec_vsldoi_16qi", ALTIVEC_BUILTIN_VSLDOI_16QI }, { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v8hi, "__builtin_altivec_vsldoi_8hi", ALTIVEC_BUILTIN_VSLDOI_8HI }, { MASK_ALTIVEC, CODE_FOR_altivec_vsldoi_v4si, "__builtin_altivec_vsldoi_4si", ALTIVEC_BUILTIN_VSLDOI_4SI }, @@ -7946,18 +8244,18 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vcfux, "__builtin_altivec_vcfux", ALTIVEC_BUILTIN_VCFUX }, { MASK_ALTIVEC, CODE_FOR_altivec_vcfsx, "__builtin_altivec_vcfsx", ALTIVEC_BUILTIN_VCFSX }, { MASK_ALTIVEC, CODE_FOR_altivec_vcmpbfp, "__builtin_altivec_vcmpbfp", ALTIVEC_BUILTIN_VCMPBFP }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpequb, "__builtin_altivec_vcmpequb", ALTIVEC_BUILTIN_VCMPEQUB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpequh, "__builtin_altivec_vcmpequh", ALTIVEC_BUILTIN_VCMPEQUH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpequw, "__builtin_altivec_vcmpequw", ALTIVEC_BUILTIN_VCMPEQUW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpeqfp, "__builtin_altivec_vcmpeqfp", ALTIVEC_BUILTIN_VCMPEQFP }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgefp, "__builtin_altivec_vcmpgefp", ALTIVEC_BUILTIN_VCMPGEFP }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtub, "__builtin_altivec_vcmpgtub", ALTIVEC_BUILTIN_VCMPGTUB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtsb, "__builtin_altivec_vcmpgtsb", ALTIVEC_BUILTIN_VCMPGTSB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtuh, "__builtin_altivec_vcmpgtuh", ALTIVEC_BUILTIN_VCMPGTUH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtsh, "__builtin_altivec_vcmpgtsh", ALTIVEC_BUILTIN_VCMPGTSH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtuw, "__builtin_altivec_vcmpgtuw", ALTIVEC_BUILTIN_VCMPGTUW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtsw, "__builtin_altivec_vcmpgtsw", ALTIVEC_BUILTIN_VCMPGTSW }, - { MASK_ALTIVEC, CODE_FOR_altivec_vcmpgtfp, "__builtin_altivec_vcmpgtfp", ALTIVEC_BUILTIN_VCMPGTFP }, + { MASK_ALTIVEC, CODE_FOR_vector_eqv16qi, "__builtin_altivec_vcmpequb", ALTIVEC_BUILTIN_VCMPEQUB }, + { MASK_ALTIVEC, CODE_FOR_vector_eqv8hi, "__builtin_altivec_vcmpequh", ALTIVEC_BUILTIN_VCMPEQUH }, + { MASK_ALTIVEC, CODE_FOR_vector_eqv4si, "__builtin_altivec_vcmpequw", ALTIVEC_BUILTIN_VCMPEQUW }, + { MASK_ALTIVEC, CODE_FOR_vector_eqv4sf, "__builtin_altivec_vcmpeqfp", ALTIVEC_BUILTIN_VCMPEQFP }, + { MASK_ALTIVEC, CODE_FOR_vector_gev4sf, "__builtin_altivec_vcmpgefp", ALTIVEC_BUILTIN_VCMPGEFP }, + { MASK_ALTIVEC, CODE_FOR_vector_gtuv16qi, "__builtin_altivec_vcmpgtub", ALTIVEC_BUILTIN_VCMPGTUB }, + { MASK_ALTIVEC, CODE_FOR_vector_gtuv8hi, "__builtin_altivec_vcmpgtsb", ALTIVEC_BUILTIN_VCMPGTSB }, + { MASK_ALTIVEC, CODE_FOR_vector_gtuv4si, "__builtin_altivec_vcmpgtuh", ALTIVEC_BUILTIN_VCMPGTUH }, + { MASK_ALTIVEC, CODE_FOR_vector_gtv16qi, "__builtin_altivec_vcmpgtsh", ALTIVEC_BUILTIN_VCMPGTSH }, + { MASK_ALTIVEC, CODE_FOR_vector_gtv8hi, "__builtin_altivec_vcmpgtuw", ALTIVEC_BUILTIN_VCMPGTUW }, + { MASK_ALTIVEC, CODE_FOR_vector_gtv4si, "__builtin_altivec_vcmpgtsw", ALTIVEC_BUILTIN_VCMPGTSW }, + { MASK_ALTIVEC, CODE_FOR_vector_gtv4sf, "__builtin_altivec_vcmpgtfp", ALTIVEC_BUILTIN_VCMPGTFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vctsxs, "__builtin_altivec_vctsxs", ALTIVEC_BUILTIN_VCTSXS }, { MASK_ALTIVEC, CODE_FOR_altivec_vctuxs, "__builtin_altivec_vctuxs", ALTIVEC_BUILTIN_VCTUXS }, { MASK_ALTIVEC, CODE_FOR_umaxv16qi3, "__builtin_altivec_vmaxub", ALTIVEC_BUILTIN_VMAXUB }, @@ -7981,14 +8279,18 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_sminv4si3, "__builtin_altivec_vminsw", ALTIVEC_BUILTIN_VMINSW }, { MASK_ALTIVEC, CODE_FOR_sminv4sf3, "__builtin_altivec_vminfp", ALTIVEC_BUILTIN_VMINFP }, { MASK_ALTIVEC, CODE_FOR_altivec_vmuleub, "__builtin_altivec_vmuleub", ALTIVEC_BUILTIN_VMULEUB }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmuleub, "__builtin_altivec_vmuleub_uns", ALTIVEC_BUILTIN_VMULEUB_UNS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmulesb, "__builtin_altivec_vmulesb", ALTIVEC_BUILTIN_VMULESB }, { MASK_ALTIVEC, CODE_FOR_altivec_vmuleuh, "__builtin_altivec_vmuleuh", ALTIVEC_BUILTIN_VMULEUH }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmuleuh, "__builtin_altivec_vmuleuh_uns", ALTIVEC_BUILTIN_VMULEUH_UNS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmulesh, "__builtin_altivec_vmulesh", ALTIVEC_BUILTIN_VMULESH }, { MASK_ALTIVEC, CODE_FOR_altivec_vmuloub, "__builtin_altivec_vmuloub", ALTIVEC_BUILTIN_VMULOUB }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmuloub, "__builtin_altivec_vmuloub_uns", ALTIVEC_BUILTIN_VMULOUB_UNS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmulosb, "__builtin_altivec_vmulosb", ALTIVEC_BUILTIN_VMULOSB }, { MASK_ALTIVEC, CODE_FOR_altivec_vmulouh, "__builtin_altivec_vmulouh", ALTIVEC_BUILTIN_VMULOUH }, + { MASK_ALTIVEC, CODE_FOR_altivec_vmulouh, "__builtin_altivec_vmulouh_uns", ALTIVEC_BUILTIN_VMULOUH_UNS }, { MASK_ALTIVEC, CODE_FOR_altivec_vmulosh, "__builtin_altivec_vmulosh", ALTIVEC_BUILTIN_VMULOSH }, - { MASK_ALTIVEC, CODE_FOR_altivec_norv4si3, "__builtin_altivec_vnor", ALTIVEC_BUILTIN_VNOR }, + { MASK_ALTIVEC, CODE_FOR_norv4si3, "__builtin_altivec_vnor", ALTIVEC_BUILTIN_VNOR }, { MASK_ALTIVEC, CODE_FOR_iorv4si3, "__builtin_altivec_vor", ALTIVEC_BUILTIN_VOR }, { MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum, "__builtin_altivec_vpkuhum", ALTIVEC_BUILTIN_VPKUHUM }, { MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum, "__builtin_altivec_vpkuwum", ALTIVEC_BUILTIN_VPKUWUM }, @@ -7999,9 +8301,9 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_altivec_vpkshus, "__builtin_altivec_vpkshus", ALTIVEC_BUILTIN_VPKSHUS }, { MASK_ALTIVEC, CODE_FOR_altivec_vpkuwus, "__builtin_altivec_vpkuwus", ALTIVEC_BUILTIN_VPKUWUS }, { MASK_ALTIVEC, CODE_FOR_altivec_vpkswus, "__builtin_altivec_vpkswus", ALTIVEC_BUILTIN_VPKSWUS }, - { MASK_ALTIVEC, CODE_FOR_altivec_vrlb, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB }, - { MASK_ALTIVEC, CODE_FOR_altivec_vrlh, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH }, - { MASK_ALTIVEC, CODE_FOR_altivec_vrlw, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW }, + { MASK_ALTIVEC, CODE_FOR_vrotlv16qi3, "__builtin_altivec_vrlb", ALTIVEC_BUILTIN_VRLB }, + { MASK_ALTIVEC, CODE_FOR_vrotlv8hi3, "__builtin_altivec_vrlh", ALTIVEC_BUILTIN_VRLH }, + { MASK_ALTIVEC, CODE_FOR_vrotlv4si3, "__builtin_altivec_vrlw", ALTIVEC_BUILTIN_VRLW }, { MASK_ALTIVEC, CODE_FOR_vashlv16qi3, "__builtin_altivec_vslb", ALTIVEC_BUILTIN_VSLB }, { MASK_ALTIVEC, CODE_FOR_vashlv8hi3, "__builtin_altivec_vslh", ALTIVEC_BUILTIN_VSLH }, { MASK_ALTIVEC, CODE_FOR_vashlv4si3, "__builtin_altivec_vslw", ALTIVEC_BUILTIN_VSLW }, @@ -8049,8 +8351,8 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vadduhs", ALTIVEC_BUILTIN_VEC_VADDUHS }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddsbs", ALTIVEC_BUILTIN_VEC_VADDSBS }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vaddubs", ALTIVEC_BUILTIN_VEC_VADDUBS }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_and", ALTIVEC_BUILTIN_VEC_AND }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_andc", ALTIVEC_BUILTIN_VEC_ANDC }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_and", ALTIVEC_BUILTIN_VEC_AND }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_andc", ALTIVEC_BUILTIN_VEC_ANDC }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_avg", ALTIVEC_BUILTIN_VEC_AVG }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavgsw", ALTIVEC_BUILTIN_VEC_VAVGSW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vavguw", ALTIVEC_BUILTIN_VEC_VAVGUW }, @@ -8075,8 +8377,8 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vcmpgtub", ALTIVEC_BUILTIN_VEC_VCMPGTUB }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmple", ALTIVEC_BUILTIN_VEC_CMPLE }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_cmplt", ALTIVEC_BUILTIN_VEC_CMPLT }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_max", ALTIVEC_BUILTIN_VEC_MAX }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxfp", ALTIVEC_BUILTIN_VEC_VMAXFP }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_max", ALTIVEC_BUILTIN_VEC_MAX }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vmaxfp", ALTIVEC_BUILTIN_VEC_VMAXFP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsw", ALTIVEC_BUILTIN_VEC_VMAXSW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxuw", ALTIVEC_BUILTIN_VEC_VMAXUW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmaxsh", ALTIVEC_BUILTIN_VEC_VMAXSH }, @@ -8091,8 +8393,8 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglw", ALTIVEC_BUILTIN_VEC_VMRGLW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglh", ALTIVEC_BUILTIN_VEC_VMRGLH }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmrglb", ALTIVEC_BUILTIN_VEC_VMRGLB }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_min", ALTIVEC_BUILTIN_VEC_MIN }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminfp", ALTIVEC_BUILTIN_VEC_VMINFP }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_min", ALTIVEC_BUILTIN_VEC_MIN }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vminfp", ALTIVEC_BUILTIN_VEC_VMINFP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminsw", ALTIVEC_BUILTIN_VEC_VMINSW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminuw", ALTIVEC_BUILTIN_VEC_VMINUW }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vminsh", ALTIVEC_BUILTIN_VEC_VMINSH }, @@ -8109,8 +8411,8 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulouh", ALTIVEC_BUILTIN_VEC_VMULOUH }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmulosb", ALTIVEC_BUILTIN_VEC_VMULOSB }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vmuloub", ALTIVEC_BUILTIN_VEC_VMULOUB }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_nor", ALTIVEC_BUILTIN_VEC_NOR }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_or", ALTIVEC_BUILTIN_VEC_OR }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_nor", ALTIVEC_BUILTIN_VEC_NOR }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_or", ALTIVEC_BUILTIN_VEC_OR }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_pack", ALTIVEC_BUILTIN_VEC_PACK }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuwum", ALTIVEC_BUILTIN_VEC_VPKUWUM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vpkuhum", ALTIVEC_BUILTIN_VEC_VPKUHUM }, @@ -8143,8 +8445,8 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsrab", ALTIVEC_BUILTIN_VEC_VSRAB }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_srl", ALTIVEC_BUILTIN_VEC_SRL }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sro", ALTIVEC_BUILTIN_VEC_SRO }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sub", ALTIVEC_BUILTIN_VEC_SUB }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubfp", ALTIVEC_BUILTIN_VEC_VSUBFP }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_sub", ALTIVEC_BUILTIN_VEC_SUB }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vsubfp", ALTIVEC_BUILTIN_VEC_VSUBFP }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuwm", ALTIVEC_BUILTIN_VEC_VSUBUWM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsubuhm", ALTIVEC_BUILTIN_VEC_VSUBUHM }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsububm", ALTIVEC_BUILTIN_VEC_VSUBUBM }, @@ -8162,7 +8464,7 @@ static struct builtin_description bdesc_2arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vsum4ubs", ALTIVEC_BUILTIN_VEC_VSUM4UBS }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sum2s", ALTIVEC_BUILTIN_VEC_SUM2S }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_sums", ALTIVEC_BUILTIN_VEC_SUMS }, - { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_xor", ALTIVEC_BUILTIN_VEC_XOR }, { 0, CODE_FOR_divv2sf3, "__builtin_paired_divv2sf3", PAIRED_BUILTIN_DIVV2SF3 }, { 0, CODE_FOR_addv2sf3, "__builtin_paired_addv2sf3", PAIRED_BUILTIN_ADDV2SF3 }, @@ -8326,30 +8628,45 @@ struct builtin_description_predicates { const unsigned int mask; const enum insn_code icode; - const char *opcode; const char *const name; const enum rs6000_builtins code; }; static const struct builtin_description_predicates bdesc_altivec_preds[] = { - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4sf, "*vcmpbfp.", "__builtin_altivec_vcmpbfp_p", ALTIVEC_BUILTIN_VCMPBFP_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4sf, "*vcmpeqfp.", "__builtin_altivec_vcmpeqfp_p", ALTIVEC_BUILTIN_VCMPEQFP_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4sf, "*vcmpgefp.", "__builtin_altivec_vcmpgefp_p", ALTIVEC_BUILTIN_VCMPGEFP_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4sf, "*vcmpgtfp.", "__builtin_altivec_vcmpgtfp_p", ALTIVEC_BUILTIN_VCMPGTFP_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4si, "*vcmpequw.", "__builtin_altivec_vcmpequw_p", ALTIVEC_BUILTIN_VCMPEQUW_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4si, "*vcmpgtsw.", "__builtin_altivec_vcmpgtsw_p", ALTIVEC_BUILTIN_VCMPGTSW_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v4si, "*vcmpgtuw.", "__builtin_altivec_vcmpgtuw_p", ALTIVEC_BUILTIN_VCMPGTUW_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v8hi, "*vcmpgtuh.", "__builtin_altivec_vcmpgtuh_p", ALTIVEC_BUILTIN_VCMPGTUH_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v8hi, "*vcmpgtsh.", "__builtin_altivec_vcmpgtsh_p", ALTIVEC_BUILTIN_VCMPGTSH_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v8hi, "*vcmpequh.", "__builtin_altivec_vcmpequh_p", ALTIVEC_BUILTIN_VCMPEQUH_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v16qi, "*vcmpequb.", "__builtin_altivec_vcmpequb_p", ALTIVEC_BUILTIN_VCMPEQUB_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v16qi, "*vcmpgtsb.", "__builtin_altivec_vcmpgtsb_p", ALTIVEC_BUILTIN_VCMPGTSB_P }, - { MASK_ALTIVEC, CODE_FOR_altivec_predicate_v16qi, "*vcmpgtub.", "__builtin_altivec_vcmpgtub_p", ALTIVEC_BUILTIN_VCMPGTUB_P }, + { MASK_ALTIVEC, CODE_FOR_altivec_vcmpbfp_p, "__builtin_altivec_vcmpbfp_p", + ALTIVEC_BUILTIN_VCMPBFP_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_eq_v4sf_p, + "__builtin_altivec_vcmpeqfp_p", ALTIVEC_BUILTIN_VCMPEQFP_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_ge_v4sf_p, + "__builtin_altivec_vcmpgefp_p", ALTIVEC_BUILTIN_VCMPGEFP_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_vector_gt_v4sf_p, + "__builtin_altivec_vcmpgtfp_p", ALTIVEC_BUILTIN_VCMPGTFP_P }, + { MASK_ALTIVEC, CODE_FOR_vector_eq_v4si_p, "__builtin_altivec_vcmpequw_p", + ALTIVEC_BUILTIN_VCMPEQUW_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gt_v4si_p, "__builtin_altivec_vcmpgtsw_p", + ALTIVEC_BUILTIN_VCMPGTSW_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gtu_v4si_p, "__builtin_altivec_vcmpgtuw_p", + ALTIVEC_BUILTIN_VCMPGTUW_P }, + { MASK_ALTIVEC, CODE_FOR_vector_eq_v8hi_p, "__builtin_altivec_vcmpequh_p", + ALTIVEC_BUILTIN_VCMPEQUH_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gt_v8hi_p, "__builtin_altivec_vcmpgtsh_p", + ALTIVEC_BUILTIN_VCMPGTSH_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gtu_v8hi_p, "__builtin_altivec_vcmpgtuh_p", + ALTIVEC_BUILTIN_VCMPGTUH_P }, + { MASK_ALTIVEC, CODE_FOR_vector_eq_v16qi_p, "__builtin_altivec_vcmpequb_p", + ALTIVEC_BUILTIN_VCMPEQUB_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gt_v16qi_p, "__builtin_altivec_vcmpgtsb_p", + ALTIVEC_BUILTIN_VCMPGTSB_P }, + { MASK_ALTIVEC, CODE_FOR_vector_gtu_v16qi_p, "__builtin_altivec_vcmpgtub_p", + ALTIVEC_BUILTIN_VCMPGTUB_P }, - { MASK_ALTIVEC, CODE_FOR_nothing, NULL, "__builtin_vec_vcmpeq_p", ALTIVEC_BUILTIN_VCMPEQ_P }, - { MASK_ALTIVEC, CODE_FOR_nothing, NULL, "__builtin_vec_vcmpgt_p", ALTIVEC_BUILTIN_VCMPGT_P }, - { MASK_ALTIVEC, CODE_FOR_nothing, NULL, "__builtin_vec_vcmpge_p", ALTIVEC_BUILTIN_VCMPGE_P } + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpeq_p", + ALTIVEC_BUILTIN_VCMPEQ_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpgt_p", + ALTIVEC_BUILTIN_VCMPGT_P }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_nothing, "__builtin_vec_vcmpge_p", + ALTIVEC_BUILTIN_VCMPGE_P } }; /* SPE predicates. */ @@ -8453,6 +8770,11 @@ static struct builtin_description bdesc_1arg[] = { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsh", ALTIVEC_BUILTIN_VEC_VUPKLSH }, { MASK_ALTIVEC, CODE_FOR_nothing, "__builtin_vec_vupklsb", ALTIVEC_BUILTIN_VEC_VUPKLSB }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_floatv4siv4sf2, "__builtin_vec_float_sisf", VECTOR_BUILTIN_FLOAT_V4SI_V4SF }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_unsigned_floatv4siv4sf2, "__builtin_vec_uns_float_sisf", VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_fix_truncv4sfv4si2, "__builtin_vec_fix_sfsi", VECTOR_BUILTIN_FIX_V4SF_V4SI }, + { MASK_ALTIVEC|MASK_VSX, CODE_FOR_fixuns_truncv4sfv4si2, "__builtin_vec_fixuns_sfsi", VECTOR_BUILTIN_FIXUNS_V4SF_V4SI }, + /* The SPE unary builtins must start with SPE_BUILTIN_EVABS and end with SPE_BUILTIN_EVSUBFUSIAAW. */ { 0, CODE_FOR_spe_evabs, "__builtin_spe_evabs", SPE_BUILTIN_EVABS }, @@ -8649,8 +8971,7 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) } static rtx -altivec_expand_predicate_builtin (enum insn_code icode, const char *opcode, - tree exp, rtx target) +altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat, scratch; tree cr6_form = CALL_EXPR_ARG (exp, 0); @@ -8689,8 +9010,7 @@ altivec_expand_predicate_builtin (enum insn_code icode, const char *opcode, scratch = gen_reg_rtx (mode0); - pat = GEN_FCN (icode) (scratch, op0, op1, - gen_rtx_SYMBOL_REF (Pmode, opcode)); + pat = GEN_FCN (icode) (scratch, op0, op1); if (! pat) return 0; emit_insn (pat); @@ -8957,11 +9277,12 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) || arg2 == error_mark_node) return const0_rtx; - if (icode == CODE_FOR_altivec_vsldoi_v4sf - || icode == CODE_FOR_altivec_vsldoi_v4si - || icode == CODE_FOR_altivec_vsldoi_v8hi - || icode == CODE_FOR_altivec_vsldoi_v16qi) + switch (icode) { + case CODE_FOR_altivec_vsldoi_v4sf: + case CODE_FOR_altivec_vsldoi_v4si: + case CODE_FOR_altivec_vsldoi_v8hi: + case CODE_FOR_altivec_vsldoi_v16qi: /* Only allow 4-bit unsigned literals. */ STRIP_NOPS (arg2); if (TREE_CODE (arg2) != INTEGER_CST @@ -8970,6 +9291,10 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) error ("argument 3 must be a 4-bit unsigned literal"); return const0_rtx; } + break; + + default: + break; } if (target == 0 @@ -9009,16 +9334,16 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) switch (fcode) { case ALTIVEC_BUILTIN_LD_INTERNAL_16qi: - icode = CODE_FOR_altivec_lvx_v16qi; + icode = CODE_FOR_vector_load_v16qi; break; case ALTIVEC_BUILTIN_LD_INTERNAL_8hi: - icode = CODE_FOR_altivec_lvx_v8hi; + icode = CODE_FOR_vector_load_v8hi; break; case ALTIVEC_BUILTIN_LD_INTERNAL_4si: - icode = CODE_FOR_altivec_lvx_v4si; + icode = CODE_FOR_vector_load_v4si; break; case ALTIVEC_BUILTIN_LD_INTERNAL_4sf: - icode = CODE_FOR_altivec_lvx_v4sf; + icode = CODE_FOR_vector_load_v4sf; break; default: *expandedp = false; @@ -9062,16 +9387,16 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, switch (fcode) { case ALTIVEC_BUILTIN_ST_INTERNAL_16qi: - icode = CODE_FOR_altivec_stvx_v16qi; + icode = CODE_FOR_vector_store_v16qi; break; case ALTIVEC_BUILTIN_ST_INTERNAL_8hi: - icode = CODE_FOR_altivec_stvx_v8hi; + icode = CODE_FOR_vector_store_v8hi; break; case ALTIVEC_BUILTIN_ST_INTERNAL_4si: - icode = CODE_FOR_altivec_stvx_v4si; + icode = CODE_FOR_vector_store_v4si; break; case ALTIVEC_BUILTIN_ST_INTERNAL_4sf: - icode = CODE_FOR_altivec_stvx_v4sf; + icode = CODE_FOR_vector_store_v4sf; break; default: *expandedp = false; @@ -9415,8 +9740,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) dp = bdesc_altivec_preds; for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, dp++) if (dp->code == fcode) - return altivec_expand_predicate_builtin (dp->icode, dp->opcode, - exp, target); + return altivec_expand_predicate_builtin (dp->icode, exp, target); /* LV* are funky. We initialized them differently. */ switch (fcode) @@ -9910,13 +10234,13 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, bool success; if (fcode == RS6000_BUILTIN_RECIP) - return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); + return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); if (fcode == RS6000_BUILTIN_RECIPF) - return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); + return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); if (fcode == RS6000_BUILTIN_RSQRTF) - return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); + return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); if (fcode == RS6000_BUILTIN_BSWAP_HI) return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target); @@ -10025,6 +10349,8 @@ rs6000_init_builtins (void) V2SI_type_node = build_vector_type (intSI_type_node, 2); V2SF_type_node = build_vector_type (float_type_node, 2); + V2DI_type_node = build_vector_type (intDI_type_node, 2); + V2DF_type_node = build_vector_type (double_type_node, 2); V4HI_type_node = build_vector_type (intHI_type_node, 4); V4SI_type_node = build_vector_type (intSI_type_node, 4); V4SF_type_node = build_vector_type (float_type_node, 4); @@ -10034,6 +10360,7 @@ rs6000_init_builtins (void) unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16); unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8); unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4); + unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2); opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2); opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2); @@ -10047,6 +10374,7 @@ rs6000_init_builtins (void) bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); + bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); long_integer_type_internal_node = long_integer_type_node; @@ -10057,9 +10385,36 @@ rs6000_init_builtins (void) uintHI_type_internal_node = unsigned_intHI_type_node; intSI_type_internal_node = intSI_type_node; uintSI_type_internal_node = unsigned_intSI_type_node; + intDI_type_internal_node = intDI_type_node; + uintDI_type_internal_node = unsigned_intDI_type_node; float_type_internal_node = float_type_node; + double_type_internal_node = float_type_node; void_type_internal_node = void_type_node; + /* Initialize the modes for builtin_function_type, mapping a machine mode to + tree type node. */ + builtin_mode_to_type[QImode][0] = integer_type_node; + builtin_mode_to_type[HImode][0] = integer_type_node; + builtin_mode_to_type[SImode][0] = intSI_type_node; + builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; + builtin_mode_to_type[DImode][0] = intDI_type_node; + builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; + builtin_mode_to_type[SFmode][0] = float_type_node; + builtin_mode_to_type[DFmode][0] = double_type_node; + builtin_mode_to_type[V2SImode][0] = V2SI_type_node; + builtin_mode_to_type[V2SFmode][0] = V2SF_type_node; + builtin_mode_to_type[V2DImode][0] = V2DI_type_node; + builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; + builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; + builtin_mode_to_type[V4HImode][0] = V4HI_type_node; + builtin_mode_to_type[V4SImode][0] = V4SI_type_node; + builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; + builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; + builtin_mode_to_type[V8HImode][0] = V8HI_type_node; + builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node; + builtin_mode_to_type[V16QImode][0] = V16QI_type_node; + builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node; + tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("__bool char"), bool_char_type_node); @@ -10083,6 +10438,7 @@ rs6000_init_builtins (void) bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16); bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8); bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4); + bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2); pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8); tdecl = build_decl (BUILTINS_LOCATION, TYPE_DECL, @@ -10154,31 +10510,30 @@ rs6000_init_builtins (void) rs6000_common_init_builtins (); if (TARGET_PPC_GFXOPT) { - tree ftype = build_function_type_list (float_type_node, - float_type_node, - float_type_node, - NULL_TREE); + tree ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode, + RS6000_BUILTIN_RECIPF, + "__builtin_recipdivf"); def_builtin (MASK_PPC_GFXOPT, "__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF); - ftype = build_function_type_list (float_type_node, - float_type_node, - NULL_TREE); + ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode, + RS6000_BUILTIN_RSQRTF, + "__builtin_rsqrtf"); def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF); } if (TARGET_POPCNTB) { - tree ftype = build_function_type_list (double_type_node, - double_type_node, - double_type_node, - NULL_TREE); + tree ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode, + RS6000_BUILTIN_RECIP, + "__builtin_recipdiv"); def_builtin (MASK_POPCNTB, "__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP); } if (TARGET_POWERPC) { + /* Don't use builtin_function_type here, as it maps HI/QI to SI. */ tree ftype = build_function_type_list (unsigned_intHI_type_node, unsigned_intHI_type_node, NULL_TREE); @@ -10851,7 +11206,7 @@ altivec_init_builtins (void) def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI); - ftype = build_function_type_list (V8HI_type_node, V16QI_type_node, + ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, intQI_type_node, integer_type_node, NULL_TREE); def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v16qi", ftype, @@ -10860,7 +11215,7 @@ altivec_init_builtins (void) ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, float_type_node, integer_type_node, NULL_TREE); - def_builtin (MASK_ALTIVEC, "__builtin_vec_set_v4sf", ftype, + def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF); /* Access to the vec_extract patterns. */ @@ -10881,480 +11236,295 @@ altivec_init_builtins (void) ftype = build_function_type_list (float_type_node, V4SF_type_node, integer_type_node, NULL_TREE); - def_builtin (MASK_ALTIVEC, "__builtin_vec_ext_v4sf", ftype, + def_builtin (MASK_ALTIVEC|MASK_VSX, "__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF); } +/* Hash function for builtin functions with up to 3 arguments and a return + type. */ +static unsigned +builtin_hash_function (const void *hash_entry) +{ + unsigned ret = 0; + int i; + const struct builtin_hash_struct *bh = + (const struct builtin_hash_struct *) hash_entry; + + for (i = 0; i < 4; i++) + { + ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]); + ret = (ret * 2) + bh->uns_p[i]; + } + + return ret; +} + +/* Compare builtin hash entries H1 and H2 for equivalence. */ +static int +builtin_hash_eq (const void *h1, const void *h2) +{ + const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1; + const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2; + + return ((p1->mode[0] == p2->mode[0]) + && (p1->mode[1] == p2->mode[1]) + && (p1->mode[2] == p2->mode[2]) + && (p1->mode[3] == p2->mode[3]) + && (p1->uns_p[0] == p2->uns_p[0]) + && (p1->uns_p[1] == p2->uns_p[1]) + && (p1->uns_p[2] == p2->uns_p[2]) + && (p1->uns_p[3] == p2->uns_p[3])); +} + +/* Map types for builtin functions with an explicit return type and up to 3 + arguments. Functions with fewer than 3 arguments use VOIDmode as the type + of the argument. */ +static tree +builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0, + enum machine_mode mode_arg1, enum machine_mode mode_arg2, + enum rs6000_builtins builtin, const char *name) +{ + struct builtin_hash_struct h; + struct builtin_hash_struct *h2; + void **found; + int num_args = 3; + int i; + tree ret_type = NULL_TREE; + tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE }; + tree args; + + /* Create builtin_hash_table. */ + if (builtin_hash_table == NULL) + builtin_hash_table = htab_create_ggc (1500, builtin_hash_function, + builtin_hash_eq, NULL); + + h.type = NULL_TREE; + h.mode[0] = mode_ret; + h.mode[1] = mode_arg0; + h.mode[2] = mode_arg1; + h.mode[3] = mode_arg2; + h.uns_p[0] = 0; + h.uns_p[1] = 0; + h.uns_p[2] = 0; + h.uns_p[3] = 0; + + /* If the builtin is a type that produces unsigned results or takes unsigned + arguments, and it is returned as a decl for the vectorizer (such as + widening multiplies, permute), make sure the arguments and return value + are type correct. */ + switch (builtin) + { + /* unsigned 2 argument functions. */ + case ALTIVEC_BUILTIN_VMULEUB_UNS: + case ALTIVEC_BUILTIN_VMULEUH_UNS: + case ALTIVEC_BUILTIN_VMULOUB_UNS: + case ALTIVEC_BUILTIN_VMULOUH_UNS: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + h.uns_p[2] = 1; + break; + + /* unsigned 3 argument functions. */ + case ALTIVEC_BUILTIN_VPERM_16QI_UNS: + case ALTIVEC_BUILTIN_VPERM_8HI_UNS: + case ALTIVEC_BUILTIN_VPERM_4SI_UNS: + case ALTIVEC_BUILTIN_VPERM_2DI_UNS: + case ALTIVEC_BUILTIN_VSEL_16QI_UNS: + case ALTIVEC_BUILTIN_VSEL_8HI_UNS: + case ALTIVEC_BUILTIN_VSEL_4SI_UNS: + case ALTIVEC_BUILTIN_VSEL_2DI_UNS: + h.uns_p[0] = 1; + h.uns_p[1] = 1; + h.uns_p[2] = 1; + h.uns_p[3] = 1; + break; + + /* signed permute functions with unsigned char mask. */ + case ALTIVEC_BUILTIN_VPERM_16QI: + case ALTIVEC_BUILTIN_VPERM_8HI: + case ALTIVEC_BUILTIN_VPERM_4SI: + case ALTIVEC_BUILTIN_VPERM_4SF: + case ALTIVEC_BUILTIN_VPERM_2DI: + case ALTIVEC_BUILTIN_VPERM_2DF: + h.uns_p[3] = 1; + break; + + /* unsigned args, signed return. */ + case VSX_BUILTIN_XVCVUXDDP_UNS: + case VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF: + h.uns_p[1] = 1; + break; + + /* signed args, unsigned return. */ + case VSX_BUILTIN_XVCVDPUXDS_UNS: + case VECTOR_BUILTIN_FIXUNS_V4SF_V4SI: + h.uns_p[0] = 1; + break; + + default: + break; + } + + /* Figure out how many args are present. */ + while (num_args > 0 && h.mode[num_args] == VOIDmode) + num_args--; + + if (num_args == 0) + fatal_error ("internal error: builtin function %s had no type", name); + + ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]]; + if (!ret_type && h.uns_p[0]) + ret_type = builtin_mode_to_type[h.mode[0]][0]; + + if (!ret_type) + fatal_error ("internal error: builtin function %s had an unexpected " + "return type %s", name, GET_MODE_NAME (h.mode[0])); + + for (i = 0; i < num_args; i++) + { + int m = (int) h.mode[i+1]; + int uns_p = h.uns_p[i+1]; + + arg_type[i] = builtin_mode_to_type[m][uns_p]; + if (!arg_type[i] && uns_p) + arg_type[i] = builtin_mode_to_type[m][0]; + + if (!arg_type[i]) + fatal_error ("internal error: builtin function %s, argument %d " + "had unexpected argument type %s", name, i, + GET_MODE_NAME (m)); + } + + found = htab_find_slot (builtin_hash_table, &h, INSERT); + if (*found == NULL) + { + h2 = GGC_NEW (struct builtin_hash_struct); + *h2 = h; + *found = (void *)h2; + args = void_list_node; + + for (i = num_args - 1; i >= 0; i--) + args = tree_cons (NULL_TREE, arg_type[i], args); + + h2->type = build_function_type (ret_type, args); + } + + return ((struct builtin_hash_struct *)(*found))->type; +} + static void rs6000_common_init_builtins (void) { const struct builtin_description *d; size_t i; - tree v2sf_ftype_v2sf_v2sf_v2sf - = build_function_type_list (V2SF_type_node, - V2SF_type_node, V2SF_type_node, - V2SF_type_node, NULL_TREE); + tree opaque_ftype_opaque = NULL_TREE; + tree opaque_ftype_opaque_opaque = NULL_TREE; + tree opaque_ftype_opaque_opaque_opaque = NULL_TREE; + tree v2si_ftype_qi = NULL_TREE; + tree v2si_ftype_v2si_qi = NULL_TREE; + tree v2si_ftype_int_qi = NULL_TREE; - tree v4sf_ftype_v4sf_v4sf_v16qi - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - V16QI_type_node, NULL_TREE); - tree v4si_ftype_v4si_v4si_v16qi - = build_function_type_list (V4SI_type_node, - V4SI_type_node, V4SI_type_node, - V16QI_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_v8hi_v16qi - = build_function_type_list (V8HI_type_node, - V8HI_type_node, V8HI_type_node, - V16QI_type_node, NULL_TREE); - tree v16qi_ftype_v16qi_v16qi_v16qi - = build_function_type_list (V16QI_type_node, - V16QI_type_node, V16QI_type_node, - V16QI_type_node, NULL_TREE); - tree v4si_ftype_int - = build_function_type_list (V4SI_type_node, integer_type_node, NULL_TREE); - tree v8hi_ftype_int - = build_function_type_list (V8HI_type_node, integer_type_node, NULL_TREE); - tree v16qi_ftype_int - = build_function_type_list (V16QI_type_node, integer_type_node, NULL_TREE); - tree v8hi_ftype_v16qi - = build_function_type_list (V8HI_type_node, V16QI_type_node, NULL_TREE); - tree v4sf_ftype_v4sf - = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); + if (!TARGET_PAIRED_FLOAT) + { + builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node; + builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node; + } - tree v2si_ftype_v2si_v2si - = build_function_type_list (opaque_V2SI_type_node, - opaque_V2SI_type_node, - opaque_V2SI_type_node, NULL_TREE); - - tree v2sf_ftype_v2sf_v2sf_spe - = build_function_type_list (opaque_V2SF_type_node, - opaque_V2SF_type_node, - opaque_V2SF_type_node, NULL_TREE); - - tree v2sf_ftype_v2sf_v2sf - = build_function_type_list (V2SF_type_node, - V2SF_type_node, - V2SF_type_node, NULL_TREE); - - - tree v2si_ftype_int_int - = build_function_type_list (opaque_V2SI_type_node, - integer_type_node, integer_type_node, - NULL_TREE); - - tree opaque_ftype_opaque - = build_function_type_list (opaque_V4SI_type_node, - opaque_V4SI_type_node, NULL_TREE); - - tree v2si_ftype_v2si - = build_function_type_list (opaque_V2SI_type_node, - opaque_V2SI_type_node, NULL_TREE); - - tree v2sf_ftype_v2sf_spe - = build_function_type_list (opaque_V2SF_type_node, - opaque_V2SF_type_node, NULL_TREE); - - tree v2sf_ftype_v2sf - = build_function_type_list (V2SF_type_node, - V2SF_type_node, NULL_TREE); - - tree v2sf_ftype_v2si - = build_function_type_list (opaque_V2SF_type_node, - opaque_V2SI_type_node, NULL_TREE); - - tree v2si_ftype_v2sf - = build_function_type_list (opaque_V2SI_type_node, - opaque_V2SF_type_node, NULL_TREE); - - tree v2si_ftype_v2si_char - = build_function_type_list (opaque_V2SI_type_node, - opaque_V2SI_type_node, - char_type_node, NULL_TREE); - - tree v2si_ftype_int_char - = build_function_type_list (opaque_V2SI_type_node, - integer_type_node, char_type_node, NULL_TREE); - - tree v2si_ftype_char - = build_function_type_list (opaque_V2SI_type_node, - char_type_node, NULL_TREE); - - tree int_ftype_int_int - = build_function_type_list (integer_type_node, - integer_type_node, integer_type_node, - NULL_TREE); - - tree opaque_ftype_opaque_opaque - = build_function_type_list (opaque_V4SI_type_node, - opaque_V4SI_type_node, opaque_V4SI_type_node, NULL_TREE); - tree v4si_ftype_v4si_v4si - = build_function_type_list (V4SI_type_node, - V4SI_type_node, V4SI_type_node, NULL_TREE); - tree v4sf_ftype_v4si_int - = build_function_type_list (V4SF_type_node, - V4SI_type_node, integer_type_node, NULL_TREE); - tree v4si_ftype_v4sf_int - = build_function_type_list (V4SI_type_node, - V4SF_type_node, integer_type_node, NULL_TREE); - tree v4si_ftype_v4si_int - = build_function_type_list (V4SI_type_node, - V4SI_type_node, integer_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_int - = build_function_type_list (V8HI_type_node, - V8HI_type_node, integer_type_node, NULL_TREE); - tree v16qi_ftype_v16qi_int - = build_function_type_list (V16QI_type_node, - V16QI_type_node, integer_type_node, NULL_TREE); - tree v16qi_ftype_v16qi_v16qi_int - = build_function_type_list (V16QI_type_node, - V16QI_type_node, V16QI_type_node, - integer_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_v8hi_int - = build_function_type_list (V8HI_type_node, - V8HI_type_node, V8HI_type_node, - integer_type_node, NULL_TREE); - tree v4si_ftype_v4si_v4si_int - = build_function_type_list (V4SI_type_node, - V4SI_type_node, V4SI_type_node, - integer_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_int - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - integer_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - tree opaque_ftype_opaque_opaque_opaque - = build_function_type_list (opaque_V4SI_type_node, - opaque_V4SI_type_node, opaque_V4SI_type_node, - opaque_V4SI_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_v4si - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - V4SI_type_node, NULL_TREE); - tree v4sf_ftype_v4sf_v4sf_v4sf - = build_function_type_list (V4SF_type_node, - V4SF_type_node, V4SF_type_node, - V4SF_type_node, NULL_TREE); - tree v4si_ftype_v4si_v4si_v4si - = build_function_type_list (V4SI_type_node, - V4SI_type_node, V4SI_type_node, - V4SI_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_v8hi - = build_function_type_list (V8HI_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v8hi_ftype_v8hi_v8hi_v8hi - = build_function_type_list (V8HI_type_node, - V8HI_type_node, V8HI_type_node, - V8HI_type_node, NULL_TREE); - tree v4si_ftype_v8hi_v8hi_v4si - = build_function_type_list (V4SI_type_node, - V8HI_type_node, V8HI_type_node, - V4SI_type_node, NULL_TREE); - tree v4si_ftype_v16qi_v16qi_v4si - = build_function_type_list (V4SI_type_node, - V16QI_type_node, V16QI_type_node, - V4SI_type_node, NULL_TREE); - tree v16qi_ftype_v16qi_v16qi - = build_function_type_list (V16QI_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v4si_ftype_v4sf_v4sf - = build_function_type_list (V4SI_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - tree v8hi_ftype_v16qi_v16qi - = build_function_type_list (V8HI_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v4si_ftype_v8hi_v8hi - = build_function_type_list (V4SI_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v8hi_ftype_v4si_v4si - = build_function_type_list (V8HI_type_node, - V4SI_type_node, V4SI_type_node, NULL_TREE); - tree v16qi_ftype_v8hi_v8hi - = build_function_type_list (V16QI_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - tree v4si_ftype_v16qi_v4si - = build_function_type_list (V4SI_type_node, - V16QI_type_node, V4SI_type_node, NULL_TREE); - tree v4si_ftype_v16qi_v16qi - = build_function_type_list (V4SI_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree v4si_ftype_v8hi_v4si - = build_function_type_list (V4SI_type_node, - V8HI_type_node, V4SI_type_node, NULL_TREE); - tree v4si_ftype_v8hi - = build_function_type_list (V4SI_type_node, V8HI_type_node, NULL_TREE); - tree int_ftype_v4si_v4si - = build_function_type_list (integer_type_node, - V4SI_type_node, V4SI_type_node, NULL_TREE); - tree int_ftype_v4sf_v4sf - = build_function_type_list (integer_type_node, - V4SF_type_node, V4SF_type_node, NULL_TREE); - tree int_ftype_v16qi_v16qi - = build_function_type_list (integer_type_node, - V16QI_type_node, V16QI_type_node, NULL_TREE); - tree int_ftype_v8hi_v8hi - = build_function_type_list (integer_type_node, - V8HI_type_node, V8HI_type_node, NULL_TREE); - - /* Add the simple ternary operators. */ + /* Add the ternary operators. */ d = bdesc_3arg; for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) { - enum machine_mode mode0, mode1, mode2, mode3; tree type; - bool is_overloaded = d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST; + int mask = d->mask; - if (is_overloaded) + if ((mask != 0 && (mask & target_flags) == 0) + || (mask == 0 && !TARGET_PAIRED_FLOAT)) + continue; + + if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) { - mode0 = VOIDmode; - mode1 = VOIDmode; - mode2 = VOIDmode; - mode3 = VOIDmode; + if (! (type = opaque_ftype_opaque_opaque_opaque)) + type = opaque_ftype_opaque_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); } else { - if (d->name == 0 || d->icode == CODE_FOR_nothing) + enum insn_code icode = d->icode; + if (d->name == 0 || icode == CODE_FOR_nothing) continue; - mode0 = insn_data[d->icode].operand[0].mode; - mode1 = insn_data[d->icode].operand[1].mode; - mode2 = insn_data[d->icode].operand[2].mode; - mode3 = insn_data[d->icode].operand[3].mode; + type = builtin_function_type (insn_data[icode].operand[0].mode, + insn_data[icode].operand[1].mode, + insn_data[icode].operand[2].mode, + insn_data[icode].operand[3].mode, + d->code, d->name); } - /* When all four are of the same mode. */ - if (mode0 == mode1 && mode1 == mode2 && mode2 == mode3) - { - switch (mode0) - { - case VOIDmode: - type = opaque_ftype_opaque_opaque_opaque; - break; - case V4SImode: - type = v4si_ftype_v4si_v4si_v4si; - break; - case V4SFmode: - type = v4sf_ftype_v4sf_v4sf_v4sf; - break; - case V8HImode: - type = v8hi_ftype_v8hi_v8hi_v8hi; - break; - case V16QImode: - type = v16qi_ftype_v16qi_v16qi_v16qi; - break; - case V2SFmode: - type = v2sf_ftype_v2sf_v2sf_v2sf; - break; - default: - gcc_unreachable (); - } - } - else if (mode0 == mode1 && mode1 == mode2 && mode3 == V16QImode) - { - switch (mode0) - { - case V4SImode: - type = v4si_ftype_v4si_v4si_v16qi; - break; - case V4SFmode: - type = v4sf_ftype_v4sf_v4sf_v16qi; - break; - case V8HImode: - type = v8hi_ftype_v8hi_v8hi_v16qi; - break; - case V16QImode: - type = v16qi_ftype_v16qi_v16qi_v16qi; - break; - default: - gcc_unreachable (); - } - } - else if (mode0 == V4SImode && mode1 == V16QImode && mode2 == V16QImode - && mode3 == V4SImode) - type = v4si_ftype_v16qi_v16qi_v4si; - else if (mode0 == V4SImode && mode1 == V8HImode && mode2 == V8HImode - && mode3 == V4SImode) - type = v4si_ftype_v8hi_v8hi_v4si; - else if (mode0 == V4SFmode && mode1 == V4SFmode && mode2 == V4SFmode - && mode3 == V4SImode) - type = v4sf_ftype_v4sf_v4sf_v4si; - - /* vchar, vchar, vchar, 4-bit literal. */ - else if (mode0 == V16QImode && mode1 == mode0 && mode2 == mode0 - && mode3 == QImode) - type = v16qi_ftype_v16qi_v16qi_int; - - /* vshort, vshort, vshort, 4-bit literal. */ - else if (mode0 == V8HImode && mode1 == mode0 && mode2 == mode0 - && mode3 == QImode) - type = v8hi_ftype_v8hi_v8hi_int; - - /* vint, vint, vint, 4-bit literal. */ - else if (mode0 == V4SImode && mode1 == mode0 && mode2 == mode0 - && mode3 == QImode) - type = v4si_ftype_v4si_v4si_int; - - /* vfloat, vfloat, vfloat, 4-bit literal. */ - else if (mode0 == V4SFmode && mode1 == mode0 && mode2 == mode0 - && mode3 == QImode) - type = v4sf_ftype_v4sf_v4sf_int; - - else - gcc_unreachable (); - def_builtin (d->mask, d->name, type, d->code); } - /* Add the simple binary operators. */ - d = (struct builtin_description *) bdesc_2arg; + /* Add the binary operators. */ + d = bdesc_2arg; for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) { enum machine_mode mode0, mode1, mode2; tree type; - bool is_overloaded = d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST - && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST; + int mask = d->mask; - if (is_overloaded) + if ((mask != 0 && (mask & target_flags) == 0) + || (mask == 0 && !TARGET_PAIRED_FLOAT)) + continue; + + if (d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST + && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST) { - mode0 = VOIDmode; - mode1 = VOIDmode; - mode2 = VOIDmode; + if (! (type = opaque_ftype_opaque_opaque)) + type = opaque_ftype_opaque_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); } else { - if (d->name == 0 || d->icode == CODE_FOR_nothing) + enum insn_code icode = d->icode; + if (d->name == 0 || icode == CODE_FOR_nothing) continue; - mode0 = insn_data[d->icode].operand[0].mode; - mode1 = insn_data[d->icode].operand[1].mode; - mode2 = insn_data[d->icode].operand[2].mode; - } + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; + mode2 = insn_data[icode].operand[2].mode; - /* When all three operands are of the same mode. */ - if (mode0 == mode1 && mode1 == mode2) - { - switch (mode0) + if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode) { - case VOIDmode: - type = opaque_ftype_opaque_opaque; - break; - case V4SFmode: - type = v4sf_ftype_v4sf_v4sf; - break; - case V4SImode: - type = v4si_ftype_v4si_v4si; - break; - case V16QImode: - type = v16qi_ftype_v16qi_v16qi; - break; - case V8HImode: - type = v8hi_ftype_v8hi_v8hi; - break; - case V2SImode: - type = v2si_ftype_v2si_v2si; - break; - case V2SFmode: - if (TARGET_PAIRED_FLOAT) - type = v2sf_ftype_v2sf_v2sf; - else - type = v2sf_ftype_v2sf_v2sf_spe; - break; - case SImode: - type = int_ftype_int_int; - break; - default: - gcc_unreachable (); + if (! (type = v2si_ftype_v2si_qi)) + type = v2si_ftype_v2si_qi + = build_function_type_list (opaque_V2SI_type_node, + opaque_V2SI_type_node, + char_type_node, + NULL_TREE); } - } - /* A few other combos we really don't want to do manually. */ - - /* vint, vfloat, vfloat. */ - else if (mode0 == V4SImode && mode1 == V4SFmode && mode2 == V4SFmode) - type = v4si_ftype_v4sf_v4sf; - - /* vshort, vchar, vchar. */ - else if (mode0 == V8HImode && mode1 == V16QImode && mode2 == V16QImode) - type = v8hi_ftype_v16qi_v16qi; - - /* vint, vshort, vshort. */ - else if (mode0 == V4SImode && mode1 == V8HImode && mode2 == V8HImode) - type = v4si_ftype_v8hi_v8hi; - - /* vshort, vint, vint. */ - else if (mode0 == V8HImode && mode1 == V4SImode && mode2 == V4SImode) - type = v8hi_ftype_v4si_v4si; - - /* vchar, vshort, vshort. */ - else if (mode0 == V16QImode && mode1 == V8HImode && mode2 == V8HImode) - type = v16qi_ftype_v8hi_v8hi; - - /* vint, vchar, vint. */ - else if (mode0 == V4SImode && mode1 == V16QImode && mode2 == V4SImode) - type = v4si_ftype_v16qi_v4si; - - /* vint, vchar, vchar. */ - else if (mode0 == V4SImode && mode1 == V16QImode && mode2 == V16QImode) - type = v4si_ftype_v16qi_v16qi; - - /* vint, vshort, vint. */ - else if (mode0 == V4SImode && mode1 == V8HImode && mode2 == V4SImode) - type = v4si_ftype_v8hi_v4si; - - /* vint, vint, 5-bit literal. */ - else if (mode0 == V4SImode && mode1 == V4SImode && mode2 == QImode) - type = v4si_ftype_v4si_int; - - /* vshort, vshort, 5-bit literal. */ - else if (mode0 == V8HImode && mode1 == V8HImode && mode2 == QImode) - type = v8hi_ftype_v8hi_int; - - /* vchar, vchar, 5-bit literal. */ - else if (mode0 == V16QImode && mode1 == V16QImode && mode2 == QImode) - type = v16qi_ftype_v16qi_int; - - /* vfloat, vint, 5-bit literal. */ - else if (mode0 == V4SFmode && mode1 == V4SImode && mode2 == QImode) - type = v4sf_ftype_v4si_int; - - /* vint, vfloat, 5-bit literal. */ - else if (mode0 == V4SImode && mode1 == V4SFmode && mode2 == QImode) - type = v4si_ftype_v4sf_int; - - else if (mode0 == V2SImode && mode1 == SImode && mode2 == SImode) - type = v2si_ftype_int_int; - - else if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode) - type = v2si_ftype_v2si_char; - - else if (mode0 == V2SImode && mode1 == SImode && mode2 == QImode) - type = v2si_ftype_int_char; - - else - { - /* int, x, x. */ - gcc_assert (mode0 == SImode); - switch (mode1) + else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT + && mode2 == QImode) { - case V4SImode: - type = int_ftype_v4si_v4si; - break; - case V4SFmode: - type = int_ftype_v4sf_v4sf; - break; - case V16QImode: - type = int_ftype_v16qi_v16qi; - break; - case V8HImode: - type = int_ftype_v8hi_v8hi; - break; - default: - gcc_unreachable (); + if (! (type = v2si_ftype_int_qi)) + type = v2si_ftype_int_qi + = build_function_type_list (opaque_V2SI_type_node, + integer_type_node, + char_type_node, + NULL_TREE); } + + else + type = builtin_function_type (mode0, mode1, mode2, VOIDmode, + d->code, d->name); } def_builtin (d->mask, d->name, type, d->code); @@ -11366,54 +11536,44 @@ rs6000_common_init_builtins (void) { enum machine_mode mode0, mode1; tree type; + int mask = d->mask; bool is_overloaded = d->code >= ALTIVEC_BUILTIN_OVERLOADED_FIRST && d->code <= ALTIVEC_BUILTIN_OVERLOADED_LAST; + if ((mask != 0 && (mask & target_flags) == 0) + || (mask == 0 && !TARGET_PAIRED_FLOAT)) + continue; + if (is_overloaded) - { - mode0 = VOIDmode; - mode1 = VOIDmode; - } + { + if (! (type = opaque_ftype_opaque)) + type = opaque_ftype_opaque + = build_function_type_list (opaque_V4SI_type_node, + opaque_V4SI_type_node, + NULL_TREE); + } else { - if (d->name == 0 || d->icode == CODE_FOR_nothing) + enum insn_code icode = d->icode; + if (d->name == 0 || icode == CODE_FOR_nothing) continue; - mode0 = insn_data[d->icode].operand[0].mode; - mode1 = insn_data[d->icode].operand[1].mode; - } + mode0 = insn_data[icode].operand[0].mode; + mode1 = insn_data[icode].operand[1].mode; - if (mode0 == V4SImode && mode1 == QImode) - type = v4si_ftype_int; - else if (mode0 == V8HImode && mode1 == QImode) - type = v8hi_ftype_int; - else if (mode0 == V16QImode && mode1 == QImode) - type = v16qi_ftype_int; - else if (mode0 == VOIDmode && mode1 == VOIDmode) - type = opaque_ftype_opaque; - else if (mode0 == V4SFmode && mode1 == V4SFmode) - type = v4sf_ftype_v4sf; - else if (mode0 == V8HImode && mode1 == V16QImode) - type = v8hi_ftype_v16qi; - else if (mode0 == V4SImode && mode1 == V8HImode) - type = v4si_ftype_v8hi; - else if (mode0 == V2SImode && mode1 == V2SImode) - type = v2si_ftype_v2si; - else if (mode0 == V2SFmode && mode1 == V2SFmode) - { - if (TARGET_PAIRED_FLOAT) - type = v2sf_ftype_v2sf; - else - type = v2sf_ftype_v2sf_spe; - } - else if (mode0 == V2SFmode && mode1 == V2SImode) - type = v2sf_ftype_v2si; - else if (mode0 == V2SImode && mode1 == V2SFmode) - type = v2si_ftype_v2sf; - else if (mode0 == V2SImode && mode1 == QImode) - type = v2si_ftype_char; - else - gcc_unreachable (); + if (mode0 == V2SImode && mode1 == QImode) + { + if (! (type = v2si_ftype_qi)) + type = v2si_ftype_qi + = build_function_type_list (opaque_V2SI_type_node, + char_type_node, + NULL_TREE); + } + + else + type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode, + d->code, d->name); + } def_builtin (d->mask, d->name, type, d->code); } @@ -12257,6 +12417,446 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) return NULL_TREE; } +enum reload_reg_type { + GPR_REGISTER_TYPE, + VECTOR_REGISTER_TYPE, + OTHER_REGISTER_TYPE +}; + +static enum reload_reg_type +rs6000_reload_register_type (enum reg_class rclass) +{ + switch (rclass) + { + case GENERAL_REGS: + case BASE_REGS: + return GPR_REGISTER_TYPE; + + case FLOAT_REGS: + case ALTIVEC_REGS: + case VSX_REGS: + return VECTOR_REGISTER_TYPE; + + default: + return OTHER_REGISTER_TYPE; + } +} + +/* Inform reload about cases where moving X with a mode MODE to a register in + RCLASS requires an extra scratch or immediate register. Return the class + needed for the immediate register. + + For VSX and Altivec, we may need a register to convert sp+offset into + reg+sp. */ + +static enum reg_class +rs6000_secondary_reload (bool in_p, + rtx x, + enum reg_class rclass, + enum machine_mode mode, + secondary_reload_info *sri) +{ + enum reg_class ret = ALL_REGS; + enum insn_code icode; + bool default_p = false; + + sri->icode = CODE_FOR_nothing; + + /* Convert vector loads and stores into gprs to use an additional base + register. */ + icode = rs6000_vector_reload[mode][in_p != false]; + if (icode != CODE_FOR_nothing) + { + ret = NO_REGS; + sri->icode = CODE_FOR_nothing; + sri->extra_cost = 0; + + if (GET_CODE (x) == MEM) + { + rtx addr = XEXP (x, 0); + + /* Loads to and stores from gprs can do reg+offset, and wouldn't need + an extra register in that case, but it would need an extra + register if the addressing is reg+reg or (reg+reg)&(-16). */ + if (rclass == GENERAL_REGS || rclass == BASE_REGS) + { + if (!legitimate_indirect_address_p (addr, false) + && !rs6000_legitimate_offset_address_p (TImode, addr, false)) + { + sri->icode = icode; + /* account for splitting the loads, and converting the + address from reg+reg to reg. */ + sri->extra_cost = (((TARGET_64BIT) ? 3 : 5) + + ((GET_CODE (addr) == AND) ? 1 : 0)); + } + } + /* Loads to and stores from vector registers can only do reg+reg + addressing. Altivec registers can also do (reg+reg)&(-16). */ + else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS + || rclass == FLOAT_REGS || rclass == NO_REGS) + { + if (!VECTOR_MEM_ALTIVEC_P (mode) + && GET_CODE (addr) == AND + && GET_CODE (XEXP (addr, 1)) == CONST_INT + && INTVAL (XEXP (addr, 1)) == -16 + && (legitimate_indirect_address_p (XEXP (addr, 0), false) + || legitimate_indexed_address_p (XEXP (addr, 0), false))) + { + sri->icode = icode; + sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS) + ? 2 : 1); + } + else if (!legitimate_indirect_address_p (addr, false) + && (rclass == NO_REGS + || !legitimate_indexed_address_p (addr, false))) + { + sri->icode = icode; + sri->extra_cost = 1; + } + else + icode = CODE_FOR_nothing; + } + /* Any other loads, including to pseudo registers which haven't been + assigned to a register yet, default to require a scratch + register. */ + else + { + sri->icode = icode; + sri->extra_cost = 2; + } + } + else if (REG_P (x)) + { + int regno = true_regnum (x); + + icode = CODE_FOR_nothing; + if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) + default_p = true; + else + { + enum reg_class xclass = REGNO_REG_CLASS (regno); + enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass); + enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass); + + /* If memory is needed, use default_secondary_reload to create the + stack slot. */ + if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE) + default_p = true; + else + ret = NO_REGS; + } + } + else + default_p = true; + } + else + default_p = true; + + if (default_p) + ret = default_secondary_reload (in_p, x, rclass, mode, sri); + + gcc_assert (ret != ALL_REGS); + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " + "mode = %s", + reg_class_names[ret], + in_p ? "true" : "false", + reg_class_names[rclass], + GET_MODE_NAME (mode)); + + if (default_p) + fprintf (stderr, ", default secondary reload"); + + if (sri->icode != CODE_FOR_nothing) + fprintf (stderr, ", reload func = %s, extra cost = %d\n", + insn_data[sri->icode].name, sri->extra_cost); + else + fprintf (stderr, "\n"); + + debug_rtx (x); + } + + return ret; +} + +/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset + to SP+reg addressing. */ + +void +rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) +{ + int regno = true_regnum (reg); + enum machine_mode mode = GET_MODE (reg); + enum reg_class rclass; + rtx addr; + rtx and_op2 = NULL_RTX; + rtx addr_op1; + rtx addr_op2; + rtx scratch_or_premodify = scratch; + rtx and_rtx; + rtx cc_clobber; + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n", + store_p ? "store" : "load"); + fprintf (stderr, "reg:\n"); + debug_rtx (reg); + fprintf (stderr, "mem:\n"); + debug_rtx (mem); + fprintf (stderr, "scratch:\n"); + debug_rtx (scratch); + } + + gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER); + gcc_assert (GET_CODE (mem) == MEM); + rclass = REGNO_REG_CLASS (regno); + addr = XEXP (mem, 0); + + switch (rclass) + { + /* GPRs can handle reg + small constant, all other addresses need to use + the scratch register. */ + case GENERAL_REGS: + case BASE_REGS: + if (GET_CODE (addr) == AND) + { + and_op2 = XEXP (addr, 1); + addr = XEXP (addr, 0); + } + + if (GET_CODE (addr) == PRE_MODIFY) + { + scratch_or_premodify = XEXP (addr, 0); + gcc_assert (REG_P (scratch_or_premodify)); + gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS); + addr = XEXP (addr, 1); + } + + if (GET_CODE (addr) == PLUS + && (!rs6000_legitimate_offset_address_p (TImode, addr, false) + || and_op2 != NULL_RTX)) + { + addr_op1 = XEXP (addr, 0); + addr_op2 = XEXP (addr, 1); + gcc_assert (legitimate_indirect_address_p (addr_op1, false)); + + if (!REG_P (addr_op2) + && (GET_CODE (addr_op2) != CONST_INT + || !satisfies_constraint_I (addr_op2))) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, + "\nMove plus addr to register %s, mode = %s: ", + rs6000_reg_names[REGNO (scratch)], + GET_MODE_NAME (mode)); + debug_rtx (addr_op2); + } + rs6000_emit_move (scratch, addr_op2, Pmode); + addr_op2 = scratch; + } + + emit_insn (gen_rtx_SET (VOIDmode, + scratch_or_premodify, + gen_rtx_PLUS (Pmode, + addr_op1, + addr_op2))); + + addr = scratch_or_premodify; + scratch_or_premodify = scratch; + } + else if (!legitimate_indirect_address_p (addr, false) + && !rs6000_legitimate_offset_address_p (TImode, addr, false)) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nMove addr to register %s, mode = %s: ", + rs6000_reg_names[REGNO (scratch_or_premodify)], + GET_MODE_NAME (mode)); + debug_rtx (addr); + } + rs6000_emit_move (scratch_or_premodify, addr, Pmode); + addr = scratch_or_premodify; + scratch_or_premodify = scratch; + } + break; + + /* Float/Altivec registers can only handle reg+reg addressing. Move + other addresses into a scratch register. */ + case FLOAT_REGS: + case VSX_REGS: + case ALTIVEC_REGS: + + /* With float regs, we need to handle the AND ourselves, since we can't + use the Altivec instruction with an implicit AND -16. Allow scalar + loads to float registers to use reg+offset even if VSX. */ + if (GET_CODE (addr) == AND + && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16 + || GET_CODE (XEXP (addr, 1)) != CONST_INT + || INTVAL (XEXP (addr, 1)) != -16 + || !VECTOR_MEM_ALTIVEC_P (mode))) + { + and_op2 = XEXP (addr, 1); + addr = XEXP (addr, 0); + } + + /* If we aren't using a VSX load, save the PRE_MODIFY register and use it + as the address later. */ + if (GET_CODE (addr) == PRE_MODIFY + && (!VECTOR_MEM_VSX_P (mode) + || and_op2 != NULL_RTX + || !legitimate_indexed_address_p (XEXP (addr, 1), false))) + { + scratch_or_premodify = XEXP (addr, 0); + gcc_assert (legitimate_indirect_address_p (scratch_or_premodify, + false)); + gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS); + addr = XEXP (addr, 1); + } + + if (legitimate_indirect_address_p (addr, false) /* reg */ + || legitimate_indexed_address_p (addr, false) /* reg+reg */ + || GET_CODE (addr) == PRE_MODIFY /* VSX pre-modify */ + || (GET_CODE (addr) == AND /* Altivec memory */ + && GET_CODE (XEXP (addr, 1)) == CONST_INT + && INTVAL (XEXP (addr, 1)) == -16 + && VECTOR_MEM_ALTIVEC_P (mode)) + || (rclass == FLOAT_REGS /* legacy float mem */ + && GET_MODE_SIZE (mode) == 8 + && and_op2 == NULL_RTX + && scratch_or_premodify == scratch + && rs6000_legitimate_offset_address_p (mode, addr, false))) + ; + + else if (GET_CODE (addr) == PLUS) + { + addr_op1 = XEXP (addr, 0); + addr_op2 = XEXP (addr, 1); + gcc_assert (REG_P (addr_op1)); + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ", + rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode)); + debug_rtx (addr_op2); + } + rs6000_emit_move (scratch, addr_op2, Pmode); + emit_insn (gen_rtx_SET (VOIDmode, + scratch_or_premodify, + gen_rtx_PLUS (Pmode, + addr_op1, + scratch))); + addr = scratch_or_premodify; + scratch_or_premodify = scratch; + } + + else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST + || GET_CODE (addr) == CONST_INT || REG_P (addr)) + { + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nMove addr to register %s, mode = %s: ", + rs6000_reg_names[REGNO (scratch_or_premodify)], + GET_MODE_NAME (mode)); + debug_rtx (addr); + } + + rs6000_emit_move (scratch_or_premodify, addr, Pmode); + addr = scratch_or_premodify; + scratch_or_premodify = scratch; + } + + else + gcc_unreachable (); + + break; + + default: + gcc_unreachable (); + } + + /* If the original address involved a pre-modify that we couldn't use the VSX + memory instruction with update, and we haven't taken care of already, + store the address in the pre-modify register and use that as the + address. */ + if (scratch_or_premodify != scratch && scratch_or_premodify != addr) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr)); + addr = scratch_or_premodify; + } + + /* If the original address involved an AND -16 and we couldn't use an ALTIVEC + memory instruction, recreate the AND now, including the clobber which is + generated by the general ANDSI3/ANDDI3 patterns for the + andi. instruction. */ + if (and_op2 != NULL_RTX) + { + if (! legitimate_indirect_address_p (addr, false)) + { + emit_insn (gen_rtx_SET (VOIDmode, scratch, addr)); + addr = scratch; + } + + if (TARGET_DEBUG_ADDR) + { + fprintf (stderr, "\nAnd addr to register %s, mode = %s: ", + rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode)); + debug_rtx (and_op2); + } + + and_rtx = gen_rtx_SET (VOIDmode, + scratch, + gen_rtx_AND (Pmode, + addr, + and_op2)); + + cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, + gen_rtvec (2, and_rtx, cc_clobber))); + addr = scratch; + } + + /* Adjust the address if it changed. */ + if (addr != XEXP (mem, 0)) + { + mem = change_address (mem, mode, addr); + if (TARGET_DEBUG_ADDR) + fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); + } + + /* Now create the move. */ + if (store_p) + emit_insn (gen_rtx_SET (VOIDmode, mem, reg)); + else + emit_insn (gen_rtx_SET (VOIDmode, reg, mem)); + + return; +} + +/* Target hook to return the cover classes for Integrated Register Allocator. + Cover classes is a set of non-intersected register classes covering all hard + registers used for register allocation purpose. Any move between two + registers of a cover class should be cheaper than load or store of the + registers. The value is array of register classes with LIM_REG_CLASSES used + as the end marker. + + We need two IRA_COVER_CLASSES, one for pre-VSX, and the other for VSX to + account for the Altivec and Floating registers being subsets of the VSX + register set under VSX, but distinct register sets on pre-VSX machines. */ + +static const enum reg_class * +rs6000_ira_cover_classes (void) +{ + static const enum reg_class cover_pre_vsx[] = IRA_COVER_CLASSES_PRE_VSX; + static const enum reg_class cover_vsx[] = IRA_COVER_CLASSES_VSX; + + return (TARGET_VSX) ? cover_vsx : cover_pre_vsx; +} + /* Allocate a 64-bit stack slot to be used for copying SDmode values through if this function has any SDmode references. */ @@ -12518,6 +13118,13 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode, && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) return (mode != SDmode) ? NO_REGS : GENERAL_REGS; + /* Memory, and FP/altivec registers can go into fp/altivec registers under + VSX. */ + if (TARGET_VSX + && (regno == -1 || VSX_REGNO_P (regno)) + && VSX_REG_CLASS_P (rclass)) + return NO_REGS; + /* Memory, and AltiVec registers can go into AltiVec registers. */ if ((regno == -1 || ALTIVEC_REGNO_P (regno)) && rclass == ALTIVEC_REGS) @@ -14225,55 +14832,34 @@ output_e500_flip_gt_bit (rtx dst, rtx src) return string; } -/* Return insn index for the vector compare instruction for given CODE, - and DEST_MODE, OP_MODE. Return INSN_NOT_AVAILABLE if valid insn is - not available. */ +/* Return insn for VSX or Altivec comparisons. */ -static int -get_vec_cmp_insn (enum rtx_code code, - enum machine_mode dest_mode, - enum machine_mode op_mode) +static rtx +rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) { - if (!TARGET_ALTIVEC) - return INSN_NOT_AVAILABLE; + rtx mask; + enum machine_mode mode = GET_MODE (op0); switch (code) { - case EQ: - if (dest_mode == V16QImode && op_mode == V16QImode) - return UNSPEC_VCMPEQUB; - if (dest_mode == V8HImode && op_mode == V8HImode) - return UNSPEC_VCMPEQUH; - if (dest_mode == V4SImode && op_mode == V4SImode) - return UNSPEC_VCMPEQUW; - if (dest_mode == V4SImode && op_mode == V4SFmode) - return UNSPEC_VCMPEQFP; - break; - case GE: - if (dest_mode == V4SImode && op_mode == V4SFmode) - return UNSPEC_VCMPGEFP; - case GT: - if (dest_mode == V16QImode && op_mode == V16QImode) - return UNSPEC_VCMPGTSB; - if (dest_mode == V8HImode && op_mode == V8HImode) - return UNSPEC_VCMPGTSH; - if (dest_mode == V4SImode && op_mode == V4SImode) - return UNSPEC_VCMPGTSW; - if (dest_mode == V4SImode && op_mode == V4SFmode) - return UNSPEC_VCMPGTFP; - break; - case GTU: - if (dest_mode == V16QImode && op_mode == V16QImode) - return UNSPEC_VCMPGTUB; - if (dest_mode == V8HImode && op_mode == V8HImode) - return UNSPEC_VCMPGTUH; - if (dest_mode == V4SImode && op_mode == V4SImode) - return UNSPEC_VCMPGTUW; - break; default: break; + + case GE: + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + return NULL_RTX; + + case EQ: + case GT: + case GTU: + mask = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (VOIDmode, + mask, + gen_rtx_fmt_ee (code, mode, op0, op1))); + return mask; } - return INSN_NOT_AVAILABLE; + + return NULL_RTX; } /* Emit vector compare for operands OP0 and OP1 using code RCODE. @@ -14284,129 +14870,114 @@ rs6000_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1, enum machine_mode dmode) { - int vec_cmp_insn; rtx mask; - enum machine_mode dest_mode; - enum machine_mode op_mode = GET_MODE (op1); + bool swap_operands = false; + bool try_again = false; - gcc_assert (TARGET_ALTIVEC); + gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); gcc_assert (GET_MODE (op0) == GET_MODE (op1)); - /* Floating point vector compare instructions uses destination V4SImode. - Move destination to appropriate mode later. */ - if (dmode == V4SFmode) - dest_mode = V4SImode; - else - dest_mode = dmode; + /* See if the comparison works as is. */ + mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); + if (mask) + return mask; - mask = gen_reg_rtx (dest_mode); - vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); - - if (vec_cmp_insn == INSN_NOT_AVAILABLE) + switch (rcode) { - bool swap_operands = false; - bool try_again = false; - switch (rcode) - { - case LT: - rcode = GT; - swap_operands = true; - try_again = true; - break; - case LTU: - rcode = GTU; - swap_operands = true; - try_again = true; - break; - case NE: - case UNLE: - case UNLT: - case UNGE: - case UNGT: - /* Invert condition and try again. - e.g., A != B becomes ~(A==B). */ + case LT: + rcode = GT; + swap_operands = true; + try_again = true; + break; + case LTU: + rcode = GTU; + swap_operands = true; + try_again = true; + break; + case NE: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + /* Invert condition and try again. + e.g., A != B becomes ~(A==B). */ + { + enum rtx_code rev_code; + enum insn_code nor_code; + rtx mask2; + + rev_code = reverse_condition_maybe_unordered (rcode); + if (rev_code == UNKNOWN) + return NULL_RTX; + + nor_code = optab_handler (one_cmpl_optab, (int)dmode)->insn_code; + if (nor_code == CODE_FOR_nothing) + return NULL_RTX; + + mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); + if (!mask2) + return NULL_RTX; + + mask = gen_reg_rtx (dmode); + emit_insn (GEN_FCN (nor_code) (mask, mask2)); + return mask; + } + break; + case GE: + case GEU: + case LE: + case LEU: + /* Try GT/GTU/LT/LTU OR EQ */ + { + rtx c_rtx, eq_rtx; + enum insn_code ior_code; + enum rtx_code new_code; + + switch (rcode) { - enum rtx_code rev_code; - enum insn_code nor_code; - rtx eq_rtx; + case GE: + new_code = GT; + break; - rev_code = reverse_condition_maybe_unordered (rcode); - eq_rtx = rs6000_emit_vector_compare (rev_code, op0, op1, - dest_mode); + case GEU: + new_code = GTU; + break; - nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code; - gcc_assert (nor_code != CODE_FOR_nothing); - emit_insn (GEN_FCN (nor_code) (mask, eq_rtx)); + case LE: + new_code = LT; + break; - if (dmode != dest_mode) - { - rtx temp = gen_reg_rtx (dest_mode); - convert_move (temp, mask, 0); - return temp; - } - return mask; + case LEU: + new_code = LTU; + break; + + default: + gcc_unreachable (); } - break; - case GE: - case GEU: - case LE: - case LEU: - /* Try GT/GTU/LT/LTU OR EQ */ - { - rtx c_rtx, eq_rtx; - enum insn_code ior_code; - enum rtx_code new_code; - switch (rcode) - { - case GE: - new_code = GT; - break; + ior_code = optab_handler (ior_optab, (int)dmode)->insn_code; + if (ior_code == CODE_FOR_nothing) + return NULL_RTX; - case GEU: - new_code = GTU; - break; + c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); + if (!c_rtx) + return NULL_RTX; - case LE: - new_code = LT; - break; + eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); + if (!eq_rtx) + return NULL_RTX; - case LEU: - new_code = LTU; - break; - - default: - gcc_unreachable (); - } - - c_rtx = rs6000_emit_vector_compare (new_code, - op0, op1, dest_mode); - eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, - dest_mode); - - ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code; - gcc_assert (ior_code != CODE_FOR_nothing); - emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); - if (dmode != dest_mode) - { - rtx temp = gen_reg_rtx (dest_mode); - convert_move (temp, mask, 0); - return temp; - } - return mask; - } - break; - default: - gcc_unreachable (); - } - - if (try_again) - { - vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); - /* You only get two chances. */ - gcc_assert (vec_cmp_insn != INSN_NOT_AVAILABLE); - } + mask = gen_reg_rtx (dmode); + emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); + return mask; + } + break; + default: + return NULL_RTX; + } + if (try_again) + { if (swap_operands) { rtx tmp; @@ -14414,91 +14985,84 @@ rs6000_emit_vector_compare (enum rtx_code rcode, op0 = op1; op1 = tmp; } + + mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); + if (mask) + return mask; } - emit_insn (gen_rtx_SET (VOIDmode, mask, - gen_rtx_UNSPEC (dest_mode, - gen_rtvec (2, op0, op1), - vec_cmp_insn))); - if (dmode != dest_mode) - { - rtx temp = gen_reg_rtx (dest_mode); - convert_move (temp, mask, 0); - return temp; - } - return mask; + /* You only get two chances. */ + return NULL_RTX; } -/* Return vector select instruction for MODE. Return INSN_NOT_AVAILABLE, if - valid insn doesn exist for given mode. */ - -static int -get_vsel_insn (enum machine_mode mode) -{ - switch (mode) - { - case V4SImode: - return UNSPEC_VSEL4SI; - break; - case V4SFmode: - return UNSPEC_VSEL4SF; - break; - case V8HImode: - return UNSPEC_VSEL8HI; - break; - case V16QImode: - return UNSPEC_VSEL16QI; - break; - default: - return INSN_NOT_AVAILABLE; - break; - } - return INSN_NOT_AVAILABLE; -} - -/* Emit vector select insn where DEST is destination using - operands OP1, OP2 and MASK. */ - -static void -rs6000_emit_vector_select (rtx dest, rtx op1, rtx op2, rtx mask) -{ - rtx t, temp; - enum machine_mode dest_mode = GET_MODE (dest); - int vsel_insn_index = get_vsel_insn (GET_MODE (dest)); - - temp = gen_reg_rtx (dest_mode); - - /* For each vector element, select op1 when mask is 1 otherwise - select op2. */ - t = gen_rtx_SET (VOIDmode, temp, - gen_rtx_UNSPEC (dest_mode, - gen_rtvec (3, op2, op1, mask), - vsel_insn_index)); - emit_insn (t); - emit_move_insn (dest, temp); - return; -} - -/* Emit vector conditional expression. - DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. - CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ +/* Emit vector conditional expression. DEST is destination. OP_TRUE and + OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two + operands for the relation operation COND. */ int -rs6000_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, +rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, rtx cond, rtx cc_op0, rtx cc_op1) { enum machine_mode dest_mode = GET_MODE (dest); enum rtx_code rcode = GET_CODE (cond); + enum machine_mode cc_mode = CCmode; rtx mask; + rtx cond2; + rtx tmp; + bool invert_move = false; - if (!TARGET_ALTIVEC) + if (VECTOR_UNIT_NONE_P (dest_mode)) return 0; + switch (rcode) + { + /* Swap operands if we can, and fall back to doing the operation as + specified, and doing a NOR to invert the test. */ + case NE: + case UNLE: + case UNLT: + case UNGE: + case UNGT: + /* Invert condition and try again. + e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ + invert_move = true; + rcode = reverse_condition_maybe_unordered (rcode); + if (rcode == UNKNOWN) + return 0; + break; + + /* Mark unsigned tests with CCUNSmode. */ + case GTU: + case GEU: + case LTU: + case LEU: + cc_mode = CCUNSmode; + break; + + default: + break; + } + /* Get the vector mask for the given relational operations. */ mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode); - rs6000_emit_vector_select (dest, op1, op2, mask); + if (!mask) + return 0; + if (invert_move) + { + tmp = op_true; + op_true = op_false; + op_false = tmp; + } + + cond2 = gen_rtx_fmt_ee (NE, cc_mode, mask, const0_rtx); + emit_insn (gen_rtx_SET (VOIDmode, + dest, + gen_rtx_IF_THEN_ELSE (dest_mode, + cond2, + op_true, + op_false))); return 1; } @@ -14749,6 +15313,15 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) enum rtx_code c; rtx target; + /* VSX/altivec have direct min/max insns. */ + if ((code == SMAX || code == SMIN) && VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)) + { + emit_insn (gen_rtx_SET (VOIDmode, + dest, + gen_rtx_fmt_ee (code, mode, op0, op1))); + return; + } + if (code == SMAX || code == SMIN) c = GE; else @@ -16414,6 +16987,19 @@ uses_TOC (void) rtx create_TOC_reference (rtx symbol) { + if (TARGET_DEBUG_ADDR) + { + if (GET_CODE (symbol) == SYMBOL_REF) + fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", + XSTR (symbol, 0)); + else + { + fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", + GET_RTX_NAME (GET_CODE (symbol))); + debug_rtx (symbol); + } + } + if (!can_create_pseudo_p ()) df_set_regs_ever_live (TOC_REGISTER, true); return gen_rtx_PLUS (Pmode, @@ -16722,6 +17308,7 @@ emit_frame_save (rtx frame_reg, rtx frame_ptr, enum machine_mode mode, /* Some cases that need register indexed addressing. */ if ((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) + || (TARGET_VSX && VSX_VECTOR_MODE (mode)) || (TARGET_E500_DOUBLE && mode == DFmode) || (TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 1c686c2bb17..3153243b30d 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1280,12 +1280,24 @@ enum reg_class purpose. Any move between two registers of a cover class should be cheaper than load or store of the registers. The macro value is array of register classes with LIM_REG_CLASSES used as the end - marker. */ + marker. -#define IRA_COVER_CLASSES \ + We need two IRA_COVER_CLASSES, one for pre-VSX, and the other for VSX to + account for the Altivec and Floating registers being subsets of the VSX + register set. */ + +#define IRA_COVER_CLASSES_PRE_VSX \ { \ - GENERAL_REGS, SPECIAL_REGS, FLOAT_REGS, ALTIVEC_REGS, \ - /*VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS, \ + GENERAL_REGS, SPECIAL_REGS, FLOAT_REGS, ALTIVEC_REGS, /* VSX_REGS, */ \ + /* VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS, \ + /* MQ_REGS, LINK_REGS, CTR_REGS, */ \ + CR_REGS, XER_REGS, LIM_REG_CLASSES \ +} + +#define IRA_COVER_CLASSES_VSX \ +{ \ + GENERAL_REGS, SPECIAL_REGS, /* FLOAT_REGS, ALTIVEC_REGS, */ VSX_REGS, \ + /* VRSAVE_REGS,*/ VSCR_REGS, SPE_ACC_REGS, SPEFSCR_REGS, \ /* MQ_REGS, LINK_REGS, CTR_REGS, */ \ CR_REGS, XER_REGS, LIM_REG_CLASSES \ } @@ -1306,9 +1318,20 @@ extern enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; #define REGNO_REG_CLASS(REGNO) rs6000_regno_regclass[(REGNO)] #endif -/* Register classes for altivec registers (and eventually other vector - units). */ -extern enum reg_class rs6000_vector_reg_class[]; +/* Register classes for various constraints that are based on the target + switches. */ +enum r6000_reg_class_enum { + RS6000_CONSTRAINT_d, /* fpr registers for double values */ + RS6000_CONSTRAINT_f, /* fpr registers for single values */ + RS6000_CONSTRAINT_v, /* Altivec registers */ + RS6000_CONSTRAINT_wa, /* Any VSX register */ + RS6000_CONSTRAINT_wd, /* VSX register for V2DF */ + RS6000_CONSTRAINT_wf, /* VSX register for V4SF */ + RS6000_CONSTRAINT_ws, /* VSX register for DF */ + RS6000_CONSTRAINT_MAX +}; + +extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; /* The class value for index registers, and the one for base regs. */ #define INDEX_REG_CLASS GENERAL_REGS @@ -2493,24 +2516,40 @@ enum rs6000_builtins ALTIVEC_BUILTIN_VMINSW, ALTIVEC_BUILTIN_VMINFP, ALTIVEC_BUILTIN_VMULEUB, + ALTIVEC_BUILTIN_VMULEUB_UNS, ALTIVEC_BUILTIN_VMULESB, ALTIVEC_BUILTIN_VMULEUH, + ALTIVEC_BUILTIN_VMULEUH_UNS, ALTIVEC_BUILTIN_VMULESH, ALTIVEC_BUILTIN_VMULOUB, + ALTIVEC_BUILTIN_VMULOUB_UNS, ALTIVEC_BUILTIN_VMULOSB, ALTIVEC_BUILTIN_VMULOUH, + ALTIVEC_BUILTIN_VMULOUH_UNS, ALTIVEC_BUILTIN_VMULOSH, ALTIVEC_BUILTIN_VNMSUBFP, ALTIVEC_BUILTIN_VNOR, ALTIVEC_BUILTIN_VOR, + ALTIVEC_BUILTIN_VSEL_2DF, /* needed for VSX */ + ALTIVEC_BUILTIN_VSEL_2DI, /* needed for VSX */ ALTIVEC_BUILTIN_VSEL_4SI, ALTIVEC_BUILTIN_VSEL_4SF, ALTIVEC_BUILTIN_VSEL_8HI, ALTIVEC_BUILTIN_VSEL_16QI, + ALTIVEC_BUILTIN_VSEL_2DI_UNS, + ALTIVEC_BUILTIN_VSEL_4SI_UNS, + ALTIVEC_BUILTIN_VSEL_8HI_UNS, + ALTIVEC_BUILTIN_VSEL_16QI_UNS, + ALTIVEC_BUILTIN_VPERM_2DF, /* needed for VSX */ + ALTIVEC_BUILTIN_VPERM_2DI, /* needed for VSX */ ALTIVEC_BUILTIN_VPERM_4SI, ALTIVEC_BUILTIN_VPERM_4SF, ALTIVEC_BUILTIN_VPERM_8HI, ALTIVEC_BUILTIN_VPERM_16QI, + ALTIVEC_BUILTIN_VPERM_2DI_UNS, + ALTIVEC_BUILTIN_VPERM_4SI_UNS, + ALTIVEC_BUILTIN_VPERM_8HI_UNS, + ALTIVEC_BUILTIN_VPERM_16QI_UNS, ALTIVEC_BUILTIN_VPKUHUM, ALTIVEC_BUILTIN_VPKUWUM, ALTIVEC_BUILTIN_VPKPX, @@ -3138,6 +3177,219 @@ enum rs6000_builtins RS6000_BUILTIN_RSQRTF, RS6000_BUILTIN_BSWAP_HI, + /* VSX builtins. */ + VSX_BUILTIN_LXSDUX, + VSX_BUILTIN_LXSDX, + VSX_BUILTIN_LXVD2UX, + VSX_BUILTIN_LXVD2X, + VSX_BUILTIN_LXVDSX, + VSX_BUILTIN_LXVW4UX, + VSX_BUILTIN_LXVW4X, + VSX_BUILTIN_STXSDUX, + VSX_BUILTIN_STXSDX, + VSX_BUILTIN_STXVD2UX, + VSX_BUILTIN_STXVD2X, + VSX_BUILTIN_STXVW4UX, + VSX_BUILTIN_STXVW4X, + VSX_BUILTIN_XSABSDP, + VSX_BUILTIN_XSADDDP, + VSX_BUILTIN_XSCMPODP, + VSX_BUILTIN_XSCMPUDP, + VSX_BUILTIN_XSCPSGNDP, + VSX_BUILTIN_XSCVDPSP, + VSX_BUILTIN_XSCVDPSXDS, + VSX_BUILTIN_XSCVDPSXWS, + VSX_BUILTIN_XSCVDPUXDS, + VSX_BUILTIN_XSCVDPUXWS, + VSX_BUILTIN_XSCVSPDP, + VSX_BUILTIN_XSCVSXDDP, + VSX_BUILTIN_XSCVUXDDP, + VSX_BUILTIN_XSDIVDP, + VSX_BUILTIN_XSMADDADP, + VSX_BUILTIN_XSMADDMDP, + VSX_BUILTIN_XSMAXDP, + VSX_BUILTIN_XSMINDP, + VSX_BUILTIN_XSMOVDP, + VSX_BUILTIN_XSMSUBADP, + VSX_BUILTIN_XSMSUBMDP, + VSX_BUILTIN_XSMULDP, + VSX_BUILTIN_XSNABSDP, + VSX_BUILTIN_XSNEGDP, + VSX_BUILTIN_XSNMADDADP, + VSX_BUILTIN_XSNMADDMDP, + VSX_BUILTIN_XSNMSUBADP, + VSX_BUILTIN_XSNMSUBMDP, + VSX_BUILTIN_XSRDPI, + VSX_BUILTIN_XSRDPIC, + VSX_BUILTIN_XSRDPIM, + VSX_BUILTIN_XSRDPIP, + VSX_BUILTIN_XSRDPIZ, + VSX_BUILTIN_XSREDP, + VSX_BUILTIN_XSRSQRTEDP, + VSX_BUILTIN_XSSQRTDP, + VSX_BUILTIN_XSSUBDP, + VSX_BUILTIN_XSTDIVDP_FE, + VSX_BUILTIN_XSTDIVDP_FG, + VSX_BUILTIN_XSTSQRTDP_FE, + VSX_BUILTIN_XSTSQRTDP_FG, + VSX_BUILTIN_XVABSDP, + VSX_BUILTIN_XVABSSP, + VSX_BUILTIN_XVADDDP, + VSX_BUILTIN_XVADDSP, + VSX_BUILTIN_XVCMPEQDP, + VSX_BUILTIN_XVCMPEQSP, + VSX_BUILTIN_XVCMPGEDP, + VSX_BUILTIN_XVCMPGESP, + VSX_BUILTIN_XVCMPGTDP, + VSX_BUILTIN_XVCMPGTSP, + VSX_BUILTIN_XVCMPEQDP_P, + VSX_BUILTIN_XVCMPEQSP_P, + VSX_BUILTIN_XVCMPGEDP_P, + VSX_BUILTIN_XVCMPGESP_P, + VSX_BUILTIN_XVCMPGTDP_P, + VSX_BUILTIN_XVCMPGTSP_P, + VSX_BUILTIN_XVCPSGNDP, + VSX_BUILTIN_XVCPSGNSP, + VSX_BUILTIN_XVCVDPSP, + VSX_BUILTIN_XVCVDPSXDS, + VSX_BUILTIN_XVCVDPSXWS, + VSX_BUILTIN_XVCVDPUXDS, + VSX_BUILTIN_XVCVDPUXDS_UNS, + VSX_BUILTIN_XVCVDPUXWS, + VSX_BUILTIN_XVCVSPDP, + VSX_BUILTIN_XVCVSPSXDS, + VSX_BUILTIN_XVCVSPSXWS, + VSX_BUILTIN_XVCVSPUXDS, + VSX_BUILTIN_XVCVSPUXWS, + VSX_BUILTIN_XVCVSXDDP, + VSX_BUILTIN_XVCVSXDSP, + VSX_BUILTIN_XVCVSXWDP, + VSX_BUILTIN_XVCVSXWSP, + VSX_BUILTIN_XVCVUXDDP, + VSX_BUILTIN_XVCVUXDDP_UNS, + VSX_BUILTIN_XVCVUXDSP, + VSX_BUILTIN_XVCVUXWDP, + VSX_BUILTIN_XVCVUXWSP, + VSX_BUILTIN_XVDIVDP, + VSX_BUILTIN_XVDIVSP, + VSX_BUILTIN_XVMADDDP, + VSX_BUILTIN_XVMADDSP, + VSX_BUILTIN_XVMAXDP, + VSX_BUILTIN_XVMAXSP, + VSX_BUILTIN_XVMINDP, + VSX_BUILTIN_XVMINSP, + VSX_BUILTIN_XVMSUBDP, + VSX_BUILTIN_XVMSUBSP, + VSX_BUILTIN_XVMULDP, + VSX_BUILTIN_XVMULSP, + VSX_BUILTIN_XVNABSDP, + VSX_BUILTIN_XVNABSSP, + VSX_BUILTIN_XVNEGDP, + VSX_BUILTIN_XVNEGSP, + VSX_BUILTIN_XVNMADDDP, + VSX_BUILTIN_XVNMADDSP, + VSX_BUILTIN_XVNMSUBDP, + VSX_BUILTIN_XVNMSUBSP, + VSX_BUILTIN_XVRDPI, + VSX_BUILTIN_XVRDPIC, + VSX_BUILTIN_XVRDPIM, + VSX_BUILTIN_XVRDPIP, + VSX_BUILTIN_XVRDPIZ, + VSX_BUILTIN_XVREDP, + VSX_BUILTIN_XVRESP, + VSX_BUILTIN_XVRSPI, + VSX_BUILTIN_XVRSPIC, + VSX_BUILTIN_XVRSPIM, + VSX_BUILTIN_XVRSPIP, + VSX_BUILTIN_XVRSPIZ, + VSX_BUILTIN_XVRSQRTEDP, + VSX_BUILTIN_XVRSQRTESP, + VSX_BUILTIN_XVSQRTDP, + VSX_BUILTIN_XVSQRTSP, + VSX_BUILTIN_XVSUBDP, + VSX_BUILTIN_XVSUBSP, + VSX_BUILTIN_XVTDIVDP_FE, + VSX_BUILTIN_XVTDIVDP_FG, + VSX_BUILTIN_XVTDIVSP_FE, + VSX_BUILTIN_XVTDIVSP_FG, + VSX_BUILTIN_XVTSQRTDP_FE, + VSX_BUILTIN_XVTSQRTDP_FG, + VSX_BUILTIN_XVTSQRTSP_FE, + VSX_BUILTIN_XVTSQRTSP_FG, + VSX_BUILTIN_XXSEL_2DI, + VSX_BUILTIN_XXSEL_2DF, + VSX_BUILTIN_XXSEL_4SI, + VSX_BUILTIN_XXSEL_4SF, + VSX_BUILTIN_XXSEL_8HI, + VSX_BUILTIN_XXSEL_16QI, + VSX_BUILTIN_XXSEL_2DI_UNS, + VSX_BUILTIN_XXSEL_4SI_UNS, + VSX_BUILTIN_XXSEL_8HI_UNS, + VSX_BUILTIN_XXSEL_16QI_UNS, + VSX_BUILTIN_VPERM_2DI, + VSX_BUILTIN_VPERM_2DF, + VSX_BUILTIN_VPERM_4SI, + VSX_BUILTIN_VPERM_4SF, + VSX_BUILTIN_VPERM_8HI, + VSX_BUILTIN_VPERM_16QI, + VSX_BUILTIN_VPERM_2DI_UNS, + VSX_BUILTIN_VPERM_4SI_UNS, + VSX_BUILTIN_VPERM_8HI_UNS, + VSX_BUILTIN_VPERM_16QI_UNS, + VSX_BUILTIN_XXPERMDI_2DF, + VSX_BUILTIN_XXPERMDI_2DI, + VSX_BUILTIN_XXPERMDI_4SF, + VSX_BUILTIN_XXPERMDI_4SI, + VSX_BUILTIN_XXPERMDI_8HI, + VSX_BUILTIN_XXPERMDI_16QI, + VSX_BUILTIN_CONCAT_2DF, + VSX_BUILTIN_CONCAT_2DI, + VSX_BUILTIN_SET_2DF, + VSX_BUILTIN_SET_2DI, + VSX_BUILTIN_SPLAT_2DF, + VSX_BUILTIN_SPLAT_2DI, + VSX_BUILTIN_XXMRGHW_4SF, + VSX_BUILTIN_XXMRGHW_4SI, + VSX_BUILTIN_XXMRGLW_4SF, + VSX_BUILTIN_XXMRGLW_4SI, + VSX_BUILTIN_XXSLDWI_16QI, + VSX_BUILTIN_XXSLDWI_8HI, + VSX_BUILTIN_XXSLDWI_4SI, + VSX_BUILTIN_XXSLDWI_4SF, + VSX_BUILTIN_XXSLDWI_2DI, + VSX_BUILTIN_XXSLDWI_2DF, + VSX_BUILTIN_VEC_INIT_V2DF, + VSX_BUILTIN_VEC_INIT_V2DI, + VSX_BUILTIN_VEC_SET_V2DF, + VSX_BUILTIN_VEC_SET_V2DI, + VSX_BUILTIN_VEC_EXT_V2DF, + VSX_BUILTIN_VEC_EXT_V2DI, + + /* VSX overloaded builtins, add the overloaded functions not present in + Altivec. */ + VSX_BUILTIN_VEC_MUL, + VSX_BUILTIN_OVERLOADED_FIRST = VSX_BUILTIN_VEC_MUL, + VSX_BUILTIN_VEC_MSUB, + VSX_BUILTIN_VEC_NMADD, + VSX_BUITLIN_VEC_NMSUB, + VSX_BUILTIN_VEC_DIV, + VSX_BUILTIN_VEC_XXMRGHW, + VSX_BUILTIN_VEC_XXMRGLW, + VSX_BUILTIN_VEC_XXPERMDI, + VSX_BUILTIN_VEC_XXSLDWI, + VSX_BUILTIN_VEC_XXSPLTD, + VSX_BUILTIN_VEC_XXSPLTW, + VSX_BUILTIN_OVERLOADED_LAST = VSX_BUILTIN_VEC_XXSPLTW, + + /* Combined VSX/Altivec builtins. */ + VECTOR_BUILTIN_FLOAT_V4SI_V4SF, + VECTOR_BUILTIN_UNSFLOAT_V4SI_V4SF, + VECTOR_BUILTIN_FIX_V4SF_V4SI, + VECTOR_BUILTIN_FIXUNS_V4SF_V4SI, + + /* Power7 builtins, that aren't VSX instructions. */ + POWER7_BUILTIN_BPERMD, + RS6000_BUILTIN_COUNT }; @@ -3151,6 +3403,8 @@ enum rs6000_builtin_type_index RS6000_BTI_V16QI, RS6000_BTI_V2SI, RS6000_BTI_V2SF, + RS6000_BTI_V2DI, + RS6000_BTI_V2DF, RS6000_BTI_V4HI, RS6000_BTI_V4SI, RS6000_BTI_V4SF, @@ -3158,13 +3412,16 @@ enum rs6000_builtin_type_index RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_char, /* __bool char */ RS6000_BTI_bool_short, /* __bool short */ RS6000_BTI_bool_int, /* __bool int */ + RS6000_BTI_bool_long, /* __bool long */ RS6000_BTI_pixel, /* __pixel */ RS6000_BTI_bool_V16QI, /* __vector __bool char */ RS6000_BTI_bool_V8HI, /* __vector __bool short */ RS6000_BTI_bool_V4SI, /* __vector __bool int */ + RS6000_BTI_bool_V2DI, /* __vector __bool long */ RS6000_BTI_pixel_V8HI, /* __vector __pixel */ RS6000_BTI_long, /* long_integer_type_node */ RS6000_BTI_unsigned_long, /* long_unsigned_type_node */ @@ -3174,7 +3431,10 @@ enum rs6000_builtin_type_index RS6000_BTI_UINTHI, /* unsigned_intHI_type_node */ RS6000_BTI_INTSI, /* intSI_type_node */ RS6000_BTI_UINTSI, /* unsigned_intSI_type_node */ + RS6000_BTI_INTDI, /* intDI_type_node */ + RS6000_BTI_UINTDI, /* unsigned_intDI_type_node */ RS6000_BTI_float, /* float_type_node */ + RS6000_BTI_double, /* double_type_node */ RS6000_BTI_void, /* void_type_node */ RS6000_BTI_MAX }; @@ -3185,6 +3445,8 @@ enum rs6000_builtin_type_index #define opaque_p_V2SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI]) #define opaque_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V4SI]) #define V16QI_type_node (rs6000_builtin_types[RS6000_BTI_V16QI]) +#define V2DI_type_node (rs6000_builtin_types[RS6000_BTI_V2DI]) +#define V2DF_type_node (rs6000_builtin_types[RS6000_BTI_V2DF]) #define V2SI_type_node (rs6000_builtin_types[RS6000_BTI_V2SI]) #define V2SF_type_node (rs6000_builtin_types[RS6000_BTI_V2SF]) #define V4HI_type_node (rs6000_builtin_types[RS6000_BTI_V4HI]) @@ -3194,13 +3456,16 @@ enum rs6000_builtin_type_index #define unsigned_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI]) #define unsigned_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI]) #define unsigned_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI]) +#define unsigned_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI]) #define bool_char_type_node (rs6000_builtin_types[RS6000_BTI_bool_char]) #define bool_short_type_node (rs6000_builtin_types[RS6000_BTI_bool_short]) #define bool_int_type_node (rs6000_builtin_types[RS6000_BTI_bool_int]) +#define bool_long_type_node (rs6000_builtin_types[RS6000_BTI_bool_long]) #define pixel_type_node (rs6000_builtin_types[RS6000_BTI_pixel]) #define bool_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V16QI]) #define bool_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V8HI]) #define bool_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V4SI]) +#define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI]) #define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI]) #define long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long]) @@ -3211,7 +3476,10 @@ enum rs6000_builtin_type_index #define uintHI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTHI]) #define intSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTSI]) #define uintSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTSI]) +#define intDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTDI]) +#define uintDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTDI]) #define float_type_internal_node (rs6000_builtin_types[RS6000_BTI_float]) +#define double_type_internal_node (rs6000_builtin_types[RS6000_BTI_double]) #define void_type_internal_node (rs6000_builtin_types[RS6000_BTI_void]) extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX]; diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a2fdc34342c..ae1ea99d0a3 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15322,6 +15322,7 @@ (include "sync.md") +(include "vector.md") (include "altivec.md") (include "spe.md") (include "dfp.md") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 00bd1b0e7a0..ac61ffc582e 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -119,6 +119,38 @@ mvsx Target Report Mask(VSX) Use vector/scalar (VSX) instructions +mvsx-scalar-double +Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(-1) +; If -mvsx, use VSX arithmetic instructions for scalar double (on by default) + +mvsx-scalar-memory +Target Undocumented Report Var(TARGET_VSX_SCALAR_MEMORY) +; If -mvsx, use VSX scalar memory reference instructions for scalar double (off by default) + +mvsx-align-128 +Target Undocumented Report Var(TARGET_VSX_ALIGN_128) +; If -mvsx, set alignment to 128 bits instead of 32/64 + +mallow-movmisalign +Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) +; Allow/disallow the movmisalign in DF/DI vectors + +mallow-df-permute +Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE) +; Allow/disallow permutation of DF/DI vectors + +msched-groups +Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1) +; Explicitly set/unset whether rs6000_sched_groups is set + +malways-hint +Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1) +; Explicitly set/unset whether rs6000_always_hint is set + +malign-branch-targets +Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) +; Explicitly set/unset whether rs6000_align_branch_targets is set + mupdate Target Report Var(TARGET_UPDATE) Init(1) Generate load/store with update instructions diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 695f5799f11..0b8e311078d 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -59,6 +59,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rios1.md \ $(srcdir)/config/rs6000/constraints.md \ $(srcdir)/config/rs6000/darwin.md \ $(srcdir)/config/rs6000/sync.md \ + $(srcdir)/config/rs6000/vector.md \ $(srcdir)/config/rs6000/altivec.md \ $(srcdir)/config/rs6000/spe.md \ $(srcdir)/config/rs6000/dfp.md \ diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md new file mode 100644 index 00000000000..1546db7a74f --- /dev/null +++ b/gcc/config/rs6000/vector.md @@ -0,0 +1,700 @@ +;; Expander definitions for vector support. No instructions are in this file, +;; this file provides the generic vector expander, and the actual vector +;; instructions will be in altivec.md. + +;; Copyright (C) 2009 +;; Free Software Foundation, Inc. +;; Contributed by Michael Meissner + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . + + +;; Vector int modes +(define_mode_iterator VEC_I [V16QI V8HI V4SI]) + +;; Vector float modes +(define_mode_iterator VEC_F [V4SF]) + +;; Vector arithmetic modes +(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF]) + +;; Vector modes that need alginment via permutes +(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) + +;; Vector logical modes +(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) + +;; Vector modes for moves. Don't do TImode here. +(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Vector comparison modes +(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF]) + +;; Vector init/extract modes +(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF]) + +;; Vector reload iterator +(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI]) + +;; Base type from vector mode +(define_mode_attr VEC_base [(V16QI "QI") + (V8HI "HI") + (V4SI "SI") + (V2DI "DI") + (V4SF "SF") + (V2DF "DF") + (TI "TI")]) + +;; Same size integer type for floating point data +(define_mode_attr VEC_int [(V4SF "v4si") + (V2DF "v2di")]) + +(define_mode_attr VEC_INT [(V4SF "V4SI") + (V2DF "V2DI")]) + +;; constants for unspec +(define_constants + [(UNSPEC_PREDICATE 400)]) + + +;; Vector move instructions. +(define_expand "mov" + [(set (match_operand:VEC_M 0 "nonimmediate_operand" "") + (match_operand:VEC_M 1 "any_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (mode)" +{ + if (can_create_pseudo_p ()) + { + if (CONSTANT_P (operands[1]) + && !easy_vector_constant (operands[1], mode)) + operands[1] = force_const_mem (mode, operands[1]); + + else if (!vlogical_operand (operands[0], mode) + && !vlogical_operand (operands[1], mode)) + operands[1] = force_reg (mode, operands[1]); + } +}) + +;; Generic vector floating point load/store instructions. +(define_expand "vector_load_" + [(set (match_operand:VEC_M 0 "vfloat_operand" "") + (match_operand:VEC_M 1 "memory_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "vector_store_" + [(set (match_operand:VEC_M 0 "memory_operand" "") + (match_operand:VEC_M 1 "vfloat_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +;; Splits if a GPR register was chosen for the move +(define_split + [(set (match_operand:VEC_L 0 "nonimmediate_operand" "") + (match_operand:VEC_L 1 "input_operand" ""))] + "VECTOR_MEM_ALTIVEC_P (mode) + && reload_completed + && gpr_or_gpr_p (operands[0], operands[1])" + [(pc)] +{ + rs6000_split_multireg_move (operands[0], operands[1]); + DONE; +}) + + +;; Reload patterns for vector operations. We may need an addtional base +;; register to convert the reg+offset addressing to reg+reg for vector +;; registers and reg+reg or (reg+reg)&(-16) addressing to just an index +;; register for gpr registers. +(define_expand "reload___store" + [(parallel [(match_operand:VEC_R 0 "memory_operand" "m") + (match_operand:VEC_R 1 "gpc_reg_operand" "r") + (match_operand:P 2 "register_operand" "=&b")])] + "" +{ + rs6000_secondary_reload_inner (operands[1], operands[0], operands[2], true); + DONE; +}) + +(define_expand "reload___load" + [(parallel [(match_operand:VEC_R 0 "gpc_reg_operand" "=&r") + (match_operand:VEC_R 1 "memory_operand" "m") + (match_operand:P 2 "register_operand" "=&b")])] + "" +{ + rs6000_secondary_reload_inner (operands[0], operands[1], operands[2], false); + DONE; +}) + +;; Reload sometimes tries to move the address to a GPR, and can generate +;; invalid RTL for addresses involving AND -16. Allow addresses involving +;; reg+reg, reg+small constant, or just reg, all wrapped in an AND -16. + +(define_insn_and_split "*vec_reload_and_plus_" + [(set (match_operand:P 0 "gpc_reg_operand" "=b") + (and:P (plus:P (match_operand:P 1 "gpc_reg_operand" "r") + (match_operand:P 2 "reg_or_cint_operand" "rI")) + (const_int -16)))] + "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(set (match_dup 0) + (plus:P (match_dup 1) + (match_dup 2))) + (parallel [(set (match_dup 0) + (and:P (match_dup 0) + (const_int -16))) + (clobber:CC (scratch:CC))])]) + +;; The normal ANDSI3/ANDDI3 won't match if reload decides to move an AND -16 +;; address to a register because there is no clobber of a (scratch), so we add +;; it here. +(define_insn_and_split "*vec_reload_and_reg_" + [(set (match_operand:P 0 "gpc_reg_operand" "=b") + (and:P (match_operand:P 1 "gpc_reg_operand" "r") + (const_int -16)))] + "TARGET_ALTIVEC && (reload_in_progress || reload_completed)" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (and:P (match_dup 1) + (const_int -16))) + (clobber:CC (scratch:CC))])]) + +;; Generic floating point vector arithmetic support +(define_expand "add3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (plus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "sub3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (minus:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "mul3" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (mult:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode) && TARGET_FUSED_MADD" + " +{ + emit_insn (gen_altivec_mulv4sf3 (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_expand "neg2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_negv4sf2 (operands[0], operands[1])); + DONE; +}") + +(define_expand "abs2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_absv4sf2 (operands[0], operands[1])); + DONE; +}") + +(define_expand "smin3" + [(set (match_operand:VEC_F 0 "register_operand" "") + (smin:VEC_F (match_operand:VEC_F 1 "register_operand" "") + (match_operand:VEC_F 2 "register_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "smax3" + [(set (match_operand:VEC_F 0 "register_operand" "") + (smax:VEC_F (match_operand:VEC_F 1 "register_operand" "") + (match_operand:VEC_F 2 "register_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + + +(define_expand "ftrunc2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (fix:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + + +;; Vector comparisons +(define_expand "vcond" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (if_then_else:VEC_F + (match_operator 3 "comparison_operator" + [(match_operand:VEC_F 4 "vfloat_operand" "") + (match_operand:VEC_F 5 "vfloat_operand" "")]) + (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcond" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (if_then_else:VEC_I + (match_operator 3 "comparison_operator" + [(match_operand:VEC_I 4 "vint_operand" "") + (match_operand:VEC_I 5 "vint_operand" "")]) + (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vcondu" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (if_then_else:VEC_I + (match_operator 3 "comparison_operator" + [(match_operand:VEC_I 4 "vint_operand" "") + (match_operand:VEC_I 5 "vint_operand" "")]) + (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2], + operands[3], operands[4], operands[5])) + DONE; + else + FAIL; +}") + +(define_expand "vector_eq" + [(set (match_operand:VEC_C 0 "vlogical_operand" "") + (eq:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") + (match_operand:VEC_C 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_gt" + [(set (match_operand:VEC_C 0 "vlogical_operand" "") + (gt:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") + (match_operand:VEC_C 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_ge" + [(set (match_operand:VEC_C 0 "vlogical_operand" "") + (ge:VEC_C (match_operand:VEC_C 1 "vlogical_operand" "") + (match_operand:VEC_C 2 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_gtu" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (gtu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_geu" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (geu:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +;; Note the arguments for __builtin_altivec_vsel are op2, op1, mask +;; which is in the reverse order that we want +(define_expand "vector_select_" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (if_then_else:VEC_L + (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "") + (const_int 0)) + (match_operand:VEC_L 2 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_select__uns" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (if_then_else:VEC_L + (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "") + (const_int 0)) + (match_operand:VEC_L 2 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +;; Expansions that compare vectors producing a vector result and a predicate, +;; setting CR6 to indicate a combined status +(define_expand "vector_eq__p" + [(parallel + [(set (reg:CC 74) + (unspec:CC [(eq:CC (match_operand:VEC_A 1 "vlogical_operand" "") + (match_operand:VEC_A 2 "vlogical_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_A 0 "vlogical_operand" "") + (eq:VEC_A (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_gt__p" + [(parallel + [(set (reg:CC 74) + (unspec:CC [(gt:CC (match_operand:VEC_A 1 "vlogical_operand" "") + (match_operand:VEC_A 2 "vlogical_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_A 0 "vlogical_operand" "") + (gt:VEC_A (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_ge__p" + [(parallel + [(set (reg:CC 74) + (unspec:CC [(ge:CC (match_operand:VEC_F 1 "vfloat_operand" "") + (match_operand:VEC_F 2 "vfloat_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_F 0 "vfloat_operand" "") + (ge:VEC_F (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +(define_expand "vector_gtu__p" + [(parallel + [(set (reg:CC 74) + (unspec:CC [(gtu:CC (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" ""))] + UNSPEC_PREDICATE)) + (set (match_operand:VEC_I 0 "vlogical_operand" "") + (gtu:VEC_I (match_dup 1) + (match_dup 2)))])] + "VECTOR_UNIT_ALTIVEC_P (mode)" + "") + +;; AltiVec predicates. + +(define_expand "cr6_test_for_zero" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC 74) + (const_int 0)))] + "TARGET_ALTIVEC" + "") + +(define_expand "cr6_test_for_zero_reverse" + [(set (match_operand:SI 0 "register_operand" "=r") + (eq:SI (reg:CC 74) + (const_int 0))) + (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] + "TARGET_ALTIVEC" + "") + +(define_expand "cr6_test_for_lt" + [(set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC 74) + (const_int 0)))] + "TARGET_ALTIVEC" + "") + +(define_expand "cr6_test_for_lt_reverse" + [(set (match_operand:SI 0 "register_operand" "=r") + (lt:SI (reg:CC 74) + (const_int 0))) + (set (match_dup 0) (minus:SI (const_int 1) (match_dup 0)))] + "TARGET_ALTIVEC" + "") + + +;; Vector logical instructions +(define_expand "xor3" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (xor:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" "")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "ior3" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" "")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "and3" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" "")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "one_cmpl2" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "nor3" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:VEC_L 2 "vlogical_operand" ""))))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +(define_expand "andc3" + [(set (match_operand:VEC_L 0 "vlogical_operand" "") + (and:VEC_L (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" "")) + (match_operand:VEC_L 1 "vlogical_operand" "")))] + "VECTOR_MEM_ALTIVEC_P (mode)" + "") + +;; Same size conversions +(define_expand "float2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (float:VEC_F (match_operand: 1 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_vcfsx (operands[0], operands[1], const0_rtx)); + DONE; +}") + +(define_expand "unsigned_float2" + [(set (match_operand:VEC_F 0 "vfloat_operand" "") + (unsigned_float:VEC_F (match_operand: 1 "vint_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_vcfux (operands[0], operands[1], const0_rtx)); + DONE; +}") + +(define_expand "fix_trunc2" + [(set (match_operand: 0 "vint_operand" "") + (fix: (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_vctsxs (operands[0], operands[1], const0_rtx)); + DONE; +}") + +(define_expand "fixuns_trunc2" + [(set (match_operand: 0 "vint_operand" "") + (unsigned_fix: (match_operand:VEC_F 1 "vfloat_operand" "")))] + "VECTOR_UNIT_ALTIVEC_P (mode)" + " +{ + emit_insn (gen_altivec_vctuxs (operands[0], operands[1], const0_rtx)); + DONE; +}") + + +;; Vector initialization, set, extract +(define_expand "vec_init" + [(match_operand:VEC_E 0 "vlogical_operand" "") + (match_operand:VEC_E 1 "" "")] + "VECTOR_MEM_ALTIVEC_P (mode)" +{ + rs6000_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +(define_expand "vec_set" + [(match_operand:VEC_E 0 "vlogical_operand" "") + (match_operand: 1 "register_operand" "") + (match_operand 2 "const_int_operand" "")] + "VECTOR_MEM_ALTIVEC_P (mode)" +{ + rs6000_expand_vector_set (operands[0], operands[1], INTVAL (operands[2])); + DONE; +}) + +(define_expand "vec_extract" + [(match_operand: 0 "register_operand" "") + (match_operand:VEC_E 1 "vlogical_operand" "") + (match_operand 2 "const_int_operand" "")] + "VECTOR_MEM_ALTIVEC_P (mode)" +{ + rs6000_expand_vector_extract (operands[0], operands[1], + INTVAL (operands[2])); + DONE; +}) + +;; Interleave patterns +(define_expand "vec_interleave_highv4sf" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "") + +(define_expand "vec_interleave_lowv4sf" + [(set (match_operand:V4SF 0 "vfloat_operand" "") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "vfloat_operand" "") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4SF (match_operand:V4SF 2 "vfloat_operand" "") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" + "") + + +;; Align vector loads with a permute. +(define_expand "vec_realign_load_" + [(match_operand:VEC_K 0 "vlogical_operand" "") + (match_operand:VEC_K 1 "vlogical_operand" "") + (match_operand:VEC_K 2 "vlogical_operand" "") + (match_operand:V16QI 3 "vlogical_operand" "")] + "VECTOR_MEM_ALTIVEC_P (mode)" +{ + emit_insn (gen_altivec_vperm_ (operands[0], operands[1], operands[2], + operands[3])); + DONE; +}) + + +;; Vector shift left in bits. Currently supported ony for shift +;; amounts that can be expressed as byte shifts (divisible by 8). +;; General shift amounts can be supported using vslo + vsl. We're +;; not expecting to see these yet (the vectorizer currently +;; generates only shifts divisible by byte_size). +(define_expand "vec_shl_" + [(match_operand:VEC_L 0 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:QI 2 "reg_or_short_operand" "")] + "TARGET_ALTIVEC" + " +{ + rtx bitshift = operands[2]; + rtx shift; + rtx insn; + HOST_WIDE_INT bitshift_val; + HOST_WIDE_INT byteshift_val; + + if (! CONSTANT_P (bitshift)) + FAIL; + bitshift_val = INTVAL (bitshift); + if (bitshift_val & 0x7) + FAIL; + byteshift_val = bitshift_val >> 3; + shift = gen_rtx_CONST_INT (QImode, byteshift_val); + insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], + shift); + + emit_insn (insn); + DONE; +}") + +;; Vector shift right in bits. Currently supported ony for shift +;; amounts that can be expressed as byte shifts (divisible by 8). +;; General shift amounts can be supported using vsro + vsr. We're +;; not expecting to see these yet (the vectorizer currently +;; generates only shifts divisible by byte_size). +(define_expand "vec_shr_" + [(match_operand:VEC_L 0 "vlogical_operand" "") + (match_operand:VEC_L 1 "vlogical_operand" "") + (match_operand:QI 2 "reg_or_short_operand" "")] + "TARGET_ALTIVEC" + " +{ + rtx bitshift = operands[2]; + rtx shift; + rtx insn; + HOST_WIDE_INT bitshift_val; + HOST_WIDE_INT byteshift_val; + + if (! CONSTANT_P (bitshift)) + FAIL; + bitshift_val = INTVAL (bitshift); + if (bitshift_val & 0x7) + FAIL; + byteshift_val = 16 - (bitshift_val >> 3); + shift = gen_rtx_CONST_INT (QImode, byteshift_val); + insn = gen_altivec_vsldoi_ (operands[0], operands[1], operands[1], + shift); + + emit_insn (insn); + DONE; +}") + +;; Expanders for rotate each element in a vector +(define_expand "vrotl3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "TARGET_ALTIVEC" + "") + +;; Expanders for arithmetic shift left on each vector element +(define_expand "vashl3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "TARGET_ALTIVEC" + "") + +;; Expanders for logical shift right on each vector element +(define_expand "vlshr3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "TARGET_ALTIVEC" + "") + +;; Expanders for arithmetic shift right on each vector element +(define_expand "vashr3" + [(set (match_operand:VEC_I 0 "vint_operand" "") + (ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "") + (match_operand:VEC_I 2 "vint_operand" "")))] + "TARGET_ALTIVEC" + "")