i386.c (override_options): If SSE, enable sse prefetch.

* config/i386/i386.c (override_options): If SSE, enable sse prefetch.
        (ix86_expand_vector_move): New.
        (bdesc_2arg): Remove andps, andnps, orps, xorps.
        (ix86_init_mmx_sse_builtins): Make static.  Remove composite builtins.
        Remove old prefetch builtins.  Special case the logicals removed above.
        (ix86_expand_builtin): Likewise.
        (safe_vector_operand): Use V4SFmode, not TImode.
        (ix86_expand_store_builtin): Remove shuffle arg.  Update callers.
        (ix86_expand_timode_binop_builtin): New.
        * config/i386/i386-protos.h: Update.
        * config/i386/i386.h (enum ix86_builtins): Update.
        * config/i386/i386.md: Correct predicates on MMX/SSE patterns.
        Use ix86_expand_vector_move in vector move expanders.
        (movti_internal, movti_rex64): Add xorps alternative.
        (sse_clrv4sf): Rename and adjust from sse_clrti.
        (prefetch): Don't work so hard.
        (prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
        * config/i386/xmmintrin.h (__m128): Use V4SFmode.
        (_mm_getcsr, _mm_setcsr): Fix typo in builtin name.

From-SVN: r48796
This commit is contained in:
Richard Henderson 2002-01-12 02:05:28 -08:00 committed by Richard Henderson
parent b0d723da36
commit e37af218ee
6 changed files with 335 additions and 558 deletions

View File

@ -1,3 +1,25 @@
2002-01-12 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (override_options): If SSE, enable sse prefetch.
(ix86_expand_vector_move): New.
(bdesc_2arg): Remove andps, andnps, orps, xorps.
(ix86_init_mmx_sse_builtins): Make static. Remove composite builtins.
Remove old prefetch builtins. Special case the logicals removed above.
(ix86_expand_builtin): Likewise.
(safe_vector_operand): Use V4SFmode, not TImode.
(ix86_expand_store_builtin): Remove shuffle arg. Update callers.
(ix86_expand_timode_binop_builtin): New.
* config/i386/i386-protos.h: Update.
* config/i386/i386.h (enum ix86_builtins): Update.
* config/i386/i386.md: Correct predicates on MMX/SSE patterns.
Use ix86_expand_vector_move in vector move expanders.
(movti_internal, movti_rex64): Add xorps alternative.
(sse_clrv4sf): Rename and adjust from sse_clrti.
(prefetch): Don't work so hard.
(prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
* config/i386/xmmintrin.h (__m128): Use V4SFmode.
(_mm_getcsr, _mm_setcsr): Fix typo in builtin name.
2002-01-11 Richard Henderson <rth@redhat.com>
* config/i386/mmintrin.h: New file.

View File

@ -108,6 +108,7 @@ extern rtx i386_simplify_dwarf_addr PARAMS ((rtx));
extern void ix86_expand_clear PARAMS ((rtx));
extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[]));
extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[]));
extern void ix86_expand_binary_operator PARAMS ((enum rtx_code,
enum machine_mode, rtx[]));
extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode,
@ -177,7 +178,6 @@ extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
tree, int));
extern rtx ix86_function_value PARAMS ((tree));
extern void ix86_init_builtins PARAMS ((void));
extern void ix86_init_mmx_sse_builtins PARAMS ((void));
extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
#endif

View File

@ -684,6 +684,7 @@ static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static void ix86_sched_init PARAMS ((FILE *, int, int));
static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
static void ix86_init_mmx_sse_builtins PARAMS ((void));
struct ix86_address
{
@ -701,7 +702,9 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
tree, rtx));
static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
@ -1164,7 +1167,10 @@ override_options ()
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
target_flags |= MASK_MMX;
{
target_flags |= MASK_MMX;
x86_prefetch_sse = true;
}
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
@ -6661,6 +6667,38 @@ ix86_expand_move (mode, operands)
emit_insn (insn);
}
void
ix86_expand_vector_move (mode, operands)
enum machine_mode mode;
rtx operands[];
{
/* Force constants other than zero into memory. We do not know how
the instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], mode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
operands[1] = gen_rtx_MEM (mode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], mode)
&& !register_operand (operands[1], mode)
&& operands[1] != CONST0_RTX (mode))
{
rtx temp = force_reg (TImode, operands[1]);
emit_move_insn (operands[0], temp);
return;
}
emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
}
/* Attempt to expand a binary operator. Make the expansion closer to the
actual machine, then just general_operand, which will allow 3 separate
memory references (one output, two input) in a single insn. */
@ -10748,11 +10786,6 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
{ MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
{ MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
@ -10865,7 +10898,7 @@ ix86_init_builtins ()
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
builtins. */
void
static void
ix86_init_mmx_sse_builtins ()
{
const struct builtin_description * d;
@ -10899,14 +10932,6 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (integer_type_node,
tree_cons (NULL_TREE, V8QI_type_node,
endlink));
tree int_ftype_v2si
= build_function_type (integer_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
endlink));
tree v2si_ftype_int
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, integer_type_node,
endlink));
tree v4sf_ftype_v4sf_int
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
@ -10976,11 +11001,6 @@ ix86_init_mmx_sse_builtins ()
endlink)));
tree void_ftype_void
= build_function_type (void_type_node, endlink);
tree void_ftype_pchar_int
= build_function_type (void_type_node,
tree_cons (NULL_TREE, pchar_type_node,
tree_cons (NULL_TREE, integer_type_node,
endlink)));
tree void_ftype_unsigned
= build_function_type (void_type_node,
tree_cons (NULL_TREE, unsigned_type_node,
@ -10989,8 +11009,8 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (unsigned_type_node, endlink);
tree di_ftype_void
= build_function_type (long_long_unsigned_type_node, endlink);
tree ti_ftype_void
= build_function_type (intTI_type_node, endlink);
tree v4sf_ftype_void
= build_function_type (V4SF_type_node, endlink);
tree v2si_ftype_v4sf
= build_function_type (V2SI_type_node,
tree_cons (NULL_TREE, V4SF_type_node,
@ -11007,19 +11027,6 @@ ix86_init_mmx_sse_builtins ()
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, pfloat_type_node,
endlink));
tree v4sf_ftype_float
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, float_type_node,
endlink));
tree v4sf_ftype_float_float_float_float
= build_function_type (V4SF_type_node,
tree_cons (NULL_TREE, float_type_node,
tree_cons (NULL_TREE, float_type_node,
tree_cons (NULL_TREE,
float_type_node,
tree_cons (NULL_TREE,
float_type_node,
endlink)))));
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type (V4SF_type_node,
@ -11069,11 +11076,6 @@ ix86_init_mmx_sse_builtins ()
tree_cons (NULL_TREE, V2SI_type_node,
tree_cons (NULL_TREE, V2SI_type_node,
endlink)));
tree ti_ftype_ti_ti
= build_function_type (intTI_type_node,
tree_cons (NULL_TREE, intTI_type_node,
tree_cons (NULL_TREE, intTI_type_node,
endlink)));
tree di_ftype_di_di
= build_function_type (long_long_unsigned_type_node,
tree_cons (NULL_TREE, long_long_unsigned_type_node,
@ -11110,11 +11112,6 @@ ix86_init_mmx_sse_builtins ()
V2SF_type_node,
endlink)));
tree void_ftype_pchar
= build_function_type (void_type_node,
tree_cons (NULL_TREE, pchar_type_node,
endlink));
/* Add all builtins that are more or less simple operations on two
operands. */
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
@ -11142,9 +11139,6 @@ ix86_init_mmx_sse_builtins ()
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
case TImode:
type = ti_ftype_ti_ti;
break;
case DImode:
type = di_ftype_di_di;
break;
@ -11164,8 +11158,6 @@ ix86_init_mmx_sse_builtins ()
}
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
@ -11199,6 +11191,11 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
@ -11222,7 +11219,6 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
@ -11256,8 +11252,6 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
/* 3DNow! extension as used in the Athlon CPU. */
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
@ -11267,14 +11261,7 @@ ix86_init_mmx_sse_builtins ()
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
/* Composite intrinsics. */
def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
@ -11293,8 +11280,8 @@ safe_vector_operand (x, mode)
emit_insn (gen_mmx_clrdi (mode == DImode ? x
: gen_rtx_SUBREG (DImode, x, 0)));
else
emit_insn (gen_sse_clrti (mode == TImode ? x
: gen_rtx_SUBREG (TImode, x, 0)));
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
: gen_rtx_SUBREG (V4SFmode, x, 0)));
return x;
}
@ -11342,13 +11329,45 @@ ix86_expand_binop_builtin (icode, arglist, target)
return target;
}
/* In type_for_mode we restrict the ability to create TImode types
to hosts with 64-bit H_W_I. So we've defined the SSE logicals
to have a V4SFmode signature. Convert them in-place to TImode. */
static rtx
ix86_expand_timode_binop_builtin (icode, arglist, target)
enum insn_code icode;
tree arglist;
rtx target;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
op0 = gen_lowpart (TImode, op0);
op1 = gen_lowpart (TImode, op1);
target = gen_reg_rtx (TImode);
if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
op0 = copy_to_mode_reg (TImode, op0);
if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
op1 = copy_to_mode_reg (TImode, op1);
pat = GEN_FCN (icode) (target, op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return gen_lowpart (V4SFmode, target);
}
/* Subroutine of ix86_expand_builtin to take care of stores. */
static rtx
ix86_expand_store_builtin (icode, arglist, shuffle)
ix86_expand_store_builtin (icode, arglist)
enum insn_code icode;
tree arglist;
int shuffle;
{
rtx pat;
tree arg0 = TREE_VALUE (arglist);
@ -11362,10 +11381,6 @@ ix86_expand_store_builtin (icode, arglist, shuffle)
op1 = safe_vector_operand (op1, mode1);
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
if (shuffle >= 0)
emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
pat = GEN_FCN (icode) (op0, op1);
if (pat)
emit_insn (pat);
@ -11568,7 +11583,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
enum insn_code icode;
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
tree arglist = TREE_OPERAND (exp, 1);
tree arg0, arg1, arg2, arg3;
tree arg0, arg1, arg2;
rtx op0, op1, op2, pat;
enum machine_mode tmode, mode0, mode1, mode2;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
@ -11583,19 +11598,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
emit_insn (gen_sfence ());
return 0;
case IX86_BUILTIN_M_FROM_INT:
target = gen_reg_rtx (DImode);
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
return target;
case IX86_BUILTIN_M_TO_INT:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
op0 = copy_to_mode_reg (DImode, op0);
target = gen_reg_rtx (SImode);
emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
return target;
case IX86_BUILTIN_PEXTRW:
icode = CODE_FOR_mmx_pextrw;
arg0 = TREE_VALUE (arglist);
@ -11689,6 +11691,19 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_RCPSS:
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
case IX86_BUILTIN_ANDPS:
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
arglist, target);
case IX86_BUILTIN_ANDNPS:
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
arglist, target);
case IX86_BUILTIN_ORPS:
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
arglist, target);
case IX86_BUILTIN_XORPS:
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
arglist, target);
case IX86_BUILTIN_LOADAPS:
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
@ -11696,15 +11711,15 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
case IX86_BUILTIN_STOREAPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
case IX86_BUILTIN_STOREUPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
case IX86_BUILTIN_LOADSS:
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
case IX86_BUILTIN_STORESS:
return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
case IX86_BUILTIN_LOADHPS:
case IX86_BUILTIN_LOADLPS:
@ -11753,9 +11768,9 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
return 0;
case IX86_BUILTIN_MOVNTPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
case IX86_BUILTIN_MOVNTQ:
return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
@ -11769,29 +11784,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
emit_insn (gen_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
case IX86_BUILTIN_PREFETCH:
icode = CODE_FOR_prefetch_sse;
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
{
/* @@@ better error message */
error ("selector must be an immediate");
return const0_rtx;
}
op0 = copy_to_mode_reg (Pmode, op0);
pat = GEN_FCN (icode) (op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return target;
case IX86_BUILTIN_SHUFPS:
icode = CODE_FOR_sse_shufps;
arg0 = TREE_VALUE (arglist);
@ -11914,19 +11906,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_PMULHRW:
return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
case IX86_BUILTIN_PREFETCH_3DNOW:
case IX86_BUILTIN_PREFETCHW:
icode = CODE_FOR_prefetch_3dnow;
arg0 = TREE_VALUE (arglist);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
mode0 = insn_data[icode].operand[0].mode;
pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
if (! pat)
return NULL_RTX;
emit_insn (pat);
return NULL_RTX;
case IX86_BUILTIN_PF2IW:
return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
@ -11945,57 +11924,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_PSWAPDSF:
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
/* Composite intrinsics. */
case IX86_BUILTIN_SETPS1:
target = assign_386_stack_local (SFmode, 0);
arg0 = TREE_VALUE (arglist);
emit_move_insn (adjust_address (target, SFmode, 0),
expand_expr (arg0, NULL_RTX, VOIDmode, 0));
op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
return op0;
case IX86_BUILTIN_SETPS:
target = assign_386_stack_local (V4SFmode, 0);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
emit_move_insn (adjust_address (target, SFmode, 0),
expand_expr (arg0, NULL_RTX, VOIDmode, 0));
emit_move_insn (adjust_address (target, SFmode, 4),
expand_expr (arg1, NULL_RTX, VOIDmode, 0));
emit_move_insn (adjust_address (target, SFmode, 8),
expand_expr (arg2, NULL_RTX, VOIDmode, 0));
emit_move_insn (adjust_address (target, SFmode, 12),
expand_expr (arg3, NULL_RTX, VOIDmode, 0));
op0 = gen_reg_rtx (V4SFmode);
emit_insn (gen_sse_movaps (op0, target));
return op0;
case IX86_BUILTIN_CLRPS:
target = gen_reg_rtx (TImode);
emit_insn (gen_sse_clrti (target));
case IX86_BUILTIN_SSE_ZERO:
target = gen_reg_rtx (V4SFmode);
emit_insn (gen_sse_clrv4sf (target));
return target;
case IX86_BUILTIN_LOADRPS:
target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
gen_reg_rtx (V4SFmode), 1);
emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
return target;
case IX86_BUILTIN_LOADPS1:
target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
gen_reg_rtx (V4SFmode), 1);
emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
return target;
case IX86_BUILTIN_STOREPS1:
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
case IX86_BUILTIN_STORERPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
case IX86_BUILTIN_MMX_ZERO:
target = gen_reg_rtx (DImode);
emit_insn (gen_mmx_clrdi (target));

View File

@ -2089,8 +2089,6 @@ enum ix86_builtins
IX86_BUILTIN_CVTSS2SI,
IX86_BUILTIN_CVTTPS2PI,
IX86_BUILTIN_CVTTSS2SI,
IX86_BUILTIN_M_FROM_INT,
IX86_BUILTIN_M_TO_INT,
IX86_BUILTIN_MAXPS,
IX86_BUILTIN_MAXSS,
@ -2215,7 +2213,6 @@ enum ix86_builtins
IX86_BUILTIN_LDMXCSR,
IX86_BUILTIN_STMXCSR,
IX86_BUILTIN_SFENCE,
IX86_BUILTIN_PREFETCH,
/* 3DNow! Original */
IX86_BUILTIN_FEMMS,
@ -2238,8 +2235,6 @@ enum ix86_builtins
IX86_BUILTIN_PFSUBR,
IX86_BUILTIN_PI2FD,
IX86_BUILTIN_PMULHRW,
IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
IX86_BUILTIN_PREFETCHW,
/* 3DNow! Athlon Extensions */
IX86_BUILTIN_PF2IW,
@ -2249,16 +2244,7 @@ enum ix86_builtins
IX86_BUILTIN_PSWAPDSI,
IX86_BUILTIN_PSWAPDSF,
/* Composite builtins, expand to more than one insn. */
IX86_BUILTIN_SETPS1,
IX86_BUILTIN_SETPS,
IX86_BUILTIN_CLRPS,
IX86_BUILTIN_SETRPS,
IX86_BUILTIN_LOADPS1,
IX86_BUILTIN_LOADRPS,
IX86_BUILTIN_STOREPS1,
IX86_BUILTIN_STORERPS,
IX86_BUILTIN_SSE_ZERO,
IX86_BUILTIN_MMX_ZERO,
IX86_BUILTIN_MAX

View File

@ -81,7 +81,6 @@
;; 32 This is a `maskmov' operation.
;; 33 This is a `movmsk' operation.
;; 34 This is a `non-temporal' move.
;; 35 This is a `prefetch' (SSE) operation.
;; 36 This is used to distinguish COMISS from UCOMISS.
;; 37 This is a `ldmxcsr' operation.
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
@ -17686,7 +17685,7 @@
(define_insn "movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SF 1 "general_operand" "xm,x"))]
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@ -17694,7 +17693,7 @@
(define_insn "movv4si_internal"
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
(match_operand:V4SI 1 "general_operand" "xm,x"))]
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movaps\t{%1, %0|%0, %1}"
@ -17702,28 +17701,28 @@
(define_insn "movv8qi_internal"
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
(match_operand:V8QI 1 "general_operand" "ym,y"))]
(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv4hi_internal"
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
(match_operand:V4HI 1 "general_operand" "ym,y"))]
(match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2si_internal"
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
(match_operand:V2SI 1 "general_operand" "ym,y"))]
(match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
"TARGET_MMX"
"movq\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_insn "movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
(match_operand:V2SF 1 "general_operand" "ym,y"))]
(match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
"TARGET_3DNOW"
"movq\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
@ -17734,34 +17733,10 @@
"TARGET_SSE || TARGET_64BIT"
{
if (TARGET_64BIT)
{
ix86_expand_move (TImode, operands);
DONE;
}
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], TImode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
operands[1] = gen_rtx_MEM (TImode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], TImode)
&& !register_operand (operands[1], TImode)
&& operands[1] != CONST0_RTX (TImode))
{
rtx temp = force_reg (TImode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_move (TImode, operands);
else
ix86_expand_vector_move (TImode, operands);
DONE;
})
(define_expand "movv4sf"
@ -17769,30 +17744,8 @@
(match_operand:V4SF 1 "general_operand" ""))]
"TARGET_SSE"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V4SFmode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V4SFmode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V4SFmode)
&& !register_operand (operands[1], V4SFmode)
&& operands[1] != CONST0_RTX (V4SFmode))
{
rtx temp = force_reg (V4SFmode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_vector_move (V4SFmode, operands);
DONE;
})
(define_expand "movv4si"
@ -17800,30 +17753,8 @@
(match_operand:V4SI 1 "general_operand" ""))]
"TARGET_MMX"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V4SImode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V4SImode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V4SImode)
&& !register_operand (operands[1], V4SImode)
&& operands[1] != CONST0_RTX (V4SImode))
{
rtx temp = force_reg (V4SImode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_vector_move (V4SImode, operands);
DONE;
})
(define_expand "movv2si"
@ -17831,30 +17762,8 @@
(match_operand:V2SI 1 "general_operand" ""))]
"TARGET_MMX"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V2SImode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V2SImode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V2SImode)
&& !register_operand (operands[1], V2SImode)
&& operands[1] != CONST0_RTX (V2SImode))
{
rtx temp = force_reg (V2SImode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_vector_move (V2SImode, operands);
DONE;
})
(define_expand "movv4hi"
@ -17862,30 +17771,8 @@
(match_operand:V4HI 1 "general_operand" ""))]
"TARGET_MMX"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V4HImode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V4HImode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V4HImode)
&& !register_operand (operands[1], V4HImode)
&& operands[1] != CONST0_RTX (V4HImode))
{
rtx temp = force_reg (V4HImode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_vector_move (V4HImode, operands);
DONE;
})
(define_expand "movv8qi"
@ -17893,65 +17780,18 @@
(match_operand:V8QI 1 "general_operand" ""))]
"TARGET_MMX"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V8QImode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V8QImode, addr);
}
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V8QImode)
&& !register_operand (operands[1], V8QImode)
&& operands[1] != CONST0_RTX (V8QImode))
{
rtx temp = force_reg (V8QImode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
ix86_expand_vector_move (V8QImode, operands);
DONE;
})
(define_expand "movv2sf"
[(set (match_operand:V2SF 0 "general_operand" "")
(match_operand:V2SF 1 "general_operand" ""))]
"TARGET_3DNOW"
"
{
/* For constants other than zero into memory. We do not know how the
instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], V2SFmode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr,
XEXP (force_const_mem (V2SFmode, operands[1]), 0));
operands[1] = gen_rtx_MEM (V2SFmode, addr);
}
/* Make operand1 a register is it isn't already. */
if ((reload_in_progress | reload_completed) == 0
&& !register_operand (operands[0], V2SFmode)
&& !register_operand (operands[1], V2SFmode)
&& (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
&& operands[1] != CONST0_RTX (V2SFmode))
{
rtx temp = force_reg (V2SFmode, operands[1]);
emit_move_insn (operands[0], temp);
DONE;
}
}")
ix86_expand_vector_move (V2SFmode, operands);
DONE;
})
(define_insn_and_split "*pushti"
[(set (match_operand:TI 0 "push_operand" "=<")
@ -18031,25 +17871,27 @@
[(set_attr "type" "mmx")])
(define_insn "movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
(match_operand:TI 1 "general_operand" "xm,x"))]
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
(match_operand:TI 1 "general_operand" "O,xm,x"))]
"TARGET_SSE && !TARGET_64BIT"
"@
xorps\t%0, %0
movaps\t{%1, %0|%0, %1}
movaps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "*movti_rex64"
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
(match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
"TARGET_64BIT
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
"@
#
#
xorps\t%0, %0
movaps\\t{%1, %0|%0, %1}
movaps\\t{%1, %0|%0, %1}"
[(set_attr "type" "*,*,sse,sse")
[(set_attr "type" "*,*,sse,sse,sse")
(set_attr "mode" "TI")])
(define_split
@ -18064,7 +17906,8 @@
;; movaps or movups
(define_insn "sse_movaps"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
"TARGET_SSE"
"@
movaps\t{%1, %0|%0, %1}
@ -18073,7 +17916,8 @@
(define_insn "sse_movups"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
"TARGET_SSE"
"@
movups\t{%1, %0|%0, %1}
@ -18154,7 +17998,8 @@
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 12)))]
"TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18164,7 +18009,8 @@
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
(const_int 3)))]
"TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"TARGET_SSE
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movlps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18220,10 +18066,11 @@
(define_insn "vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"addss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18231,17 +18078,18 @@
(define_insn "subv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"subps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "vmsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"subss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18256,10 +18104,11 @@
(define_insn "vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"mulss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18274,10 +18123,11 @@
(define_insn "vmdivv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(div:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"divss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18287,53 +18137,57 @@
(define_insn "rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
"TARGET_SSE"
"rcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
"TARGET_SSE"
"rsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"sqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "vmsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
(vec_merge:V4SF
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"sqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
;; SSE logical operations.
;; These are not called andti3 etc. because we really really don't want
@ -18519,9 +18373,9 @@
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
(define_insn "sse_clrti"
[(set (match_operand:TI 0 "register_operand" "=x")
(unspec:TI [(const_int 0)] 45))]
(define_insn "sse_clrv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF [(const_int 0)] 45))]
"TARGET_SSE"
"xorps\t{%0, %0|%0, %0}"
[(set_attr "type" "sse")
@ -18532,8 +18386,8 @@
(define_insn "maskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")]))]
"TARGET_SSE"
"cmp%D3ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18542,24 +18396,23 @@
[(set (match_operand:V4SI 0 "register_operand" "=x")
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")])))]
"TARGET_SSE"
"*
{
if (GET_CODE (operands[3]) == UNORDERED)
return \"cmpordps\t{%2, %0|%0, %2}\";
return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
}"
return "cmpordps\t{%2, %0|%0, %2}";
else
return "cmpn%D3ps\t{%2, %0|%0, %2}";
}
[(set_attr "type" "sse")])
(define_insn "vmmaskcmpv4sf3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_merge:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "x")])
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")])
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
@ -18571,18 +18424,17 @@
(vec_merge:V4SI
(not:V4SI
(match_operator:V4SI 3 "sse_comparison_operator"
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "x")]))
[(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "register_operand" "x")]))
(subreg:V4SI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE"
"*
{
if (GET_CODE (operands[3]) == UNORDERED)
return \"cmpordss\t{%2, %0|%0, %2}\";
return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
}"
return "cmpordss\t{%2, %0|%0, %2}";
else
return "cmpn%D3ss\t{%2, %0|%0, %2}";
}
[(set_attr "type" "sse")])
(define_insn "sse_comi"
@ -18663,10 +18515,11 @@
(define_insn "vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18681,10 +18534,11 @@
(define_insn "vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(vec_merge:V4SF
(smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
@ -18694,56 +18548,58 @@
(define_insn "cvtpi2ps"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
(const_int 12)))]
(vec_merge:V4SF
(match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
(const_int 12)))]
"TARGET_SSE"
"cvtpi2ps\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
(parallel
[(const_int 0)
(const_int 1)])))]
(vec_select:V2SI
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvtps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttps2pi"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
(parallel
[(const_int 0)
(const_int 1)])))]
(vec_select:V2SI
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
(parallel [(const_int 0) (const_int 1)])))]
"TARGET_SSE"
"cvttps2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvtsi2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float:SF (match_operand:SI 2 "register_operand" "rm")))
(const_int 14)))]
(vec_merge:V4SF
(match_operand:V4SF 1 "register_operand" "0")
(vec_duplicate:V4SF
(float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
(const_int 14)))]
"TARGET_SSE"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sse")])
(define_insn "cvtss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
(parallel [(const_int 0)])))]
(vec_select:SI
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
(parallel [(const_int 0)])))]
"TARGET_SSE"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
(define_insn "cvttss2si"
[(set (match_operand:SI 0 "register_operand" "=r")
(vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
(parallel [(const_int 0)])))]
(vec_select:SI
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
(parallel [(const_int 0)])))]
"TARGET_SSE"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")])
@ -18877,8 +18733,10 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
(sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(mult:V4SI (sign_extend:V4SI
(match_operand:V4HI 1 "register_operand" "0"))
(sign_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_MMX"
"pmulhw\t{%2, %0|%0, %2}"
@ -18888,8 +18746,10 @@
[(set (match_operand:V4HI 0 "register_operand" "=y")
(truncate:V4HI
(lshiftrt:V4SI
(mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
(zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(mult:V4SI (zero_extend:V4SI
(match_operand:V4HI 1 "register_operand" "0"))
(zero_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
(const_int 16))))]
"TARGET_SSE || TARGET_3DNOW_A"
"pmulhuw\t{%2, %0|%0, %2}"
@ -18899,12 +18759,12 @@
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0)
(const_int 2)])))
(sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0)
(const_int 2)]))))
(sign_extend:V2SI
(vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
(vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
(parallel [(const_int 0) (const_int 2)]))))
(mult:V2SI
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
(parallel [(const_int 1)
@ -19404,75 +19264,6 @@
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
(define_expand "prefetch"
[(prefetch (match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "const_int_operand" "n")
(match_operand:SI 2 "const_int_operand" "n"))]
"TARGET_PREFETCH_SSE || TARGET_3DNOW"
"
{
int rw = INTVAL (operands[1]);
int locality = INTVAL (operands[2]);
if (rw != 0 && rw != 1)
abort ();
if (locality < 0 || locality > 3)
abort ();
/* Use 3dNOW prefetch in case we are asking for write prefetch not
suported by SSE counterpart or the SSE prefetch is not available
(K6 machines). Otherwise use SSE prefetch as it allows specifying
of locality. */
if (TARGET_3DNOW
&& (!TARGET_PREFETCH_SSE || rw))
{
emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
}
else
{
int i;
switch (locality)
{
case 0: /* No temporal locality. */
i = 0;
break;
case 1: /* Lowest level of temporal locality. */
i = 3;
break;
case 2: /* Moderate level of temporal locality. */
i = 2;
break;
case 3: /* Highest level of temporal locality. */
i = 1;
break;
default:
abort (); /* We already checked for valid values above. */
break;
}
emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
}
DONE;
}")
(define_insn "prefetch_sse"
[(unspec [(match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "immediate_operand" "n")] 35)]
"TARGET_PREFETCH_SSE"
{
switch (INTVAL (operands[1]))
{
case 0:
return "prefetchnta\t%a0";
case 1:
return "prefetcht0\t%a0";
case 2:
return "prefetcht1\t%a0";
case 3:
return "prefetcht2\t%a0";
default:
abort ();
}
}
[(set_attr "type" "sse")])
(define_expand "sse_prologue_save"
[(parallel [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(reg:DI 21)
@ -19630,19 +19421,6 @@
"femms"
[(set_attr "type" "mmx")])
(define_insn "prefetch_3dnow"
[(prefetch (match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "const_int_operand" "n")
(const_int 0))]
"TARGET_3DNOW"
{
if (INTVAL (operands[1]) == 0)
return "prefetch\t%a0";
else
return "prefetchw\t%a0";
}
[(set_attr "type" "mmx")])
(define_insn "pf2id"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
@ -19820,3 +19598,61 @@
"TARGET_3DNOW_A"
"pswapd\\t{%1, %0|%0, %1}"
[(set_attr "type" "mmx")])
(define_expand "prefetch"
[(prefetch (match_operand:SI 0 "address_operand" "")
(match_operand:SI 1 "const_int_operand" "")
(match_operand:SI 2 "const_int_operand" ""))]
"TARGET_PREFETCH_SSE || TARGET_3DNOW"
{
int rw = INTVAL (operands[1]);
int locality = INTVAL (operands[2]);
if (rw != 0 && rw != 1)
abort ();
if (locality < 0 || locality > 3)
abort ();
/* Use 3dNOW prefetch in case we are asking for write prefetch not
suported by SSE counterpart or the SSE prefetch is not available
(K6 machines). Otherwise use SSE prefetch as it allows specifying
of locality. */
if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
{
operands[2] = GEN_INT (3);
}
else
{
operands[1] = const0_rtx;
}
})
(define_insn "*prefetch_sse"
[(prefetch (match_operand:SI 0 "address_operand" "")
(const_int 0)
(match_operand:SI 1 "const_int_operand" ""))]
"TARGET_PREFETCH_SSE"
{
static const char * const patterns[4] = {
"prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
};
int locality = INTVAL (operands[1]);
if (locality < 0 || locality > 3)
abort ();
return patterns[locality];
}
[(set_attr "type" "sse")])
(define_insn "*prefetch_3dnow"
[(prefetch (match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "const_int_operand" "n")
(const_int 0))]
"TARGET_3DNOW"
{
if (INTVAL (operands[1]) == 0)
return "prefetch\t%a0";
else
return "prefetchw\t%a0";
}
[(set_attr "type" "mmx")])

View File

@ -34,11 +34,11 @@
#include <mmintrin.h>
/* The data type indended for user use. */
typedef int __m128 __attribute__ ((mode (TI)));
typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
/* Internal data types for implementing the instrinsics. */
typedef int __v4sf __attribute__ ((mode (V4SF)));
typedef int __v4si __attribute__ ((mode (V4SI)));
typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
/* Create a selector for use with the SHUFPS instruction. */
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
@ -680,7 +680,7 @@ _mm_movemask_ps (__m128 __A)
static __inline unsigned int
_mm_getcsr (void)
{
return __builtin_ia32_getmxcsr ();
return __builtin_ia32_stmxcsr ();
}
/* Read exception bits from the control register. */
@ -712,7 +712,7 @@ _MM_GET_FLUSH_ZERO_MODE (void)
static __inline void
_mm_setcsr (unsigned int __I)
{
__builtin_ia32_setmxcsr (__I);
__builtin_ia32_ldmxcsr (__I);
}
/* Set exception bits in the control register. */