i386.c (override_options): If SSE, enable sse prefetch.
* config/i386/i386.c (override_options): If SSE, enable sse prefetch. (ix86_expand_vector_move): New. (bdesc_2arg): Remove andps, andnps, orps, xorps. (ix86_init_mmx_sse_builtins): Make static. Remove composite builtins. Remove old prefetch builtins. Special case the logicals removed above. (ix86_expand_builtin): Likewise. (safe_vector_operand): Use V4SFmode, not TImode. (ix86_expand_store_builtin): Remove shuffle arg. Update callers. (ix86_expand_timode_binop_builtin): New. * config/i386/i386-protos.h: Update. * config/i386/i386.h (enum ix86_builtins): Update. * config/i386/i386.md: Correct predicates on MMX/SSE patterns. Use ix86_expand_vector_move in vector move expanders. (movti_internal, movti_rex64): Add xorps alternative. (sse_clrv4sf): Rename and adjust from sse_clrti. (prefetch): Don't work so hard. (prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC. * config/i386/xmmintrin.h (__m128): Use V4SFmode. (_mm_getcsr, _mm_setcsr): Fix typo in builtin name. From-SVN: r48796
This commit is contained in:
parent
b0d723da36
commit
e37af218ee
@ -1,3 +1,25 @@
|
||||
2002-01-12 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (override_options): If SSE, enable sse prefetch.
|
||||
(ix86_expand_vector_move): New.
|
||||
(bdesc_2arg): Remove andps, andnps, orps, xorps.
|
||||
(ix86_init_mmx_sse_builtins): Make static. Remove composite builtins.
|
||||
Remove old prefetch builtins. Special case the logicals removed above.
|
||||
(ix86_expand_builtin): Likewise.
|
||||
(safe_vector_operand): Use V4SFmode, not TImode.
|
||||
(ix86_expand_store_builtin): Remove shuffle arg. Update callers.
|
||||
(ix86_expand_timode_binop_builtin): New.
|
||||
* config/i386/i386-protos.h: Update.
|
||||
* config/i386/i386.h (enum ix86_builtins): Update.
|
||||
* config/i386/i386.md: Correct predicates on MMX/SSE patterns.
|
||||
Use ix86_expand_vector_move in vector move expanders.
|
||||
(movti_internal, movti_rex64): Add xorps alternative.
|
||||
(sse_clrv4sf): Rename and adjust from sse_clrti.
|
||||
(prefetch): Don't work so hard.
|
||||
(prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
|
||||
* config/i386/xmmintrin.h (__m128): Use V4SFmode.
|
||||
(_mm_getcsr, _mm_setcsr): Fix typo in builtin name.
|
||||
|
||||
2002-01-11 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/mmintrin.h: New file.
|
||||
|
@ -108,6 +108,7 @@ extern rtx i386_simplify_dwarf_addr PARAMS ((rtx));
|
||||
|
||||
extern void ix86_expand_clear PARAMS ((rtx));
|
||||
extern void ix86_expand_move PARAMS ((enum machine_mode, rtx[]));
|
||||
extern void ix86_expand_vector_move PARAMS ((enum machine_mode, rtx[]));
|
||||
extern void ix86_expand_binary_operator PARAMS ((enum rtx_code,
|
||||
enum machine_mode, rtx[]));
|
||||
extern int ix86_binary_operator_ok PARAMS ((enum rtx_code, enum machine_mode,
|
||||
@ -177,7 +178,6 @@ extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
|
||||
tree, int));
|
||||
extern rtx ix86_function_value PARAMS ((tree));
|
||||
extern void ix86_init_builtins PARAMS ((void));
|
||||
extern void ix86_init_mmx_sse_builtins PARAMS ((void));
|
||||
extern rtx ix86_expand_builtin PARAMS ((tree, rtx, rtx, enum machine_mode, int));
|
||||
#endif
|
||||
|
||||
|
@ -684,6 +684,7 @@ static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
|
||||
static void ix86_sched_init PARAMS ((FILE *, int, int));
|
||||
static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
|
||||
static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
|
||||
static void ix86_init_mmx_sse_builtins PARAMS ((void));
|
||||
|
||||
struct ix86_address
|
||||
{
|
||||
@ -701,7 +702,9 @@ static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
|
||||
static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
|
||||
static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
|
||||
static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
|
||||
static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree, int));
|
||||
static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
|
||||
tree, rtx));
|
||||
static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
|
||||
static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
|
||||
static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
|
||||
static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
|
||||
@ -1164,7 +1167,10 @@ override_options ()
|
||||
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
|
||||
on by -msse. */
|
||||
if (TARGET_SSE)
|
||||
target_flags |= MASK_MMX;
|
||||
{
|
||||
target_flags |= MASK_MMX;
|
||||
x86_prefetch_sse = true;
|
||||
}
|
||||
|
||||
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
|
||||
if (TARGET_3DNOW)
|
||||
@ -6661,6 +6667,38 @@ ix86_expand_move (mode, operands)
|
||||
emit_insn (insn);
|
||||
}
|
||||
|
||||
void
|
||||
ix86_expand_vector_move (mode, operands)
|
||||
enum machine_mode mode;
|
||||
rtx operands[];
|
||||
{
|
||||
/* Force constants other than zero into memory. We do not know how
|
||||
the instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], mode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (mode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], mode)
|
||||
&& !register_operand (operands[1], mode)
|
||||
&& operands[1] != CONST0_RTX (mode))
|
||||
{
|
||||
rtx temp = force_reg (TImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
return;
|
||||
}
|
||||
|
||||
emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
|
||||
}
|
||||
|
||||
/* Attempt to expand a binary operator. Make the expansion closer to the
|
||||
actual machine, then just general_operand, which will allow 3 separate
|
||||
memory references (one output, two input) in a single insn. */
|
||||
@ -10748,11 +10786,6 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
|
||||
|
||||
{ MASK_SSE, CODE_FOR_sse_andti3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_sse_nandti3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_sse_iorti3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_sse_xorti3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
|
||||
|
||||
{ MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
|
||||
{ MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
|
||||
@ -10865,7 +10898,7 @@ ix86_init_builtins ()
|
||||
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
|
||||
is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
|
||||
builtins. */
|
||||
void
|
||||
static void
|
||||
ix86_init_mmx_sse_builtins ()
|
||||
{
|
||||
const struct builtin_description * d;
|
||||
@ -10899,14 +10932,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
= build_function_type (integer_type_node,
|
||||
tree_cons (NULL_TREE, V8QI_type_node,
|
||||
endlink));
|
||||
tree int_ftype_v2si
|
||||
= build_function_type (integer_type_node,
|
||||
tree_cons (NULL_TREE, V2SI_type_node,
|
||||
endlink));
|
||||
tree v2si_ftype_int
|
||||
= build_function_type (V2SI_type_node,
|
||||
tree_cons (NULL_TREE, integer_type_node,
|
||||
endlink));
|
||||
tree v4sf_ftype_v4sf_int
|
||||
= build_function_type (V4SF_type_node,
|
||||
tree_cons (NULL_TREE, V4SF_type_node,
|
||||
@ -10976,11 +11001,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
endlink)));
|
||||
tree void_ftype_void
|
||||
= build_function_type (void_type_node, endlink);
|
||||
tree void_ftype_pchar_int
|
||||
= build_function_type (void_type_node,
|
||||
tree_cons (NULL_TREE, pchar_type_node,
|
||||
tree_cons (NULL_TREE, integer_type_node,
|
||||
endlink)));
|
||||
tree void_ftype_unsigned
|
||||
= build_function_type (void_type_node,
|
||||
tree_cons (NULL_TREE, unsigned_type_node,
|
||||
@ -10989,8 +11009,8 @@ ix86_init_mmx_sse_builtins ()
|
||||
= build_function_type (unsigned_type_node, endlink);
|
||||
tree di_ftype_void
|
||||
= build_function_type (long_long_unsigned_type_node, endlink);
|
||||
tree ti_ftype_void
|
||||
= build_function_type (intTI_type_node, endlink);
|
||||
tree v4sf_ftype_void
|
||||
= build_function_type (V4SF_type_node, endlink);
|
||||
tree v2si_ftype_v4sf
|
||||
= build_function_type (V2SI_type_node,
|
||||
tree_cons (NULL_TREE, V4SF_type_node,
|
||||
@ -11007,19 +11027,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
= build_function_type (V4SF_type_node,
|
||||
tree_cons (NULL_TREE, pfloat_type_node,
|
||||
endlink));
|
||||
tree v4sf_ftype_float
|
||||
= build_function_type (V4SF_type_node,
|
||||
tree_cons (NULL_TREE, float_type_node,
|
||||
endlink));
|
||||
tree v4sf_ftype_float_float_float_float
|
||||
= build_function_type (V4SF_type_node,
|
||||
tree_cons (NULL_TREE, float_type_node,
|
||||
tree_cons (NULL_TREE, float_type_node,
|
||||
tree_cons (NULL_TREE,
|
||||
float_type_node,
|
||||
tree_cons (NULL_TREE,
|
||||
float_type_node,
|
||||
endlink)))));
|
||||
/* @@@ the type is bogus */
|
||||
tree v4sf_ftype_v4sf_pv2si
|
||||
= build_function_type (V4SF_type_node,
|
||||
@ -11069,11 +11076,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
tree_cons (NULL_TREE, V2SI_type_node,
|
||||
tree_cons (NULL_TREE, V2SI_type_node,
|
||||
endlink)));
|
||||
tree ti_ftype_ti_ti
|
||||
= build_function_type (intTI_type_node,
|
||||
tree_cons (NULL_TREE, intTI_type_node,
|
||||
tree_cons (NULL_TREE, intTI_type_node,
|
||||
endlink)));
|
||||
tree di_ftype_di_di
|
||||
= build_function_type (long_long_unsigned_type_node,
|
||||
tree_cons (NULL_TREE, long_long_unsigned_type_node,
|
||||
@ -11110,11 +11112,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
V2SF_type_node,
|
||||
endlink)));
|
||||
|
||||
tree void_ftype_pchar
|
||||
= build_function_type (void_type_node,
|
||||
tree_cons (NULL_TREE, pchar_type_node,
|
||||
endlink));
|
||||
|
||||
/* Add all builtins that are more or less simple operations on two
|
||||
operands. */
|
||||
for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
|
||||
@ -11142,9 +11139,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
case V2SImode:
|
||||
type = v2si_ftype_v2si_v2si;
|
||||
break;
|
||||
case TImode:
|
||||
type = ti_ftype_ti_ti;
|
||||
break;
|
||||
case DImode:
|
||||
type = di_ftype_di_di;
|
||||
break;
|
||||
@ -11164,8 +11158,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
}
|
||||
|
||||
/* Add the remaining MMX insns with somewhat more complicated types. */
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_m_from_int", v2si_ftype_int, IX86_BUILTIN_M_FROM_INT);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_m_to_int", int_ftype_v2si, IX86_BUILTIN_M_TO_INT);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
|
||||
@ -11199,6 +11191,11 @@ ix86_init_mmx_sse_builtins ()
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
|
||||
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
|
||||
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
|
||||
|
||||
@ -11222,7 +11219,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
|
||||
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_prefetch", void_ftype_pchar_int, IX86_BUILTIN_PREFETCH);
|
||||
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", v4hi_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
|
||||
|
||||
@ -11256,8 +11252,6 @@ ix86_init_mmx_sse_builtins ()
|
||||
def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
|
||||
def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
|
||||
def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
|
||||
def_builtin (MASK_3DNOW, "__builtin_ia32_prefetch_3dnow", void_ftype_pchar, IX86_BUILTIN_PREFETCH_3DNOW);
|
||||
def_builtin (MASK_3DNOW, "__builtin_ia32_prefetchw", void_ftype_pchar, IX86_BUILTIN_PREFETCHW);
|
||||
|
||||
/* 3DNow! extension as used in the Athlon CPU. */
|
||||
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
|
||||
@ -11267,14 +11261,7 @@ ix86_init_mmx_sse_builtins ()
|
||||
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
|
||||
def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
|
||||
|
||||
/* Composite intrinsics. */
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_setps1", v4sf_ftype_float, IX86_BUILTIN_SETPS1);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_setps", v4sf_ftype_float_float_float_float, IX86_BUILTIN_SETPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_setzerops", ti_ftype_void, IX86_BUILTIN_CLRPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_loadps1", v4sf_ftype_pfloat, IX86_BUILTIN_LOADPS1);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_loadrps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADRPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_storeps1", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREPS1);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_storerps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORERPS);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
|
||||
}
|
||||
|
||||
/* Errors in the source file can cause expand_expr to return const0_rtx
|
||||
@ -11293,8 +11280,8 @@ safe_vector_operand (x, mode)
|
||||
emit_insn (gen_mmx_clrdi (mode == DImode ? x
|
||||
: gen_rtx_SUBREG (DImode, x, 0)));
|
||||
else
|
||||
emit_insn (gen_sse_clrti (mode == TImode ? x
|
||||
: gen_rtx_SUBREG (TImode, x, 0)));
|
||||
emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
|
||||
: gen_rtx_SUBREG (V4SFmode, x, 0)));
|
||||
return x;
|
||||
}
|
||||
|
||||
@ -11342,13 +11329,45 @@ ix86_expand_binop_builtin (icode, arglist, target)
|
||||
return target;
|
||||
}
|
||||
|
||||
/* In type_for_mode we restrict the ability to create TImode types
|
||||
to hosts with 64-bit H_W_I. So we've defined the SSE logicals
|
||||
to have a V4SFmode signature. Convert them in-place to TImode. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_timode_binop_builtin (icode, arglist, target)
|
||||
enum insn_code icode;
|
||||
tree arglist;
|
||||
rtx target;
|
||||
{
|
||||
rtx pat;
|
||||
tree arg0 = TREE_VALUE (arglist);
|
||||
tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
|
||||
op0 = gen_lowpart (TImode, op0);
|
||||
op1 = gen_lowpart (TImode, op1);
|
||||
target = gen_reg_rtx (TImode);
|
||||
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
|
||||
op0 = copy_to_mode_reg (TImode, op0);
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
|
||||
op1 = copy_to_mode_reg (TImode, op1);
|
||||
|
||||
pat = GEN_FCN (icode) (target, op0, op1);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
|
||||
return gen_lowpart (V4SFmode, target);
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of stores. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_store_builtin (icode, arglist, shuffle)
|
||||
ix86_expand_store_builtin (icode, arglist)
|
||||
enum insn_code icode;
|
||||
tree arglist;
|
||||
int shuffle;
|
||||
{
|
||||
rtx pat;
|
||||
tree arg0 = TREE_VALUE (arglist);
|
||||
@ -11362,10 +11381,6 @@ ix86_expand_store_builtin (icode, arglist, shuffle)
|
||||
op1 = safe_vector_operand (op1, mode1);
|
||||
|
||||
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
|
||||
if (shuffle >= 0 || ! (*insn_data[icode].operand[1].predicate) (op1, mode1))
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
if (shuffle >= 0)
|
||||
emit_insn (gen_sse_shufps (op1, op1, op1, GEN_INT (shuffle)));
|
||||
pat = GEN_FCN (icode) (op0, op1);
|
||||
if (pat)
|
||||
emit_insn (pat);
|
||||
@ -11568,7 +11583,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
enum insn_code icode;
|
||||
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
|
||||
tree arglist = TREE_OPERAND (exp, 1);
|
||||
tree arg0, arg1, arg2, arg3;
|
||||
tree arg0, arg1, arg2;
|
||||
rtx op0, op1, op2, pat;
|
||||
enum machine_mode tmode, mode0, mode1, mode2;
|
||||
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
|
||||
@ -11583,19 +11598,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
emit_insn (gen_sfence ());
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_M_FROM_INT:
|
||||
target = gen_reg_rtx (DImode);
|
||||
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
|
||||
emit_move_insn (gen_rtx_SUBREG (SImode, target, 0), op0);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_M_TO_INT:
|
||||
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
|
||||
op0 = copy_to_mode_reg (DImode, op0);
|
||||
target = gen_reg_rtx (SImode);
|
||||
emit_move_insn (target, gen_rtx_SUBREG (SImode, op0, 0));
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_PEXTRW:
|
||||
icode = CODE_FOR_mmx_pextrw;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
@ -11689,6 +11691,19 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
case IX86_BUILTIN_RCPSS:
|
||||
return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
|
||||
|
||||
case IX86_BUILTIN_ANDPS:
|
||||
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
|
||||
arglist, target);
|
||||
case IX86_BUILTIN_ANDNPS:
|
||||
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
|
||||
arglist, target);
|
||||
case IX86_BUILTIN_ORPS:
|
||||
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
|
||||
arglist, target);
|
||||
case IX86_BUILTIN_XORPS:
|
||||
return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
|
||||
arglist, target);
|
||||
|
||||
case IX86_BUILTIN_LOADAPS:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
|
||||
|
||||
@ -11696,15 +11711,15 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
|
||||
|
||||
case IX86_BUILTIN_STOREAPS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, -1);
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
|
||||
case IX86_BUILTIN_STOREUPS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist, -1);
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
|
||||
|
||||
case IX86_BUILTIN_LOADSS:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
|
||||
|
||||
case IX86_BUILTIN_STORESS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist, -1);
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
|
||||
|
||||
case IX86_BUILTIN_LOADHPS:
|
||||
case IX86_BUILTIN_LOADLPS:
|
||||
@ -11753,9 +11768,9 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_MOVNTPS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist, -1);
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
|
||||
case IX86_BUILTIN_MOVNTQ:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist, -1);
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
|
||||
|
||||
case IX86_BUILTIN_LDMXCSR:
|
||||
op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
|
||||
@ -11769,29 +11784,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
emit_insn (gen_stmxcsr (target));
|
||||
return copy_to_mode_reg (SImode, target);
|
||||
|
||||
case IX86_BUILTIN_PREFETCH:
|
||||
icode = CODE_FOR_prefetch_sse;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
|
||||
mode0 = insn_data[icode].operand[0].mode;
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
|
||||
{
|
||||
/* @@@ better error message */
|
||||
error ("selector must be an immediate");
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
op0 = copy_to_mode_reg (Pmode, op0);
|
||||
pat = GEN_FCN (icode) (op0, op1);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_SHUFPS:
|
||||
icode = CODE_FOR_sse_shufps;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
@ -11914,19 +11906,6 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
case IX86_BUILTIN_PMULHRW:
|
||||
return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
|
||||
|
||||
case IX86_BUILTIN_PREFETCH_3DNOW:
|
||||
case IX86_BUILTIN_PREFETCHW:
|
||||
icode = CODE_FOR_prefetch_3dnow;
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
|
||||
op1 = (fcode == IX86_BUILTIN_PREFETCH_3DNOW ? const0_rtx : const1_rtx);
|
||||
mode0 = insn_data[icode].operand[0].mode;
|
||||
pat = GEN_FCN (icode) (copy_to_mode_reg (Pmode, op0), op1);
|
||||
if (! pat)
|
||||
return NULL_RTX;
|
||||
emit_insn (pat);
|
||||
return NULL_RTX;
|
||||
|
||||
case IX86_BUILTIN_PF2IW:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
|
||||
|
||||
@ -11945,57 +11924,11 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
|
||||
case IX86_BUILTIN_PSWAPDSF:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
|
||||
|
||||
/* Composite intrinsics. */
|
||||
case IX86_BUILTIN_SETPS1:
|
||||
target = assign_386_stack_local (SFmode, 0);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
emit_move_insn (adjust_address (target, SFmode, 0),
|
||||
expand_expr (arg0, NULL_RTX, VOIDmode, 0));
|
||||
op0 = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
|
||||
emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
|
||||
return op0;
|
||||
|
||||
case IX86_BUILTIN_SETPS:
|
||||
target = assign_386_stack_local (V4SFmode, 0);
|
||||
arg0 = TREE_VALUE (arglist);
|
||||
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
|
||||
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
|
||||
arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
|
||||
emit_move_insn (adjust_address (target, SFmode, 0),
|
||||
expand_expr (arg0, NULL_RTX, VOIDmode, 0));
|
||||
emit_move_insn (adjust_address (target, SFmode, 4),
|
||||
expand_expr (arg1, NULL_RTX, VOIDmode, 0));
|
||||
emit_move_insn (adjust_address (target, SFmode, 8),
|
||||
expand_expr (arg2, NULL_RTX, VOIDmode, 0));
|
||||
emit_move_insn (adjust_address (target, SFmode, 12),
|
||||
expand_expr (arg3, NULL_RTX, VOIDmode, 0));
|
||||
op0 = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_sse_movaps (op0, target));
|
||||
return op0;
|
||||
|
||||
case IX86_BUILTIN_CLRPS:
|
||||
target = gen_reg_rtx (TImode);
|
||||
emit_insn (gen_sse_clrti (target));
|
||||
case IX86_BUILTIN_SSE_ZERO:
|
||||
target = gen_reg_rtx (V4SFmode);
|
||||
emit_insn (gen_sse_clrv4sf (target));
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_LOADRPS:
|
||||
target = ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist,
|
||||
gen_reg_rtx (V4SFmode), 1);
|
||||
emit_insn (gen_sse_shufps (target, target, target, GEN_INT (0x1b)));
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_LOADPS1:
|
||||
target = ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist,
|
||||
gen_reg_rtx (V4SFmode), 1);
|
||||
emit_insn (gen_sse_shufps (target, target, target, const0_rtx));
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_STOREPS1:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0);
|
||||
case IX86_BUILTIN_STORERPS:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist, 0x1B);
|
||||
|
||||
case IX86_BUILTIN_MMX_ZERO:
|
||||
target = gen_reg_rtx (DImode);
|
||||
emit_insn (gen_mmx_clrdi (target));
|
||||
|
@ -2089,8 +2089,6 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_CVTSS2SI,
|
||||
IX86_BUILTIN_CVTTPS2PI,
|
||||
IX86_BUILTIN_CVTTSS2SI,
|
||||
IX86_BUILTIN_M_FROM_INT,
|
||||
IX86_BUILTIN_M_TO_INT,
|
||||
|
||||
IX86_BUILTIN_MAXPS,
|
||||
IX86_BUILTIN_MAXSS,
|
||||
@ -2215,7 +2213,6 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_LDMXCSR,
|
||||
IX86_BUILTIN_STMXCSR,
|
||||
IX86_BUILTIN_SFENCE,
|
||||
IX86_BUILTIN_PREFETCH,
|
||||
|
||||
/* 3DNow! Original */
|
||||
IX86_BUILTIN_FEMMS,
|
||||
@ -2238,8 +2235,6 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_PFSUBR,
|
||||
IX86_BUILTIN_PI2FD,
|
||||
IX86_BUILTIN_PMULHRW,
|
||||
IX86_BUILTIN_PREFETCH_3DNOW, /* PREFETCH already used */
|
||||
IX86_BUILTIN_PREFETCHW,
|
||||
|
||||
/* 3DNow! Athlon Extensions */
|
||||
IX86_BUILTIN_PF2IW,
|
||||
@ -2249,16 +2244,7 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_PSWAPDSI,
|
||||
IX86_BUILTIN_PSWAPDSF,
|
||||
|
||||
/* Composite builtins, expand to more than one insn. */
|
||||
IX86_BUILTIN_SETPS1,
|
||||
IX86_BUILTIN_SETPS,
|
||||
IX86_BUILTIN_CLRPS,
|
||||
IX86_BUILTIN_SETRPS,
|
||||
IX86_BUILTIN_LOADPS1,
|
||||
IX86_BUILTIN_LOADRPS,
|
||||
IX86_BUILTIN_STOREPS1,
|
||||
IX86_BUILTIN_STORERPS,
|
||||
|
||||
IX86_BUILTIN_SSE_ZERO,
|
||||
IX86_BUILTIN_MMX_ZERO,
|
||||
|
||||
IX86_BUILTIN_MAX
|
||||
|
@ -81,7 +81,6 @@
|
||||
;; 32 This is a `maskmov' operation.
|
||||
;; 33 This is a `movmsk' operation.
|
||||
;; 34 This is a `non-temporal' move.
|
||||
;; 35 This is a `prefetch' (SSE) operation.
|
||||
;; 36 This is used to distinguish COMISS from UCOMISS.
|
||||
;; 37 This is a `ldmxcsr' operation.
|
||||
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
|
||||
@ -17686,7 +17685,7 @@
|
||||
|
||||
(define_insn "movv4sf_internal"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
|
||||
(match_operand:V4SF 1 "general_operand" "xm,x"))]
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
|
||||
"TARGET_SSE"
|
||||
;; @@@ let's try to use movaps here.
|
||||
"movaps\t{%1, %0|%0, %1}"
|
||||
@ -17694,7 +17693,7 @@
|
||||
|
||||
(define_insn "movv4si_internal"
|
||||
[(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
|
||||
(match_operand:V4SI 1 "general_operand" "xm,x"))]
|
||||
(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
|
||||
"TARGET_SSE"
|
||||
;; @@@ let's try to use movaps here.
|
||||
"movaps\t{%1, %0|%0, %1}"
|
||||
@ -17702,28 +17701,28 @@
|
||||
|
||||
(define_insn "movv8qi_internal"
|
||||
[(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
|
||||
(match_operand:V8QI 1 "general_operand" "ym,y"))]
|
||||
(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
|
||||
"TARGET_MMX"
|
||||
"movq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "movv4hi_internal"
|
||||
[(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
|
||||
(match_operand:V4HI 1 "general_operand" "ym,y"))]
|
||||
(match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
|
||||
"TARGET_MMX"
|
||||
"movq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "movv2si_internal"
|
||||
[(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
|
||||
(match_operand:V2SI 1 "general_operand" "ym,y"))]
|
||||
(match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
|
||||
"TARGET_MMX"
|
||||
"movq\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "movv2sf_internal"
|
||||
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
|
||||
(match_operand:V2SF 1 "general_operand" "ym,y"))]
|
||||
(match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
|
||||
"TARGET_3DNOW"
|
||||
"movq\\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "mmx")])
|
||||
@ -17734,34 +17733,10 @@
|
||||
"TARGET_SSE || TARGET_64BIT"
|
||||
{
|
||||
if (TARGET_64BIT)
|
||||
{
|
||||
ix86_expand_move (TImode, operands);
|
||||
DONE;
|
||||
}
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], TImode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (TImode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], TImode)
|
||||
&& !register_operand (operands[1], TImode)
|
||||
&& operands[1] != CONST0_RTX (TImode))
|
||||
{
|
||||
rtx temp = force_reg (TImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_move (TImode, operands);
|
||||
else
|
||||
ix86_expand_vector_move (TImode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv4sf"
|
||||
@ -17769,30 +17744,8 @@
|
||||
(match_operand:V4SF 1 "general_operand" ""))]
|
||||
"TARGET_SSE"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V4SFmode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V4SFmode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V4SFmode)
|
||||
&& !register_operand (operands[1], V4SFmode)
|
||||
&& operands[1] != CONST0_RTX (V4SFmode))
|
||||
{
|
||||
rtx temp = force_reg (V4SFmode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_vector_move (V4SFmode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv4si"
|
||||
@ -17800,30 +17753,8 @@
|
||||
(match_operand:V4SI 1 "general_operand" ""))]
|
||||
"TARGET_MMX"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V4SImode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V4SImode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V4SImode)
|
||||
&& !register_operand (operands[1], V4SImode)
|
||||
&& operands[1] != CONST0_RTX (V4SImode))
|
||||
{
|
||||
rtx temp = force_reg (V4SImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_vector_move (V4SImode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv2si"
|
||||
@ -17831,30 +17762,8 @@
|
||||
(match_operand:V2SI 1 "general_operand" ""))]
|
||||
"TARGET_MMX"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V2SImode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V2SImode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V2SImode)
|
||||
&& !register_operand (operands[1], V2SImode)
|
||||
&& operands[1] != CONST0_RTX (V2SImode))
|
||||
{
|
||||
rtx temp = force_reg (V2SImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_vector_move (V2SImode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv4hi"
|
||||
@ -17862,30 +17771,8 @@
|
||||
(match_operand:V4HI 1 "general_operand" ""))]
|
||||
"TARGET_MMX"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V4HImode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V4HImode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V4HImode)
|
||||
&& !register_operand (operands[1], V4HImode)
|
||||
&& operands[1] != CONST0_RTX (V4HImode))
|
||||
{
|
||||
rtx temp = force_reg (V4HImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_vector_move (V4HImode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv8qi"
|
||||
@ -17893,65 +17780,18 @@
|
||||
(match_operand:V8QI 1 "general_operand" ""))]
|
||||
"TARGET_MMX"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V8QImode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V8QImode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register if it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V8QImode)
|
||||
&& !register_operand (operands[1], V8QImode)
|
||||
&& operands[1] != CONST0_RTX (V8QImode))
|
||||
{
|
||||
rtx temp = force_reg (V8QImode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
ix86_expand_vector_move (V8QImode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "movv2sf"
|
||||
[(set (match_operand:V2SF 0 "general_operand" "")
|
||||
(match_operand:V2SF 1 "general_operand" ""))]
|
||||
"TARGET_3DNOW"
|
||||
"
|
||||
{
|
||||
/* For constants other than zero into memory. We do not know how the
|
||||
instructions used to build constants modify the upper 64 bits
|
||||
of the register, once we have that information we may be able
|
||||
to handle some of them more efficiently. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& register_operand (operands[0], V2SFmode)
|
||||
&& CONSTANT_P (operands[1]))
|
||||
{
|
||||
rtx addr = gen_reg_rtx (Pmode);
|
||||
|
||||
emit_move_insn (addr,
|
||||
XEXP (force_const_mem (V2SFmode, operands[1]), 0));
|
||||
operands[1] = gen_rtx_MEM (V2SFmode, addr);
|
||||
}
|
||||
|
||||
/* Make operand1 a register is it isn't already. */
|
||||
if ((reload_in_progress | reload_completed) == 0
|
||||
&& !register_operand (operands[0], V2SFmode)
|
||||
&& !register_operand (operands[1], V2SFmode)
|
||||
&& (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0)
|
||||
&& operands[1] != CONST0_RTX (V2SFmode))
|
||||
{
|
||||
rtx temp = force_reg (V2SFmode, operands[1]);
|
||||
emit_move_insn (operands[0], temp);
|
||||
DONE;
|
||||
}
|
||||
}")
|
||||
ix86_expand_vector_move (V2SFmode, operands);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn_and_split "*pushti"
|
||||
[(set (match_operand:TI 0 "push_operand" "=<")
|
||||
@ -18031,25 +17871,27 @@
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "movti_internal"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,m")
|
||||
(match_operand:TI 1 "general_operand" "xm,x"))]
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
|
||||
(match_operand:TI 1 "general_operand" "O,xm,x"))]
|
||||
"TARGET_SSE && !TARGET_64BIT"
|
||||
"@
|
||||
xorps\t%0, %0
|
||||
movaps\t{%1, %0|%0, %1}
|
||||
movaps\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "*movti_rex64"
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x")
|
||||
(match_operand:TI 1 "general_operand" "riFo,riF,x,m"))]
|
||||
[(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
|
||||
(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
|
||||
"TARGET_64BIT
|
||||
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
|
||||
"@
|
||||
#
|
||||
#
|
||||
xorps\t%0, %0
|
||||
movaps\\t{%1, %0|%0, %1}
|
||||
movaps\\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "*,*,sse,sse")
|
||||
[(set_attr "type" "*,*,sse,sse,sse")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_split
|
||||
@ -18064,7 +17906,8 @@
|
||||
;; movaps or movups
|
||||
(define_insn "sse_movaps"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))]
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
movaps\t{%1, %0|%0, %1}
|
||||
@ -18073,7 +17916,8 @@
|
||||
|
||||
(define_insn "sse_movups"
|
||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))]
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))]
|
||||
"TARGET_SSE"
|
||||
"@
|
||||
movups\t{%1, %0|%0, %1}
|
||||
@ -18154,7 +17998,8 @@
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
|
||||
(const_int 12)))]
|
||||
"TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"TARGET_SSE
|
||||
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"movhps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
@ -18164,7 +18009,8 @@
|
||||
(match_operand:V4SF 1 "nonimmediate_operand" "0,0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "m,x")
|
||||
(const_int 3)))]
|
||||
"TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"TARGET_SSE
|
||||
&& (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
|
||||
"movlps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
@ -18220,10 +18066,11 @@
|
||||
|
||||
(define_insn "vmaddv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"addss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18231,17 +18078,18 @@
|
||||
(define_insn "subv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
|
||||
"TARGET_SSE"
|
||||
"subps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "vmsubv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"subss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18256,10 +18104,11 @@
|
||||
|
||||
(define_insn "vmmulv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"mulss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18274,10 +18123,11 @@
|
||||
|
||||
(define_insn "vmdivv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(div:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"divss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18287,53 +18137,57 @@
|
||||
|
||||
(define_insn "rcpv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))]
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))]
|
||||
"TARGET_SSE"
|
||||
"rcpps\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "vmrcpv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42)
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42)
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"rcpss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "rsqrtv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))]
|
||||
(unspec:V4SF
|
||||
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))]
|
||||
"TARGET_SSE"
|
||||
"rsqrtps\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "vmrsqrtv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43)
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43)
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"rsqrtss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "sqrtv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))]
|
||||
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
|
||||
"TARGET_SSE"
|
||||
"sqrtps\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "vmsqrtv4sf2"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm"))
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
|
||||
(match_operand:V4SF 2 "register_operand" "0")
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"sqrtss\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
|
||||
;; SSE logical operations.
|
||||
|
||||
;; These are not called andti3 etc. because we really really don't want
|
||||
@ -18519,9 +18373,9 @@
|
||||
|
||||
;; Use xor, but don't show input operands so they aren't live before
|
||||
;; this insn.
|
||||
(define_insn "sse_clrti"
|
||||
[(set (match_operand:TI 0 "register_operand" "=x")
|
||||
(unspec:TI [(const_int 0)] 45))]
|
||||
(define_insn "sse_clrv4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(unspec:V4SF [(const_int 0)] 45))]
|
||||
"TARGET_SSE"
|
||||
"xorps\t{%0, %0|%0, %0}"
|
||||
[(set_attr "type" "sse")
|
||||
@ -18532,8 +18386,8 @@
|
||||
(define_insn "maskcmpv4sf3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(match_operator:V4SI 3 "sse_comparison_operator"
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "x")]))]
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "register_operand" "x")]))]
|
||||
"TARGET_SSE"
|
||||
"cmp%D3ps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18542,24 +18396,23 @@
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(not:V4SI
|
||||
(match_operator:V4SI 3 "sse_comparison_operator"
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "x")])))]
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "register_operand" "x")])))]
|
||||
"TARGET_SSE"
|
||||
"*
|
||||
{
|
||||
if (GET_CODE (operands[3]) == UNORDERED)
|
||||
return \"cmpordps\t{%2, %0|%0, %2}\";
|
||||
|
||||
return \"cmpn%D3ps\t{%2, %0|%0, %2}\";
|
||||
}"
|
||||
return "cmpordps\t{%2, %0|%0, %2}";
|
||||
else
|
||||
return "cmpn%D3ps\t{%2, %0|%0, %2}";
|
||||
}
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "vmmaskcmpv4sf3"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(vec_merge:V4SI
|
||||
(match_operator:V4SI 3 "sse_comparison_operator"
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "x")])
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "register_operand" "x")])
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
@ -18571,18 +18424,17 @@
|
||||
(vec_merge:V4SI
|
||||
(not:V4SI
|
||||
(match_operator:V4SI 3 "sse_comparison_operator"
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "x")]))
|
||||
[(match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "register_operand" "x")]))
|
||||
(subreg:V4SI (match_dup 1) 0)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"*
|
||||
{
|
||||
if (GET_CODE (operands[3]) == UNORDERED)
|
||||
return \"cmpordss\t{%2, %0|%0, %2}\";
|
||||
|
||||
return \"cmpn%D3ss\t{%2, %0|%0, %2}\";
|
||||
}"
|
||||
return "cmpordss\t{%2, %0|%0, %2}";
|
||||
else
|
||||
return "cmpn%D3ss\t{%2, %0|%0, %2}";
|
||||
}
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "sse_comi"
|
||||
@ -18663,10 +18515,11 @@
|
||||
|
||||
(define_insn "vmsmaxv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"maxss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18681,10 +18534,11 @@
|
||||
|
||||
(define_insn "vmsminv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
(vec_merge:V4SF
|
||||
(smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
|
||||
(match_dup 1)
|
||||
(const_int 1)))]
|
||||
"TARGET_SSE"
|
||||
"minss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18694,56 +18548,58 @@
|
||||
|
||||
(define_insn "cvtpi2ps"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(vec_duplicate:V4SF
|
||||
(float:V2SF (match_operand:V2SI 2 "register_operand" "ym")))
|
||||
(const_int 12)))]
|
||||
(vec_merge:V4SF
|
||||
(match_operand:V4SF 1 "register_operand" "0")
|
||||
(vec_duplicate:V4SF
|
||||
(float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
|
||||
(const_int 12)))]
|
||||
"TARGET_SSE"
|
||||
"cvtpi2ps\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "cvtps2pi"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
|
||||
(parallel
|
||||
[(const_int 0)
|
||||
(const_int 1)])))]
|
||||
(vec_select:V2SI
|
||||
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 1)])))]
|
||||
"TARGET_SSE"
|
||||
"cvtps2pi\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "cvttps2pi"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
|
||||
(parallel
|
||||
[(const_int 0)
|
||||
(const_int 1)])))]
|
||||
(vec_select:V2SI
|
||||
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
|
||||
(parallel [(const_int 0) (const_int 1)])))]
|
||||
"TARGET_SSE"
|
||||
"cvttps2pi\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "cvtsi2ss"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=x")
|
||||
(vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0")
|
||||
(vec_duplicate:V4SF
|
||||
(float:SF (match_operand:SI 2 "register_operand" "rm")))
|
||||
(const_int 14)))]
|
||||
(vec_merge:V4SF
|
||||
(match_operand:V4SF 1 "register_operand" "0")
|
||||
(vec_duplicate:V4SF
|
||||
(float:SF (match_operand:SI 2 "nonimmediate_operand" "rm")))
|
||||
(const_int 14)))]
|
||||
"TARGET_SSE"
|
||||
"cvtsi2ss\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "cvtss2si"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm"))
|
||||
(parallel [(const_int 0)])))]
|
||||
(vec_select:SI
|
||||
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0)])))]
|
||||
"TARGET_SSE"
|
||||
"cvtss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "cvttss2si"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30)
|
||||
(parallel [(const_int 0)])))]
|
||||
(vec_select:SI
|
||||
(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30)
|
||||
(parallel [(const_int 0)])))]
|
||||
"TARGET_SSE"
|
||||
"cvttss2si\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sse")])
|
||||
@ -18877,8 +18733,10 @@
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(truncate:V4HI
|
||||
(lshiftrt:V4SI
|
||||
(mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
|
||||
(sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
|
||||
(mult:V4SI (sign_extend:V4SI
|
||||
(match_operand:V4HI 1 "register_operand" "0"))
|
||||
(sign_extend:V4SI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
|
||||
(const_int 16))))]
|
||||
"TARGET_MMX"
|
||||
"pmulhw\t{%2, %0|%0, %2}"
|
||||
@ -18888,8 +18746,10 @@
|
||||
[(set (match_operand:V4HI 0 "register_operand" "=y")
|
||||
(truncate:V4HI
|
||||
(lshiftrt:V4SI
|
||||
(mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0"))
|
||||
(zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
|
||||
(mult:V4SI (zero_extend:V4SI
|
||||
(match_operand:V4HI 1 "register_operand" "0"))
|
||||
(zero_extend:V4SI
|
||||
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))
|
||||
(const_int 16))))]
|
||||
"TARGET_SSE || TARGET_3DNOW_A"
|
||||
"pmulhuw\t{%2, %0|%0, %2}"
|
||||
@ -18899,12 +18759,12 @@
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(plus:V2SI
|
||||
(mult:V2SI
|
||||
(sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)])))
|
||||
(sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)]))))
|
||||
(sign_extend:V2SI
|
||||
(vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
|
||||
(parallel [(const_int 0) (const_int 2)])))
|
||||
(sign_extend:V2SI
|
||||
(vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
|
||||
(parallel [(const_int 0) (const_int 2)]))))
|
||||
(mult:V2SI
|
||||
(sign_extend:V2SI (vec_select:V2HI (match_dup 1)
|
||||
(parallel [(const_int 1)
|
||||
@ -19404,75 +19264,6 @@
|
||||
[(set_attr "type" "sse")
|
||||
(set_attr "memory" "unknown")])
|
||||
|
||||
(define_expand "prefetch"
|
||||
[(prefetch (match_operand:SI 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand" "n")
|
||||
(match_operand:SI 2 "const_int_operand" "n"))]
|
||||
"TARGET_PREFETCH_SSE || TARGET_3DNOW"
|
||||
"
|
||||
{
|
||||
int rw = INTVAL (operands[1]);
|
||||
int locality = INTVAL (operands[2]);
|
||||
if (rw != 0 && rw != 1)
|
||||
abort ();
|
||||
if (locality < 0 || locality > 3)
|
||||
abort ();
|
||||
/* Use 3dNOW prefetch in case we are asking for write prefetch not
|
||||
suported by SSE counterpart or the SSE prefetch is not available
|
||||
(K6 machines). Otherwise use SSE prefetch as it allows specifying
|
||||
of locality. */
|
||||
if (TARGET_3DNOW
|
||||
&& (!TARGET_PREFETCH_SSE || rw))
|
||||
{
|
||||
emit_insn (gen_prefetch_3dnow (operands[0], operands[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
switch (locality)
|
||||
{
|
||||
case 0: /* No temporal locality. */
|
||||
i = 0;
|
||||
break;
|
||||
case 1: /* Lowest level of temporal locality. */
|
||||
i = 3;
|
||||
break;
|
||||
case 2: /* Moderate level of temporal locality. */
|
||||
i = 2;
|
||||
break;
|
||||
case 3: /* Highest level of temporal locality. */
|
||||
i = 1;
|
||||
break;
|
||||
default:
|
||||
abort (); /* We already checked for valid values above. */
|
||||
break;
|
||||
}
|
||||
emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
|
||||
}
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_insn "prefetch_sse"
|
||||
[(unspec [(match_operand:SI 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "immediate_operand" "n")] 35)]
|
||||
"TARGET_PREFETCH_SSE"
|
||||
{
|
||||
switch (INTVAL (operands[1]))
|
||||
{
|
||||
case 0:
|
||||
return "prefetchnta\t%a0";
|
||||
case 1:
|
||||
return "prefetcht0\t%a0";
|
||||
case 2:
|
||||
return "prefetcht1\t%a0";
|
||||
case 3:
|
||||
return "prefetcht2\t%a0";
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_expand "sse_prologue_save"
|
||||
[(parallel [(set (match_operand:BLK 0 "" "")
|
||||
(unspec:BLK [(reg:DI 21)
|
||||
@ -19630,19 +19421,6 @@
|
||||
"femms"
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "prefetch_3dnow"
|
||||
[(prefetch (match_operand:SI 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand" "n")
|
||||
(const_int 0))]
|
||||
"TARGET_3DNOW"
|
||||
{
|
||||
if (INTVAL (operands[1]) == 0)
|
||||
return "prefetch\t%a0";
|
||||
else
|
||||
return "prefetchw\t%a0";
|
||||
}
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_insn "pf2id"
|
||||
[(set (match_operand:V2SI 0 "register_operand" "=y")
|
||||
(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
|
||||
@ -19820,3 +19598,61 @@
|
||||
"TARGET_3DNOW_A"
|
||||
"pswapd\\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "mmx")])
|
||||
|
||||
(define_expand "prefetch"
|
||||
[(prefetch (match_operand:SI 0 "address_operand" "")
|
||||
(match_operand:SI 1 "const_int_operand" "")
|
||||
(match_operand:SI 2 "const_int_operand" ""))]
|
||||
"TARGET_PREFETCH_SSE || TARGET_3DNOW"
|
||||
{
|
||||
int rw = INTVAL (operands[1]);
|
||||
int locality = INTVAL (operands[2]);
|
||||
if (rw != 0 && rw != 1)
|
||||
abort ();
|
||||
if (locality < 0 || locality > 3)
|
||||
abort ();
|
||||
|
||||
/* Use 3dNOW prefetch in case we are asking for write prefetch not
|
||||
suported by SSE counterpart or the SSE prefetch is not available
|
||||
(K6 machines). Otherwise use SSE prefetch as it allows specifying
|
||||
of locality. */
|
||||
if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
|
||||
{
|
||||
operands[2] = GEN_INT (3);
|
||||
}
|
||||
else
|
||||
{
|
||||
operands[1] = const0_rtx;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "*prefetch_sse"
|
||||
[(prefetch (match_operand:SI 0 "address_operand" "")
|
||||
(const_int 0)
|
||||
(match_operand:SI 1 "const_int_operand" ""))]
|
||||
"TARGET_PREFETCH_SSE"
|
||||
{
|
||||
static const char * const patterns[4] = {
|
||||
"prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
|
||||
};
|
||||
|
||||
int locality = INTVAL (operands[1]);
|
||||
if (locality < 0 || locality > 3)
|
||||
abort ();
|
||||
|
||||
return patterns[locality];
|
||||
}
|
||||
[(set_attr "type" "sse")])
|
||||
|
||||
(define_insn "*prefetch_3dnow"
|
||||
[(prefetch (match_operand:SI 0 "address_operand" "p")
|
||||
(match_operand:SI 1 "const_int_operand" "n")
|
||||
(const_int 0))]
|
||||
"TARGET_3DNOW"
|
||||
{
|
||||
if (INTVAL (operands[1]) == 0)
|
||||
return "prefetch\t%a0";
|
||||
else
|
||||
return "prefetchw\t%a0";
|
||||
}
|
||||
[(set_attr "type" "mmx")])
|
||||
|
@ -34,11 +34,11 @@
|
||||
#include <mmintrin.h>
|
||||
|
||||
/* The data type indended for user use. */
|
||||
typedef int __m128 __attribute__ ((mode (TI)));
|
||||
typedef int __m128 __attribute__ ((__mode__(__V4SF__)));
|
||||
|
||||
/* Internal data types for implementing the instrinsics. */
|
||||
typedef int __v4sf __attribute__ ((mode (V4SF)));
|
||||
typedef int __v4si __attribute__ ((mode (V4SI)));
|
||||
typedef int __v4sf __attribute__ ((__mode__(__V4SF__)));
|
||||
typedef int __v4si __attribute__ ((__mode__(__V4SI__)));
|
||||
|
||||
/* Create a selector for use with the SHUFPS instruction. */
|
||||
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
|
||||
@ -680,7 +680,7 @@ _mm_movemask_ps (__m128 __A)
|
||||
static __inline unsigned int
|
||||
_mm_getcsr (void)
|
||||
{
|
||||
return __builtin_ia32_getmxcsr ();
|
||||
return __builtin_ia32_stmxcsr ();
|
||||
}
|
||||
|
||||
/* Read exception bits from the control register. */
|
||||
@ -712,7 +712,7 @@ _MM_GET_FLUSH_ZERO_MODE (void)
|
||||
static __inline void
|
||||
_mm_setcsr (unsigned int __I)
|
||||
{
|
||||
__builtin_ia32_setmxcsr (__I);
|
||||
__builtin_ia32_ldmxcsr (__I);
|
||||
}
|
||||
|
||||
/* Set exception bits in the control register. */
|
||||
|
Loading…
Reference in New Issue
Block a user