i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.

* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
	* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
	* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
	fixuns_truncdfhi2): New.
	(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
	(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
	(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
	(floatunssidf2): Allow nonimmediate source.
	* gcc/config/i386/sse.md (movdi_to_sse): New.  (vec_concatv2di): Drop '*'.
	* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
	* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
	ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
	(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
	(x86_emit_floatuns): Rewrite.

Co-Authored-By: Richard Henderson <rth@redhat.com>

From-SVN: r121790
This commit is contained in:
Stuart Hastings 2007-02-10 01:17:03 +00:00 committed by Stuart Hastings
parent 692b647c4f
commit ebff937c19
7 changed files with 387 additions and 36 deletions

View File

@ -1,3 +1,25 @@
2007-02-09 Stuart Hastings <stuart@apple.com>
Richard Henderson <rth@redhat.com>
* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
fixuns_truncdfhi2): New.
(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
(floatunssidf2): Allow nonimmediate source.
* gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'.
* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
(x86_emit_floatuns): Rewrite.
2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org> 2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
* genautomata.c (longest_path_length): Delete unused function. * genautomata.c (longest_path_length): Delete unused function.

View File

@ -66,6 +66,9 @@ Boston, MA 02110-1301, USA. */
#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN #undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0) #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
/* We want -fPIC by default, unless we're using -static to compile for /* We want -fPIC by default, unless we're using -static to compile for
the kernel or some such. */ the kernel or some such. */

View File

@ -89,6 +89,11 @@ extern void ix86_expand_binary_operator (enum rtx_code,
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]); rtx[]);
extern void ix86_expand_convert_uns_si_sse (rtx, rtx);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool); extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode, extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
rtx[]); rtx[]);

View File

@ -1518,6 +1518,9 @@ static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void); static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void); static rtx ix86_internal_arg_pointer (void);
static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int); static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
static rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
rtx, rtx, int);
/* This function is only used on Solaris. */ /* This function is only used on Solaris. */
static void i386_solaris_elf_named_section (const char *, unsigned int, tree) static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
@ -9858,6 +9861,233 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
return TRUE; return TRUE;
} }
/* Convert an SF or DFmode value in an SSE register into an unsigned SImode.
When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
conversion, and ignoring the upper 32 bits of the result. On x86_64,
there is an equivalent SSE %xmm->signed-int-64 conversion.
On x86_32, we don't have the instruction, nor the 64-bit destination
register it requires. Do the conversion inline in the SSE registers.
Requires SSE2. For x86_32, -mfpmath=sse, !optimize_size only. */
void
ix86_expand_convert_uns_si_sse (rtx target, rtx input)
{
REAL_VALUE_TYPE TWO31r;
enum machine_mode mode, vecmode;
rtx two31, value, large, sign, result_vec, zero_or_two31, x;
mode = GET_MODE (input);
vecmode = mode == SFmode ? V4SFmode : V2DFmode;
real_ldexp (&TWO31r, &dconst1, 31);
two31 = const_double_from_real_value (TWO31r, mode);
two31 = ix86_build_const_vector (mode, true, two31);
two31 = force_reg (vecmode, two31);
value = gen_reg_rtx (vecmode);
ix86_expand_vector_init_one_nonzero (false, vecmode, value, input, 0);
large = gen_reg_rtx (vecmode);
x = gen_rtx_fmt_ee (LE, vecmode, two31, value);
emit_insn (gen_rtx_SET (VOIDmode, large, x));
zero_or_two31 = gen_reg_rtx (vecmode);
x = gen_rtx_AND (vecmode, large, two31);
emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
emit_insn (gen_rtx_SET (VOIDmode, value, x));
result_vec = gen_reg_rtx (V4SImode);
if (mode == SFmode)
x = gen_sse2_cvttps2dq (result_vec, value);
else
x = gen_sse2_cvttpd2dq (result_vec, value);
emit_insn (x);
sign = gen_reg_rtx (V4SImode);
emit_insn (gen_ashlv4si3 (sign, gen_lowpart (V4SImode, large),
GEN_INT (31)));
emit_insn (gen_xorv4si3 (result_vec, result_vec, sign));
ix86_expand_vector_extract (false, target, result_vec, 0);
}
/* Convert an unsigned DImode value into a DFmode, using only SSE.
Expects the 64-bit DImode to be supplied in a pair of integral
registers. Requires SSE2; will use SSE3 if available. For x86_32,
-mfpmath=sse, !optimize_size only. */
void
ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
{
REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
rtx int_xmm, fp_xmm;
rtx biases, exponents;
rtx x;
int_xmm = gen_reg_rtx (V4SImode);
if (TARGET_INTER_UNIT_MOVES)
emit_insn (gen_movdi_to_sse (int_xmm, input));
else if (TARGET_SSE_SPLIT_REGS)
{
emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
emit_move_insn (gen_lowpart (DImode, int_xmm), input);
}
else
{
x = gen_reg_rtx (V2DImode);
ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
}
x = gen_rtx_CONST_VECTOR (V4SImode,
gen_rtvec (4, GEN_INT (0x43300000UL),
GEN_INT (0x45300000UL),
const0_rtx, const0_rtx));
exponents = validize_mem (force_const_mem (V4SImode, x));
/* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
/* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
Similarly (0x45300000UL ## fp_value_hi_xmm) yields
(0x1.0p84 + double(fp_value_hi_xmm)).
Note these exponents differ by 32. */
fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
/* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
real_ldexp (&bias_lo_rvt, &dconst1, 52);
real_ldexp (&bias_hi_rvt, &dconst1, 84);
biases = const_double_from_real_value (bias_lo_rvt, DFmode);
x = const_double_from_real_value (bias_hi_rvt, DFmode);
biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
biases = validize_mem (force_const_mem (V2DFmode, biases));
emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
/* Add the upper and lower DFmode values together. */
if (TARGET_SSE3)
emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
else
{
x = copy_to_mode_reg (V2DFmode, fp_xmm);
emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
}
ix86_expand_vector_extract (false, target, fp_xmm, 0);
}
/* Convert an unsigned SImode value into a DFmode. Only currently used
for SSE, but applicable anywhere. */
void
ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
{
REAL_VALUE_TYPE TWO31r;
rtx x, fp;
x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
NULL, 1, OPTAB_DIRECT);
fp = gen_reg_rtx (DFmode);
emit_insn (gen_floatsidf2 (fp, x));
real_ldexp (&TWO31r, &dconst1, 31);
x = const_double_from_real_value (TWO31r, DFmode);
x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
if (x != target)
emit_move_insn (target, x);
}
/* Convert a signed DImode value into a DFmode. Only used for SSE in
32-bit mode; otherwise we have a direct convert instruction. */
void
ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
{
REAL_VALUE_TYPE TWO32r;
rtx fp_lo, fp_hi, x;
fp_lo = gen_reg_rtx (DFmode);
fp_hi = gen_reg_rtx (DFmode);
emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
real_ldexp (&TWO32r, &dconst1, 32);
x = const_double_from_real_value (TWO32r, DFmode);
fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
0, OPTAB_DIRECT);
if (x != target)
emit_move_insn (target, x);
}
/* Convert an unsigned SImode value into a SFmode, using only SSE.
For x86_32, -mfpmath=sse, !optimize_size only. */
void
ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
{
REAL_VALUE_TYPE ONE16r;
rtx fp_hi, fp_lo, int_hi, int_lo, x;
real_ldexp (&ONE16r, &dconst1, 16);
x = const_double_from_real_value (ONE16r, SFmode);
int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
NULL, 0, OPTAB_DIRECT);
int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
NULL, 0, OPTAB_DIRECT);
fp_hi = gen_reg_rtx (SFmode);
fp_lo = gen_reg_rtx (SFmode);
emit_insn (gen_floatsisf2 (fp_hi, int_hi));
emit_insn (gen_floatsisf2 (fp_lo, int_lo));
fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
0, OPTAB_DIRECT);
fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
0, OPTAB_DIRECT);
if (!rtx_equal_p (target, fp_hi))
emit_move_insn (target, fp_hi);
}
/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
then replicate the value for all elements of the vector
register. */
static rtx
ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
{
rtvec v;
switch (mode)
{
case SFmode:
if (vect)
v = gen_rtvec (4, value, value, value, value);
else
v = gen_rtvec (4, value, CONST0_RTX (SFmode),
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
return gen_rtx_CONST_VECTOR (V4SFmode, v);
case DFmode:
if (vect)
v = gen_rtvec (2, value, value);
else
v = gen_rtvec (2, value, CONST0_RTX (DFmode));
return gen_rtx_CONST_VECTOR (V2DFmode, v);
default:
gcc_unreachable ();
}
}
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders. /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
Create a mask for the sign bit in MODE for an SSE register. If VECT is Create a mask for the sign bit in MODE for an SSE register. If VECT is
true, then replicate the mask for all elements of the vector register. true, then replicate the mask for all elements of the vector register.
@ -9869,7 +10099,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
enum machine_mode vec_mode; enum machine_mode vec_mode;
HOST_WIDE_INT hi, lo; HOST_WIDE_INT hi, lo;
int shift = 63; int shift = 63;
rtvec v; rtx v;
rtx mask; rtx mask;
/* Find the sign bit, sign extended to 2*HWI. */ /* Find the sign bit, sign extended to 2*HWI. */
@ -9887,25 +10117,9 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode); mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
mask = gen_lowpart (mode, mask); mask = gen_lowpart (mode, mask);
if (mode == SFmode) v = ix86_build_const_vector (mode, vect, mask);
{ vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
if (vect) return force_reg (vec_mode, v);
v = gen_rtvec (4, mask, mask, mask, mask);
else
v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
vec_mode = V4SFmode;
}
else
{
if (vect)
v = gen_rtvec (2, mask, mask);
else
v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
vec_mode = V2DFmode;
}
return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
} }
/* Generate code for floating point ABS or NEG. */ /* Generate code for floating point ABS or NEG. */
@ -19573,21 +19787,25 @@ x86_emit_floatuns (rtx operands[2])
mode = GET_MODE (out); mode = GET_MODE (out);
neglab = gen_label_rtx (); neglab = gen_label_rtx ();
donelab = gen_label_rtx (); donelab = gen_label_rtx ();
i1 = gen_reg_rtx (Pmode);
f0 = gen_reg_rtx (mode); f0 = gen_reg_rtx (mode);
emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab); emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
expand_float (out, in, 0);
emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
emit_jump_insn (gen_jump (donelab)); emit_jump_insn (gen_jump (donelab));
emit_barrier (); emit_barrier ();
emit_label (neglab); emit_label (neglab);
i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT); i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT); 1, OPTAB_DIRECT);
i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
1, OPTAB_DIRECT);
i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
expand_float (f0, i0, 0); expand_float (f0, i0, 0);
emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
emit_label (donelab); emit_label (donelab);

View File

@ -658,6 +658,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \ #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
(ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT) (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)
/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is
mandatory for the 64-bit ABI, and may or may not be true for other
operating systems. */
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
/* Minimum allocation boundary for the code of a function. */ /* Minimum allocation boundary for the code of a function. */
#define FUNCTION_BOUNDARY 8 #define FUNCTION_BOUNDARY 8

View File

@ -4331,6 +4331,38 @@
} }
}) })
;; Unsigned conversion to SImode.
(define_expand "fixuns_trunc<mode>si2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
(fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))]
"!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size"
{
ix86_expand_convert_uns_si_sse (operands[0], operands[1]);
DONE;
})
;; Unsigned conversion to HImode.
;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.
(define_expand "fixuns_truncsfhi2"
[(set (match_dup 2)
(fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
(set (match_operand:HI 0 "nonimmediate_operand" "")
(subreg:HI (match_dup 2) 0))]
"TARGET_SSE_MATH"
"operands[2] = gen_reg_rtx (SImode);")
(define_expand "fixuns_truncdfhi2"
[(set (match_dup 2)
(fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
(set (match_operand:HI 0 "nonimmediate_operand" "")
(subreg:HI (match_dup 2) 0))]
"TARGET_SSE_MATH"
"operands[2] = gen_reg_rtx (SImode);")
;; When SSE is available, it is always faster to use it! ;; When SSE is available, it is always faster to use it!
(define_insn "fix_truncsfdi_sse" (define_insn "fix_truncsfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r") [(set (match_operand:DI 0 "register_operand" "=r,r")
@ -4848,8 +4880,14 @@
(define_expand "floatdidf2" (define_expand "floatdidf2"
[(set (match_operand:DF 0 "register_operand" "") [(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))] (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)" "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
"") {
if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
{
ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
DONE;
}
})
(define_insn "*floatdidf2_mixed" (define_insn "*floatdidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x") [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
@ -4944,21 +4982,40 @@
(define_expand "floatunssisf2" (define_expand "floatunssisf2"
[(use (match_operand:SF 0 "register_operand" "")) [(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SI 1 "register_operand" ""))] (use (match_operand:SI 1 "nonimmediate_operand" ""))]
"!TARGET_64BIT && TARGET_SSE_MATH" "!TARGET_64BIT"
"x86_emit_floatuns (operands); DONE;") {
if (TARGET_SSE_MATH && TARGET_SSE2)
ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]);
else
x86_emit_floatuns (operands);
DONE;
})
(define_expand "floatunssidf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
"!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2"
"ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;")
(define_expand "floatunsdisf2" (define_expand "floatunsdisf2"
[(use (match_operand:SF 0 "register_operand" "")) [(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:DI 1 "register_operand" ""))] (use (match_operand:DI 1 "nonimmediate_operand" ""))]
"TARGET_64BIT && TARGET_SSE_MATH" "TARGET_64BIT && TARGET_SSE_MATH"
"x86_emit_floatuns (operands); DONE;") "x86_emit_floatuns (operands); DONE;")
(define_expand "floatunsdidf2" (define_expand "floatunsdidf2"
[(use (match_operand:DF 0 "register_operand" "")) [(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DI 1 "register_operand" ""))] (use (match_operand:DI 1 "nonimmediate_operand" ""))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH" "TARGET_SSE_MATH && TARGET_SSE2
"x86_emit_floatuns (operands); DONE;") && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)"
{
if (TARGET_64BIT)
x86_emit_floatuns (operands);
else
ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
DONE;
})
;; SSE extract/set expanders ;; SSE extract/set expanders

View File

@ -87,6 +87,47 @@
(const_string "V4SF") (const_string "V4SF")
(const_string "TI")))]) (const_string "TI")))])
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
;; from memory, we'd prefer to load the memory directly into the %xmm
;; register. To facilitate this happy circumstance, this pattern won't
;; split until after register allocation. If the 64-bit value didn't
;; come from memory, this is the best we can do. This is much better
;; than storing %edx:%eax into a stack temporary and loading an %xmm
;; from there.
(define_insn_and_split "movdi_to_sse"
[(parallel
[(set (match_operand:V4SI 0 "register_operand" "=?x,x")
(subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
(clobber (match_scratch:V4SI 2 "=&x,X"))])]
"!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
"#"
"&& reload_completed"
[(const_int 0)]
{
switch (which_alternative)
{
case 0:
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
Assemble the 64-bit DImode value in an xmm register. */
emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
break;
case 1:
emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
break;
default:
gcc_unreachable ();
}
DONE;
})
(define_expand "movv4sf" (define_expand "movv4sf"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "") [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
(match_operand:V4SF 1 "nonimmediate_operand" ""))] (match_operand:V4SF 1 "nonimmediate_operand" ""))]
@ -4118,7 +4159,7 @@
[(set_attr "type" "sselog,ssemov,ssemov") [(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "TI,V4SF,V2SF")]) (set_attr "mode" "TI,V4SF,V2SF")])
(define_insn "*vec_concatv2di" (define_insn "vec_concatv2di"
[(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x") [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
(vec_concat:V2DI (vec_concat:V2DI
(match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m") (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")