i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2, fixuns_truncdfhi2): New. (fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse. (floatunsdidf2): Call ix86_expand_convert_uns_didf_sse. (floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse. (floatunssidf2): Allow nonimmediate source. * gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'. * gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New. * gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse, ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New. (ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector. (x86_emit_floatuns): Rewrite. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r121790
This commit is contained in:
parent
692b647c4f
commit
ebff937c19
|
@ -1,3 +1,25 @@
|
||||||
|
2007-02-09 Stuart Hastings <stuart@apple.com>
|
||||||
|
Richard Henderson <rth@redhat.com>
|
||||||
|
|
||||||
|
* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
|
||||||
|
* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
|
||||||
|
* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
|
||||||
|
fixuns_truncdfhi2): New.
|
||||||
|
(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
|
||||||
|
(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
|
||||||
|
(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
|
||||||
|
(floatunssidf2): Allow nonimmediate source.
|
||||||
|
* gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'.
|
||||||
|
* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
|
||||||
|
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
|
||||||
|
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
|
||||||
|
* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
|
||||||
|
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
|
||||||
|
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
|
||||||
|
ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
|
||||||
|
(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
|
||||||
|
(x86_emit_floatuns): Rewrite.
|
||||||
|
|
||||||
2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
|
2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
|
||||||
|
|
||||||
* genautomata.c (longest_path_length): Delete unused function.
|
* genautomata.c (longest_path_length): Delete unused function.
|
||||||
|
|
|
@ -66,6 +66,9 @@ Boston, MA 02110-1301, USA. */
|
||||||
#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
|
#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
|
||||||
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
|
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
|
||||||
|
|
||||||
|
#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
|
||||||
|
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
|
||||||
|
|
||||||
/* We want -fPIC by default, unless we're using -static to compile for
|
/* We want -fPIC by default, unless we're using -static to compile for
|
||||||
the kernel or some such. */
|
the kernel or some such. */
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,11 @@ extern void ix86_expand_binary_operator (enum rtx_code,
|
||||||
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
|
||||||
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
|
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
|
||||||
rtx[]);
|
rtx[]);
|
||||||
|
extern void ix86_expand_convert_uns_si_sse (rtx, rtx);
|
||||||
|
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
|
||||||
|
extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
|
||||||
|
extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
|
||||||
|
extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
|
||||||
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
|
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
|
||||||
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
|
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
|
||||||
rtx[]);
|
rtx[]);
|
||||||
|
|
|
@ -1518,6 +1518,9 @@ static const char *ix86_mangle_fundamental_type (tree);
|
||||||
static tree ix86_stack_protect_fail (void);
|
static tree ix86_stack_protect_fail (void);
|
||||||
static rtx ix86_internal_arg_pointer (void);
|
static rtx ix86_internal_arg_pointer (void);
|
||||||
static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
|
static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
|
||||||
|
static rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
|
||||||
|
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
|
||||||
|
rtx, rtx, int);
|
||||||
|
|
||||||
/* This function is only used on Solaris. */
|
/* This function is only used on Solaris. */
|
||||||
static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
|
static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
|
||||||
|
@ -9858,6 +9861,233 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert an SF or DFmode value in an SSE register into an unsigned SImode.
|
||||||
|
When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
|
||||||
|
conversion, and ignoring the upper 32 bits of the result. On x86_64,
|
||||||
|
there is an equivalent SSE %xmm->signed-int-64 conversion.
|
||||||
|
|
||||||
|
On x86_32, we don't have the instruction, nor the 64-bit destination
|
||||||
|
register it requires. Do the conversion inline in the SSE registers.
|
||||||
|
Requires SSE2. For x86_32, -mfpmath=sse, !optimize_size only. */
|
||||||
|
|
||||||
|
void
|
||||||
|
ix86_expand_convert_uns_si_sse (rtx target, rtx input)
|
||||||
|
{
|
||||||
|
REAL_VALUE_TYPE TWO31r;
|
||||||
|
enum machine_mode mode, vecmode;
|
||||||
|
rtx two31, value, large, sign, result_vec, zero_or_two31, x;
|
||||||
|
|
||||||
|
mode = GET_MODE (input);
|
||||||
|
vecmode = mode == SFmode ? V4SFmode : V2DFmode;
|
||||||
|
|
||||||
|
real_ldexp (&TWO31r, &dconst1, 31);
|
||||||
|
two31 = const_double_from_real_value (TWO31r, mode);
|
||||||
|
two31 = ix86_build_const_vector (mode, true, two31);
|
||||||
|
two31 = force_reg (vecmode, two31);
|
||||||
|
|
||||||
|
value = gen_reg_rtx (vecmode);
|
||||||
|
ix86_expand_vector_init_one_nonzero (false, vecmode, value, input, 0);
|
||||||
|
|
||||||
|
large = gen_reg_rtx (vecmode);
|
||||||
|
x = gen_rtx_fmt_ee (LE, vecmode, two31, value);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, large, x));
|
||||||
|
|
||||||
|
zero_or_two31 = gen_reg_rtx (vecmode);
|
||||||
|
x = gen_rtx_AND (vecmode, large, two31);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
|
||||||
|
|
||||||
|
x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
|
||||||
|
emit_insn (gen_rtx_SET (VOIDmode, value, x));
|
||||||
|
|
||||||
|
result_vec = gen_reg_rtx (V4SImode);
|
||||||
|
if (mode == SFmode)
|
||||||
|
x = gen_sse2_cvttps2dq (result_vec, value);
|
||||||
|
else
|
||||||
|
x = gen_sse2_cvttpd2dq (result_vec, value);
|
||||||
|
emit_insn (x);
|
||||||
|
|
||||||
|
sign = gen_reg_rtx (V4SImode);
|
||||||
|
emit_insn (gen_ashlv4si3 (sign, gen_lowpart (V4SImode, large),
|
||||||
|
GEN_INT (31)));
|
||||||
|
|
||||||
|
emit_insn (gen_xorv4si3 (result_vec, result_vec, sign));
|
||||||
|
|
||||||
|
ix86_expand_vector_extract (false, target, result_vec, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert an unsigned DImode value into a DFmode, using only SSE.
|
||||||
|
Expects the 64-bit DImode to be supplied in a pair of integral
|
||||||
|
registers. Requires SSE2; will use SSE3 if available. For x86_32,
|
||||||
|
-mfpmath=sse, !optimize_size only. */
|
||||||
|
|
||||||
|
void
|
||||||
|
ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
|
||||||
|
{
|
||||||
|
REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
|
||||||
|
rtx int_xmm, fp_xmm;
|
||||||
|
rtx biases, exponents;
|
||||||
|
rtx x;
|
||||||
|
|
||||||
|
int_xmm = gen_reg_rtx (V4SImode);
|
||||||
|
if (TARGET_INTER_UNIT_MOVES)
|
||||||
|
emit_insn (gen_movdi_to_sse (int_xmm, input));
|
||||||
|
else if (TARGET_SSE_SPLIT_REGS)
|
||||||
|
{
|
||||||
|
emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
|
||||||
|
emit_move_insn (gen_lowpart (DImode, int_xmm), input);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
x = gen_reg_rtx (V2DImode);
|
||||||
|
ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
|
||||||
|
emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
|
||||||
|
}
|
||||||
|
|
||||||
|
x = gen_rtx_CONST_VECTOR (V4SImode,
|
||||||
|
gen_rtvec (4, GEN_INT (0x43300000UL),
|
||||||
|
GEN_INT (0x45300000UL),
|
||||||
|
const0_rtx, const0_rtx));
|
||||||
|
exponents = validize_mem (force_const_mem (V4SImode, x));
|
||||||
|
|
||||||
|
/* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
|
||||||
|
emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
|
||||||
|
|
||||||
|
/* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
|
||||||
|
yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
|
||||||
|
Similarly (0x45300000UL ## fp_value_hi_xmm) yields
|
||||||
|
(0x1.0p84 + double(fp_value_hi_xmm)).
|
||||||
|
Note these exponents differ by 32. */
|
||||||
|
|
||||||
|
fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
|
||||||
|
|
||||||
|
/* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
|
||||||
|
in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
|
||||||
|
real_ldexp (&bias_lo_rvt, &dconst1, 52);
|
||||||
|
real_ldexp (&bias_hi_rvt, &dconst1, 84);
|
||||||
|
biases = const_double_from_real_value (bias_lo_rvt, DFmode);
|
||||||
|
x = const_double_from_real_value (bias_hi_rvt, DFmode);
|
||||||
|
biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
|
||||||
|
biases = validize_mem (force_const_mem (V2DFmode, biases));
|
||||||
|
emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
|
||||||
|
|
||||||
|
/* Add the upper and lower DFmode values together. */
|
||||||
|
if (TARGET_SSE3)
|
||||||
|
emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
x = copy_to_mode_reg (V2DFmode, fp_xmm);
|
||||||
|
emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
|
||||||
|
emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
|
||||||
|
}
|
||||||
|
|
||||||
|
ix86_expand_vector_extract (false, target, fp_xmm, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert an unsigned SImode value into a DFmode. Only currently used
|
||||||
|
for SSE, but applicable anywhere. */
|
||||||
|
|
||||||
|
void
|
||||||
|
ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
|
||||||
|
{
|
||||||
|
REAL_VALUE_TYPE TWO31r;
|
||||||
|
rtx x, fp;
|
||||||
|
|
||||||
|
x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
|
||||||
|
NULL, 1, OPTAB_DIRECT);
|
||||||
|
|
||||||
|
fp = gen_reg_rtx (DFmode);
|
||||||
|
emit_insn (gen_floatsidf2 (fp, x));
|
||||||
|
|
||||||
|
real_ldexp (&TWO31r, &dconst1, 31);
|
||||||
|
x = const_double_from_real_value (TWO31r, DFmode);
|
||||||
|
|
||||||
|
x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
|
||||||
|
if (x != target)
|
||||||
|
emit_move_insn (target, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert a signed DImode value into a DFmode. Only used for SSE in
|
||||||
|
32-bit mode; otherwise we have a direct convert instruction. */
|
||||||
|
|
||||||
|
void
|
||||||
|
ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
|
||||||
|
{
|
||||||
|
REAL_VALUE_TYPE TWO32r;
|
||||||
|
rtx fp_lo, fp_hi, x;
|
||||||
|
|
||||||
|
fp_lo = gen_reg_rtx (DFmode);
|
||||||
|
fp_hi = gen_reg_rtx (DFmode);
|
||||||
|
|
||||||
|
emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
|
||||||
|
|
||||||
|
real_ldexp (&TWO32r, &dconst1, 32);
|
||||||
|
x = const_double_from_real_value (TWO32r, DFmode);
|
||||||
|
fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
|
||||||
|
|
||||||
|
ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
|
||||||
|
|
||||||
|
x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
|
||||||
|
0, OPTAB_DIRECT);
|
||||||
|
if (x != target)
|
||||||
|
emit_move_insn (target, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert an unsigned SImode value into a SFmode, using only SSE.
|
||||||
|
For x86_32, -mfpmath=sse, !optimize_size only. */
|
||||||
|
void
|
||||||
|
ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
|
||||||
|
{
|
||||||
|
REAL_VALUE_TYPE ONE16r;
|
||||||
|
rtx fp_hi, fp_lo, int_hi, int_lo, x;
|
||||||
|
|
||||||
|
real_ldexp (&ONE16r, &dconst1, 16);
|
||||||
|
x = const_double_from_real_value (ONE16r, SFmode);
|
||||||
|
int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
|
||||||
|
NULL, 0, OPTAB_DIRECT);
|
||||||
|
int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
|
||||||
|
NULL, 0, OPTAB_DIRECT);
|
||||||
|
fp_hi = gen_reg_rtx (SFmode);
|
||||||
|
fp_lo = gen_reg_rtx (SFmode);
|
||||||
|
emit_insn (gen_floatsisf2 (fp_hi, int_hi));
|
||||||
|
emit_insn (gen_floatsisf2 (fp_lo, int_lo));
|
||||||
|
fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
|
||||||
|
0, OPTAB_DIRECT);
|
||||||
|
fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
|
||||||
|
0, OPTAB_DIRECT);
|
||||||
|
if (!rtx_equal_p (target, fp_hi))
|
||||||
|
emit_move_insn (target, fp_hi);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
|
||||||
|
then replicate the value for all elements of the vector
|
||||||
|
register. */
|
||||||
|
|
||||||
|
static rtx
|
||||||
|
ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
|
||||||
|
{
|
||||||
|
rtvec v;
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case SFmode:
|
||||||
|
if (vect)
|
||||||
|
v = gen_rtvec (4, value, value, value, value);
|
||||||
|
else
|
||||||
|
v = gen_rtvec (4, value, CONST0_RTX (SFmode),
|
||||||
|
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
||||||
|
return gen_rtx_CONST_VECTOR (V4SFmode, v);
|
||||||
|
|
||||||
|
case DFmode:
|
||||||
|
if (vect)
|
||||||
|
v = gen_rtvec (2, value, value);
|
||||||
|
else
|
||||||
|
v = gen_rtvec (2, value, CONST0_RTX (DFmode));
|
||||||
|
return gen_rtx_CONST_VECTOR (V2DFmode, v);
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
|
/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
|
||||||
Create a mask for the sign bit in MODE for an SSE register. If VECT is
|
Create a mask for the sign bit in MODE for an SSE register. If VECT is
|
||||||
true, then replicate the mask for all elements of the vector register.
|
true, then replicate the mask for all elements of the vector register.
|
||||||
|
@ -9869,7 +10099,7 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
||||||
enum machine_mode vec_mode;
|
enum machine_mode vec_mode;
|
||||||
HOST_WIDE_INT hi, lo;
|
HOST_WIDE_INT hi, lo;
|
||||||
int shift = 63;
|
int shift = 63;
|
||||||
rtvec v;
|
rtx v;
|
||||||
rtx mask;
|
rtx mask;
|
||||||
|
|
||||||
/* Find the sign bit, sign extended to 2*HWI. */
|
/* Find the sign bit, sign extended to 2*HWI. */
|
||||||
|
@ -9887,25 +10117,9 @@ ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
|
||||||
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
|
mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
|
||||||
mask = gen_lowpart (mode, mask);
|
mask = gen_lowpart (mode, mask);
|
||||||
|
|
||||||
if (mode == SFmode)
|
v = ix86_build_const_vector (mode, vect, mask);
|
||||||
{
|
vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
|
||||||
if (vect)
|
return force_reg (vec_mode, v);
|
||||||
v = gen_rtvec (4, mask, mask, mask, mask);
|
|
||||||
else
|
|
||||||
v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
|
|
||||||
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
|
||||||
vec_mode = V4SFmode;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (vect)
|
|
||||||
v = gen_rtvec (2, mask, mask);
|
|
||||||
else
|
|
||||||
v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
|
|
||||||
vec_mode = V2DFmode;
|
|
||||||
}
|
|
||||||
|
|
||||||
return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Generate code for floating point ABS or NEG. */
|
/* Generate code for floating point ABS or NEG. */
|
||||||
|
@ -19573,21 +19787,25 @@ x86_emit_floatuns (rtx operands[2])
|
||||||
mode = GET_MODE (out);
|
mode = GET_MODE (out);
|
||||||
neglab = gen_label_rtx ();
|
neglab = gen_label_rtx ();
|
||||||
donelab = gen_label_rtx ();
|
donelab = gen_label_rtx ();
|
||||||
i1 = gen_reg_rtx (Pmode);
|
|
||||||
f0 = gen_reg_rtx (mode);
|
f0 = gen_reg_rtx (mode);
|
||||||
|
|
||||||
emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
|
emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
|
||||||
|
|
||||||
|
expand_float (out, in, 0);
|
||||||
|
|
||||||
emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
|
|
||||||
emit_jump_insn (gen_jump (donelab));
|
emit_jump_insn (gen_jump (donelab));
|
||||||
emit_barrier ();
|
emit_barrier ();
|
||||||
|
|
||||||
emit_label (neglab);
|
emit_label (neglab);
|
||||||
|
|
||||||
i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
|
i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
|
||||||
i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
|
1, OPTAB_DIRECT);
|
||||||
i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
|
i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
|
||||||
|
1, OPTAB_DIRECT);
|
||||||
|
i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
|
||||||
|
|
||||||
expand_float (f0, i0, 0);
|
expand_float (f0, i0, 0);
|
||||||
|
|
||||||
emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
|
emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
|
||||||
|
|
||||||
emit_label (donelab);
|
emit_label (donelab);
|
||||||
|
|
|
@ -658,6 +658,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||||
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
|
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
|
||||||
(ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)
|
(ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)
|
||||||
|
|
||||||
|
/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is
|
||||||
|
mandatory for the 64-bit ABI, and may or may not be true for other
|
||||||
|
operating systems. */
|
||||||
|
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
|
||||||
|
|
||||||
/* Minimum allocation boundary for the code of a function. */
|
/* Minimum allocation boundary for the code of a function. */
|
||||||
#define FUNCTION_BOUNDARY 8
|
#define FUNCTION_BOUNDARY 8
|
||||||
|
|
||||||
|
|
|
@ -4331,6 +4331,38 @@
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
;; Unsigned conversion to SImode.
|
||||||
|
|
||||||
|
(define_expand "fixuns_trunc<mode>si2"
|
||||||
|
[(set (match_operand:SI 0 "nonimmediate_operand" "")
|
||||||
|
(fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))]
|
||||||
|
"!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
|
||||||
|
&& TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size"
|
||||||
|
{
|
||||||
|
ix86_expand_convert_uns_si_sse (operands[0], operands[1]);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
;; Unsigned conversion to HImode.
|
||||||
|
;; Without these patterns, we'll try the unsigned SI conversion which
|
||||||
|
;; is complex for SSE, rather than the signed SI conversion, which isn't.
|
||||||
|
|
||||||
|
(define_expand "fixuns_truncsfhi2"
|
||||||
|
[(set (match_dup 2)
|
||||||
|
(fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
|
||||||
|
(set (match_operand:HI 0 "nonimmediate_operand" "")
|
||||||
|
(subreg:HI (match_dup 2) 0))]
|
||||||
|
"TARGET_SSE_MATH"
|
||||||
|
"operands[2] = gen_reg_rtx (SImode);")
|
||||||
|
|
||||||
|
(define_expand "fixuns_truncdfhi2"
|
||||||
|
[(set (match_dup 2)
|
||||||
|
(fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
|
||||||
|
(set (match_operand:HI 0 "nonimmediate_operand" "")
|
||||||
|
(subreg:HI (match_dup 2) 0))]
|
||||||
|
"TARGET_SSE_MATH"
|
||||||
|
"operands[2] = gen_reg_rtx (SImode);")
|
||||||
|
|
||||||
;; When SSE is available, it is always faster to use it!
|
;; When SSE is available, it is always faster to use it!
|
||||||
(define_insn "fix_truncsfdi_sse"
|
(define_insn "fix_truncsfdi_sse"
|
||||||
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
[(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||||
|
@ -4848,8 +4880,14 @@
|
||||||
(define_expand "floatdidf2"
|
(define_expand "floatdidf2"
|
||||||
[(set (match_operand:DF 0 "register_operand" "")
|
[(set (match_operand:DF 0 "register_operand" "")
|
||||||
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
|
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
|
||||||
"TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)"
|
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
|
||||||
"")
|
{
|
||||||
|
if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
|
||||||
|
{
|
||||||
|
ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
(define_insn "*floatdidf2_mixed"
|
(define_insn "*floatdidf2_mixed"
|
||||||
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
|
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
|
||||||
|
@ -4944,21 +4982,40 @@
|
||||||
|
|
||||||
(define_expand "floatunssisf2"
|
(define_expand "floatunssisf2"
|
||||||
[(use (match_operand:SF 0 "register_operand" ""))
|
[(use (match_operand:SF 0 "register_operand" ""))
|
||||||
(use (match_operand:SI 1 "register_operand" ""))]
|
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
|
||||||
"!TARGET_64BIT && TARGET_SSE_MATH"
|
"!TARGET_64BIT"
|
||||||
"x86_emit_floatuns (operands); DONE;")
|
{
|
||||||
|
if (TARGET_SSE_MATH && TARGET_SSE2)
|
||||||
|
ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]);
|
||||||
|
else
|
||||||
|
x86_emit_floatuns (operands);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_expand "floatunssidf2"
|
||||||
|
[(use (match_operand:DF 0 "register_operand" ""))
|
||||||
|
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
|
||||||
|
"!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2"
|
||||||
|
"ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;")
|
||||||
|
|
||||||
(define_expand "floatunsdisf2"
|
(define_expand "floatunsdisf2"
|
||||||
[(use (match_operand:SF 0 "register_operand" ""))
|
[(use (match_operand:SF 0 "register_operand" ""))
|
||||||
(use (match_operand:DI 1 "register_operand" ""))]
|
(use (match_operand:DI 1 "nonimmediate_operand" ""))]
|
||||||
"TARGET_64BIT && TARGET_SSE_MATH"
|
"TARGET_64BIT && TARGET_SSE_MATH"
|
||||||
"x86_emit_floatuns (operands); DONE;")
|
"x86_emit_floatuns (operands); DONE;")
|
||||||
|
|
||||||
(define_expand "floatunsdidf2"
|
(define_expand "floatunsdidf2"
|
||||||
[(use (match_operand:DF 0 "register_operand" ""))
|
[(use (match_operand:DF 0 "register_operand" ""))
|
||||||
(use (match_operand:DI 1 "register_operand" ""))]
|
(use (match_operand:DI 1 "nonimmediate_operand" ""))]
|
||||||
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
|
"TARGET_SSE_MATH && TARGET_SSE2
|
||||||
"x86_emit_floatuns (operands); DONE;")
|
&& (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)"
|
||||||
|
{
|
||||||
|
if (TARGET_64BIT)
|
||||||
|
x86_emit_floatuns (operands);
|
||||||
|
else
|
||||||
|
ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
;; SSE extract/set expanders
|
;; SSE extract/set expanders
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,47 @@
|
||||||
(const_string "V4SF")
|
(const_string "V4SF")
|
||||||
(const_string "TI")))])
|
(const_string "TI")))])
|
||||||
|
|
||||||
|
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
|
||||||
|
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
|
||||||
|
;; from memory, we'd prefer to load the memory directly into the %xmm
|
||||||
|
;; register. To facilitate this happy circumstance, this pattern won't
|
||||||
|
;; split until after register allocation. If the 64-bit value didn't
|
||||||
|
;; come from memory, this is the best we can do. This is much better
|
||||||
|
;; than storing %edx:%eax into a stack temporary and loading an %xmm
|
||||||
|
;; from there.
|
||||||
|
|
||||||
|
(define_insn_and_split "movdi_to_sse"
|
||||||
|
[(parallel
|
||||||
|
[(set (match_operand:V4SI 0 "register_operand" "=?x,x")
|
||||||
|
(subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
|
||||||
|
(clobber (match_scratch:V4SI 2 "=&x,X"))])]
|
||||||
|
"!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
|
||||||
|
"#"
|
||||||
|
"&& reload_completed"
|
||||||
|
[(const_int 0)]
|
||||||
|
{
|
||||||
|
switch (which_alternative)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
|
||||||
|
Assemble the 64-bit DImode value in an xmm register. */
|
||||||
|
emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
|
||||||
|
gen_rtx_SUBREG (SImode, operands[1], 0)));
|
||||||
|
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
|
||||||
|
gen_rtx_SUBREG (SImode, operands[1], 4)));
|
||||||
|
emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
(define_expand "movv4sf"
|
(define_expand "movv4sf"
|
||||||
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
|
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
|
||||||
(match_operand:V4SF 1 "nonimmediate_operand" ""))]
|
(match_operand:V4SF 1 "nonimmediate_operand" ""))]
|
||||||
|
@ -4118,7 +4159,7 @@
|
||||||
[(set_attr "type" "sselog,ssemov,ssemov")
|
[(set_attr "type" "sselog,ssemov,ssemov")
|
||||||
(set_attr "mode" "TI,V4SF,V2SF")])
|
(set_attr "mode" "TI,V4SF,V2SF")])
|
||||||
|
|
||||||
(define_insn "*vec_concatv2di"
|
(define_insn "vec_concatv2di"
|
||||||
[(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
|
[(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
|
||||||
(vec_concat:V2DI
|
(vec_concat:V2DI
|
||||||
(match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
|
(match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
|
||||||
|
|
Loading…
Reference in New Issue