i386.h (X86_TUNE_PROMOTE_HIMODE_IMUL): New tuning option.
* config/i386/i386.h (X86_TUNE_PROMOTE_HIMODE_IMUL): New tuning option. (TARGET_TUNE_PROMOTE_HIMODE_IMUL): New define. Use new tuning option. * config/i386/i386.c (ix86_tune_features): Initialize new tuning option. * config/i386/predicates.md (promotable_binary_operator): Use TARGET_TUNE_PROMOTE_HIMODE_IMUL. From-SVN: r123301
This commit is contained in:
parent
9b7dd5e868
commit
a646adedf0
|
@ -1,3 +1,13 @@
|
|||
2007-03-28 Uros Bizjak <ubizjak@gmail.com>
|
||||
|
||||
* config/i386/i386.h (X86_TUNE_PROMOTE_HIMODE_IMUL): New tuning
|
||||
option.
|
||||
(TARGET_TUNE_PROMOTE_HIMODE_IMUL): New define. Use new tuning option.
|
||||
* config/i386/i386.c (ix86_tune_features): Initialize new
|
||||
tuning option.
|
||||
* config/i386/predicates.md (promotable_binary_operator): Use
|
||||
TARGET_TUNE_PROMOTE_HIMODE_IMUL.
|
||||
|
||||
2007-03-28 Grigory Zagorodnev <grigory_zagorodnev@linux.intel.com>
|
||||
|
||||
PR target/31380
|
||||
|
@ -85,22 +95,22 @@
|
|||
|
||||
2007-03-26 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR target/31361
|
||||
* config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128,
|
||||
IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128,
|
||||
IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128,
|
||||
IX86_BUILTIN_PSRLQ128): New.
|
||||
(ix86_init_mmx_sse_builtins): Add them.
|
||||
(ix86_expand_builtin): Expand them.
|
||||
* config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make
|
||||
operand 2 be TImode.
|
||||
* config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16,
|
||||
_mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
|
||||
_mm_srli_epi64): Mark __B const.
|
||||
(_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions.
|
||||
(_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16,
|
||||
_mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use
|
||||
new two-vector shift builtins.
|
||||
PR target/31361
|
||||
* config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128,
|
||||
IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128,
|
||||
IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128,
|
||||
IX86_BUILTIN_PSRLQ128): New.
|
||||
(ix86_init_mmx_sse_builtins): Add them.
|
||||
(ix86_expand_builtin): Expand them.
|
||||
* config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make
|
||||
operand 2 be TImode.
|
||||
* config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16,
|
||||
_mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
|
||||
_mm_srli_epi64): Mark __B const.
|
||||
(_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions.
|
||||
(_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16,
|
||||
_mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use
|
||||
new two-vector shift builtins.
|
||||
|
||||
2007-03-26 Ian Lance Taylor <iant@google.com>
|
||||
|
||||
|
@ -288,7 +298,7 @@
|
|||
IX86_BUILTIN_PSRAW128, IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128,
|
||||
IX86_BUILTIN_PSRLD128, IX86_BUILTIN_PSRLQ128): Remove.
|
||||
(ix86_init_mmx_sse_builtins): Remove v8hi_ftype_v8hi_v2di and
|
||||
v4si_ftype_v4si_v2di. Remove __builtin_ia32_psllw128,
|
||||
v4si_ftype_v4si_v2di. Remove __builtin_ia32_psllw128,
|
||||
__builtin_ia32_pslld128, __builtin_ia32_psllq128,
|
||||
__builtin_ia32_psrlw128, __builtin_ia32_psrld128,
|
||||
__builtin_ia32_psrlq128, __builtin_ia32_psraw128 and
|
||||
|
|
|
@ -1200,25 +1200,29 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
|
|||
/* X86_TUNE_AVOID_VECTOR_DECODE */
|
||||
m_K8 | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
|
||||
path on AMD machines) */
|
||||
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
|
||||
and SImode multiply, but 386 and 486 do HImode multiply faster. */
|
||||
~(m_386 | m_486),
|
||||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
|
||||
vector path on AMD machines. */
|
||||
m_K8 | m_GENERIC64 | m_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
|
||||
machines) */
|
||||
/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
|
||||
machines. */
|
||||
m_K8 | m_GENERIC64 | m_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
|
||||
a MOV) */
|
||||
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
|
||||
than a MOV. */
|
||||
m_PENT,
|
||||
|
||||
/* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
|
||||
one byte longer). */
|
||||
/* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
|
||||
but one byte longer. */
|
||||
m_PENT,
|
||||
|
||||
/* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
|
||||
/* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
|
||||
operand that cannot be represented using a modRM byte. The XOR
|
||||
replacement is long decoded, so this split helps here as well). */
|
||||
replacement is long decoded, so this split helps here as well. */
|
||||
m_K6,
|
||||
};
|
||||
|
||||
|
|
|
@ -235,6 +235,7 @@ enum ix86_tune_indices {
|
|||
X86_TUNE_EXT_80387_CONSTANTS,
|
||||
X86_TUNE_SHORTEN_X87_SSE,
|
||||
X86_TUNE_AVOID_VECTOR_DECODE,
|
||||
X86_TUNE_PROMOTE_HIMODE_IMUL,
|
||||
X86_TUNE_SLOW_IMUL_IMM32_MEM,
|
||||
X86_TUNE_SLOW_IMUL_IMM8,
|
||||
X86_TUNE_MOVE_M1_VIA_OR,
|
||||
|
@ -312,6 +313,8 @@ extern unsigned int ix86_tune_features[X86_TUNE_LAST];
|
|||
#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
|
||||
#define TARGET_AVOID_VECTOR_DECODE \
|
||||
ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
|
||||
#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
|
||||
ix86_tune_features[X86_TUNE_PROMOTE_HIMODE_IMUL]
|
||||
#define TARGET_SLOW_IMUL_IMM32_MEM \
|
||||
ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM32_MEM]
|
||||
#define TARGET_SLOW_IMUL_IMM8 ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM8]
|
||||
|
|
|
@ -963,12 +963,10 @@
|
|||
mod,udiv,umod,ashift,rotate,ashiftrt,lshiftrt,rotatert"))
|
||||
|
||||
;; Return 1 if OP is a binary operator that can be promoted to wider mode.
|
||||
;; Modern CPUs have same latency for HImode and SImode multiply,
|
||||
;; but 386 and 486 do HImode multiply faster. */
|
||||
(define_predicate "promotable_binary_operator"
|
||||
(ior (match_code "plus,and,ior,xor,ashift")
|
||||
(and (match_code "mult")
|
||||
(match_test "ix86_tune > PROCESSOR_I486"))))
|
||||
(match_test "TARGET_TUNE_PROMOTE_HIMODE_IMUL"))))
|
||||
|
||||
;; To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
|
||||
;; re-recognize the operand to avoid a copy_to_mode_reg that will fail.
|
||||
|
|
Loading…
Reference in New Issue