i386.h (TARGET_SAHF): New define.

* config/i386/i386.h (TARGET_SAHF): New define.
        * config/i386/i386.c (ix86_tune_features) [X86_TUNE_USE_SAHF]:
        Also enable for m_K8, m_AMDFAM10 and m_CORE2.
        (x86_sahf): New global variable.
        (override_options): Add PTA_NO_SAHF to pta_flags enum.  Recode
        pta_flags masks using shifts.  Add PTA_NO_SAHF to x86_64 and
        nocona processor flags.  Set x86_sahf when PTA_NO_SAHF is not set
        in processor flags.  Do not unconditionally disable TARGET_USE_SAHF
        for 64-bit.
        (ix86_fp_comparison_sahf_cost): Return high value for !TARGET_SAHF.
        (ix86_expand_fp_compare): Check for TARGET_CMOVE or TARGET_SAHF
        when expanding fcomi/sahf based tests.
        (ix86_emit_fp_unordered_jump): Check for TARGET_SAHF when
        expanding sahf based alternative. Emit sahf based sequence when
        optimizing for code size.
        * config/i386/i386.md (x86_sahf_1): Do not disable for
        TARGET_64BIT, enable for TARGET_SAHF.

From-SVN: r122705
This commit is contained in:
Uros Bizjak 2007-03-08 18:21:40 +01:00
parent 0c705abc64
commit 3c2d980c94
4 changed files with 62 additions and 32 deletions

View File

@ -1,3 +1,23 @@
2007-03-08 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.h (TARGET_SAHF): New define.
* config/i386/i386.c (ix86_tune_features) [X86_TUNE_USE_SAHF]:
Also enable for m_K8, m_AMDFAM10 and m_CORE2.
(x86_sahf): New global variable.
(override_options): Add PTA_NO_SAHF to pta_flags enum. Recode
pta_flags masks using shifts. Add PTA_NO_SAHF to x86_64 and
nocona processor flags. Set x86_sahf when PTA_NO_SAHF is not set
in processor flags. Do not unconditionally disable TARGET_USE_SAHF
for 64-bit.
(ix86_fp_comparison_sahf_cost): Return high value for !TARGET_SAHF.
(ix86_expand_fp_compare): Check for TARGET_CMOVE or TARGET_SAHF
when expanding fcomi/sahf based tests.
(ix86_emit_fp_unordered_jump): Check for TARGET_SAHF when
expanding sahf based alternative. Emit sahf based sequence when
optimizing for code size.
* config/i386/i386.md (x86_sahf_1): Do not disable for
TARGET_64BIT, enable for TARGET_SAHF.
2007-03-08 Martin Michlmayr <tbm@cyrius.com>
* tree-ssa-coalesce.c (fail_abnormal_edge_coalesce): Remove
@ -505,7 +525,7 @@
of comparison of non-null ADDR_EXPR against null.
2007-03-05 Richard Guenther <rguenther@suse.de>
Dorit Nuzman <dorit@il.ibm.com>
Dorit Nuzman <dorit@il.ibm.com>
PR tree-optimization/26420
* tree-vectorizer.c (vectorize_loops): Bail out early if there
@ -515,9 +535,9 @@
2007-03-05 Revital Eres <eres@il.ibm.com>
* gcc.dg/var-expand1.c: New test.
* loop-unroll.c (analyze_insn_to_expand_var): Add dump info
when an accumulator is expanded.
* gcc.dg/var-expand1.c: New test.
* loop-unroll.c (analyze_insn_to_expand_var): Add dump info
when an accumulator is expanded.
2007-03-04 Manuel Lopez-Ibanez <manu@gcc.gnu.org>

View File

@ -1039,11 +1039,11 @@ unsigned int ix86_tune_features[X86_TUNE_LAST] = {
~m_386,
/* X86_TUNE_USE_SAHF */
m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
/* | m_GENERIC | m_ATHLON_K8 ? */
m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC32,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies */
partial dependencies. */
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
@ -1441,6 +1441,11 @@ int x86_prefetch_sse;
/* true if cmpxchg16b is supported. */
int x86_cmpxchg16b;
/* true if sahf is supported. Early Intel CPUs with Intel 64
lacked LAHF and SAHF instructions supported by AMD64 until
introduction of Pentium 4 G1 step in December 2005. */
int x86_sahf;
/* ix86_regparm_string as a number */
static int ix86_regparm;
@ -1884,19 +1889,20 @@ override_options (void)
const enum processor_type processor;
const enum pta_flags
{
PTA_SSE = 1,
PTA_SSE2 = 2,
PTA_SSE3 = 4,
PTA_MMX = 8,
PTA_PREFETCH_SSE = 16,
PTA_3DNOW = 32,
PTA_3DNOW_A = 64,
PTA_64BIT = 128,
PTA_SSSE3 = 256,
PTA_CX16 = 512,
PTA_POPCNT = 1024,
PTA_ABM = 2048,
PTA_SSE4A = 4096
PTA_SSE = 1 << 0,
PTA_SSE2 = 1 << 1,
PTA_SSE3 = 1 << 2,
PTA_MMX = 1 << 3,
PTA_PREFETCH_SSE = 1 << 4,
PTA_3DNOW = 1 << 5,
PTA_3DNOW_A = 1 << 6,
PTA_64BIT = 1 << 7,
PTA_SSSE3 = 1 << 8,
PTA_CX16 = 1 << 9,
PTA_POPCNT = 1 << 10,
PTA_ABM = 1 << 11,
PTA_SSE4A = 1 << 12,
PTA_NO_SAHF = 1 << 13
} flags;
}
const processor_alias_table[] =
@ -1923,7 +1929,8 @@ override_options (void)
{"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_MMX | PTA_PREFETCH_SSE},
{"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
| PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
| PTA_MMX | PTA_PREFETCH_SSE
| PTA_CX16 | PTA_NO_SAHF},
{"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
| PTA_64BIT | PTA_MMX
| PTA_PREFETCH_SSE | PTA_CX16},
@ -1943,7 +1950,7 @@ override_options (void)
{"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
| PTA_SSE | PTA_SSE2 },
| PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
{"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
{"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
@ -2146,6 +2153,8 @@ override_options (void)
if (processor_alias_table[i].flags & PTA_SSE4A
&& !(target_flags_explicit & MASK_SSE4A))
target_flags |= MASK_SSE4A;
if (!(processor_alias_table[i].flags & PTA_NO_SAHF))
x86_sahf = true;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 "
"instruction set");
@ -2465,10 +2474,6 @@ override_options (void)
if (TARGET_SSE)
TARGET_CMOVE = 1;
/* ??? Any idea why this is unconditionally disabled for 64-bit? */
if (TARGET_64BIT)
TARGET_USE_SAHF = 0;
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
{
char *p;
@ -10976,7 +10981,7 @@ ix86_fp_comparison_sahf_cost (enum rtx_code code)
enum rtx_code bypass_code, first_code, second_code;
/* Return arbitrarily high cost when instruction is not preferred - this
avoids gcc from using it. */
if (!TARGET_USE_SAHF && !optimize_size)
if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
return 1024;
ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
@ -11023,7 +11028,8 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
/* Do fcomi/sahf based test when profitable. */
if ((bypass_code == UNKNOWN || bypass_test)
if ((TARGET_CMOVE || TARGET_SAHF)
&& (bypass_code == UNKNOWN || bypass_test)
&& (second_code == UNKNOWN || second_test)
&& ix86_fp_comparison_arithmetics_cost (code) > cost)
{
@ -21007,7 +21013,7 @@ ix86_emit_fp_unordered_jump (rtx label)
emit_insn (gen_x86_fnstsw_1 (reg));
if (TARGET_USE_SAHF)
if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
{
emit_insn (gen_x86_sahf_1 (reg));

View File

@ -191,7 +191,7 @@ enum ix86_tune_indices {
X86_TUNE_DEEP_BRANCH_PREDICTION,
X86_TUNE_BRANCH_PREDICTION_HINTS,
X86_TUNE_DOUBLE_WITH_ADD,
X86_TUNE_USE_SAHF, /* && !TARGET_64BIT */
X86_TUNE_USE_SAHF,
X86_TUNE_MOVX,
X86_TUNE_PARTIAL_REG_STALL,
X86_TUNE_PARTIAL_FLAG_REG_STALL,
@ -330,6 +330,9 @@ extern int x86_prefetch_sse;
extern int x86_cmpxchg16b;
#define TARGET_CMPXCHG16B x86_cmpxchg16b
extern int x86_sahf;
#define TARGET_SAHF x86_sahf
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)

View File

@ -983,8 +983,9 @@
(define_insn "x86_sahf_1"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))]
"!TARGET_64BIT"
(unspec:CC [(match_operand:HI 0 "register_operand" "a")]
UNSPEC_SAHF))]
"TARGET_SAHF"
"sahf"
[(set_attr "length" "1")
(set_attr "athlon_decode" "vector")