i386.c (builtin_description): Add __builtin_ia32_paddq and __builtin_ia32_psubq.

* i386.c (builtin_description): Add __builtin_ia32_paddq and
	__builtin_ia32_psubq. Fix __builtin_ia32_paddq128
	 and __builtin_ia32_psubq128.
	* i386.h (IX86_BUILTIN_PADDQ, IX86_BUILTIN_PSUBQ): New.
	* i386.md (addv*, mmx_ior*, mmx_xoe*, mmx_and*): Add missing '%'.
	(mmx_adddi3, mmx_subdi3): New.
	* mmintrin.h (_mm_add_si64, _mm_sub_si64): New.
	* xmmintrin.h (_mm_movepi64_pi64): New.
	(_mm_add_epi64, _mm_sub_epi64): fix.
	(_mm_mul_pu16): Rename to...
	(_mm_mul_su32): ... this one.

	* builtins.c (expand_builtin_expect):  Do not predict
	flag_guess_branch_prob is not set.
	* c-semantics.c (expand_stmt): Likewise.
	* predict.c (predict_insn): Likewise.
	* stmt.c (expand_continue_loop): Likewise.
	* toplev.c (rest_of_compilation): Do not call
	note_prediction_to_br_prob and note_prediction_to_br_prob
	when not optimizing.

From-SVN: r63263
This commit is contained in:
Jan Hubicka 2003-02-22 01:32:09 +01:00 committed by Jan Hubicka
parent 6a6d417ebd
commit d50672efa7
11 changed files with 112 additions and 32 deletions

View File

@ -1,3 +1,26 @@
Sat Feb 22 00:48:22 CET 2003 Jan Hubicka <jh@suse.cz>
* i386.c (builtin_description): Add __builtin_ia32_paddq and
__builtin_ia32_psubq. Fix __builtin_ia32_paddq128
and __builtin_ia32_psubq128.
* i386.h (IX86_BUILTIN_PADDQ, IX86_BUILTIN_PSUBQ): New.
* i386.md (addv*, mmx_ior*, mmx_xoe*, mmx_and*): Add missing '%'.
(mmx_adddi3, mmx_subdi3): New.
* mmintrin.h (_mm_add_si64, _mm_sub_si64): New.
* xmmintrin.h (_mm_movepi64_pi64): New.
(_mm_add_epi64, _mm_sub_epi64): fix.
(_mm_mul_pu16): Rename to...
(_mm_mul_su32): ... this one.
* builtins.c (expand_builtin_expect): Do not predict
flag_guess_branch_prob is not set.
* c-semantics.c (expand_stmt): Likewise.
* predict.c (predict_insn): Likewise.
* stmt.c (expand_continue_loop): Likewise.
* toplev.c (rest_of_compilation): Do not call
note_prediction_to_br_prob and note_prediction_to_br_prob
when not optimizing.
Fri Feb 21 23:10:13 CET 2003 Jan Hubicka <jh@suse.cz>
* cfgrtl.c (commit_edge_insertions): Call

View File

@ -3840,7 +3840,7 @@ expand_builtin_expect (arglist, target)
target = expand_expr (exp, target, VOIDmode, EXPAND_NORMAL);
/* Don't bother with expected value notes for integral constants. */
if (GET_CODE (target) != CONST_INT)
if (flag_guess_branch_prob && GET_CODE (target) != CONST_INT)
{
/* We do need to force this into a register so that we can be
moderately sure to be able to correctly interpret the branch

View File

@ -864,7 +864,8 @@ expand_stmt (t)
case GOTO_STMT:
/* Emit information for branch prediction. */
if (!GOTO_FAKE_P (t)
&& TREE_CODE (GOTO_DESTINATION (t)) == LABEL_DECL)
&& TREE_CODE (GOTO_DESTINATION (t)) == LABEL_DECL
&& flag_guess_branch_prob)
{
rtx note = emit_note (NULL, NOTE_INSN_PREDICTION);

View File

@ -12787,9 +12787,11 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
{ MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
{ MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
{ MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
{ MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
{ MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
{ MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
@ -12910,11 +12912,11 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
{ MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },

View File

@ -2163,6 +2163,7 @@ enum ix86_builtins
IX86_BUILTIN_PADDB,
IX86_BUILTIN_PADDW,
IX86_BUILTIN_PADDD,
IX86_BUILTIN_PADDQ,
IX86_BUILTIN_PADDSB,
IX86_BUILTIN_PADDSW,
IX86_BUILTIN_PADDUSB,
@ -2170,6 +2171,7 @@ enum ix86_builtins
IX86_BUILTIN_PSUBB,
IX86_BUILTIN_PSUBW,
IX86_BUILTIN_PSUBD,
IX86_BUILTIN_PSUBQ,
IX86_BUILTIN_PSUBSB,
IX86_BUILTIN_PSUBSW,
IX86_BUILTIN_PSUBUSB,

View File

@ -20242,7 +20242,7 @@
(define_insn "addv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
(plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddb\t{%2, %0|%0, %2}"
@ -20251,7 +20251,7 @@
(define_insn "addv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
(plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddw\t{%2, %0|%0, %2}"
@ -20260,16 +20260,27 @@
(define_insn "addv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
(plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddd\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
(define_insn "mmx_adddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(plus:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
"paddq\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
(define_insn "ssaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
(ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsb\t{%2, %0|%0, %2}"
@ -20278,7 +20289,7 @@
(define_insn "ssaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
(ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddsw\t{%2, %0|%0, %2}"
@ -20287,7 +20298,7 @@
(define_insn "usaddv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
(us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusb\t{%2, %0|%0, %2}"
@ -20296,7 +20307,7 @@
(define_insn "usaddv4hi3"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
(us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
"TARGET_MMX"
"paddusw\t{%2, %0|%0, %2}"
@ -20330,6 +20341,17 @@
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
(define_insn "mmx_subdi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(minus:DI (match_operand:DI 1 "register_operand" "0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
"psubq\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxadd")
(set_attr "mode" "DI")])
(define_insn "sssubv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
@ -20433,7 +20455,7 @@
(define_insn "mmx_iordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(ior:DI (match_operand:DI 1 "register_operand" "0")
[(ior:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@ -20444,7 +20466,7 @@
(define_insn "mmx_xordi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(xor:DI (match_operand:DI 1 "register_operand" "0")
[(xor:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@ -20467,7 +20489,7 @@
(define_insn "mmx_anddi3"
[(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[(and:DI (match_operand:DI 1 "register_operand" "0")
[(and:DI (match_operand:DI 1 "register_operand" "%0")
(match_operand:DI 2 "nonimmediate_operand" "ym"))]
UNSPEC_NOP))]
"TARGET_MMX"
@ -21894,7 +21916,7 @@
(define_insn "addv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
(plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddb\t{%2, %0|%0, %2}"
@ -21903,7 +21925,7 @@
(define_insn "addv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
(plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddw\t{%2, %0|%0, %2}"
@ -21912,7 +21934,7 @@
(define_insn "addv4si3"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(plus:V4SI (match_operand:V4SI 1 "register_operand" "0")
(plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
(match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddd\t{%2, %0|%0, %2}"
@ -21921,7 +21943,7 @@
(define_insn "addv2di3"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(plus:V2DI (match_operand:V2DI 1 "register_operand" "0")
(plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddq\t{%2, %0|%0, %2}"
@ -21930,7 +21952,7 @@
(define_insn "ssaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
(ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsb\t{%2, %0|%0, %2}"
@ -21939,7 +21961,7 @@
(define_insn "ssaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
(ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddsw\t{%2, %0|%0, %2}"
@ -21948,7 +21970,7 @@
(define_insn "usaddv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
(us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusb\t{%2, %0|%0, %2}"
@ -21957,7 +21979,7 @@
(define_insn "usaddv8hi3"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
(us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
(match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"paddusw\t{%2, %0|%0, %2}"

View File

@ -160,6 +160,13 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2)
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
}
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
static __inline __m64
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
}
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
static __inline __m64
@ -213,6 +220,13 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2)
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
}
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
static __inline __m64
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
}
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
static __inline __m64

View File

@ -1619,6 +1619,12 @@ _mm_storel_epi64 (__m128i *__P, __m128i __B)
*(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
}
static __inline __m64
_mm_movepi64_pi64 (__m128i __B)
{
return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
}
static __inline __m128i
_mm_move_epi64 (__m128i __A)
{
@ -2048,7 +2054,7 @@ _mm_add_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_add_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B);
return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@ -2096,7 +2102,7 @@ _mm_sub_epi32 (__m128i __A, __m128i __B)
static __inline __m128i
_mm_sub_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B);
return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
@ -2142,7 +2148,7 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B)
}
static __inline __m64
_mm_mul_pu16 (__m64 __A, __m64 __B)
_mm_mul_su32 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
}

View File

@ -186,6 +186,8 @@ predict_insn (insn, predictor, probability)
{
if (!any_condjump_p (insn))
abort ();
if (!flag_guess_branch_prob)
return;
REG_NOTES (insn)
= gen_rtx_EXPR_LIST (REG_BR_PRED,

View File

@ -2781,8 +2781,11 @@ expand_continue_loop (whichloop)
/* Emit information for branch prediction. */
rtx note;
note = emit_note (NULL, NOTE_INSN_PREDICTION);
NOTE_PREDICTION (note) = NOTE_PREDICT (PRED_CONTINUE, IS_TAKEN);
if (flag_guess_branch_prob)
{
note = emit_note (NULL, NOTE_INSN_PREDICTION);
NOTE_PREDICTION (note) = NOTE_PREDICT (PRED_CONTINUE, IS_TAKEN);
}
clear_last_expr ();
if (whichloop == 0)
whichloop = loop_stack;
@ -2974,7 +2977,8 @@ expand_value_return (val)
rtx return_reg;
enum br_predictor pred;
if ((pred = return_prediction (val)) != PRED_NO_PREDICTION)
if (flag_guess_branch_prob
&& (pred = return_prediction (val)) != PRED_NO_PREDICTION)
{
/* Emit information for branch prediction. */
rtx note;

View File

@ -2605,9 +2605,12 @@ rest_of_compilation (decl)
delete_unreachable_blocks ();
/* Turn NOTE_INSN_PREDICTIONs into branch predictions. */
timevar_push (TV_BRANCH_PROB);
note_prediction_to_br_prob ();
timevar_pop (TV_BRANCH_PROB);
if (flag_guess_branch_prob)
{
timevar_push (TV_BRANCH_PROB);
note_prediction_to_br_prob ();
timevar_pop (TV_BRANCH_PROB);
}
/* We may have potential sibling or tail recursion sites. Select one
(of possibly multiple) methods of performing the call. */
@ -2688,7 +2691,8 @@ rest_of_compilation (decl)
timevar_push (TV_JUMP);
/* Turn NOTE_INSN_EXPECTED_VALUE into REG_BR_PROB. Do this
before jump optimization switches branch directions. */
expected_value_to_br_prob ();
if (flag_guess_branch_prob)
expected_value_to_br_prob ();
reg_scan (insns, max_reg_num (), 0);
rebuild_jump_labels (insns);