re PR target/78037 (Incorrect code generated at optimization level -O2 for tzcnt and binary and)

PR target/78037
	* config/i386/bmiintrin.h (__tzcnt_u16): Call __builtin_ia32_tzcnt_u16.
	(__tzcnt_u32, _tzcnt_u32): Call __builtin_ia32_tzcnt_u32.
	(__tzcnt_u64, _tzcnt_u64): Call __builtin_ia32_tzcnt_u64.
	* config/i386/lzcntintrin.h (__lzcnt_u16): Call
	__builtin_ia32_lzcnt_u16.
	(__lzcnt_u32, _lzcnt_u32): Call __builtin_ia32_lzcnt_u32.
	(__lzcnt_u64, _lzcnt_u64): Call __builtin_ia32_lzcnt_u64.
	* config/i386/i386.md (UNSPEC_LZCNT, UNSPEC_TZCNT): New unspecs.
	(ctz<mode>2, *ctz<mode>2): Use SWI48 mode iterator.
	(bmi_tzcnt_<mode>): New expander.
	(*bmi_tzcnt_<mode>_falsedep_1): New define_insn_and_split pattern.
	(*bmi_tzcnt_<mode>_falsedep, *bmi_tzcnt_<mode>): New insn patterns.
	(clz<mode>2_lzcnt, *clz<mode>2_lzcnt): Use SWI48 mode iterator.
	(lzcnt_<mode>): New expander.
	(*lzcnt_<mode>_falsedep_1): New define_insn_and_split pattern.
	(*lzcnt_<mode>_falsedep, *lzcnt_<mode>): New insn patterns.
	* config/i386/i386-builtin-types.def (UINT_FTYPE_UINT): New.
	(UINT64_FTYPE_UINT64): New.
	* config/i386/i386-builtin.def (__builtin_clzs): Remove description.
	(__builtin_ia32_lzcnt_u16): New description.
	(__builtin_ia32_lzcnt_u32): Ditto.
	(__builtin_ia32_lzcnt_u64): Ditto.
	(__builtin_ctzs): Remove description.
	(__builtin_ia32_tzcnt_u16): New description.
	(__builtin_ia32_tzcnt_u32): Ditto.
	(__builtin_ia32_tzcnt_u64): Ditto.
	* config/i386/i386.c (ix86_expand_args_builtin): Handle
	UINT_FTYPE_UINT and UINT64_FTYPE_UINT64.

testsuite/ChangeLog:

	PR target/78037
	* gcc.target/i386/pr78037.c: New test.

From-SVN: r241381
This commit is contained in:
Uros Bizjak 2016-10-20 19:53:10 +02:00 committed by Uros Bizjak
parent 3e23d1bcc4
commit f56ea76ecb
9 changed files with 226 additions and 28 deletions

View File

@ -1,3 +1,35 @@
2016-10-20 Uros Bizjak <ubizjak@gmail.com>
PR target/78037
* config/i386/bmiintrin.h (__tzcnt_u16): Call __builtin_ia32_tzcnt_u16.
(__tzcnt_u32, _tzcnt_u32): Call __builtin_ia32_tzcnt_u32.
(__tzcnt_u64, _tzcnt_u64): Call __builtin_ia32_tzcnt_u64.
* config/i386/lzcntintrin.h (__lzcnt_u16): Call
__builtin_ia32_lzcnt_u16.
(__lzcnt_u32, _lzcnt_u32): Call __builtin_ia32_lzcnt_u32.
(__lzcnt_u64, _lzcnt_u64): Call __builtin_ia32_lzcnt_u64.
* config/i386/i386.md (UNSPEC_LZCNT, UNSPEC_TZCNT): New unspecs.
(ctz<mode>2, *ctz<mode>2): Use SWI48 mode iterator.
(bmi_tzcnt_<mode>): New expander.
(*bmi_tzcnt_<mode>_falsedep_1): New define_insn_and_split pattern.
(*bmi_tzcnt_<mode>_falsedep, *bmi_tzcnt_<mode>): New insn patterns.
(clz<mode>2_lzcnt, *clz<mode>2_lzcnt): Use SWI48 mode iterator.
(lzcnt_<mode>): New expander.
(*lzcnt_<mode>_falsedep_1): New define_insn_and_split pattern.
(*lzcnt_<mode>_falsedep, *lzcnt_<mode>): New insn patterns.
* config/i386/i386-builtin-types.def (UINT_FTYPE_UINT): New.
(UINT64_FTYPE_UINT64): New.
* config/i386/i386-builtin.def (__builtin_clzs): Remove description.
(__builtin_ia32_lzcnt_u16): New description.
(__builtin_ia32_lzcnt_u32): Ditto.
(__builtin_ia32_lzcnt_u64): Ditto.
(__builtin_ctzs): Remove description.
(__builtin_ia32_tzcnt_u16): New description.
(__builtin_ia32_tzcnt_u32): Ditto.
(__builtin_ia32_tzcnt_u64): Ditto.
* config/i386/i386.c (ix86_expand_args_builtin): Handle
UINT_FTYPE_UINT and UINT64_FTYPE_UINT64.
2016-10-20 Martin Liska <mliska@suse.cz>
PR lto/78049

View File

@ -37,7 +37,7 @@
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u16 (unsigned short __X)
{
return __builtin_ctzs (__X);
return __builtin_ia32_tzcnt_u16 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@ -97,13 +97,13 @@ _blsr_u32 (unsigned int __X)
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u32 (unsigned int __X)
{
return __builtin_ctz (__X);
return __builtin_ia32_tzcnt_u32 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tzcnt_u32 (unsigned int __X)
{
return __builtin_ctz (__X);
return __builtin_ia32_tzcnt_u32 (__X);
}
@ -165,13 +165,13 @@ _blsr_u64 (unsigned long long __X)
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u64 (unsigned long long __X)
{
return __builtin_ctzll (__X);
return __builtin_ia32_tzcnt_u64 (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tzcnt_u64 (unsigned long long __X)
{
return __builtin_ctzll (__X);
return __builtin_ia32_tzcnt_u64 (__X);
}
#endif /* __x86_64__ */

View File

@ -205,9 +205,11 @@ DEF_FUNCTION_TYPE (INT, PCCHAR)
DEF_FUNCTION_TYPE (INT64, INT64)
DEF_FUNCTION_TYPE (INT64, V2DF)
DEF_FUNCTION_TYPE (INT64, V4SF)
DEF_FUNCTION_TYPE (UINT64, INT)
DEF_FUNCTION_TYPE (UINT, UINT)
DEF_FUNCTION_TYPE (UINT16, UINT16)
DEF_FUNCTION_TYPE (UINT64, INT)
DEF_FUNCTION_TYPE (UINT64, PUNSIGNED)
DEF_FUNCTION_TYPE (UINT64, UINT64)
DEF_FUNCTION_TYPE (V16QI, PCCHAR)
DEF_FUNCTION_TYPE (V16QI, V16QI)
DEF_FUNCTION_TYPE (V2DF, PCDOUBLE)

View File

@ -1186,12 +1186,18 @@ BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di",
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
BDESC (OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16)
/* LZCNT */
BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_hi, "__builtin_ia32_lzcnt_u16", IX86_BUILTIN_LZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16)
BDESC (OPTION_MASK_ISA_LZCNT, CODE_FOR_lzcnt_si, "__builtin_ia32_lzcnt_u32", IX86_BUILTIN_LZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT)
BDESC (OPTION_MASK_ISA_LZCNT | OPTION_MASK_ISA_64BIT, CODE_FOR_lzcnt_di, "__builtin_ia32_lzcnt_u64", IX86_BUILTIN_LZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64)
/* BMI */
BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT)
BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64)
BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16)
BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_hi, "__builtin_ia32_tzcnt_u16", IX86_BUILTIN_TZCNT16, UNKNOWN, (int) UINT16_FTYPE_UINT16)
BDESC (OPTION_MASK_ISA_BMI, CODE_FOR_bmi_tzcnt_si, "__builtin_ia32_tzcnt_u32", IX86_BUILTIN_TZCNT32, UNKNOWN, (int) UINT_FTYPE_UINT)
BDESC (OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_tzcnt_di, "__builtin_ia32_tzcnt_u64", IX86_BUILTIN_TZCNT64, UNKNOWN, (int) UINT64_FTYPE_UINT64)
/* TBM */
BDESC (OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT)

View File

@ -34317,8 +34317,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case FLOAT128_FTYPE_FLOAT128:
case FLOAT_FTYPE_FLOAT:
case INT_FTYPE_INT:
case UINT64_FTYPE_INT:
case UINT_FTYPE_UINT:
case UINT16_FTYPE_UINT16:
case UINT64_FTYPE_INT:
case UINT64_FTYPE_UINT64:
case INT64_FTYPE_INT64:
case INT64_FTYPE_V4SF:
case INT64_FTYPE_V2DF:

View File

@ -175,7 +175,11 @@
;; For CRC32 support
UNSPEC_CRC32
;; For LZCNT suppoprt
UNSPEC_LZCNT
;; For BMI support
UNSPEC_TZCNT
UNSPEC_BEXTR
;; For BMI2 support
@ -12850,9 +12854,9 @@
(define_expand "ctz<mode>2"
[(parallel
[(set (match_operand:SWI248 0 "register_operand")
(ctz:SWI248
(match_operand:SWI248 1 "nonimmediate_operand")))
[(set (match_operand:SWI48 0 "register_operand")
(ctz:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))])])
; False dependency happens when destination is only updated by tzcnt,
@ -12900,8 +12904,8 @@
(set_attr "mode" "<MODE>")])
(define_insn "*ctz<mode>2"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
[(set (match_operand:SWI48 0 "register_operand" "=r")
(ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
""
{
@ -12926,15 +12930,78 @@
(const_string "0")))
(set_attr "mode" "<MODE>")])
;; Version of tzcnt that is expanded from intrinsics. This version provides
;; operand size as output when source operand is zero.
(define_expand "bmi_tzcnt_<mode>"
[(parallel
[(set (match_operand:SWI248 0 "register_operand")
(unspec:SWI248
[(match_operand:SWI248 1 "nonimmediate_operand")]
UNSPEC_TZCNT))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_BMI")
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
(define_insn_and_split "*bmi_tzcnt_<mode>_falsedep_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
[(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
UNSPEC_TZCNT))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI
&& TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
"#"
"&& reload_completed"
[(parallel
[(set (match_dup 0)
(unspec:SWI48 [(match_dup 1)] UNSPEC_TZCNT))
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))])]
{
if (!reg_mentioned_p (operands[0], operands[1]))
ix86_expand_clear (operands[0]);
})
(define_insn "*bmi_tzcnt_<mode>_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
[(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
UNSPEC_TZCNT))
(unspec [(match_operand:SWI48 2 "register_operand" "0")]
UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI"
"tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*bmi_tzcnt_<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(unspec:SWI248
[(match_operand:SWI248 1 "nonimmediate_operand" "rm")]
UNSPEC_TZCNT))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI"
"tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
(define_expand "clz<mode>2"
[(parallel
[(set (match_operand:SWI248 0 "register_operand")
(minus:SWI248
[(set (match_operand:SWI48 0 "register_operand")
(minus:SWI48
(match_dup 2)
(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand"))))
(clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand"))))
(clobber (reg:CC FLAGS_REG))])
(parallel
[(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2)))
[(set (match_dup 0) (xor:SWI48 (match_dup 0) (match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
""
{
@ -12948,9 +13015,9 @@
(define_expand "clz<mode>2_lzcnt"
[(parallel
[(set (match_operand:SWI248 0 "register_operand")
(clz:SWI248
(match_operand:SWI248 1 "nonimmediate_operand")))
[(set (match_operand:SWI48 0 "register_operand")
(clz:SWI48
(match_operand:SWI48 1 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_LZCNT")
@ -12987,8 +13054,8 @@
(set_attr "mode" "<MODE>")])
(define_insn "*clz<mode>2_lzcnt"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
[(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_LZCNT"
"lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
@ -12996,6 +13063,69 @@
(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
;; Version of lzcnt that is expanded from intrinsics. This version provides
;; operand size as output when source operand is zero.
(define_expand "lzcnt_<mode>"
[(parallel
[(set (match_operand:SWI248 0 "register_operand")
(unspec:SWI248
[(match_operand:SWI248 1 "nonimmediate_operand")]
UNSPEC_LZCNT))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_LZCNT")
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
(define_insn_and_split "*lzcnt_<mode>_falsedep_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
[(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
UNSPEC_LZCNT))
(clobber (reg:CC FLAGS_REG))]
"TARGET_LZCNT
&& TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)"
"#"
"&& reload_completed"
[(parallel
[(set (match_dup 0)
(unspec:SWI48 [(match_dup 1)] UNSPEC_LZCNT))
(unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))])]
{
if (!reg_mentioned_p (operands[0], operands[1]))
ix86_expand_clear (operands[0]);
})
(define_insn "*lzcnt_<mode>_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
[(match_operand:SWI48 1 "nonimmediate_operand" "rm")]
UNSPEC_LZCNT))
(unspec [(match_operand:SWI48 2 "register_operand" "0")]
UNSPEC_INSN_FALSE_DEP)
(clobber (reg:CC FLAGS_REG))]
"TARGET_LZCNT"
"lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
(define_insn "*lzcnt_<mode>"
[(set (match_operand:SWI248 0 "register_operand" "=r")
(unspec:SWI248
[(match_operand:SWI248 1 "nonimmediate_operand" "rm")]
UNSPEC_LZCNT))
(clobber (reg:CC FLAGS_REG))]
"TARGET_LZCNT"
"lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
[(set_attr "type" "alu1")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
;; BMI instructions.
(define_insn "*bmi_andn_<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r,r")

View File

@ -38,32 +38,32 @@
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt16 (unsigned short __X)
{
return __builtin_clzs (__X);
return __builtin_ia32_lzcnt_u16 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt32 (unsigned int __X)
{
return __builtin_clz (__X);
return __builtin_ia32_lzcnt_u32 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_lzcnt_u32 (unsigned int __X)
{
return __builtin_clz (__X);
return __builtin_ia32_lzcnt_u32 (__X);
}
#ifdef __x86_64__
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt64 (unsigned long long __X)
{
return __builtin_clzll (__X);
return __builtin_ia32_lzcnt_u64 (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_lzcnt_u64 (unsigned long long __X)
{
return __builtin_clzll (__X);
return __builtin_ia32_lzcnt_u64 (__X);
}
#endif

View File

@ -1,3 +1,8 @@
2016-10-20 Uros Bizjak <ubizjak@gmail.com>
PR target/78037
* gcc.target/i386/pr78037.c: New test.
2016-10-20 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/visium/overflow8.c: New.

View File

@ -0,0 +1,21 @@
/* { dg-do run } */
/* { dg-require-effective-target bmi } */
/* { dg-options "-O2 -mbmi" } */
#include <x86intrin.h>
#include "bmi-check.h"
int
__attribute__((noinline, noclone))
foo (int x)
{
return __tzcnt_u32 (x) & 0x1f;
}
static void
bmi_test ()
{
if (foo (0) != 0)
abort ();
}