Currently, LLVM lowers a cttz8 on x86_64 to these instructions:
```asm movzbl %dil, %eax bsfl %eax, %eax movl $32, %ecx cmovnel %eax, %ecx cmpl $32, %ecx movl $8, %eax cmovnel %ecx, %eax ``` which has some unnecessary overhead, having two conditional moves. To improve the codegen, we can zero extend the 8 bit integer, then set bit 8 and perform a cttz operation on the extended value. That way there's no conditional operation involved at all.
This commit is contained in:
parent
8f991d1fc2
commit
36dccec2f3
@ -745,7 +745,20 @@ macro_rules! uint_impl {
|
|||||||
#[stable(feature = "rust1", since = "1.0.0")]
|
#[stable(feature = "rust1", since = "1.0.0")]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn trailing_zeros(self) -> u32 {
|
pub fn trailing_zeros(self) -> u32 {
|
||||||
unsafe { $cttz(self as $ActualT) as u32 }
|
// As of LLVM 3.6 the codegen for the zero-safe cttz8 intrinsic
|
||||||
|
// emits two conditional moves on x86_64. By promoting the value to
|
||||||
|
// u16 and setting bit 8, we get better code without any conditional
|
||||||
|
// operations.
|
||||||
|
// FIXME: There's a LLVM patch (http://reviews.llvm.org/D9284)
|
||||||
|
// pending, remove this workaround once LLVM generates better code
|
||||||
|
// for cttz8.
|
||||||
|
unsafe {
|
||||||
|
if $BITS == 8 {
|
||||||
|
intrinsics::cttz16(self as u16 | 0x100) as u32
|
||||||
|
} else {
|
||||||
|
$cttz(self as $ActualT) as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shifts the bits to the left by a specified amount, `n`,
|
/// Shifts the bits to the left by a specified amount, `n`,
|
||||||
|
Loading…
Reference in New Issue
Block a user