Implement llvm.x86.avx2.pmovmskb llvm intrinsic

This commit is contained in:
bjorn3 2019-07-29 12:50:20 +02:00
parent 48a6b581b5
commit 63646b1956
2 changed files with 31 additions and 5 deletions

View File

@ -65,6 +65,8 @@ unsafe fn test_simd() {
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]); assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
test_mm_slli_si128(); test_mm_slli_si128();
test_mm_movemask_epi8();
test_mm256_movemask_epi8();
} }
#[target_feature(enable = "sse2")] #[target_feature(enable = "sse2")]
@ -109,6 +111,31 @@ unsafe fn test_mm_slli_si128() {
assert_eq_m128i(r, _mm_set1_epi8(0)); assert_eq_m128i(r, _mm_set1_epi8(0));
} }
#[target_feature(enable = "sse2")]
unsafe fn test_mm_movemask_epi8() {
use std::arch::x86_64::*;
#[rustfmt::skip]
let a = _mm_setr_epi8(
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
0b0101, 0b1111_0000u8 as i8, 0, 0,
0, 0, 0b1111_0000u8 as i8, 0b0101,
0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
);
let r = _mm_movemask_epi8(a);
assert_eq!(r, 0b10100100_00100101);
}
#[target_feature(enable = "avx2")]
unsafe fn test_mm256_movemask_epi8() {
use std::arch::x86_64::*;
let a = _mm256_set1_epi8(-1);
let r = _mm256_movemask_epi8(a);
let e = -1;
assert_eq!(r, e);
}
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) { fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
unsafe { unsafe {
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x)); assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));

View File

@ -33,15 +33,15 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
crate::trap::trap_unimplemented(fx, intrinsic); crate::trap::trap_unimplemented(fx, intrinsic);
}; };
// Used by _mm_movemask_epi8 // Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
llvm.x86.sse2.pmovmskb.128, (c a) { llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) {
let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic); let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic);
assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty); assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty);
assert_eq!(lane_count, 16); assert!(lane_count == 16 || lane_count == 32);
let mut res = fx.bcx.ins().iconst(types::I32, 0); let mut res = fx.bcx.ins().iconst(types::I32, 0);
for lane in 0..16 { for lane in 0..lane_count {
let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int
let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign); let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);
@ -65,6 +65,5 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
// llvm.x86.avx2.vperm2i128 // llvm.x86.avx2.vperm2i128
// llvm.x86.ssse3.pshuf.b.128 // llvm.x86.ssse3.pshuf.b.128
// llvm.x86.avx2.pshuf.b // llvm.x86.avx2.pshuf.b
// llvm.x86.avx2.pmovmskb
// llvm.x86.avx2.psrli.w // llvm.x86.avx2.psrli.w
// llvm.x86.sse2.psrli.w // llvm.x86.sse2.psrli.w