Implement llvm.x86.avx2.pmovmskb llvm intrinsic
This commit is contained in:
parent
48a6b581b5
commit
63646b1956
@ -65,6 +65,8 @@ unsafe fn test_simd() {
|
|||||||
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
|
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
|
||||||
|
|
||||||
test_mm_slli_si128();
|
test_mm_slli_si128();
|
||||||
|
test_mm_movemask_epi8();
|
||||||
|
test_mm256_movemask_epi8();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[target_feature(enable = "sse2")]
|
#[target_feature(enable = "sse2")]
|
||||||
@ -109,6 +111,31 @@ unsafe fn test_mm_slli_si128() {
|
|||||||
assert_eq_m128i(r, _mm_set1_epi8(0));
|
assert_eq_m128i(r, _mm_set1_epi8(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[target_feature(enable = "sse2")]
|
||||||
|
unsafe fn test_mm_movemask_epi8() {
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
let a = _mm_setr_epi8(
|
||||||
|
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
|
||||||
|
0b0101, 0b1111_0000u8 as i8, 0, 0,
|
||||||
|
0, 0, 0b1111_0000u8 as i8, 0b0101,
|
||||||
|
0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
|
||||||
|
);
|
||||||
|
let r = _mm_movemask_epi8(a);
|
||||||
|
assert_eq!(r, 0b10100100_00100101);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn test_mm256_movemask_epi8() {
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
let a = _mm256_set1_epi8(-1);
|
||||||
|
let r = _mm256_movemask_epi8(a);
|
||||||
|
let e = -1;
|
||||||
|
assert_eq!(r, e);
|
||||||
|
}
|
||||||
|
|
||||||
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
|
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
|
||||||
unsafe {
|
unsafe {
|
||||||
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
|
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
|
||||||
|
@ -33,15 +33,15 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
|
|||||||
crate::trap::trap_unimplemented(fx, intrinsic);
|
crate::trap::trap_unimplemented(fx, intrinsic);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Used by _mm_movemask_epi8
|
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
|
||||||
llvm.x86.sse2.pmovmskb.128, (c a) {
|
llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) {
|
||||||
let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic);
|
let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic);
|
||||||
assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty);
|
assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty);
|
||||||
assert_eq!(lane_count, 16);
|
assert!(lane_count == 16 || lane_count == 32);
|
||||||
|
|
||||||
let mut res = fx.bcx.ins().iconst(types::I32, 0);
|
let mut res = fx.bcx.ins().iconst(types::I32, 0);
|
||||||
|
|
||||||
for lane in 0..16 {
|
for lane in 0..lane_count {
|
||||||
let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
|
let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
|
||||||
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int
|
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int
|
||||||
let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);
|
let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);
|
||||||
@ -65,6 +65,5 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
|
|||||||
// llvm.x86.avx2.vperm2i128
|
// llvm.x86.avx2.vperm2i128
|
||||||
// llvm.x86.ssse3.pshuf.b.128
|
// llvm.x86.ssse3.pshuf.b.128
|
||||||
// llvm.x86.avx2.pshuf.b
|
// llvm.x86.avx2.pshuf.b
|
||||||
// llvm.x86.avx2.pmovmskb
|
|
||||||
// llvm.x86.avx2.psrli.w
|
// llvm.x86.avx2.psrli.w
|
||||||
// llvm.x86.sse2.psrli.w
|
// llvm.x86.sse2.psrli.w
|
||||||
|
Loading…
x
Reference in New Issue
Block a user