diff --git a/example/std_example.rs b/example/std_example.rs index e3b3edd86af..8a43af5bd80 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -67,6 +67,9 @@ unsafe fn test_simd() { test_mm_slli_si128(); test_mm_movemask_epi8(); test_mm256_movemask_epi8(); + + let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))); + assert_eq!(mask1, 1); } #[target_feature(enable = "sse2")] diff --git a/src/llvm_intrinsics.rs b/src/llvm_intrinsics.rs index 32aa8b5d3df..b93fa1bdbdf 100644 --- a/src/llvm_intrinsics.rs +++ b/src/llvm_intrinsics.rs @@ -41,7 +41,7 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>( let mut res = fx.bcx.ins().iconst(types::I32, 0); - for lane in 0..lane_count { + for lane in (0..lane_count).rev() { let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx); let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);