add intrinsics for portable packed simd vector reductions

This commit is contained in:
gnzlbg 2018-03-13 16:46:55 +01:00
parent e5acb0c8f6
commit 01cc5b3e19
6 changed files with 525 additions and 3 deletions

View File

@ -1201,6 +1201,46 @@ extern "C" {
Name: *const c_char)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildPtrDiff(B: BuilderRef,

View File

@ -955,6 +955,81 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
}
}
pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fadd_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmul_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.add");
unsafe {
llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src)
}
}
pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.mul");
unsafe {
llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src)
}
}
pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.and");
unsafe {
llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src)
}
}
pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.or");
unsafe {
llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src)
}
}
pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.xor");
unsafe {
llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src)
}
}
pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.min");
unsafe {
llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed)
}
}
pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.max");
unsafe {
llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed)
}
}
pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef {
self.count_insn("extractvalue");
assert_eq!(idx as c_uint as u64, idx);

View File

@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>(
name, $($fmt)*));
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
macro_rules! return_error {
($($fmt: tt)*) => {
{
emit_error!($($fmt)*);
return Err(());
}
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
return_error!($($fmt)*);
}
};
}
macro_rules! require_simd {
($ty: expr, $position: expr) => {
require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty)
@ -1142,6 +1150,211 @@ fn generic_simd_intrinsic<'a, 'tcx>(
return Ok(bx.extract_element(args[0].immediate(), args[1].immediate()))
}
if name == "simd_reduce_add" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_add(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_add(args[0].immediate()))
},
ty::TyFloat(f) => {
// undef as accumulator makes the reduction unordered:
let acc = match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(
"unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
"simd_reduce_add", in_ty, in_elem, v, ret_ty)
}
};
Ok(bx.vector_reduce_fadd_fast(acc, args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_add", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_mul" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_mul(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_mul(args[0].immediate()))
},
ty::TyFloat(f) => {
// undef as accumulator makes the reduction unordered:
let acc = match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(
"unsupported {} from `{}` with element `{}` of size `{}` to `{}`",
"simd_reduce_mul", in_ty, in_elem, v, ret_ty)
}
};
Ok(bx.vector_reduce_fmul_fast(acc, args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_mul", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_min" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_min(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_min(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.vector_reduce_fmin_fast(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_min", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_max" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_max(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_max(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.vector_reduce_fmax_fast(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_max", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_and" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_and(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_and(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_or" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_or(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_or(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_or", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_xor" {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.vector_reduce_xor(args[0].immediate()))
},
ty::TyUint(_u) => {
Ok(bx.vector_reduce_xor(args[0].immediate()))
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_xor", in_ty, in_elem, ret_ty)
},
}
}
if name == "simd_reduce_all" {
//require!(ret_ty == in_elem,
// "expected return type `{}` (element of input `{}`), found `{}`",
// in_elem, in_ty, ret_ty);
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
let v = bx.trunc(args[0].immediate(), i1xn);
let red = match in_elem.sty {
ty::TyInt(_i) => {
bx.vector_reduce_and(v)
},
ty::TyUint(_u) => {
bx.vector_reduce_and(v)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
};
return Ok(bx.zext(red, Type::bool(bx.cx)));
}
if name == "simd_reduce_any" {
//require!(ret_ty == in_elem,
// "expected return type `{}` (element of input `{}`), found `{}`",
// in_elem, in_ty, ret_ty);
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
let v = bx.trunc(args[0].immediate(), i1xn);
let red = match in_elem.sty {
ty::TyInt(_i) => {
bx.vector_reduce_or(v)
},
ty::TyUint(_u) => {
bx.vector_reduce_or(v)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
"simd_reduce_and", in_ty, in_elem, ret_ty)
},
};
return Ok(bx.zext(red, Type::bool(bx.cx)));
}
if name == "simd_cast" {
require_simd!(ret_ty, "return");
let out_len = ret_ty.simd_size(tcx);

View File

@ -361,6 +361,11 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)),
"simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)),
"simd_cast" => (2, vec![param(0)], param(1)),
"simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool),
"simd_reduce_add" | "simd_reduce_mul" |
"simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" |
"simd_reduce_min" | "simd_reduce_max"
=> (2, vec![param(0)], param(1)),
name if name.starts_with("simd_shuffle") => {
match name["simd_shuffle".len()..].parse() {
Ok(n) => {

View File

@ -1395,3 +1395,49 @@ LLVMRustModuleCost(LLVMModuleRef M) {
auto f = unwrap(M)->functions();
return std::distance(std::begin(f), std::end(f));
}
// Vector reductions:
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMul(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAdd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAddReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMul(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateMulReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAnd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAndReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceOr(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateOrReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceXor(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateXorReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMin(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMinReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMaxReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN));
}

View File

@ -0,0 +1,143 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Test that the simd_reduce_{op} intrinsics produce the correct results.
#![feature(repr_simd, platform_intrinsics)]
#[allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone)]
struct i32x4(pub i32, pub i32, pub i32, pub i32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct u32x4(pub u32, pub u32, pub u32, pub u32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct f32x4(pub f32, pub f32, pub f32, pub f32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x4(pub i8, pub i8, pub i8, pub i8);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x16(
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8
);
extern "platform-intrinsic" {
fn simd_reduce_add<T, U>(x: T) -> U;
fn simd_reduce_mul<T, U>(x: T) -> U;
fn simd_reduce_min<T, U>(x: T) -> U;
fn simd_reduce_max<T, U>(x: T) -> U;
fn simd_reduce_and<T, U>(x: T) -> U;
fn simd_reduce_or<T, U>(x: T) -> U;
fn simd_reduce_xor<T, U>(x: T) -> U;
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
}
fn main() {
unsafe {
let x = i32x4(1, -2, 3, 4);
let r: i32 = simd_reduce_add(x);
assert!(r == 6_i32);
let r: i32 = simd_reduce_mul(x);
assert!(r == -24_i32);
let r: i32 = simd_reduce_min(x);
assert!(r == -21_i32);
let r: i32 = simd_reduce_max(x);
assert!(r == 4_i32);
let x = i32x4(-1, -1, -1, -1);
let r: i32 = simd_reduce_and(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_or(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_xor(x);
assert!(r == 0_i32);
let x = i32x4(-1, -1, 0, -1);
let r: i32 = simd_reduce_and(x);
assert!(r == 0_i32);
let r: i32 = simd_reduce_or(x);
assert!(r == -1_i32);
let r: i32 = simd_reduce_xor(x);
assert!(r == -1_i32);
}
unsafe {
let x = u32x4(1, 2, 3, 4);
let r: u32 = simd_reduce_add(x);
assert!(r == 10_u32);
let r: u32 = simd_reduce_mul(x);
assert!(r == 24_u32);
let r: u32 = simd_reduce_min(x);
assert!(r == 1_u32);
let r: u32 = simd_reduce_max(x);
assert!(r == 4_u32);
let t = u32::max_value();
let x = u32x4(t, t, t, t);
let r: u32 = simd_reduce_and(x);
assert!(r == t);
let r: u32 = simd_reduce_or(x);
assert!(r == t);
let r: u32 = simd_reduce_xor(x);
assert!(r == 0_u32);
let x = u32x4(t, t, 0, t);
let r: u32 = simd_reduce_and(x);
assert!(r == 0_u32);
let r: u32 = simd_reduce_or(x);
assert!(r == t);
let r: u32 = simd_reduce_xor(x);
assert!(r == t);
}
unsafe {
let x = f32x4(1., -2., 3., 4.);
let r: f32 = simd_reduce_add(x);
assert!(r == 6_f32);
let r: f32 = simd_reduce_mul(x);
assert!(r == -24_f32);
let r: f32 = simd_reduce_min(x);
assert!(r == -2_f32);
let r: f32 = simd_reduce_max(x);
assert!(r == 4_f32);
}
unsafe {
let x = b8x4(!0, !0, !0, !0);
let r: bool = simd_reduce_all(x);
//let r: bool = foobar(x);
assert!(r);
let r: bool = simd_reduce_any(x);
assert!(r);
let x = b8x4(!0, !0, 0, !0);
let r: bool = simd_reduce_all(x);
assert!(!r);
let r: bool = simd_reduce_any(x);
assert!(r);
let x = b8x4(0, 0, 0, 0);
let r: bool = simd_reduce_all(x);
assert!(!r);
let r: bool = simd_reduce_any(x);
assert!(!r);
}
}