Rollup merge of #48983 - gnzlbg:red, r=alexcrichton

add intrinsics for portable packed simd vector reductions

Adds the following portable vector reduction intrinsics:

* fn simd_reduce_add<T, U>(x: T) -> U;
* fn simd_reduce_mul<T, U>(x: T) -> U;
* fn simd_reduce_min<T, U>(x: T) -> U;
* fn simd_reduce_max<T, U>(x: T) -> U;
* fn simd_reduce_and<T, U>(x: T) -> U;
* fn simd_reduce_or<T, U>(x: T) -> U;
* fn simd_reduce_xor<T, U>(x: T) -> U;

I've also added:

* fn simd_reduce_all<T>(x: T) -> bool;
* fn simd_reduce_any<T>(x: T) -> bool;

These produce better code that what we are currently producing in `stdsimd`, but the code is still not optimal due to this LLVM bug:  https://bugs.llvm.org/show_bug.cgi?id=36702

r? @alexcrichton
This commit is contained in:
kennytm 2018-03-17 17:20:43 +08:00 committed by GitHub
commit b724c69374
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 728 additions and 3 deletions

View File

@ -621,6 +621,7 @@ extern "C" {
pub fn LLVMConstIntGetSExtValue(ConstantVal: ValueRef) -> c_longlong;
pub fn LLVMRustConstInt128Get(ConstantVal: ValueRef, SExt: bool,
high: *mut u64, low: *mut u64) -> bool;
pub fn LLVMConstRealGetDouble (ConstantVal: ValueRef, losesInfo: *mut Bool) -> f64;
// Operations on composite constants
@ -1201,6 +1202,46 @@ extern "C" {
Name: *const c_char)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildPtrDiff(B: BuilderRef,
@ -1567,6 +1608,7 @@ extern "C" {
pub fn LLVMRustWriteValueToString(value_ref: ValueRef, s: RustStringRef);
pub fn LLVMIsAConstantInt(value_ref: ValueRef) -> ValueRef;
pub fn LLVMIsAConstantFP(value_ref: ValueRef) -> ValueRef;
pub fn LLVMRustPassKind(Pass: PassRef) -> PassKind;
pub fn LLVMRustFindAndCreatePass(Pass: *const c_char) -> PassRef;

View File

@ -955,6 +955,147 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
}
}
pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fadd_fast");
unsafe {
// FIXME: add a non-fast math version once
// https://bugs.llvm.org/show_bug.cgi?id=36732
// is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFAdd is not available in LLVM version < 5.0");
}
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmul_fast");
unsafe {
// FIXME: add a non-fast math version once
// https://bugs.llvm.org/show_bug.cgi?id=36732
// is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFMul is not available in LLVM version < 5.0");
}
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.add");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceAdd is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.mul");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceMul is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.and");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceAnd is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.or");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceOr is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.xor");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceXor is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_fmin(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, true);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFMin is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_fmax(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, true);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFMax is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFMin is not available in LLVM version < 5.0");
}
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceFMax is not available in LLVM version < 5.0");
}
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.min");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceMin is not available in LLVM version < 5.0");
}
instr
}
}
pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.max");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed);
if instr.is_null() {
bug!("LLVMRustBuildVectorReduceMax is not available in LLVM version < 5.0");
}
instr
}
}
pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef {
self.count_insn("extractvalue");
assert_eq!(idx as c_uint as u64, idx);

View File

@ -269,6 +269,19 @@ pub fn const_get_elt(v: ValueRef, idx: u64) -> ValueRef {
}
}
pub fn const_get_real(v: ValueRef) -> Option<(f64, bool)> {
unsafe {
if is_const_real(v) {
let mut loses_info: llvm::Bool = ::std::mem::uninitialized();
let r = llvm::LLVMConstRealGetDouble(v, &mut loses_info as *mut llvm::Bool);
let loses_info = if loses_info == 1 { true } else { false };
Some((r, loses_info))
} else {
None
}
}
}
pub fn const_to_uint(v: ValueRef) -> u64 {
unsafe {
llvm::LLVMConstIntGetZExtValue(v)
@ -281,6 +294,13 @@ pub fn is_const_integral(v: ValueRef) -> bool {
}
}
pub fn is_const_real(v: ValueRef) -> bool {
unsafe {
!llvm::LLVMIsAConstantFP(v).is_null()
}
}
#[inline]
fn hi_lo_to_u128(lo: u64, hi: u64) -> u128 {
((hi as u128) << 64) | (lo as u128)

View File

@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>(
name, $($fmt)*));
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
macro_rules! return_error {
($($fmt: tt)*) => {
{
emit_error!($($fmt)*);
return Err(());
}
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
return_error!($($fmt)*);
}
};
}
macro_rules! require_simd {
($ty: expr, $position: expr) => {
require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty)
@ -1145,6 +1153,161 @@ fn generic_simd_intrinsic<'a, 'tcx>(
return Ok(bx.extract_element(args[0].immediate(), args[1].immediate()))
}
macro_rules! arith_red {
($name:tt : $integer_reduce:ident, $float_reduce:ident, $ordered:expr) => {
if name == $name {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_) | ty::TyUint(_) => {
let r = bx.$integer_reduce(args[0].immediate());
if $ordered {
// if overflow occurs, the result is the
// mathematical result modulo 2^n:
if name.contains("mul") {
Ok(bx.mul(args[1].immediate(), r))
} else {
Ok(bx.add(args[1].immediate(), r))
}
} else {
Ok(bx.$integer_reduce(args[0].immediate()))
}
},
ty::TyFloat(f) => {
// ordered arithmetic reductions take an accumulator
let acc = if $ordered {
let acc = args[1].immediate();
// FIXME: https://bugs.llvm.org/show_bug.cgi?id=36734
// * if the accumulator of the fadd isn't 0, incorrect
// code is generated
// * if the accumulator of the fmul isn't 1, incorrect
// code is generated
match const_get_real(acc) {
None => return_error!("accumulator of {} is not a constant", $name),
Some((v, loses_info)) => {
if $name.contains("mul") && v != 1.0_f64 {
return_error!("accumulator of {} is not 1.0", $name);
} else if $name.contains("add") && v != 0.0_f64 {
return_error!("accumulator of {} is not 0.0", $name);
} else if loses_info {
return_error!("accumulator of {} loses information", $name);
}
}
}
acc
} else {
// unordered arithmetic reductions do not:
match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(r#"
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
$name, in_ty, in_elem, v, ret_ty
)
}
}
};
Ok(bx.$float_reduce(acc, args[0].immediate()))
}
_ => {
return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty
)
},
}
}
}
}
arith_red!("simd_reduce_add_ordered": vector_reduce_add, vector_reduce_fadd_fast, true);
arith_red!("simd_reduce_mul_ordered": vector_reduce_mul, vector_reduce_fmul_fast, true);
arith_red!("simd_reduce_add_unordered": vector_reduce_add, vector_reduce_fadd_fast, false);
arith_red!("simd_reduce_mul_unordered": vector_reduce_mul, vector_reduce_fmul_fast, false);
macro_rules! minmax_red {
($name:tt: $int_red:ident, $float_red:ident) => {
if name == $name {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.$int_red(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.$int_red(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.$float_red(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty)
},
}
}
}
}
minmax_red!("simd_reduce_min": vector_reduce_min, vector_reduce_fmin);
minmax_red!("simd_reduce_max": vector_reduce_max, vector_reduce_fmax);
minmax_red!("simd_reduce_min_nanless": vector_reduce_min, vector_reduce_fmin_fast);
minmax_red!("simd_reduce_max_nanless": vector_reduce_max, vector_reduce_fmax_fast);
macro_rules! bitwise_red {
($name:tt : $red:ident, $boolean:expr) => {
if name == $name {
let input = if !$boolean {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
args[0].immediate()
} else {
match in_elem.sty {
ty::TyInt(_) | ty::TyUint(_) => {},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty)
}
}
// boolean reductions operate on vectors of i1s:
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
bx.trunc(args[0].immediate(), i1xn)
};
return match in_elem.sty {
ty::TyInt(_) | ty::TyUint(_) => {
let r = bx.$red(input);
Ok(
if !$boolean {
r
} else {
bx.zext(r, Type::bool(bx.cx))
}
)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty)
},
}
}
}
}
bitwise_red!("simd_reduce_and": vector_reduce_and, false);
bitwise_red!("simd_reduce_or": vector_reduce_or, false);
bitwise_red!("simd_reduce_xor": vector_reduce_xor, false);
bitwise_red!("simd_reduce_all": vector_reduce_and, true);
bitwise_red!("simd_reduce_any": vector_reduce_or, true);
if name == "simd_cast" {
require_simd!(ret_ty, "return");
let out_len = ret_ty.simd_size(tcx);

View File

@ -361,6 +361,14 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)),
"simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)),
"simd_cast" => (2, vec![param(0)], param(1)),
"simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool),
"simd_reduce_add_ordered" | "simd_reduce_mul_ordered"
=> (2, vec![param(0), param(1)], param(1)),
"simd_reduce_add_unordered" | "simd_reduce_mul_unordered" |
"simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" |
"simd_reduce_min" | "simd_reduce_max" |
"simd_reduce_min_nanless" | "simd_reduce_max_nanless"
=> (2, vec![param(0)], param(1)),
name if name.starts_with("simd_shuffle") => {
match name["simd_shuffle".len()..].parse() {
Ok(n) => {

View File

@ -21,6 +21,8 @@
#if LLVM_VERSION_GE(5, 0)
#include "llvm/ADT/Optional.h"
#else
#include <cstdlib>
#endif
//===----------------------------------------------------------------------===
@ -1395,3 +1397,98 @@ LLVMRustModuleCost(LLVMModuleRef M) {
auto f = unwrap(M)->functions();
return std::distance(std::begin(f), std::end(f));
}
// Vector reductions:
#if LLVM_VERSION_GE(5, 0)
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFAddReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMul(LLVMBuilderRef B, LLVMValueRef Acc, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateFMulReduce(unwrap(Acc),unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAdd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAddReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMul(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateMulReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAnd(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateAndReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceOr(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateOrReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceXor(LLVMBuilderRef B, LLVMValueRef Src) {
return wrap(unwrap(B)->CreateXorReduce(unwrap(Src)));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMin(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMinReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMax(LLVMBuilderRef B, LLVMValueRef Src, bool IsSigned) {
return wrap(unwrap(B)->CreateIntMaxReduce(unwrap(Src), IsSigned));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMin(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMinReduce(unwrap(Src), NoNaN));
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMax(LLVMBuilderRef B, LLVMValueRef Src, bool NoNaN) {
return wrap(unwrap(B)->CreateFPMaxReduce(unwrap(Src), NoNaN));
}
#else
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFAdd(LLVMBuilderRef, LLVMValueRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMul(LLVMBuilderRef, LLVMValueRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAdd(LLVMBuilderRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMul(LLVMBuilderRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceAnd(LLVMBuilderRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceOr(LLVMBuilderRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceXor(LLVMBuilderRef, LLVMValueRef) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMin(LLVMBuilderRef, LLVMValueRef, bool) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceMax(LLVMBuilderRef, LLVMValueRef, bool) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMin(LLVMBuilderRef, LLVMValueRef, bool) {
return nullptr;
}
extern "C" LLVMValueRef
LLVMRustBuildVectorReduceFMax(LLVMBuilderRef, LLVMValueRef, bool) {
return nullptr;
}
#endif

View File

@ -0,0 +1,82 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// min-llvm-version 5.0
// ignore-emscripten
// Test that the simd_reduce_{op} intrinsics produce ok-ish error
// messages when misused.
#![feature(repr_simd, platform_intrinsics)]
#![allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone)]
pub struct f32x4(pub f32, pub f32, pub f32, pub f32);
#[repr(simd)]
#[derive(Copy, Clone)]
pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
extern "platform-intrinsic" {
fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
fn simd_reduce_and<T, U>(x: T) -> U;
fn simd_reduce_or<T, U>(x: T) -> U;
fn simd_reduce_xor<T, U>(x: T) -> U;
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
}
fn main() {
let x = u32x4(0, 0, 0, 0);
let z = f32x4(0.0, 0.0, 0.0, 0.0);
unsafe {
simd_reduce_add_ordered(z, 0_f32);
simd_reduce_mul_ordered(z, 1_f32);
simd_reduce_add_ordered(z, 2_f32);
//~^ ERROR accumulator of simd_reduce_add_ordered is not 0.0
simd_reduce_mul_ordered(z, 3_f32);
//~^ ERROR accumulator of simd_reduce_mul_ordered is not 1.0
let _: f32 = simd_reduce_and(x);
//~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32`
let _: f32 = simd_reduce_or(x);
//~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32`
let _: f32 = simd_reduce_xor(x);
//~^ ERROR expected return type `u32` (element of input `u32x4`), found `f32`
let _: f32 = simd_reduce_and(z);
//~^ ERROR unsupported simd_reduce_and from `f32x4` with element `f32` to `f32`
let _: f32 = simd_reduce_or(z);
//~^ ERROR unsupported simd_reduce_or from `f32x4` with element `f32` to `f32`
let _: f32 = simd_reduce_xor(z);
//~^ ERROR unsupported simd_reduce_xor from `f32x4` with element `f32` to `f32`
let _: bool = simd_reduce_all(z);
//~^ ERROR unsupported simd_reduce_all from `f32x4` with element `f32` to `bool`
let _: bool = simd_reduce_any(z);
//~^ ERROR unsupported simd_reduce_any from `f32x4` with element `f32` to `bool`
foo(0_f32);
}
}
#[inline(never)]
unsafe fn foo(x: f32) {
let z = f32x4(0.0, 0.0, 0.0, 0.0);
simd_reduce_add_ordered(z, x);
//~^ ERROR accumulator of simd_reduce_add_ordered is not a constant
simd_reduce_mul_ordered(z, x);
//~^ ERROR accumulator of simd_reduce_mul_ordered is not a constant
}

View File

@ -0,0 +1,172 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// min-llvm-version 5.0
// ignore-emscripten
// Test that the simd_reduce_{op} intrinsics produce the correct results.
#![feature(repr_simd, platform_intrinsics)]
#[allow(non_camel_case_types)]
#[repr(simd)]
#[derive(Copy, Clone)]
struct i32x4(pub i32, pub i32, pub i32, pub i32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct u32x4(pub u32, pub u32, pub u32, pub u32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct f32x4(pub f32, pub f32, pub f32, pub f32);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x4(pub i8, pub i8, pub i8, pub i8);
#[repr(simd)]
#[derive(Copy, Clone)]
struct b8x16(
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8,
pub i8, pub i8, pub i8, pub i8
);
extern "platform-intrinsic" {
fn simd_reduce_add_unordered<T, U>(x: T) -> U;
fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
fn simd_reduce_min<T, U>(x: T) -> U;
fn simd_reduce_max<T, U>(x: T) -> U;
fn simd_reduce_min_nanless<T, U>(x: T) -> U;
fn simd_reduce_max_nanless<T, U>(x: T) -> U;
fn simd_reduce_and<T, U>(x: T) -> U;
fn simd_reduce_or<T, U>(x: T) -> U;
fn simd_reduce_xor<T, U>(x: T) -> U;
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
}
fn main() {
unsafe {
let x = i32x4(1, -2, 3, 4);
let r: i32 = simd_reduce_add_unordered(x);
assert_eq!(r, 6_i32);
let r: i32 = simd_reduce_mul_unordered(x);
assert_eq!(r, -24_i32);
let r: i32 = simd_reduce_add_ordered(x, -1);
assert_eq!(r, 5_i32);
let r: i32 = simd_reduce_mul_ordered(x, -1);
assert_eq!(r, 24_i32);
let r: i32 = simd_reduce_min(x);
assert_eq!(r, -2_i32);
let r: i32 = simd_reduce_max(x);
assert_eq!(r, 4_i32);
let x = i32x4(-1, -1, -1, -1);
let r: i32 = simd_reduce_and(x);
assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_or(x);
assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_xor(x);
assert_eq!(r, 0_i32);
let x = i32x4(-1, -1, 0, -1);
let r: i32 = simd_reduce_and(x);
assert_eq!(r, 0_i32);
let r: i32 = simd_reduce_or(x);
assert_eq!(r, -1_i32);
let r: i32 = simd_reduce_xor(x);
assert_eq!(r, -1_i32);
}
unsafe {
let x = u32x4(1, 2, 3, 4);
let r: u32 = simd_reduce_add_unordered(x);
assert_eq!(r, 10_u32);
let r: u32 = simd_reduce_mul_unordered(x);
assert_eq!(r, 24_u32);
let r: u32 = simd_reduce_add_ordered(x, 1);
assert_eq!(r, 11_u32);
let r: u32 = simd_reduce_mul_ordered(x, 2);
assert_eq!(r, 48_u32);
let r: u32 = simd_reduce_min(x);
assert_eq!(r, 1_u32);
let r: u32 = simd_reduce_max(x);
assert_eq!(r, 4_u32);
let t = u32::max_value();
let x = u32x4(t, t, t, t);
let r: u32 = simd_reduce_and(x);
assert_eq!(r, t);
let r: u32 = simd_reduce_or(x);
assert_eq!(r, t);
let r: u32 = simd_reduce_xor(x);
assert_eq!(r, 0_u32);
let x = u32x4(t, t, 0, t);
let r: u32 = simd_reduce_and(x);
assert_eq!(r, 0_u32);
let r: u32 = simd_reduce_or(x);
assert_eq!(r, t);
let r: u32 = simd_reduce_xor(x);
assert_eq!(r, t);
}
unsafe {
let x = f32x4(1., -2., 3., 4.);
let r: f32 = simd_reduce_add_unordered(x);
assert_eq!(r, 6_f32);
let r: f32 = simd_reduce_mul_unordered(x);
assert_eq!(r, -24_f32);
// FIXME: only works correctly for accumulator, 0:
// https://bugs.llvm.org/show_bug.cgi?id=36734
let r: f32 = simd_reduce_add_ordered(x, 0.);
assert_eq!(r, 6_f32);
// FIXME: only works correctly for accumulator, 1:
// https://bugs.llvm.org/show_bug.cgi?id=36734
let r: f32 = simd_reduce_mul_ordered(x, 1.);
assert_eq!(r, -24_f32);
let r: f32 = simd_reduce_min(x);
assert_eq!(r, -2_f32);
let r: f32 = simd_reduce_max(x);
assert_eq!(r, 4_f32);
let r: f32 = simd_reduce_min_nanless(x);
assert_eq!(r, -2_f32);
let r: f32 = simd_reduce_max_nanless(x);
assert_eq!(r, 4_f32);
}
unsafe {
let x = b8x4(!0, !0, !0, !0);
let r: bool = simd_reduce_all(x);
assert_eq!(r, true);
let r: bool = simd_reduce_any(x);
assert_eq!(r, true);
let x = b8x4(!0, !0, 0, !0);
let r: bool = simd_reduce_all(x);
assert_eq!(r, false);
let r: bool = simd_reduce_any(x);
assert_eq!(r, true);
let x = b8x4(0, 0, 0, 0);
let r: bool = simd_reduce_all(x);
assert_eq!(r, false);
let r: bool = simd_reduce_any(x);
assert_eq!(r, false);
}
}