Auto merge of #28221 - huonw:simd, r=alexcrichton

The ARM equivalents of the AArch64 are annoyingly more complicated (and some of the AArch64 ones are too).

I think I've got exposed all the x86 intrinsics from SSE to AVX2 now (at least, the ones that LLVM implements as callable intrinsics).
This commit is contained in:
bors 2015-09-05 02:15:41 +00:00
commit 7ee876cb8e
13 changed files with 1418 additions and 112 deletions

View File

@ -336,6 +336,48 @@
"ret": "i8",
"args": ["0"]
},
{
"intrinsic": "ld2{0[0].width}_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);2]","[f(32-64);2]"],
"args": ["0.0SPc/0.0"]
},
{
"intrinsic": "ld3{0[0].width}_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);3]","[f(32-64);3]"],
"args": ["0.0SPc/0.0"]
},
{
"intrinsic": "ld4{0[0].width}_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);4]","[f(32-64);4]"],
"args": ["0.0SPc/0.0"]
},
{
"intrinsic": "ld2{0[0].width}_dup_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);2]","[f(32-64);2]"],
"args": ["0.0SPc"]
},
{
"intrinsic": "ld3{0[0].width}_dup_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);3]","[f(32-64);3]"],
"args": ["0.0SPc"]
},
{
"intrinsic": "ld4{0[0].width}_dup_{0[0].data_type}",
"width": [64, 128],
"llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
"ret": ["[i(8-64);4]","[f(32-64);4]"],
"args": ["0.0SPc"]
},
{
"intrinsic": "padd{0.width}_{0.data_type}",
"width": [64, 128],

View File

@ -14,11 +14,13 @@ import argparse
import sys
import re
import textwrap
import itertools
SPEC = re.compile(
r'^(?:(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
r'^(?:(?P<void>V)|(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
r'|(?P<reference>\d+)(?P<modifiers>[vShdnwus]*)(?P<force_width>x\d+)?)$'
r'|(?P<reference>\d+))(?P<index>\.\d+)?(?P<modifiers>[vShdnwusfDMC]*)(?P<force_width>x\d+)?'
r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?|(?P<bitcast>->.*))?$'
)
class PlatformInfo(object):
@ -68,18 +70,35 @@ class IntrinsicSet(object):
{k: lookup(v) for k, v in data.items()})
class PlatformTypeInfo(object):
def __init__(self, llvm_name, properties):
self.properties = properties
self.llvm_name = llvm_name
def __init__(self, llvm_name, properties, elems = None):
if elems is None:
self.properties = properties
self.llvm_name = llvm_name
else:
assert properties is None and llvm_name is None
self.properties = {}
self.elems = elems
def __repr__(self):
return '<PlatformTypeInfo {}, {}>'.format(self.llvm_name, self.properties)
def __getattr__(self, name):
return self.properties[name]
def __getitem__(self, idx):
return self.elems[idx]
def vectorize(self, length, width_info):
props = self.properties.copy()
props.update(width_info)
return PlatformTypeInfo('v{}{}'.format(length, self.llvm_name), props)
def pointer(self, llvm_elem):
name = self.llvm_name if llvm_elem is None else llvm_elem.llvm_name
return PlatformTypeInfo('p0{}'.format(name), self.properties)
BITWIDTH_POINTER = '<pointer>'
class Type(object):
def __init__(self, bitwidth):
self._bitwidth = bitwidth
@ -87,18 +106,39 @@ class Type(object):
def bitwidth(self):
return self._bitwidth
def modify(self, spec, width):
def modify(self, spec, width, previous):
raise NotImplementedError()
def __ne__(self, other):
return not (self == other)
class Void(Type):
def __init__(self):
Type.__init__(self, 0)
def compiler_ctor(self):
return 'void()'
def rust_name(self):
return '()'
def type_info(self, platform_info):
return None
def __eq__(self, other):
return isinstance(other, Void)
class Number(Type):
def __init__(self, bitwidth):
Type.__init__(self, bitwidth)
def modify(self, spec, width):
def modify(self, spec, width, previous):
if spec == 'u':
return Unsigned(self.bitwidth())
elif spec == 's':
return Signed(self.bitwidth())
elif spec == 'f':
return Float(self.bitwidth())
elif spec == 'w':
return self.__class__(self.bitwidth() * 2)
elif spec == 'n':
@ -111,11 +151,16 @@ class Number(Type):
def type_info(self, platform_info):
return platform_info.number_type_info(self)
def __eq__(self, other):
# print(self, other)
return self.__class__ == other.__class__ and self.bitwidth() == other.bitwidth()
class Signed(Number):
def __init__(self, bitwidth, llvm_bitwidth = None):
Number.__init__(self, bitwidth)
self._llvm_bitwidth = llvm_bitwidth
def compiler_ctor(self):
if self._llvm_bitwidth is None:
return 'i({})'.format(self.bitwidth())
@ -164,26 +209,47 @@ class Float(Number):
return 'f{}'.format(self.bitwidth())
class Vector(Type):
def __init__(self, elem, length):
def __init__(self, elem, length, bitcast = None):
assert isinstance(elem, Type) and not isinstance(elem, Vector)
Type.__init__(self,
elem.bitwidth() * length)
self._length = length
self._elem = elem
assert bitcast is None or (isinstance(bitcast, Vector) and
bitcast._bitcast is None and
bitcast._elem.bitwidth() == elem.bitwidth())
if bitcast is not None and bitcast._elem != elem:
self._bitcast = bitcast._elem
else:
self._bitcast = None
def modify(self, spec, width):
if spec == 'h':
def modify(self, spec, width, previous):
if spec == 'S':
return self._elem
elif spec == 'h':
return Vector(self._elem, self._length // 2)
elif spec == 'd':
return Vector(self._elem, self._length * 2)
elif spec.startswith('x'):
new_bitwidth = int(spec[1:])
return Vector(self._elem, new_bitwidth // self._elem.bitwidth())
elif spec.startswith('->'):
bitcast_to = TypeSpec(spec[2:])
choices = list(bitcast_to.enumerate(width, previous))
assert len(choices) == 1
bitcast_to = choices[0]
return Vector(self._elem, self._length, bitcast_to)
else:
return Vector(self._elem.modify(spec, width), self._length)
return Vector(self._elem.modify(spec, width, previous), self._length)
def compiler_ctor(self):
return 'v({}, {})'.format(self._elem.compiler_ctor(), self._length)
if self._bitcast is None:
return 'v({}, {})'.format(self._elem.compiler_ctor(),
self._length)
else:
return 'v_({}, {}, {})'.format(self._elem.compiler_ctor(),
self._bitcast.compiler_ctor(),
self._length)
def rust_name(self):
return '{}x{}'.format(self._elem.rust_name(), self._length)
@ -193,6 +259,51 @@ class Vector(Type):
return elem_info.vectorize(self._length,
platform_info.width_info(self.bitwidth()))
def __eq__(self, other):
return isinstance(other, Vector) and self._length == other._length and \
self._elem == other._elem and self._bitcast == other._bitcast
class Pointer(Type):
def __init__(self, elem, llvm_elem, const):
self._elem = elem;
self._llvm_elem = llvm_elem
self._const = const
Type.__init__(self, BITWIDTH_POINTER)
def modify(self, spec, width, previous):
if spec == 'D':
return self._elem
elif spec == 'M':
return Pointer(self._elem, self._llvm_elem, False)
elif spec == 'C':
return Pointer(self._elem, self._llvm_elem, True)
else:
return Pointer(self._elem.modify(spec, width, previous), self._llvm_elem, self._const)
def compiler_ctor(self):
if self._llvm_elem is None:
llvm_elem = 'None'
else:
llvm_elem = 'Some({})'.format(self._llvm_elem.compiler_ctor())
return 'p({}, {}, {})'.format('true' if self._const else 'false',
self._elem.compiler_ctor(),
llvm_elem)
def rust_name(self):
return '*{} {}'.format('const' if self._const else 'mut',
self._elem.rust_name())
def type_info(self, platform_info):
if self._llvm_elem is None:
llvm_elem = None
else:
llvm_elem = self._llvm_elem.type_info(platform_info)
return self._elem.type_info(platform_info).pointer(llvm_elem)
def __eq__(self, other):
return isinstance(other, Pointer) and self._const == other._const \
and self._elem == other._elem and self._llvm_elem == other._llvm_elem
class Aggregate(Type):
def __init__(self, flatten, elems):
self._flatten = flatten
@ -202,6 +313,14 @@ class Aggregate(Type):
def __repr__(self):
return '<Aggregate {}>'.format(self._elems)
def modify(self, spec, width, previous):
if spec.startswith('.'):
num = int(spec[1:])
return self._elems[num]
else:
print(spec)
raise NotImplementedError()
def compiler_ctor(self):
return 'agg({}, vec![{}])'.format('true' if self._flatten else 'false',
', '.join(elem.compiler_ctor() for elem in self._elems))
@ -210,8 +329,11 @@ class Aggregate(Type):
return '({})'.format(', '.join(elem.rust_name() for elem in self._elems))
def type_info(self, platform_info):
#return PlatformTypeInfo(None, None, self._llvm_name)
return None
return PlatformTypeInfo(None, None, [elem.type_info(platform_info) for elem in self._elems])
def __eq__(self, other):
return isinstance(other, Aggregate) and self._flatten == other._flatten and \
self._elems == other._elems
TYPE_ID_LOOKUP = {'i': [Signed, Unsigned],
@ -219,6 +341,22 @@ TYPE_ID_LOOKUP = {'i': [Signed, Unsigned],
'u': [Unsigned],
'f': [Float]}
def ptrify(match, elem, width, previous):
ptr = match.group('pointer')
if ptr is None:
return elem
else:
llvm_ptr = match.group('llvm_pointer')
if llvm_ptr is None:
llvm_elem = None
else:
assert llvm_ptr.startswith('/')
options = list(TypeSpec(llvm_ptr[1:]).enumerate(width, previous))
assert len(options) == 1
llvm_elem = options[0]
assert ptr in ('Pc', 'Pm')
return Pointer(elem, llvm_elem, ptr == 'Pc')
class TypeSpec(object):
def __init__(self, spec):
if not isinstance(spec, list):
@ -226,71 +364,103 @@ class TypeSpec(object):
self.spec = spec
def enumerate(self, width):
def enumerate(self, width, previous):
for spec in self.spec:
match = SPEC.match(spec)
if match:
if match is not None:
id = match.group('id')
is_vector = id.islower()
type_ctors = TYPE_ID_LOOKUP[id.lower()]
reference = match.group('reference')
start = match.group('start')
if start is not None:
end = match.group('end')
llvm_width = None
modifiers = []
index = match.group('index')
if index is not None:
modifiers.append(index)
modifiers += list(match.group('modifiers') or '')
force = match.group('force_width')
if force is not None:
modifiers.append(force)
bitcast = match.group('bitcast')
if bitcast is not None:
modifiers.append(bitcast)
if match.group('void') is not None:
assert spec == 'V'
yield Void()
elif id is not None:
is_vector = id.islower()
type_ctors = TYPE_ID_LOOKUP[id.lower()]
start = match.group('start')
if start is not None:
end = match.group('end')
llvm_width = None
else:
start = end = match.group('width')
llvm_width = match.group('llvm_width')
start = int(start)
end = int(end)
bitwidth = start
while bitwidth <= end:
for ctor in type_ctors:
if llvm_width is not None:
assert not is_vector
llvm_width = int(llvm_width)
assert llvm_width < bitwidth
scalar = ctor(bitwidth, llvm_width)
else:
scalar = ctor(bitwidth)
if is_vector:
elem = Vector(scalar, width // bitwidth)
else:
assert bitcast is None
elem = scalar
for x in modifiers:
elem = elem.modify(x, width, previous)
yield ptrify(match, elem, width, previous)
bitwidth *= 2
elif reference is not None:
reference = int(reference)
assert reference < len(previous), \
'referring to argument {}, but only {} are known'.format(reference,
len(previous))
ret = previous[reference]
for x in modifiers:
ret = ret.modify(x, width, previous)
yield ptrify(match, ret, width, previous)
else:
start = end = match.group('width')
llvm_width = match.group('llvm_width')
start = int(start)
end = int(end)
assert False, 'matched `{}`, but didn\'t understand it?'.format(spec)
elif spec.startswith('('):
if spec.endswith(')'):
true_spec = spec[1:-1]
flatten = False
elif spec.endswith(')f'):
true_spec = spec[1:-2]
flatten = True
else:
assert False, 'found unclosed aggregate `{}`'.format(spec)
bitwidth = start
while bitwidth <= end:
for ctor in type_ctors:
if llvm_width is not None:
assert not is_vector
llvm_width = int(llvm_width)
assert llvm_width < bitwidth
scalar = ctor(bitwidth, llvm_width)
else:
scalar = ctor(bitwidth)
for elems in itertools.product(*(TypeSpec(subspec).enumerate(width, previous)
for subspec in true_spec.split(','))):
yield Aggregate(flatten, elems)
elif spec.startswith('['):
if spec.endswith(']'):
true_spec = spec[1:-1]
flatten = False
elif spec.endswith(']f'):
true_spec = spec[1:-2]
flatten = True
else:
assert False, 'found unclosed aggregate `{}`'.format(spec)
elem_spec, count = true_spec.split(';')
if is_vector:
yield Vector(scalar, width // bitwidth)
else:
yield scalar
bitwidth *= 2
count = int(count)
for elem in TypeSpec(elem_spec).enumerate(width, previous):
yield Aggregate(flatten, [elem] * count)
else:
print('Failed to parse: `{}`'.format(spec), file=sys.stderr)
def resolve(self, width, zero):
assert len(self.spec) == 1
spec = self.spec[0]
match = SPEC.match(spec)
if match:
id = match.group('id')
if id is not None:
options = list(self.enumerate(width))
assert len(options) == 1
return options[0]
reference = match.group('reference')
if reference != '0':
raise NotImplementedError('only argument 0 (return value) references are supported')
ret = zero
for x in match.group('modifiers') or []:
ret = ret.modify(x, width)
force = match.group('force_width')
if force is not None:
ret = ret.modify(force, width)
return ret
elif spec.startswith('('):
if spec.endswith(')'):
raise NotImplementedError()
elif spec.endswith(')f'):
true_spec = spec[1:-2]
flatten = True
elems = [TypeSpec(subspec).resolve(width, zero) for subspec in true_spec.split(',')]
return Aggregate(flatten, elems)
assert False, 'Failed to parse `{}`'.format(spec)
class GenericIntrinsic(object):
def __init__(self, platform, intrinsic, widths, llvm_name, ret, args):
@ -305,10 +475,22 @@ class GenericIntrinsic(object):
for width in self.widths:
# must be a power of two
assert width & (width - 1) == 0
for ret in self.ret.enumerate(width):
args = [arg.resolve(width, ret) for arg in self.args]
yield MonomorphicIntrinsic(self._platform, self.intrinsic, width, self.llvm_name,
ret, args)
def recur(processed, untouched):
if untouched == []:
ret = processed[0]
args = processed[1:]
yield MonomorphicIntrinsic(self._platform, self.intrinsic, width,
self.llvm_name,
ret, args)
else:
raw_arg = untouched[0]
rest = untouched[1:]
for arg in raw_arg.enumerate(width, processed):
for intr in recur(processed + [arg], rest):
yield intr
for x in recur([], [self.ret] + self.args):
yield x
class MonomorphicIntrinsic(object):
def __init__(self, platform, intrinsic, width, llvm_name, ret, args):
@ -369,7 +551,18 @@ def parse_args():
## Type specifier grammar
```
type := vector | scalar | aggregate | reference
type := core_type modifier* suffix?
core_type := void | vector | scalar | aggregate | reference
modifier := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
'x' number | '.' number
suffix := pointer | bitcast
pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
llvm_pointer := '/' type
bitcast := '->' type
void := 'V'
vector := vector_elem width |
vector_elem := 'i' | 'u' | 's' | 'f'
@ -378,18 +571,20 @@ def parse_args():
scalar_type := 'U' | 'S' | 'F'
llvm_width := '/' number
aggregate := '(' (type),* ')' 'f'?
reference := number modifiers*
modifiers := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
'x' number
aggregate := '(' (type),* ')' 'f'? | '[' type ';' number ']' 'f'?
reference := number
width = number | '(' number '-' number ')'
number = [0-9]+
```
## Void
The `V` type corresponds to `void` in LLVM (`()` in
Rust). It's likely to only work in return position.
## Vectors
The vector grammar is a pattern describing many possibilities
@ -433,6 +628,12 @@ def parse_args():
- no `f` corresponds to `declare ... @llvm.foo({float, i32})`.
- having an `f` corresponds to `declare ... @llvm.foo(float, i32)`.
The `[type;number]` form is a just shorter way to write
`(...)`, except avoids doing a cartesian product of generic
types, e.g. `[S32;2]` is the same as `(S32, S32)`, while
`[I32;2]` is describing just the two types `(S32,S32)` and
`(U32,U32)` (i.e. doesn't include `(S32,U32)`, `(U32,S32)` as
`(I32,I32)` would).
(Currently aggregates can not contain other aggregates.)
@ -441,19 +642,49 @@ def parse_args():
A reference uses the type of another argument, with possible
modifications. The number refers to the type to use, starting
with 0 == return value, 1 == first argument, 2 == second
argument, etc. (Currently only referencing 0, the return
value, is supported.)
argument, etc.
## Affixes
The `modifier` and `suffix` adaptors change the precise
representation.
### Modifiers
- 'v': put a scalar into a vector of the current width (u32 -> u32x4, when width == 128)
- 'S': get the scalar element of a vector (u32x4 -> u32)
- 'h': half the length of the vector (u32x4 -> u32x2)
- 'd': double the length of the vector (u32x2 -> u32x4)
- 'n': narrow the element of the vector (u32x4 -> u16x4)
- 'w': widen the element of the vector (u16x4 -> u32x4)
- 'u': force an integer (vector or scalar) to be unsigned (i32x4 -> u32x4)
- 's': force an integer (vector or scalar) to be signed (u32x4 -> i32x4)
- 'u': force a number (vector or scalar) to be unsigned int (f32x4 -> u32x4)
- 's': force a number (vector or scalar) to be signed int (u32x4 -> i32x4)
- 'f': force a number (vector or scalar) to be float (u32x4 -> f32x4)
- 'x' number: force the type to be a vector of bitwidth `number`.
- '.' number: get the `number`th element of an aggregate
- 'D': dereference a pointer (*mut u32 -> u32)
- 'C': make a pointer const (*mut u32 -> *const u32)
- 'M': make a pointer mut (*const u32 -> *mut u32)
### Pointers
Pointers can be created of any type by appending a `P*`
suffix. The `m` vs. `c` chooses mut vs. const. e.g. `S32Pm`
corresponds to `*mut i32`, and `i32Pc` corresponds (with width
128) to `*const i8x16`, `*const u32x4`, etc.
The type after the `/` (optional) represents the type used
internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32`
in Rust, but is `i8*` in LLVM. (This defaults to the main
type).
### Bitcast
The `'->' type` bitcast suffix will cause the value to be
bitcast to the right-hand type when calling the intrinsic,
e.g. `s32->f32` will expose the intrinsic as `i32x4` at the
Rust level, but will cast that vector to `f32x4` when calling
the LLVM intrinsic.
'''))
parser.add_argument('--format', choices=FORMATS, required=True,
help = 'Output format.')
@ -502,7 +733,7 @@ class CompilerDefs(object):
#![allow(unused_imports)]
use {{Intrinsic, i, i_, u, u_, f, v, agg}};
use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
use IntrinsicDef::Named;
use rustc::middle::ty;

View File

@ -36,6 +36,20 @@
"ret": "f(32-64)",
"args": ["0", "0"]
},
{
"intrinsic": "{0.width_mm}_maskload_{0.data_type}",
"width": [128, 256],
"llvm": "maskload.{0.data_type_short}{0.width_suffix}",
"ret": ["f(32-64)"],
"args": ["0SPc/S8", "0s->0"]
},
{
"intrinsic": "{3.width_mm}_maskstore_{3.data_type}",
"width": [128, 256],
"llvm": "maskstore.{3.data_type_short}{3.width_suffix}",
"ret": "V",
"args": ["F(32-64)Pm/S8", "1Dsv->1Dv", "1Dv"]
},
{
"intrinsic": "256_min_{0.data_type}",
"width": [256],
@ -78,6 +92,20 @@
"ret": "f32",
"args": ["f32"]
},
{
"intrinsic": "256_storeu_{2.data_type}",
"width": [256],
"llvm": "storeu.ps.256",
"ret": "V",
"args": ["f(32-64)Pm/U8", "1D"]
},
{
"intrinsic": "256_storeu_si256",
"width": [256],
"llvm": "storeu.dq.256",
"ret": "V",
"args": ["u8Pm/U8", "1D"]
},
{
"intrinsic": "256_sqrt_{0.data_type}",
"width": [256],
@ -147,6 +175,20 @@
"llvm": "ptestz.256",
"ret": "S32",
"args": ["u64", "u64"]
},
{
"intrinsic": "256_zeroall",
"width": [256],
"llvm": "vzeroall",
"ret": "V",
"args": []
},
{
"intrinsic": "256_zeroupper",
"width": [256],
"llvm": "vzeroupper",
"ret": "V",
"args": []
}
]
}

View File

@ -4,21 +4,21 @@
{
"intrinsic": "256_abs_{0.data_type}",
"width": [256],
"llvm": "avx2.pabs.{0.data_type_short}",
"llvm": "pabs.{0.data_type_short}",
"ret": "s(8-32)",
"args": ["0"]
},
{
"intrinsic": "256_adds_{0.data_type}",
"width": [256],
"llvm": "avx2.padd{0.kind_short}s.{0.data_type_short}",
"llvm": "padd{0.kind_short}s.{0.data_type_short}",
"ret": "i(8-16)",
"args": ["0", "0"]
},
{
"intrinsic": "256_avg_{0.data_type}",
"width": [256],
"llvm": "avx2.pavg.{0.data_type_short}",
"llvm": "pavg.{0.data_type_short}",
"ret": "u(8-16)",
"args": ["0", "0"]
},
@ -64,6 +64,48 @@
"ret": "s16",
"args": ["s8", "s8"]
},
{
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
"ret": ["s32", "f32"],
"args": ["0", "0SPc/S8", "s32", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
"ret": ["s64", "f64"],
"args": ["0", "0SPc/S8", "s32x128", "0s->0", "S32/8"]
},
{
"intrinsic": "{3.width_mm}_mask_i64gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
"ret": ["s32x128", "f32x128"],
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_mask_i64gather_{0.data_type}",
"width": [128, 256],
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
"ret": ["s64", "f64"],
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
},
{
"intrinsic": "{0.width_mm}_maskload_{0.data_type}",
"width": [128, 256],
"llvm": "maskload.{0.data_type_short}{0.width_suffix}",
"ret": ["s(32-64)"],
"args": ["0Pc/S8", "0"]
},
{
"intrinsic": "{2.width_mm}_maskstore_{2.data_type}",
"width": [128, 256],
"llvm": "maskstore.{2.data_type_short}{2.width_suffix}",
"ret": "V",
"args": ["S(32-64)Pm/S8", "1Dv", "2"]
},
{
"intrinsic": "256_max_{0.data_type}",
"width": [256],

View File

@ -42,6 +42,13 @@
"llvm": "!llvm.sqrt.v4f32",
"ret": "f32",
"args": ["0"]
},
{
"intrinsic": "_storeu_ps",
"width": [128],
"llvm": "storeu.ps",
"ret": "V",
"args": ["F32Pm/S8", "f32"]
}
]
}

View File

@ -15,6 +15,13 @@
"ret": "u(8-16)",
"args": ["0", "0"]
},
{
"intrinsic": "_lfence",
"width": [128],
"llvm": "lfence",
"ret": "V",
"args": []
},
{
"intrinsic": "_madd_epi16",
"width": [128],
@ -22,6 +29,13 @@
"ret": "s32",
"args": ["s16", "s16"]
},
{
"intrinsic": "_maskmoveu_si128",
"width": [128],
"llvm": "maskmov.dqu",
"ret": "V",
"args": ["u8", "u8", "U8Pm"]
},
{
"intrinsic": "_max_{0.data_type}",
"width": [128],
@ -36,6 +50,13 @@
"ret": "f64",
"args": ["0", "0"]
},
{
"intrinsic": "_mfence",
"width": [128],
"llvm": "fence",
"ret": "V",
"args": []
},
{
"intrinsic": "_min_{0.data_type}",
"width": [128],
@ -99,6 +120,13 @@
"ret": "u64",
"args": ["u8", "u8"]
},
{
"intrinsic": "_sfence",
"width": [128],
"llvm": "sfence",
"ret": "V",
"args": []
},
{
"intrinsic": "_sqrt_pd",
"width": [128],
@ -106,6 +134,20 @@
"ret": "f64",
"args": ["0"]
},
{
"intrinsic": "_storeu_pd",
"width": [128],
"llvm": "storeu.pd",
"ret": "V",
"args": ["F64Pm/U8", "f64"]
},
{
"intrinsic": "_storeu_si128",
"width": [128],
"llvm": "storeu.dq",
"ret": "V",
"args": ["u8Pm/U8", "u8"]
},
{
"intrinsic": "_subs_{0.data_type}",
"width": [128],

View File

@ -21,6 +21,13 @@
"llvm": "hsub.{0.data_type}",
"ret": "f(32-64)",
"args": ["0", "0"]
},
{
"intrinsic": "_lddqu_si128",
"width": [128],
"llvm": "ldu.dq",
"ret": "u8",
"args": ["0Pc/S8"]
}
]
}

View File

@ -13,7 +13,7 @@
#![allow(unused_imports)]
use {Intrinsic, i, u, f, v, agg};
use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
use IntrinsicDef::Named;
use rustc::middle::ty;
@ -1910,6 +1910,606 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(u(8), 16),
definition: Named("llvm.aarch64.neon.rbit.v16i8")
},
"ld2_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
},
"ld2_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
},
"ld2_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
},
"ld2_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
},
"ld2_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
},
"ld2_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
},
"ld2_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
},
"ld2_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
},
"ld2_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2f32.p0v2f32")
},
"ld2_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1f64.p0v1f64")
},
"ld2q_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
},
"ld2q_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
},
"ld2q_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
},
"ld2q_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
},
"ld2q_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
},
"ld2q_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
},
"ld2q_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
},
"ld2q_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
},
"ld2q_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4f32.p0v4f32")
},
"ld2q_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2f64.p0v2f64")
},
"ld3_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
},
"ld3_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
},
"ld3_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
},
"ld3_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
},
"ld3_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
},
"ld3_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
},
"ld3_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
},
"ld3_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
},
"ld3_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2f32.p0v2f32")
},
"ld3_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1f64.p0v1f64")
},
"ld3q_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
},
"ld3q_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
},
"ld3q_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
},
"ld3q_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
},
"ld3q_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
},
"ld3q_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
},
"ld3q_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
},
"ld3q_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
},
"ld3q_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4f32.p0v4f32")
},
"ld3q_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2f64.p0v2f64")
},
"ld4_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
},
"ld4_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
},
"ld4_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
},
"ld4_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
},
"ld4_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
},
"ld4_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
},
"ld4_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
},
"ld4_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
},
"ld4_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2f32.p0v2f32")
},
"ld4_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1f64.p0v1f64")
},
"ld4q_s8" => Intrinsic {
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
},
"ld4q_u8" => Intrinsic {
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
},
"ld4q_s16" => Intrinsic {
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
},
"ld4q_u16" => Intrinsic {
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
},
"ld4q_s32" => Intrinsic {
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
},
"ld4q_u32" => Intrinsic {
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
},
"ld4q_s64" => Intrinsic {
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
},
"ld4q_u64" => Intrinsic {
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
},
"ld4q_f32" => Intrinsic {
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4f32.p0v4f32")
},
"ld4q_f64" => Intrinsic {
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2f64.p0v2f64")
},
"ld2_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
},
"ld2_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
},
"ld2_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
},
"ld2_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
},
"ld2_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
},
"ld2_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
},
"ld2_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
},
"ld2_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
},
"ld2_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2f32.p0f32")
},
"ld2_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld2.v1f64.p0f64")
},
"ld2q_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
},
"ld2q_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
},
"ld2q_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
},
"ld2q_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
},
"ld2q_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
},
"ld2q_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
},
"ld2q_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
},
"ld2q_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
},
"ld2q_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld2.v4f32.p0f32")
},
"ld2q_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld2.v2f64.p0f64")
},
"ld3_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
},
"ld3_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
},
"ld3_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
},
"ld3_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
},
"ld3_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
},
"ld3_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
},
"ld3_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
},
"ld3_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
},
"ld3_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2f32.p0f32")
},
"ld3_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld3.v1f64.p0f64")
},
"ld3q_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
},
"ld3q_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
},
"ld3q_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
},
"ld3q_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
},
"ld3q_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
},
"ld3q_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
},
"ld3q_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
},
"ld3q_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
},
"ld3q_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld3.v4f32.p0f32")
},
"ld3q_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld3.v2f64.p0f64")
},
"ld4_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
},
"ld4_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
},
"ld4_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
},
"ld4_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
},
"ld4_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
},
"ld4_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
},
"ld4_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
},
"ld4_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
},
"ld4_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2f32.p0f32")
},
"ld4_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
definition: Named("llvm.aarch64.neon.ld4.v1f64.p0f64")
},
"ld4q_dup_s8" => Intrinsic {
inputs: vec![p(true, i(8), None)],
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
},
"ld4q_dup_u8" => Intrinsic {
inputs: vec![p(true, u(8), None)],
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
},
"ld4q_dup_s16" => Intrinsic {
inputs: vec![p(true, i(16), None)],
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
},
"ld4q_dup_u16" => Intrinsic {
inputs: vec![p(true, u(16), None)],
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
},
"ld4q_dup_s32" => Intrinsic {
inputs: vec![p(true, i(32), None)],
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
},
"ld4q_dup_u32" => Intrinsic {
inputs: vec![p(true, u(32), None)],
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
},
"ld4q_dup_s64" => Intrinsic {
inputs: vec![p(true, i(64), None)],
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
},
"ld4q_dup_u64" => Intrinsic {
inputs: vec![p(true, u(64), None)],
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
},
"ld4q_dup_f32" => Intrinsic {
inputs: vec![p(true, f(32), None)],
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
definition: Named("llvm.aarch64.neon.ld4.v4f32.p0f32")
},
"ld4q_dup_f64" => Intrinsic {
inputs: vec![p(true, f(64), None)],
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
definition: Named("llvm.aarch64.neon.ld4.v2f64.p0f64")
},
"padd_s8" => Intrinsic {
inputs: vec![v(i(8), 8), v(i(8), 8)],
output: v(i(8), 8),

View File

@ -13,7 +13,7 @@
#![allow(unused_imports)]
use {Intrinsic, i, u, f, v, agg};
use {Intrinsic, i, i_, u, u_, f, v, agg, p};
use IntrinsicDef::Named;
use rustc::middle::ty;

View File

@ -30,10 +30,11 @@ pub struct Intrinsic {
#[derive(Clone, Hash, Eq, PartialEq)]
pub enum Type {
Void,
Integer(/* signed */ bool, u8, /* llvm width */ u8),
Float(u8),
Pointer(Box<Type>),
Vector(Box<Type>, u8),
Pointer(Box<Type>, Option<Box<Type>>, /* const */ bool),
Vector(Box<Type>, Option<Box<Type>>, u8),
Aggregate(bool, Vec<Type>),
}
@ -47,10 +48,19 @@ fn u(width: u8) -> Type { Type::Integer(false, width, width) }
#[allow(dead_code)]
fn u_(width: u8, llvm_width: u8) -> Type { Type::Integer(false, width, llvm_width) }
fn f(width: u8) -> Type { Type::Float(width) }
fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) }
fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), None, length) }
fn v_(x: Type, bitcast: Type, length: u8) -> Type {
Type::Vector(Box::new(x), Some(Box::new(bitcast)), length)
}
fn agg(flatten: bool, types: Vec<Type>) -> Type {
Type::Aggregate(flatten, types)
}
fn p(const_: bool, elem: Type, llvm_elem: Option<Type>) -> Type {
Type::Pointer(Box::new(elem), llvm_elem.map(Box::new), const_)
}
fn void() -> Type {
Type::Void
}
mod x86;
mod arm;

View File

@ -13,7 +13,7 @@
#![allow(unused_imports)]
use {Intrinsic, i, i_, u, u_, f, v, agg};
use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
use IntrinsicDef::Named;
use rustc::middle::ty;
@ -50,6 +50,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(32), 4),
definition: Named("llvm.sqrt.v4f32")
},
"_storeu_ps" => Intrinsic {
inputs: vec![p(false, f(32), Some(i(8))), v(f(32), 4)],
output: void(),
definition: Named("llvm.x86.sse.storeu.ps")
},
"_adds_epi8" => Intrinsic {
inputs: vec![v(i(8), 16), v(i(8), 16)],
output: v(i(8), 16),
@ -80,11 +85,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(u(16), 8),
definition: Named("llvm.x86.sse2.pavg.w")
},
"_lfence" => Intrinsic {
inputs: vec![],
output: void(),
definition: Named("llvm.x86.sse2.lfence")
},
"_madd_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
output: v(i(32), 4),
definition: Named("llvm.x86.sse2.pmadd.wd")
},
"_maskmoveu_si128" => Intrinsic {
inputs: vec![v(u(8), 16), v(u(8), 16), p(false, u(8), None)],
output: void(),
definition: Named("llvm.x86.sse2.maskmov.dqu")
},
"_max_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
output: v(i(16), 8),
@ -100,6 +115,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(64), 2),
definition: Named("llvm.x86.sse2.max.pd")
},
"_mfence" => Intrinsic {
inputs: vec![],
output: void(),
definition: Named("llvm.x86.sse2.fence")
},
"_min_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
output: v(i(16), 8),
@ -160,11 +180,26 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(u(64), 2),
definition: Named("llvm.x86.sse2.psad.bw")
},
"_sfence" => Intrinsic {
inputs: vec![],
output: void(),
definition: Named("llvm.x86.sse2.sfence")
},
"_sqrt_pd" => Intrinsic {
inputs: vec![v(f(64), 2)],
output: v(f(64), 2),
definition: Named("llvm.sqrt.v2f64")
},
"_storeu_pd" => Intrinsic {
inputs: vec![p(false, f(64), Some(u(8))), v(f(64), 2)],
output: void(),
definition: Named("llvm.x86.sse2.storeu.pd")
},
"_storeu_si128" => Intrinsic {
inputs: vec![p(false, v(u(8), 16), Some(u(8))), v(u(8), 16)],
output: void(),
definition: Named("llvm.x86.sse2.storeu.dq")
},
"_subs_epi8" => Intrinsic {
inputs: vec![v(i(8), 16), v(i(8), 16)],
output: v(i(8), 16),
@ -215,6 +250,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(64), 2),
definition: Named("llvm.x86.sse3.hsub.pd")
},
"_lddqu_si128" => Intrinsic {
inputs: vec![p(true, v(u(8), 16), Some(i(8)))],
output: v(u(8), 16),
definition: Named("llvm.x86.sse3.ldu.dq")
},
"_abs_epi8" => Intrinsic {
inputs: vec![v(i(8), 16)],
output: v(i(8), 16),
@ -490,6 +530,46 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(64), 4),
definition: Named("llvm.x86.avx.max.pd.256")
},
"_maskload_ps" => Intrinsic {
inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 4)],
output: v(f(32), 4),
definition: Named("llvm.x86.avx.maskload.ps")
},
"_maskload_pd" => Intrinsic {
inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 2)],
output: v(f(64), 2),
definition: Named("llvm.x86.avx.maskload.pd")
},
"256_maskload_ps" => Intrinsic {
inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 8)],
output: v(f(32), 8),
definition: Named("llvm.x86.avx.maskload.ps.256")
},
"256_maskload_pd" => Intrinsic {
inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 4)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx.maskload.pd.256")
},
"_maskstore_ps" => Intrinsic {
inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 4), v(f(32), 4)],
output: void(),
definition: Named("llvm.x86.avx.maskstore.ps")
},
"_maskstore_pd" => Intrinsic {
inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 2), v(f(64), 2)],
output: void(),
definition: Named("llvm.x86.avx.maskstore.pd")
},
"256_maskstore_ps" => Intrinsic {
inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 8), v(f(32), 8)],
output: void(),
definition: Named("llvm.x86.avx.maskstore.ps.256")
},
"256_maskstore_pd" => Intrinsic {
inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 4), v(f(64), 4)],
output: void(),
definition: Named("llvm.x86.avx.maskstore.pd.256")
},
"256_min_ps" => Intrinsic {
inputs: vec![v(f(32), 8), v(f(32), 8)],
output: v(f(32), 8),
@ -540,6 +620,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(f(32), 8),
definition: Named("llvm.x86.avx.rsqrt.ps.256")
},
"256_storeu_ps" => Intrinsic {
inputs: vec![p(false, v(f(32), 8), Some(u(8))), v(f(32), 8)],
output: void(),
definition: Named("llvm.x86.avx.storeu.ps.256")
},
"256_storeu_pd" => Intrinsic {
inputs: vec![p(false, v(f(64), 4), Some(u(8))), v(f(64), 4)],
output: void(),
definition: Named("llvm.x86.avx.storeu.ps.256")
},
"256_storeu_si256" => Intrinsic {
inputs: vec![p(false, v(u(8), 32), Some(u(8))), v(u(8), 32)],
output: void(),
definition: Named("llvm.x86.avx.storeu.dq.256")
},
"256_sqrt_ps" => Intrinsic {
inputs: vec![v(f(32), 8)],
output: v(f(32), 8),
@ -625,50 +720,60 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: i(32),
definition: Named("llvm.x86.avx.ptestz.256")
},
"256_zeroall" => Intrinsic {
inputs: vec![],
output: void(),
definition: Named("llvm.x86.avx.vzeroall")
},
"256_zeroupper" => Intrinsic {
inputs: vec![],
output: void(),
definition: Named("llvm.x86.avx.vzeroupper")
},
"256_abs_epi8" => Intrinsic {
inputs: vec![v(i(8), 32)],
output: v(i(8), 32),
definition: Named("llvm.x86.avx2.avx2.pabs.b")
definition: Named("llvm.x86.avx2.pabs.b")
},
"256_abs_epi16" => Intrinsic {
inputs: vec![v(i(16), 16)],
output: v(i(16), 16),
definition: Named("llvm.x86.avx2.avx2.pabs.w")
definition: Named("llvm.x86.avx2.pabs.w")
},
"256_abs_epi32" => Intrinsic {
inputs: vec![v(i(32), 8)],
output: v(i(32), 8),
definition: Named("llvm.x86.avx2.avx2.pabs.d")
definition: Named("llvm.x86.avx2.pabs.d")
},
"256_adds_epi8" => Intrinsic {
inputs: vec![v(i(8), 32), v(i(8), 32)],
output: v(i(8), 32),
definition: Named("llvm.x86.avx2.avx2.padds.b")
definition: Named("llvm.x86.avx2.padds.b")
},
"256_adds_epu8" => Intrinsic {
inputs: vec![v(u(8), 32), v(u(8), 32)],
output: v(u(8), 32),
definition: Named("llvm.x86.avx2.avx2.paddus.b")
definition: Named("llvm.x86.avx2.paddus.b")
},
"256_adds_epi16" => Intrinsic {
inputs: vec![v(i(16), 16), v(i(16), 16)],
output: v(i(16), 16),
definition: Named("llvm.x86.avx2.avx2.padds.w")
definition: Named("llvm.x86.avx2.padds.w")
},
"256_adds_epu16" => Intrinsic {
inputs: vec![v(u(16), 16), v(u(16), 16)],
output: v(u(16), 16),
definition: Named("llvm.x86.avx2.avx2.paddus.w")
definition: Named("llvm.x86.avx2.paddus.w")
},
"256_avg_epu8" => Intrinsic {
inputs: vec![v(u(8), 32), v(u(8), 32)],
output: v(u(8), 32),
definition: Named("llvm.x86.avx2.avx2.pavg.b")
definition: Named("llvm.x86.avx2.pavg.b")
},
"256_avg_epu16" => Intrinsic {
inputs: vec![v(u(16), 16), v(u(16), 16)],
output: v(u(16), 16),
definition: Named("llvm.x86.avx2.avx2.pavg.w")
definition: Named("llvm.x86.avx2.pavg.w")
},
"256_hadd_epi16" => Intrinsic {
inputs: vec![v(i(16), 16), v(i(16), 16)],
@ -710,6 +815,126 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(i(16), 16),
definition: Named("llvm.x86.avx2.pmadd.ub.sw")
},
"_mask_i32gather_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4), i_(32, 8)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx2.gather.d.d")
},
"_mask_i32gather_ps" => Intrinsic {
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(32), 4), v_(i(32), f(32), 4), i_(32, 8)],
output: v(f(32), 4),
definition: Named("llvm.x86.avx2.gather.d.ps")
},
"256_mask_i32gather_epi32" => Intrinsic {
inputs: vec![v(i(32), 8), p(true, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8), i_(32, 8)],
output: v(i(32), 8),
definition: Named("llvm.x86.avx2.gather.d.d.256")
},
"256_mask_i32gather_ps" => Intrinsic {
inputs: vec![v(f(32), 8), p(true, f(32), Some(i(8))), v(i(32), 8), v_(i(32), f(32), 8), i_(32, 8)],
output: v(f(32), 8),
definition: Named("llvm.x86.avx2.gather.d.ps.256")
},
"_mask_i32gather_epi64" => Intrinsic {
inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 2), i_(32, 8)],
output: v(i(64), 2),
definition: Named("llvm.x86.avx2.gather.d.q")
},
"_mask_i32gather_pd" => Intrinsic {
inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 2), i_(32, 8)],
output: v(f(64), 2),
definition: Named("llvm.x86.avx2.gather.d.pd")
},
"256_mask_i32gather_epi64" => Intrinsic {
inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 4), i_(32, 8)],
output: v(i(64), 4),
definition: Named("llvm.x86.avx2.gather.d.q.256")
},
"256_mask_i32gather_pd" => Intrinsic {
inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 4), i_(32, 8)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx2.gather.d.pd.256")
},
"_mask_i64gather_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 2), v(i(32), 4), i_(32, 8)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx2.gather.q.d")
},
"_mask_i64gather_ps" => Intrinsic {
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 2), v_(i(32), f(32), 4), i_(32, 8)],
output: v(f(32), 4),
definition: Named("llvm.x86.avx2.gather.q.ps")
},
"256_mask_i64gather_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 4), v(i(32), 4), i_(32, 8)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx2.gather.q.d")
},
"256_mask_i64gather_ps" => Intrinsic {
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 4), v_(i(32), f(32), 4), i_(32, 8)],
output: v(f(32), 4),
definition: Named("llvm.x86.avx2.gather.q.ps")
},
"_mask_i64gather_epi64" => Intrinsic {
inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2), i_(32, 8)],
output: v(i(64), 2),
definition: Named("llvm.x86.avx2.gather.q.q")
},
"_mask_i64gather_pd" => Intrinsic {
inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(64), 2), v_(i(64), f(64), 2), i_(32, 8)],
output: v(f(64), 2),
definition: Named("llvm.x86.avx2.gather.q.pd")
},
"256_mask_i64gather_epi64" => Intrinsic {
inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4), i_(32, 8)],
output: v(i(64), 4),
definition: Named("llvm.x86.avx2.gather.q.q.256")
},
"256_mask_i64gather_pd" => Intrinsic {
inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(64), 4), v_(i(64), f(64), 4), i_(32, 8)],
output: v(f(64), 4),
definition: Named("llvm.x86.avx2.gather.q.pd.256")
},
"_maskload_epi32" => Intrinsic {
inputs: vec![p(true, v(i(32), 4), Some(i(8))), v(i(32), 4)],
output: v(i(32), 4),
definition: Named("llvm.x86.avx2.maskload.d")
},
"_maskload_epi64" => Intrinsic {
inputs: vec![p(true, v(i(64), 2), Some(i(8))), v(i(64), 2)],
output: v(i(64), 2),
definition: Named("llvm.x86.avx2.maskload.q")
},
"256_maskload_epi32" => Intrinsic {
inputs: vec![p(true, v(i(32), 8), Some(i(8))), v(i(32), 8)],
output: v(i(32), 8),
definition: Named("llvm.x86.avx2.maskload.d.256")
},
"256_maskload_epi64" => Intrinsic {
inputs: vec![p(true, v(i(64), 4), Some(i(8))), v(i(64), 4)],
output: v(i(64), 4),
definition: Named("llvm.x86.avx2.maskload.q.256")
},
"_maskstore_epi32" => Intrinsic {
inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4)],
output: void(),
definition: Named("llvm.x86.avx2.maskstore.d")
},
"_maskstore_epi64" => Intrinsic {
inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2)],
output: void(),
definition: Named("llvm.x86.avx2.maskstore.q")
},
"256_maskstore_epi32" => Intrinsic {
inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8)],
output: void(),
definition: Named("llvm.x86.avx2.maskstore.d.256")
},
"256_maskstore_epi64" => Intrinsic {
inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4)],
output: void(),
definition: Named("llvm.x86.avx2.maskstore.q.256")
},
"256_max_epi8" => Intrinsic {
inputs: vec![v(i(8), 32), v(i(8), 32)],
output: v(i(8), 32),

View File

@ -936,6 +936,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
any_changes_needed: &mut bool) -> Vec<Type> {
use intrinsics::Type::*;
match *t {
Void => vec![Type::void(ccx)],
Integer(_signed, width, llvm_width) => {
*any_changes_needed |= width != llvm_width;
vec![Type::ix(ccx, llvm_width as u64)]
@ -947,14 +948,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
_ => unreachable!()
}
}
Pointer(_) => unimplemented!(),
Vector(ref t, length) => {
Pointer(ref t, ref llvm_elem, _const) => {
*any_changes_needed |= llvm_elem.is_some();
let t = llvm_elem.as_ref().unwrap_or(t);
let elem = one(ty_to_type(ccx, t,
any_changes_needed));
vec![elem.ptr_to()]
}
Vector(ref t, ref llvm_elem, length) => {
*any_changes_needed |= llvm_elem.is_some();
let t = llvm_elem.as_ref().unwrap_or(t);
let elem = one(ty_to_type(ccx, t,
any_changes_needed));
vec![Type::vector(&elem,
length as u64)]
}
Aggregate(false, _) => unimplemented!(),
Aggregate(false, ref contents) => {
let elems = contents.iter()
.map(|t| one(ty_to_type(ccx, t, any_changes_needed)))
.collect::<Vec<_>>();
vec![Type::struct_(ccx, &elems, false)]
}
Aggregate(true, ref contents) => {
*any_changes_needed = true;
contents.iter()
@ -965,8 +981,9 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
}
// This allows an argument list like `foo, (bar, baz),
// qux` to be converted into `foo, bar, baz, qux`, and
// integer arguments to be truncated as needed.
// qux` to be converted into `foo, bar, baz, qux`, integer
// arguments to be truncated as needed and pointers to be
// cast.
fn modify_as_needed<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
t: &intrinsics::Type,
arg_type: Ty<'tcx>,
@ -991,6 +1008,16 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
})
.collect()
}
intrinsics::Type::Pointer(_, Some(ref llvm_elem), _) => {
let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
vec![PointerCast(bcx, llarg,
llvm_elem.ptr_to())]
}
intrinsics::Type::Vector(_, Some(ref llvm_elem), length) => {
let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
vec![BitCast(bcx, llarg,
Type::vector(&llvm_elem, length as u64))]
}
intrinsics::Type::Integer(_, width, llvm_width) if width != llvm_width => {
// the LLVM intrinsic uses a smaller integer
// size than the C intrinsic's signature, so
@ -1027,7 +1054,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
};
assert_eq!(inputs.len(), llargs.len());
match intr.definition {
let val = match intr.definition {
intrinsics::IntrinsicDef::Named(name) => {
let f = declare::declare_cfn(ccx,
name,
@ -1035,6 +1062,20 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
tcx.mk_nil());
Call(bcx, f, &llargs, None, call_debug_location)
}
};
match intr.output {
intrinsics::Type::Aggregate(flatten, ref elems) => {
// the output is a tuple so we need to munge it properly
assert!(!flatten);
for i in 0..elems.len() {
let val = ExtractValue(bcx, val, i);
Store(bcx, val, StructGEP(bcx, llresult, i));
}
C_nil(ccx)
}
_ => val,
}
}
};

View File

@ -464,6 +464,10 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
};
match *expected {
Void => match t.sty {
ty::TyTuple(ref v) if v.is_empty() => {},
_ => simple_error(&format!("`{}`", t), "()"),
},
// (The width we pass to LLVM doesn't concern the type checker.)
Integer(signed, bits, _llvm_width) => match (signed, bits, &t.sty) {
(true, 8, &ty::TyInt(hir::IntTy::TyI8)) |
@ -485,8 +489,21 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
_ => simple_error(&format!("`{}`", t),
&format!("`f{n}`", n = bits)),
},
Pointer(_) => unimplemented!(),
Vector(ref inner_expected, len) => {
Pointer(ref inner_expected, ref _llvm_type, const_) => {
match t.sty {
ty::TyRawPtr(ty::TypeAndMut { ty, mutbl }) => {
if (mutbl == hir::MutImmutable) != const_ {
simple_error(&format!("`{}`", t),
if const_ {"const pointer"} else {"mut pointer"})
}
match_intrinsic_type_to_type(tcx, position, span, structural_to_nominal,
inner_expected, ty)
}
_ => simple_error(&format!("`{}`", t),
&format!("raw pointer")),
}
}
Vector(ref inner_expected, ref _llvm_type, len) => {
if !t.is_simd() {
simple_error(&format!("non-simd type `{}`", t),
"simd type");