Auto merge of #28221 - huonw:simd, r=alexcrichton
The ARM equivalents of the AArch64 are annoyingly more complicated (and some of the AArch64 ones are too). I think I've got exposed all the x86 intrinsics from SSE to AVX2 now (at least, the ones that LLVM implements as callable intrinsics).
This commit is contained in:
commit
7ee876cb8e
|
@ -336,6 +336,48 @@
|
|||
"ret": "i8",
|
||||
"args": ["0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld2{0[0].width}_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);2]","[f(32-64);2]"],
|
||||
"args": ["0.0SPc/0.0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld3{0[0].width}_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);3]","[f(32-64);3]"],
|
||||
"args": ["0.0SPc/0.0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld4{0[0].width}_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);4]","[f(32-64);4]"],
|
||||
"args": ["0.0SPc/0.0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld2{0[0].width}_dup_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);2]","[f(32-64);2]"],
|
||||
"args": ["0.0SPc"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld3{0[0].width}_dup_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);3]","[f(32-64);3]"],
|
||||
"args": ["0.0SPc"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "ld4{0[0].width}_dup_{0[0].data_type}",
|
||||
"width": [64, 128],
|
||||
"llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
|
||||
"ret": ["[i(8-64);4]","[f(32-64);4]"],
|
||||
"args": ["0.0SPc"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "padd{0.width}_{0.data_type}",
|
||||
"width": [64, 128],
|
||||
|
|
|
@ -14,11 +14,13 @@ import argparse
|
|||
import sys
|
||||
import re
|
||||
import textwrap
|
||||
import itertools
|
||||
|
||||
SPEC = re.compile(
|
||||
r'^(?:(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
|
||||
r'^(?:(?P<void>V)|(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
|
||||
r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
|
||||
r'|(?P<reference>\d+)(?P<modifiers>[vShdnwus]*)(?P<force_width>x\d+)?)$'
|
||||
r'|(?P<reference>\d+))(?P<index>\.\d+)?(?P<modifiers>[vShdnwusfDMC]*)(?P<force_width>x\d+)?'
|
||||
r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?|(?P<bitcast>->.*))?$'
|
||||
)
|
||||
|
||||
class PlatformInfo(object):
|
||||
|
@ -68,18 +70,35 @@ class IntrinsicSet(object):
|
|||
{k: lookup(v) for k, v in data.items()})
|
||||
|
||||
class PlatformTypeInfo(object):
|
||||
def __init__(self, llvm_name, properties):
|
||||
self.properties = properties
|
||||
self.llvm_name = llvm_name
|
||||
def __init__(self, llvm_name, properties, elems = None):
|
||||
if elems is None:
|
||||
self.properties = properties
|
||||
self.llvm_name = llvm_name
|
||||
else:
|
||||
assert properties is None and llvm_name is None
|
||||
self.properties = {}
|
||||
self.elems = elems
|
||||
|
||||
def __repr__(self):
|
||||
return '<PlatformTypeInfo {}, {}>'.format(self.llvm_name, self.properties)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return self.properties[name]
|
||||
|
||||
def __getitem__(self, idx):
|
||||
return self.elems[idx]
|
||||
|
||||
def vectorize(self, length, width_info):
|
||||
props = self.properties.copy()
|
||||
props.update(width_info)
|
||||
return PlatformTypeInfo('v{}{}'.format(length, self.llvm_name), props)
|
||||
|
||||
def pointer(self, llvm_elem):
|
||||
name = self.llvm_name if llvm_elem is None else llvm_elem.llvm_name
|
||||
return PlatformTypeInfo('p0{}'.format(name), self.properties)
|
||||
|
||||
BITWIDTH_POINTER = '<pointer>'
|
||||
|
||||
class Type(object):
|
||||
def __init__(self, bitwidth):
|
||||
self._bitwidth = bitwidth
|
||||
|
@ -87,18 +106,39 @@ class Type(object):
|
|||
def bitwidth(self):
|
||||
return self._bitwidth
|
||||
|
||||
def modify(self, spec, width):
|
||||
def modify(self, spec, width, previous):
|
||||
raise NotImplementedError()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
class Void(Type):
|
||||
def __init__(self):
|
||||
Type.__init__(self, 0)
|
||||
|
||||
def compiler_ctor(self):
|
||||
return 'void()'
|
||||
|
||||
def rust_name(self):
|
||||
return '()'
|
||||
|
||||
def type_info(self, platform_info):
|
||||
return None
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Void)
|
||||
|
||||
class Number(Type):
|
||||
def __init__(self, bitwidth):
|
||||
Type.__init__(self, bitwidth)
|
||||
|
||||
def modify(self, spec, width):
|
||||
def modify(self, spec, width, previous):
|
||||
if spec == 'u':
|
||||
return Unsigned(self.bitwidth())
|
||||
elif spec == 's':
|
||||
return Signed(self.bitwidth())
|
||||
elif spec == 'f':
|
||||
return Float(self.bitwidth())
|
||||
elif spec == 'w':
|
||||
return self.__class__(self.bitwidth() * 2)
|
||||
elif spec == 'n':
|
||||
|
@ -111,11 +151,16 @@ class Number(Type):
|
|||
def type_info(self, platform_info):
|
||||
return platform_info.number_type_info(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
# print(self, other)
|
||||
return self.__class__ == other.__class__ and self.bitwidth() == other.bitwidth()
|
||||
|
||||
class Signed(Number):
|
||||
def __init__(self, bitwidth, llvm_bitwidth = None):
|
||||
Number.__init__(self, bitwidth)
|
||||
self._llvm_bitwidth = llvm_bitwidth
|
||||
|
||||
|
||||
def compiler_ctor(self):
|
||||
if self._llvm_bitwidth is None:
|
||||
return 'i({})'.format(self.bitwidth())
|
||||
|
@ -164,26 +209,47 @@ class Float(Number):
|
|||
return 'f{}'.format(self.bitwidth())
|
||||
|
||||
class Vector(Type):
|
||||
def __init__(self, elem, length):
|
||||
def __init__(self, elem, length, bitcast = None):
|
||||
assert isinstance(elem, Type) and not isinstance(elem, Vector)
|
||||
Type.__init__(self,
|
||||
elem.bitwidth() * length)
|
||||
self._length = length
|
||||
self._elem = elem
|
||||
assert bitcast is None or (isinstance(bitcast, Vector) and
|
||||
bitcast._bitcast is None and
|
||||
bitcast._elem.bitwidth() == elem.bitwidth())
|
||||
if bitcast is not None and bitcast._elem != elem:
|
||||
self._bitcast = bitcast._elem
|
||||
else:
|
||||
self._bitcast = None
|
||||
|
||||
def modify(self, spec, width):
|
||||
if spec == 'h':
|
||||
def modify(self, spec, width, previous):
|
||||
if spec == 'S':
|
||||
return self._elem
|
||||
elif spec == 'h':
|
||||
return Vector(self._elem, self._length // 2)
|
||||
elif spec == 'd':
|
||||
return Vector(self._elem, self._length * 2)
|
||||
elif spec.startswith('x'):
|
||||
new_bitwidth = int(spec[1:])
|
||||
return Vector(self._elem, new_bitwidth // self._elem.bitwidth())
|
||||
elif spec.startswith('->'):
|
||||
bitcast_to = TypeSpec(spec[2:])
|
||||
choices = list(bitcast_to.enumerate(width, previous))
|
||||
assert len(choices) == 1
|
||||
bitcast_to = choices[0]
|
||||
return Vector(self._elem, self._length, bitcast_to)
|
||||
else:
|
||||
return Vector(self._elem.modify(spec, width), self._length)
|
||||
return Vector(self._elem.modify(spec, width, previous), self._length)
|
||||
|
||||
def compiler_ctor(self):
|
||||
return 'v({}, {})'.format(self._elem.compiler_ctor(), self._length)
|
||||
if self._bitcast is None:
|
||||
return 'v({}, {})'.format(self._elem.compiler_ctor(),
|
||||
self._length)
|
||||
else:
|
||||
return 'v_({}, {}, {})'.format(self._elem.compiler_ctor(),
|
||||
self._bitcast.compiler_ctor(),
|
||||
self._length)
|
||||
|
||||
def rust_name(self):
|
||||
return '{}x{}'.format(self._elem.rust_name(), self._length)
|
||||
|
@ -193,6 +259,51 @@ class Vector(Type):
|
|||
return elem_info.vectorize(self._length,
|
||||
platform_info.width_info(self.bitwidth()))
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Vector) and self._length == other._length and \
|
||||
self._elem == other._elem and self._bitcast == other._bitcast
|
||||
|
||||
class Pointer(Type):
|
||||
def __init__(self, elem, llvm_elem, const):
|
||||
self._elem = elem;
|
||||
self._llvm_elem = llvm_elem
|
||||
self._const = const
|
||||
Type.__init__(self, BITWIDTH_POINTER)
|
||||
|
||||
def modify(self, spec, width, previous):
|
||||
if spec == 'D':
|
||||
return self._elem
|
||||
elif spec == 'M':
|
||||
return Pointer(self._elem, self._llvm_elem, False)
|
||||
elif spec == 'C':
|
||||
return Pointer(self._elem, self._llvm_elem, True)
|
||||
else:
|
||||
return Pointer(self._elem.modify(spec, width, previous), self._llvm_elem, self._const)
|
||||
|
||||
def compiler_ctor(self):
|
||||
if self._llvm_elem is None:
|
||||
llvm_elem = 'None'
|
||||
else:
|
||||
llvm_elem = 'Some({})'.format(self._llvm_elem.compiler_ctor())
|
||||
return 'p({}, {}, {})'.format('true' if self._const else 'false',
|
||||
self._elem.compiler_ctor(),
|
||||
llvm_elem)
|
||||
|
||||
def rust_name(self):
|
||||
return '*{} {}'.format('const' if self._const else 'mut',
|
||||
self._elem.rust_name())
|
||||
|
||||
def type_info(self, platform_info):
|
||||
if self._llvm_elem is None:
|
||||
llvm_elem = None
|
||||
else:
|
||||
llvm_elem = self._llvm_elem.type_info(platform_info)
|
||||
return self._elem.type_info(platform_info).pointer(llvm_elem)
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Pointer) and self._const == other._const \
|
||||
and self._elem == other._elem and self._llvm_elem == other._llvm_elem
|
||||
|
||||
class Aggregate(Type):
|
||||
def __init__(self, flatten, elems):
|
||||
self._flatten = flatten
|
||||
|
@ -202,6 +313,14 @@ class Aggregate(Type):
|
|||
def __repr__(self):
|
||||
return '<Aggregate {}>'.format(self._elems)
|
||||
|
||||
def modify(self, spec, width, previous):
|
||||
if spec.startswith('.'):
|
||||
num = int(spec[1:])
|
||||
return self._elems[num]
|
||||
else:
|
||||
print(spec)
|
||||
raise NotImplementedError()
|
||||
|
||||
def compiler_ctor(self):
|
||||
return 'agg({}, vec![{}])'.format('true' if self._flatten else 'false',
|
||||
', '.join(elem.compiler_ctor() for elem in self._elems))
|
||||
|
@ -210,8 +329,11 @@ class Aggregate(Type):
|
|||
return '({})'.format(', '.join(elem.rust_name() for elem in self._elems))
|
||||
|
||||
def type_info(self, platform_info):
|
||||
#return PlatformTypeInfo(None, None, self._llvm_name)
|
||||
return None
|
||||
return PlatformTypeInfo(None, None, [elem.type_info(platform_info) for elem in self._elems])
|
||||
|
||||
def __eq__(self, other):
|
||||
return isinstance(other, Aggregate) and self._flatten == other._flatten and \
|
||||
self._elems == other._elems
|
||||
|
||||
|
||||
TYPE_ID_LOOKUP = {'i': [Signed, Unsigned],
|
||||
|
@ -219,6 +341,22 @@ TYPE_ID_LOOKUP = {'i': [Signed, Unsigned],
|
|||
'u': [Unsigned],
|
||||
'f': [Float]}
|
||||
|
||||
def ptrify(match, elem, width, previous):
|
||||
ptr = match.group('pointer')
|
||||
if ptr is None:
|
||||
return elem
|
||||
else:
|
||||
llvm_ptr = match.group('llvm_pointer')
|
||||
if llvm_ptr is None:
|
||||
llvm_elem = None
|
||||
else:
|
||||
assert llvm_ptr.startswith('/')
|
||||
options = list(TypeSpec(llvm_ptr[1:]).enumerate(width, previous))
|
||||
assert len(options) == 1
|
||||
llvm_elem = options[0]
|
||||
assert ptr in ('Pc', 'Pm')
|
||||
return Pointer(elem, llvm_elem, ptr == 'Pc')
|
||||
|
||||
class TypeSpec(object):
|
||||
def __init__(self, spec):
|
||||
if not isinstance(spec, list):
|
||||
|
@ -226,71 +364,103 @@ class TypeSpec(object):
|
|||
|
||||
self.spec = spec
|
||||
|
||||
def enumerate(self, width):
|
||||
def enumerate(self, width, previous):
|
||||
for spec in self.spec:
|
||||
match = SPEC.match(spec)
|
||||
if match:
|
||||
if match is not None:
|
||||
id = match.group('id')
|
||||
is_vector = id.islower()
|
||||
type_ctors = TYPE_ID_LOOKUP[id.lower()]
|
||||
reference = match.group('reference')
|
||||
|
||||
start = match.group('start')
|
||||
if start is not None:
|
||||
end = match.group('end')
|
||||
llvm_width = None
|
||||
modifiers = []
|
||||
index = match.group('index')
|
||||
if index is not None:
|
||||
modifiers.append(index)
|
||||
modifiers += list(match.group('modifiers') or '')
|
||||
force = match.group('force_width')
|
||||
if force is not None:
|
||||
modifiers.append(force)
|
||||
bitcast = match.group('bitcast')
|
||||
if bitcast is not None:
|
||||
modifiers.append(bitcast)
|
||||
|
||||
if match.group('void') is not None:
|
||||
assert spec == 'V'
|
||||
yield Void()
|
||||
elif id is not None:
|
||||
is_vector = id.islower()
|
||||
type_ctors = TYPE_ID_LOOKUP[id.lower()]
|
||||
|
||||
start = match.group('start')
|
||||
if start is not None:
|
||||
end = match.group('end')
|
||||
llvm_width = None
|
||||
else:
|
||||
start = end = match.group('width')
|
||||
llvm_width = match.group('llvm_width')
|
||||
start = int(start)
|
||||
end = int(end)
|
||||
|
||||
bitwidth = start
|
||||
while bitwidth <= end:
|
||||
for ctor in type_ctors:
|
||||
if llvm_width is not None:
|
||||
assert not is_vector
|
||||
llvm_width = int(llvm_width)
|
||||
assert llvm_width < bitwidth
|
||||
scalar = ctor(bitwidth, llvm_width)
|
||||
else:
|
||||
scalar = ctor(bitwidth)
|
||||
|
||||
if is_vector:
|
||||
elem = Vector(scalar, width // bitwidth)
|
||||
else:
|
||||
assert bitcast is None
|
||||
elem = scalar
|
||||
|
||||
for x in modifiers:
|
||||
elem = elem.modify(x, width, previous)
|
||||
yield ptrify(match, elem, width, previous)
|
||||
bitwidth *= 2
|
||||
elif reference is not None:
|
||||
reference = int(reference)
|
||||
assert reference < len(previous), \
|
||||
'referring to argument {}, but only {} are known'.format(reference,
|
||||
len(previous))
|
||||
ret = previous[reference]
|
||||
for x in modifiers:
|
||||
ret = ret.modify(x, width, previous)
|
||||
yield ptrify(match, ret, width, previous)
|
||||
else:
|
||||
start = end = match.group('width')
|
||||
llvm_width = match.group('llvm_width')
|
||||
start = int(start)
|
||||
end = int(end)
|
||||
assert False, 'matched `{}`, but didn\'t understand it?'.format(spec)
|
||||
elif spec.startswith('('):
|
||||
if spec.endswith(')'):
|
||||
true_spec = spec[1:-1]
|
||||
flatten = False
|
||||
elif spec.endswith(')f'):
|
||||
true_spec = spec[1:-2]
|
||||
flatten = True
|
||||
else:
|
||||
assert False, 'found unclosed aggregate `{}`'.format(spec)
|
||||
|
||||
bitwidth = start
|
||||
while bitwidth <= end:
|
||||
for ctor in type_ctors:
|
||||
if llvm_width is not None:
|
||||
assert not is_vector
|
||||
llvm_width = int(llvm_width)
|
||||
assert llvm_width < bitwidth
|
||||
scalar = ctor(bitwidth, llvm_width)
|
||||
else:
|
||||
scalar = ctor(bitwidth)
|
||||
for elems in itertools.product(*(TypeSpec(subspec).enumerate(width, previous)
|
||||
for subspec in true_spec.split(','))):
|
||||
yield Aggregate(flatten, elems)
|
||||
elif spec.startswith('['):
|
||||
if spec.endswith(']'):
|
||||
true_spec = spec[1:-1]
|
||||
flatten = False
|
||||
elif spec.endswith(']f'):
|
||||
true_spec = spec[1:-2]
|
||||
flatten = True
|
||||
else:
|
||||
assert False, 'found unclosed aggregate `{}`'.format(spec)
|
||||
elem_spec, count = true_spec.split(';')
|
||||
|
||||
if is_vector:
|
||||
yield Vector(scalar, width // bitwidth)
|
||||
else:
|
||||
yield scalar
|
||||
bitwidth *= 2
|
||||
count = int(count)
|
||||
for elem in TypeSpec(elem_spec).enumerate(width, previous):
|
||||
yield Aggregate(flatten, [elem] * count)
|
||||
else:
|
||||
print('Failed to parse: `{}`'.format(spec), file=sys.stderr)
|
||||
|
||||
def resolve(self, width, zero):
|
||||
assert len(self.spec) == 1
|
||||
spec = self.spec[0]
|
||||
match = SPEC.match(spec)
|
||||
if match:
|
||||
id = match.group('id')
|
||||
if id is not None:
|
||||
options = list(self.enumerate(width))
|
||||
assert len(options) == 1
|
||||
return options[0]
|
||||
reference = match.group('reference')
|
||||
if reference != '0':
|
||||
raise NotImplementedError('only argument 0 (return value) references are supported')
|
||||
ret = zero
|
||||
for x in match.group('modifiers') or []:
|
||||
ret = ret.modify(x, width)
|
||||
force = match.group('force_width')
|
||||
if force is not None:
|
||||
ret = ret.modify(force, width)
|
||||
return ret
|
||||
elif spec.startswith('('):
|
||||
if spec.endswith(')'):
|
||||
raise NotImplementedError()
|
||||
elif spec.endswith(')f'):
|
||||
true_spec = spec[1:-2]
|
||||
flatten = True
|
||||
elems = [TypeSpec(subspec).resolve(width, zero) for subspec in true_spec.split(',')]
|
||||
return Aggregate(flatten, elems)
|
||||
assert False, 'Failed to parse `{}`'.format(spec)
|
||||
|
||||
class GenericIntrinsic(object):
|
||||
def __init__(self, platform, intrinsic, widths, llvm_name, ret, args):
|
||||
|
@ -305,10 +475,22 @@ class GenericIntrinsic(object):
|
|||
for width in self.widths:
|
||||
# must be a power of two
|
||||
assert width & (width - 1) == 0
|
||||
for ret in self.ret.enumerate(width):
|
||||
args = [arg.resolve(width, ret) for arg in self.args]
|
||||
yield MonomorphicIntrinsic(self._platform, self.intrinsic, width, self.llvm_name,
|
||||
ret, args)
|
||||
def recur(processed, untouched):
|
||||
if untouched == []:
|
||||
ret = processed[0]
|
||||
args = processed[1:]
|
||||
yield MonomorphicIntrinsic(self._platform, self.intrinsic, width,
|
||||
self.llvm_name,
|
||||
ret, args)
|
||||
else:
|
||||
raw_arg = untouched[0]
|
||||
rest = untouched[1:]
|
||||
for arg in raw_arg.enumerate(width, processed):
|
||||
for intr in recur(processed + [arg], rest):
|
||||
yield intr
|
||||
|
||||
for x in recur([], [self.ret] + self.args):
|
||||
yield x
|
||||
|
||||
class MonomorphicIntrinsic(object):
|
||||
def __init__(self, platform, intrinsic, width, llvm_name, ret, args):
|
||||
|
@ -369,7 +551,18 @@ def parse_args():
|
|||
## Type specifier grammar
|
||||
|
||||
```
|
||||
type := vector | scalar | aggregate | reference
|
||||
type := core_type modifier* suffix?
|
||||
|
||||
core_type := void | vector | scalar | aggregate | reference
|
||||
|
||||
modifier := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
|
||||
'x' number | '.' number
|
||||
suffix := pointer | bitcast
|
||||
pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
|
||||
llvm_pointer := '/' type
|
||||
bitcast := '->' type
|
||||
|
||||
void := 'V'
|
||||
|
||||
vector := vector_elem width |
|
||||
vector_elem := 'i' | 'u' | 's' | 'f'
|
||||
|
@ -378,18 +571,20 @@ def parse_args():
|
|||
scalar_type := 'U' | 'S' | 'F'
|
||||
llvm_width := '/' number
|
||||
|
||||
aggregate := '(' (type),* ')' 'f'?
|
||||
|
||||
reference := number modifiers*
|
||||
modifiers := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
|
||||
'x' number
|
||||
aggregate := '(' (type),* ')' 'f'? | '[' type ';' number ']' 'f'?
|
||||
|
||||
reference := number
|
||||
|
||||
width = number | '(' number '-' number ')'
|
||||
|
||||
number = [0-9]+
|
||||
```
|
||||
|
||||
## Void
|
||||
|
||||
The `V` type corresponds to `void` in LLVM (`()` in
|
||||
Rust). It's likely to only work in return position.
|
||||
|
||||
## Vectors
|
||||
|
||||
The vector grammar is a pattern describing many possibilities
|
||||
|
@ -433,6 +628,12 @@ def parse_args():
|
|||
- no `f` corresponds to `declare ... @llvm.foo({float, i32})`.
|
||||
- having an `f` corresponds to `declare ... @llvm.foo(float, i32)`.
|
||||
|
||||
The `[type;number]` form is a just shorter way to write
|
||||
`(...)`, except avoids doing a cartesian product of generic
|
||||
types, e.g. `[S32;2]` is the same as `(S32, S32)`, while
|
||||
`[I32;2]` is describing just the two types `(S32,S32)` and
|
||||
`(U32,U32)` (i.e. doesn't include `(S32,U32)`, `(U32,S32)` as
|
||||
`(I32,I32)` would).
|
||||
|
||||
(Currently aggregates can not contain other aggregates.)
|
||||
|
||||
|
@ -441,19 +642,49 @@ def parse_args():
|
|||
A reference uses the type of another argument, with possible
|
||||
modifications. The number refers to the type to use, starting
|
||||
with 0 == return value, 1 == first argument, 2 == second
|
||||
argument, etc. (Currently only referencing 0, the return
|
||||
value, is supported.)
|
||||
argument, etc.
|
||||
|
||||
## Affixes
|
||||
|
||||
The `modifier` and `suffix` adaptors change the precise
|
||||
representation.
|
||||
|
||||
### Modifiers
|
||||
|
||||
- 'v': put a scalar into a vector of the current width (u32 -> u32x4, when width == 128)
|
||||
- 'S': get the scalar element of a vector (u32x4 -> u32)
|
||||
- 'h': half the length of the vector (u32x4 -> u32x2)
|
||||
- 'd': double the length of the vector (u32x2 -> u32x4)
|
||||
- 'n': narrow the element of the vector (u32x4 -> u16x4)
|
||||
- 'w': widen the element of the vector (u16x4 -> u32x4)
|
||||
- 'u': force an integer (vector or scalar) to be unsigned (i32x4 -> u32x4)
|
||||
- 's': force an integer (vector or scalar) to be signed (u32x4 -> i32x4)
|
||||
- 'u': force a number (vector or scalar) to be unsigned int (f32x4 -> u32x4)
|
||||
- 's': force a number (vector or scalar) to be signed int (u32x4 -> i32x4)
|
||||
- 'f': force a number (vector or scalar) to be float (u32x4 -> f32x4)
|
||||
- 'x' number: force the type to be a vector of bitwidth `number`.
|
||||
- '.' number: get the `number`th element of an aggregate
|
||||
- 'D': dereference a pointer (*mut u32 -> u32)
|
||||
- 'C': make a pointer const (*mut u32 -> *const u32)
|
||||
- 'M': make a pointer mut (*const u32 -> *mut u32)
|
||||
|
||||
### Pointers
|
||||
|
||||
Pointers can be created of any type by appending a `P*`
|
||||
suffix. The `m` vs. `c` chooses mut vs. const. e.g. `S32Pm`
|
||||
corresponds to `*mut i32`, and `i32Pc` corresponds (with width
|
||||
128) to `*const i8x16`, `*const u32x4`, etc.
|
||||
|
||||
The type after the `/` (optional) represents the type used
|
||||
internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32`
|
||||
in Rust, but is `i8*` in LLVM. (This defaults to the main
|
||||
type).
|
||||
|
||||
### Bitcast
|
||||
|
||||
The `'->' type` bitcast suffix will cause the value to be
|
||||
bitcast to the right-hand type when calling the intrinsic,
|
||||
e.g. `s32->f32` will expose the intrinsic as `i32x4` at the
|
||||
Rust level, but will cast that vector to `f32x4` when calling
|
||||
the LLVM intrinsic.
|
||||
'''))
|
||||
parser.add_argument('--format', choices=FORMATS, required=True,
|
||||
help = 'Output format.')
|
||||
|
@ -502,7 +733,7 @@ class CompilerDefs(object):
|
|||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use {{Intrinsic, i, i_, u, u_, f, v, agg}};
|
||||
use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
|
||||
use IntrinsicDef::Named;
|
||||
use rustc::middle::ty;
|
||||
|
||||
|
|
|
@ -36,6 +36,20 @@
|
|||
"ret": "f(32-64)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_maskload_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "maskload.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["f(32-64)"],
|
||||
"args": ["0SPc/S8", "0s->0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{3.width_mm}_maskstore_{3.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "maskstore.{3.data_type_short}{3.width_suffix}",
|
||||
"ret": "V",
|
||||
"args": ["F(32-64)Pm/S8", "1Dsv->1Dv", "1Dv"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_min_{0.data_type}",
|
||||
"width": [256],
|
||||
|
@ -78,6 +92,20 @@
|
|||
"ret": "f32",
|
||||
"args": ["f32"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_storeu_{2.data_type}",
|
||||
"width": [256],
|
||||
"llvm": "storeu.ps.256",
|
||||
"ret": "V",
|
||||
"args": ["f(32-64)Pm/U8", "1D"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_storeu_si256",
|
||||
"width": [256],
|
||||
"llvm": "storeu.dq.256",
|
||||
"ret": "V",
|
||||
"args": ["u8Pm/U8", "1D"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_sqrt_{0.data_type}",
|
||||
"width": [256],
|
||||
|
@ -147,6 +175,20 @@
|
|||
"llvm": "ptestz.256",
|
||||
"ret": "S32",
|
||||
"args": ["u64", "u64"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_zeroall",
|
||||
"width": [256],
|
||||
"llvm": "vzeroall",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_zeroupper",
|
||||
"width": [256],
|
||||
"llvm": "vzeroupper",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -4,21 +4,21 @@
|
|||
{
|
||||
"intrinsic": "256_abs_{0.data_type}",
|
||||
"width": [256],
|
||||
"llvm": "avx2.pabs.{0.data_type_short}",
|
||||
"llvm": "pabs.{0.data_type_short}",
|
||||
"ret": "s(8-32)",
|
||||
"args": ["0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_adds_{0.data_type}",
|
||||
"width": [256],
|
||||
"llvm": "avx2.padd{0.kind_short}s.{0.data_type_short}",
|
||||
"llvm": "padd{0.kind_short}s.{0.data_type_short}",
|
||||
"ret": "i(8-16)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_avg_{0.data_type}",
|
||||
"width": [256],
|
||||
"llvm": "avx2.pavg.{0.data_type_short}",
|
||||
"llvm": "pavg.{0.data_type_short}",
|
||||
"ret": "u(8-16)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
|
@ -64,6 +64,48 @@
|
|||
"ret": "s16",
|
||||
"args": ["s8", "s8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["s32", "f32"],
|
||||
"args": ["0", "0SPc/S8", "s32", "0s->0", "S32/8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["s64", "f64"],
|
||||
"args": ["0", "0SPc/S8", "s32x128", "0s->0", "S32/8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{3.width_mm}_mask_i64gather_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["s32x128", "f32x128"],
|
||||
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_mask_i64gather_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["s64", "f64"],
|
||||
"args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{0.width_mm}_maskload_{0.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "maskload.{0.data_type_short}{0.width_suffix}",
|
||||
"ret": ["s(32-64)"],
|
||||
"args": ["0Pc/S8", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "{2.width_mm}_maskstore_{2.data_type}",
|
||||
"width": [128, 256],
|
||||
"llvm": "maskstore.{2.data_type_short}{2.width_suffix}",
|
||||
"ret": "V",
|
||||
"args": ["S(32-64)Pm/S8", "1Dv", "2"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "256_max_{0.data_type}",
|
||||
"width": [256],
|
||||
|
|
|
@ -42,6 +42,13 @@
|
|||
"llvm": "!llvm.sqrt.v4f32",
|
||||
"ret": "f32",
|
||||
"args": ["0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_storeu_ps",
|
||||
"width": [128],
|
||||
"llvm": "storeu.ps",
|
||||
"ret": "V",
|
||||
"args": ["F32Pm/S8", "f32"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -15,6 +15,13 @@
|
|||
"ret": "u(8-16)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_lfence",
|
||||
"width": [128],
|
||||
"llvm": "lfence",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "_madd_epi16",
|
||||
"width": [128],
|
||||
|
@ -22,6 +29,13 @@
|
|||
"ret": "s32",
|
||||
"args": ["s16", "s16"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_maskmoveu_si128",
|
||||
"width": [128],
|
||||
"llvm": "maskmov.dqu",
|
||||
"ret": "V",
|
||||
"args": ["u8", "u8", "U8Pm"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_max_{0.data_type}",
|
||||
"width": [128],
|
||||
|
@ -36,6 +50,13 @@
|
|||
"ret": "f64",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_mfence",
|
||||
"width": [128],
|
||||
"llvm": "fence",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "_min_{0.data_type}",
|
||||
"width": [128],
|
||||
|
@ -99,6 +120,13 @@
|
|||
"ret": "u64",
|
||||
"args": ["u8", "u8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_sfence",
|
||||
"width": [128],
|
||||
"llvm": "sfence",
|
||||
"ret": "V",
|
||||
"args": []
|
||||
},
|
||||
{
|
||||
"intrinsic": "_sqrt_pd",
|
||||
"width": [128],
|
||||
|
@ -106,6 +134,20 @@
|
|||
"ret": "f64",
|
||||
"args": ["0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_storeu_pd",
|
||||
"width": [128],
|
||||
"llvm": "storeu.pd",
|
||||
"ret": "V",
|
||||
"args": ["F64Pm/U8", "f64"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_storeu_si128",
|
||||
"width": [128],
|
||||
"llvm": "storeu.dq",
|
||||
"ret": "V",
|
||||
"args": ["u8Pm/U8", "u8"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_subs_{0.data_type}",
|
||||
"width": [128],
|
||||
|
|
|
@ -21,6 +21,13 @@
|
|||
"llvm": "hsub.{0.data_type}",
|
||||
"ret": "f(32-64)",
|
||||
"args": ["0", "0"]
|
||||
},
|
||||
{
|
||||
"intrinsic": "_lddqu_si128",
|
||||
"width": [128],
|
||||
"llvm": "ldu.dq",
|
||||
"ret": "u8",
|
||||
"args": ["0Pc/S8"]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use {Intrinsic, i, u, f, v, agg};
|
||||
use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
|
||||
use IntrinsicDef::Named;
|
||||
use rustc::middle::ty;
|
||||
|
||||
|
@ -1910,6 +1910,606 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(u(8), 16),
|
||||
definition: Named("llvm.aarch64.neon.rbit.v16i8")
|
||||
},
|
||||
"ld2_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
|
||||
},
|
||||
"ld2_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
|
||||
},
|
||||
"ld2_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
|
||||
},
|
||||
"ld2_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
|
||||
},
|
||||
"ld2_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
|
||||
},
|
||||
"ld2_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
|
||||
},
|
||||
"ld2_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
|
||||
},
|
||||
"ld2_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
|
||||
},
|
||||
"ld2_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2f32.p0v2f32")
|
||||
},
|
||||
"ld2_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1f64.p0v1f64")
|
||||
},
|
||||
"ld2q_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
|
||||
},
|
||||
"ld2q_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
|
||||
},
|
||||
"ld2q_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
|
||||
},
|
||||
"ld2q_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
|
||||
},
|
||||
"ld2q_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
|
||||
},
|
||||
"ld2q_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
|
||||
},
|
||||
"ld2q_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
|
||||
},
|
||||
"ld2q_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
|
||||
},
|
||||
"ld2q_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4f32.p0v4f32")
|
||||
},
|
||||
"ld2q_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2f64.p0v2f64")
|
||||
},
|
||||
"ld3_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
|
||||
},
|
||||
"ld3_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
|
||||
},
|
||||
"ld3_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
|
||||
},
|
||||
"ld3_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
|
||||
},
|
||||
"ld3_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
|
||||
},
|
||||
"ld3_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
|
||||
},
|
||||
"ld3_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
|
||||
},
|
||||
"ld3_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
|
||||
},
|
||||
"ld3_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2f32.p0v2f32")
|
||||
},
|
||||
"ld3_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1f64.p0v1f64")
|
||||
},
|
||||
"ld3q_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
|
||||
},
|
||||
"ld3q_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
|
||||
},
|
||||
"ld3q_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
|
||||
},
|
||||
"ld3q_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
|
||||
},
|
||||
"ld3q_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
|
||||
},
|
||||
"ld3q_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
|
||||
},
|
||||
"ld3q_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
|
||||
},
|
||||
"ld3q_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
|
||||
},
|
||||
"ld3q_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4f32.p0v4f32")
|
||||
},
|
||||
"ld3q_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2f64.p0v2f64")
|
||||
},
|
||||
"ld4_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
|
||||
},
|
||||
"ld4_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
|
||||
},
|
||||
"ld4_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
|
||||
},
|
||||
"ld4_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
|
||||
},
|
||||
"ld4_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
|
||||
},
|
||||
"ld4_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
|
||||
},
|
||||
"ld4_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
|
||||
},
|
||||
"ld4_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
|
||||
},
|
||||
"ld4_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2f32.p0v2f32")
|
||||
},
|
||||
"ld4_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1f64.p0v1f64")
|
||||
},
|
||||
"ld4q_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
|
||||
},
|
||||
"ld4q_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
|
||||
},
|
||||
"ld4q_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
|
||||
},
|
||||
"ld4q_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
|
||||
},
|
||||
"ld4q_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
|
||||
},
|
||||
"ld4q_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
|
||||
},
|
||||
"ld4q_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
|
||||
},
|
||||
"ld4q_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
|
||||
},
|
||||
"ld4q_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4f32.p0v4f32")
|
||||
},
|
||||
"ld4q_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2f64.p0v2f64")
|
||||
},
|
||||
"ld2_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
|
||||
},
|
||||
"ld2_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
|
||||
},
|
||||
"ld2_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
|
||||
},
|
||||
"ld2_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
|
||||
},
|
||||
"ld2_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
|
||||
},
|
||||
"ld2_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
|
||||
},
|
||||
"ld2_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
|
||||
},
|
||||
"ld2_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
|
||||
},
|
||||
"ld2_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2f32.p0f32")
|
||||
},
|
||||
"ld2_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v1f64.p0f64")
|
||||
},
|
||||
"ld2q_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
|
||||
},
|
||||
"ld2q_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
|
||||
},
|
||||
"ld2q_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
|
||||
},
|
||||
"ld2q_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
|
||||
},
|
||||
"ld2q_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
|
||||
},
|
||||
"ld2q_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
|
||||
},
|
||||
"ld2q_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
|
||||
},
|
||||
"ld2q_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
|
||||
},
|
||||
"ld2q_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v4f32.p0f32")
|
||||
},
|
||||
"ld2q_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld2.v2f64.p0f64")
|
||||
},
|
||||
"ld3_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
|
||||
},
|
||||
"ld3_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
|
||||
},
|
||||
"ld3_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
|
||||
},
|
||||
"ld3_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
|
||||
},
|
||||
"ld3_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
|
||||
},
|
||||
"ld3_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
|
||||
},
|
||||
"ld3_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
|
||||
},
|
||||
"ld3_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
|
||||
},
|
||||
"ld3_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2f32.p0f32")
|
||||
},
|
||||
"ld3_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v1f64.p0f64")
|
||||
},
|
||||
"ld3q_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
|
||||
},
|
||||
"ld3q_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
|
||||
},
|
||||
"ld3q_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
|
||||
},
|
||||
"ld3q_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
|
||||
},
|
||||
"ld3q_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
|
||||
},
|
||||
"ld3q_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
|
||||
},
|
||||
"ld3q_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
|
||||
},
|
||||
"ld3q_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
|
||||
},
|
||||
"ld3q_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v4f32.p0f32")
|
||||
},
|
||||
"ld3q_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld3.v2f64.p0f64")
|
||||
},
|
||||
"ld4_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
|
||||
},
|
||||
"ld4_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
|
||||
},
|
||||
"ld4_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
|
||||
},
|
||||
"ld4_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
|
||||
},
|
||||
"ld4_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
|
||||
},
|
||||
"ld4_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
|
||||
},
|
||||
"ld4_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
|
||||
},
|
||||
"ld4_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
|
||||
},
|
||||
"ld4_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2f32.p0f32")
|
||||
},
|
||||
"ld4_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v1f64.p0f64")
|
||||
},
|
||||
"ld4q_dup_s8" => Intrinsic {
|
||||
inputs: vec![p(true, i(8), None)],
|
||||
output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
|
||||
},
|
||||
"ld4q_dup_u8" => Intrinsic {
|
||||
inputs: vec![p(true, u(8), None)],
|
||||
output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
|
||||
},
|
||||
"ld4q_dup_s16" => Intrinsic {
|
||||
inputs: vec![p(true, i(16), None)],
|
||||
output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
|
||||
},
|
||||
"ld4q_dup_u16" => Intrinsic {
|
||||
inputs: vec![p(true, u(16), None)],
|
||||
output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
|
||||
},
|
||||
"ld4q_dup_s32" => Intrinsic {
|
||||
inputs: vec![p(true, i(32), None)],
|
||||
output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
|
||||
},
|
||||
"ld4q_dup_u32" => Intrinsic {
|
||||
inputs: vec![p(true, u(32), None)],
|
||||
output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
|
||||
},
|
||||
"ld4q_dup_s64" => Intrinsic {
|
||||
inputs: vec![p(true, i(64), None)],
|
||||
output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
|
||||
},
|
||||
"ld4q_dup_u64" => Intrinsic {
|
||||
inputs: vec![p(true, u(64), None)],
|
||||
output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
|
||||
},
|
||||
"ld4q_dup_f32" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), None)],
|
||||
output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v4f32.p0f32")
|
||||
},
|
||||
"ld4q_dup_f64" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), None)],
|
||||
output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
|
||||
definition: Named("llvm.aarch64.neon.ld4.v2f64.p0f64")
|
||||
},
|
||||
"padd_s8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 8), v(i(8), 8)],
|
||||
output: v(i(8), 8),
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use {Intrinsic, i, u, f, v, agg};
|
||||
use {Intrinsic, i, i_, u, u_, f, v, agg, p};
|
||||
use IntrinsicDef::Named;
|
||||
use rustc::middle::ty;
|
||||
|
||||
|
|
|
@ -30,10 +30,11 @@ pub struct Intrinsic {
|
|||
|
||||
#[derive(Clone, Hash, Eq, PartialEq)]
|
||||
pub enum Type {
|
||||
Void,
|
||||
Integer(/* signed */ bool, u8, /* llvm width */ u8),
|
||||
Float(u8),
|
||||
Pointer(Box<Type>),
|
||||
Vector(Box<Type>, u8),
|
||||
Pointer(Box<Type>, Option<Box<Type>>, /* const */ bool),
|
||||
Vector(Box<Type>, Option<Box<Type>>, u8),
|
||||
Aggregate(bool, Vec<Type>),
|
||||
}
|
||||
|
||||
|
@ -47,10 +48,19 @@ fn u(width: u8) -> Type { Type::Integer(false, width, width) }
|
|||
#[allow(dead_code)]
|
||||
fn u_(width: u8, llvm_width: u8) -> Type { Type::Integer(false, width, llvm_width) }
|
||||
fn f(width: u8) -> Type { Type::Float(width) }
|
||||
fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) }
|
||||
fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), None, length) }
|
||||
fn v_(x: Type, bitcast: Type, length: u8) -> Type {
|
||||
Type::Vector(Box::new(x), Some(Box::new(bitcast)), length)
|
||||
}
|
||||
fn agg(flatten: bool, types: Vec<Type>) -> Type {
|
||||
Type::Aggregate(flatten, types)
|
||||
}
|
||||
fn p(const_: bool, elem: Type, llvm_elem: Option<Type>) -> Type {
|
||||
Type::Pointer(Box::new(elem), llvm_elem.map(Box::new), const_)
|
||||
}
|
||||
fn void() -> Type {
|
||||
Type::Void
|
||||
}
|
||||
|
||||
mod x86;
|
||||
mod arm;
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use {Intrinsic, i, i_, u, u_, f, v, agg};
|
||||
use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
|
||||
use IntrinsicDef::Named;
|
||||
use rustc::middle::ty;
|
||||
|
||||
|
@ -50,6 +50,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(32), 4),
|
||||
definition: Named("llvm.sqrt.v4f32")
|
||||
},
|
||||
"_storeu_ps" => Intrinsic {
|
||||
inputs: vec![p(false, f(32), Some(i(8))), v(f(32), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse.storeu.ps")
|
||||
},
|
||||
"_adds_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 16), v(i(8), 16)],
|
||||
output: v(i(8), 16),
|
||||
|
@ -80,11 +85,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(u(16), 8),
|
||||
definition: Named("llvm.x86.sse2.pavg.w")
|
||||
},
|
||||
"_lfence" => Intrinsic {
|
||||
inputs: vec![],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.lfence")
|
||||
},
|
||||
"_madd_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 8), v(i(16), 8)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.sse2.pmadd.wd")
|
||||
},
|
||||
"_maskmoveu_si128" => Intrinsic {
|
||||
inputs: vec![v(u(8), 16), v(u(8), 16), p(false, u(8), None)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.maskmov.dqu")
|
||||
},
|
||||
"_max_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 8), v(i(16), 8)],
|
||||
output: v(i(16), 8),
|
||||
|
@ -100,6 +115,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.sse2.max.pd")
|
||||
},
|
||||
"_mfence" => Intrinsic {
|
||||
inputs: vec![],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.fence")
|
||||
},
|
||||
"_min_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 8), v(i(16), 8)],
|
||||
output: v(i(16), 8),
|
||||
|
@ -160,11 +180,26 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(u(64), 2),
|
||||
definition: Named("llvm.x86.sse2.psad.bw")
|
||||
},
|
||||
"_sfence" => Intrinsic {
|
||||
inputs: vec![],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.sfence")
|
||||
},
|
||||
"_sqrt_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.sqrt.v2f64")
|
||||
},
|
||||
"_storeu_pd" => Intrinsic {
|
||||
inputs: vec![p(false, f(64), Some(u(8))), v(f(64), 2)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.storeu.pd")
|
||||
},
|
||||
"_storeu_si128" => Intrinsic {
|
||||
inputs: vec![p(false, v(u(8), 16), Some(u(8))), v(u(8), 16)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.sse2.storeu.dq")
|
||||
},
|
||||
"_subs_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 16), v(i(8), 16)],
|
||||
output: v(i(8), 16),
|
||||
|
@ -215,6 +250,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.sse3.hsub.pd")
|
||||
},
|
||||
"_lddqu_si128" => Intrinsic {
|
||||
inputs: vec![p(true, v(u(8), 16), Some(i(8)))],
|
||||
output: v(u(8), 16),
|
||||
definition: Named("llvm.x86.sse3.ldu.dq")
|
||||
},
|
||||
"_abs_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 16)],
|
||||
output: v(i(8), 16),
|
||||
|
@ -490,6 +530,46 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.max.pd.256")
|
||||
},
|
||||
"_maskload_ps" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 4)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.avx.maskload.ps")
|
||||
},
|
||||
"_maskload_pd" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 2)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.avx.maskload.pd")
|
||||
},
|
||||
"256_maskload_ps" => Intrinsic {
|
||||
inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.avx.maskload.ps.256")
|
||||
},
|
||||
"256_maskload_pd" => Intrinsic {
|
||||
inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 4)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx.maskload.pd.256")
|
||||
},
|
||||
"_maskstore_ps" => Intrinsic {
|
||||
inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 4), v(f(32), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.maskstore.ps")
|
||||
},
|
||||
"_maskstore_pd" => Intrinsic {
|
||||
inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 2), v(f(64), 2)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.maskstore.pd")
|
||||
},
|
||||
"256_maskstore_ps" => Intrinsic {
|
||||
inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 8), v(f(32), 8)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.maskstore.ps.256")
|
||||
},
|
||||
"256_maskstore_pd" => Intrinsic {
|
||||
inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 4), v(f(64), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.maskstore.pd.256")
|
||||
},
|
||||
"256_min_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
|
@ -540,6 +620,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.avx.rsqrt.ps.256")
|
||||
},
|
||||
"256_storeu_ps" => Intrinsic {
|
||||
inputs: vec![p(false, v(f(32), 8), Some(u(8))), v(f(32), 8)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.storeu.ps.256")
|
||||
},
|
||||
"256_storeu_pd" => Intrinsic {
|
||||
inputs: vec![p(false, v(f(64), 4), Some(u(8))), v(f(64), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.storeu.ps.256")
|
||||
},
|
||||
"256_storeu_si256" => Intrinsic {
|
||||
inputs: vec![p(false, v(u(8), 32), Some(u(8))), v(u(8), 32)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.storeu.dq.256")
|
||||
},
|
||||
"256_sqrt_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8)],
|
||||
output: v(f(32), 8),
|
||||
|
@ -625,50 +720,60 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: i(32),
|
||||
definition: Named("llvm.x86.avx.ptestz.256")
|
||||
},
|
||||
"256_zeroall" => Intrinsic {
|
||||
inputs: vec![],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.vzeroall")
|
||||
},
|
||||
"256_zeroupper" => Intrinsic {
|
||||
inputs: vec![],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx.vzeroupper")
|
||||
},
|
||||
"256_abs_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 32)],
|
||||
output: v(i(8), 32),
|
||||
definition: Named("llvm.x86.avx2.avx2.pabs.b")
|
||||
definition: Named("llvm.x86.avx2.pabs.b")
|
||||
},
|
||||
"256_abs_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 16)],
|
||||
output: v(i(16), 16),
|
||||
definition: Named("llvm.x86.avx2.avx2.pabs.w")
|
||||
definition: Named("llvm.x86.avx2.pabs.w")
|
||||
},
|
||||
"256_abs_epi32" => Intrinsic {
|
||||
inputs: vec![v(i(32), 8)],
|
||||
output: v(i(32), 8),
|
||||
definition: Named("llvm.x86.avx2.avx2.pabs.d")
|
||||
definition: Named("llvm.x86.avx2.pabs.d")
|
||||
},
|
||||
"256_adds_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 32), v(i(8), 32)],
|
||||
output: v(i(8), 32),
|
||||
definition: Named("llvm.x86.avx2.avx2.padds.b")
|
||||
definition: Named("llvm.x86.avx2.padds.b")
|
||||
},
|
||||
"256_adds_epu8" => Intrinsic {
|
||||
inputs: vec![v(u(8), 32), v(u(8), 32)],
|
||||
output: v(u(8), 32),
|
||||
definition: Named("llvm.x86.avx2.avx2.paddus.b")
|
||||
definition: Named("llvm.x86.avx2.paddus.b")
|
||||
},
|
||||
"256_adds_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 16), v(i(16), 16)],
|
||||
output: v(i(16), 16),
|
||||
definition: Named("llvm.x86.avx2.avx2.padds.w")
|
||||
definition: Named("llvm.x86.avx2.padds.w")
|
||||
},
|
||||
"256_adds_epu16" => Intrinsic {
|
||||
inputs: vec![v(u(16), 16), v(u(16), 16)],
|
||||
output: v(u(16), 16),
|
||||
definition: Named("llvm.x86.avx2.avx2.paddus.w")
|
||||
definition: Named("llvm.x86.avx2.paddus.w")
|
||||
},
|
||||
"256_avg_epu8" => Intrinsic {
|
||||
inputs: vec![v(u(8), 32), v(u(8), 32)],
|
||||
output: v(u(8), 32),
|
||||
definition: Named("llvm.x86.avx2.avx2.pavg.b")
|
||||
definition: Named("llvm.x86.avx2.pavg.b")
|
||||
},
|
||||
"256_avg_epu16" => Intrinsic {
|
||||
inputs: vec![v(u(16), 16), v(u(16), 16)],
|
||||
output: v(u(16), 16),
|
||||
definition: Named("llvm.x86.avx2.avx2.pavg.w")
|
||||
definition: Named("llvm.x86.avx2.pavg.w")
|
||||
},
|
||||
"256_hadd_epi16" => Intrinsic {
|
||||
inputs: vec![v(i(16), 16), v(i(16), 16)],
|
||||
|
@ -710,6 +815,126 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
|
|||
output: v(i(16), 16),
|
||||
definition: Named("llvm.x86.avx2.pmadd.ub.sw")
|
||||
},
|
||||
"_mask_i32gather_epi32" => Intrinsic {
|
||||
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4), i_(32, 8)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.d.d")
|
||||
},
|
||||
"_mask_i32gather_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(32), 4), v_(i(32), f(32), 4), i_(32, 8)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.d.ps")
|
||||
},
|
||||
"256_mask_i32gather_epi32" => Intrinsic {
|
||||
inputs: vec![v(i(32), 8), p(true, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8), i_(32, 8)],
|
||||
output: v(i(32), 8),
|
||||
definition: Named("llvm.x86.avx2.gather.d.d.256")
|
||||
},
|
||||
"256_mask_i32gather_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 8), p(true, f(32), Some(i(8))), v(i(32), 8), v_(i(32), f(32), 8), i_(32, 8)],
|
||||
output: v(f(32), 8),
|
||||
definition: Named("llvm.x86.avx2.gather.d.ps.256")
|
||||
},
|
||||
"_mask_i32gather_epi64" => Intrinsic {
|
||||
inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 2), i_(32, 8)],
|
||||
output: v(i(64), 2),
|
||||
definition: Named("llvm.x86.avx2.gather.d.q")
|
||||
},
|
||||
"_mask_i32gather_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 2), i_(32, 8)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.avx2.gather.d.pd")
|
||||
},
|
||||
"256_mask_i32gather_epi64" => Intrinsic {
|
||||
inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 4), i_(32, 8)],
|
||||
output: v(i(64), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.d.q.256")
|
||||
},
|
||||
"256_mask_i32gather_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 4), i_(32, 8)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.d.pd.256")
|
||||
},
|
||||
"_mask_i64gather_epi32" => Intrinsic {
|
||||
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 2), v(i(32), 4), i_(32, 8)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.d")
|
||||
},
|
||||
"_mask_i64gather_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 2), v_(i(32), f(32), 4), i_(32, 8)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.ps")
|
||||
},
|
||||
"256_mask_i64gather_epi32" => Intrinsic {
|
||||
inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 4), v(i(32), 4), i_(32, 8)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.d")
|
||||
},
|
||||
"256_mask_i64gather_ps" => Intrinsic {
|
||||
inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 4), v_(i(32), f(32), 4), i_(32, 8)],
|
||||
output: v(f(32), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.ps")
|
||||
},
|
||||
"_mask_i64gather_epi64" => Intrinsic {
|
||||
inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2), i_(32, 8)],
|
||||
output: v(i(64), 2),
|
||||
definition: Named("llvm.x86.avx2.gather.q.q")
|
||||
},
|
||||
"_mask_i64gather_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(64), 2), v_(i(64), f(64), 2), i_(32, 8)],
|
||||
output: v(f(64), 2),
|
||||
definition: Named("llvm.x86.avx2.gather.q.pd")
|
||||
},
|
||||
"256_mask_i64gather_epi64" => Intrinsic {
|
||||
inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4), i_(32, 8)],
|
||||
output: v(i(64), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.q.256")
|
||||
},
|
||||
"256_mask_i64gather_pd" => Intrinsic {
|
||||
inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(64), 4), v_(i(64), f(64), 4), i_(32, 8)],
|
||||
output: v(f(64), 4),
|
||||
definition: Named("llvm.x86.avx2.gather.q.pd.256")
|
||||
},
|
||||
"_maskload_epi32" => Intrinsic {
|
||||
inputs: vec![p(true, v(i(32), 4), Some(i(8))), v(i(32), 4)],
|
||||
output: v(i(32), 4),
|
||||
definition: Named("llvm.x86.avx2.maskload.d")
|
||||
},
|
||||
"_maskload_epi64" => Intrinsic {
|
||||
inputs: vec![p(true, v(i(64), 2), Some(i(8))), v(i(64), 2)],
|
||||
output: v(i(64), 2),
|
||||
definition: Named("llvm.x86.avx2.maskload.q")
|
||||
},
|
||||
"256_maskload_epi32" => Intrinsic {
|
||||
inputs: vec![p(true, v(i(32), 8), Some(i(8))), v(i(32), 8)],
|
||||
output: v(i(32), 8),
|
||||
definition: Named("llvm.x86.avx2.maskload.d.256")
|
||||
},
|
||||
"256_maskload_epi64" => Intrinsic {
|
||||
inputs: vec![p(true, v(i(64), 4), Some(i(8))), v(i(64), 4)],
|
||||
output: v(i(64), 4),
|
||||
definition: Named("llvm.x86.avx2.maskload.q.256")
|
||||
},
|
||||
"_maskstore_epi32" => Intrinsic {
|
||||
inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx2.maskstore.d")
|
||||
},
|
||||
"_maskstore_epi64" => Intrinsic {
|
||||
inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx2.maskstore.q")
|
||||
},
|
||||
"256_maskstore_epi32" => Intrinsic {
|
||||
inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx2.maskstore.d.256")
|
||||
},
|
||||
"256_maskstore_epi64" => Intrinsic {
|
||||
inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4)],
|
||||
output: void(),
|
||||
definition: Named("llvm.x86.avx2.maskstore.q.256")
|
||||
},
|
||||
"256_max_epi8" => Intrinsic {
|
||||
inputs: vec![v(i(8), 32), v(i(8), 32)],
|
||||
output: v(i(8), 32),
|
||||
|
|
|
@ -936,6 +936,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
any_changes_needed: &mut bool) -> Vec<Type> {
|
||||
use intrinsics::Type::*;
|
||||
match *t {
|
||||
Void => vec![Type::void(ccx)],
|
||||
Integer(_signed, width, llvm_width) => {
|
||||
*any_changes_needed |= width != llvm_width;
|
||||
vec![Type::ix(ccx, llvm_width as u64)]
|
||||
|
@ -947,14 +948,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
Pointer(_) => unimplemented!(),
|
||||
Vector(ref t, length) => {
|
||||
Pointer(ref t, ref llvm_elem, _const) => {
|
||||
*any_changes_needed |= llvm_elem.is_some();
|
||||
|
||||
let t = llvm_elem.as_ref().unwrap_or(t);
|
||||
let elem = one(ty_to_type(ccx, t,
|
||||
any_changes_needed));
|
||||
vec![elem.ptr_to()]
|
||||
}
|
||||
Vector(ref t, ref llvm_elem, length) => {
|
||||
*any_changes_needed |= llvm_elem.is_some();
|
||||
|
||||
let t = llvm_elem.as_ref().unwrap_or(t);
|
||||
let elem = one(ty_to_type(ccx, t,
|
||||
any_changes_needed));
|
||||
vec![Type::vector(&elem,
|
||||
length as u64)]
|
||||
}
|
||||
Aggregate(false, _) => unimplemented!(),
|
||||
Aggregate(false, ref contents) => {
|
||||
let elems = contents.iter()
|
||||
.map(|t| one(ty_to_type(ccx, t, any_changes_needed)))
|
||||
.collect::<Vec<_>>();
|
||||
vec![Type::struct_(ccx, &elems, false)]
|
||||
}
|
||||
Aggregate(true, ref contents) => {
|
||||
*any_changes_needed = true;
|
||||
contents.iter()
|
||||
|
@ -965,8 +981,9 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
}
|
||||
|
||||
// This allows an argument list like `foo, (bar, baz),
|
||||
// qux` to be converted into `foo, bar, baz, qux`, and
|
||||
// integer arguments to be truncated as needed.
|
||||
// qux` to be converted into `foo, bar, baz, qux`, integer
|
||||
// arguments to be truncated as needed and pointers to be
|
||||
// cast.
|
||||
fn modify_as_needed<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
|
||||
t: &intrinsics::Type,
|
||||
arg_type: Ty<'tcx>,
|
||||
|
@ -991,6 +1008,16 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
})
|
||||
.collect()
|
||||
}
|
||||
intrinsics::Type::Pointer(_, Some(ref llvm_elem), _) => {
|
||||
let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
|
||||
vec![PointerCast(bcx, llarg,
|
||||
llvm_elem.ptr_to())]
|
||||
}
|
||||
intrinsics::Type::Vector(_, Some(ref llvm_elem), length) => {
|
||||
let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
|
||||
vec![BitCast(bcx, llarg,
|
||||
Type::vector(&llvm_elem, length as u64))]
|
||||
}
|
||||
intrinsics::Type::Integer(_, width, llvm_width) if width != llvm_width => {
|
||||
// the LLVM intrinsic uses a smaller integer
|
||||
// size than the C intrinsic's signature, so
|
||||
|
@ -1027,7 +1054,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
};
|
||||
assert_eq!(inputs.len(), llargs.len());
|
||||
|
||||
match intr.definition {
|
||||
let val = match intr.definition {
|
||||
intrinsics::IntrinsicDef::Named(name) => {
|
||||
let f = declare::declare_cfn(ccx,
|
||||
name,
|
||||
|
@ -1035,6 +1062,20 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
|
|||
tcx.mk_nil());
|
||||
Call(bcx, f, &llargs, None, call_debug_location)
|
||||
}
|
||||
};
|
||||
|
||||
match intr.output {
|
||||
intrinsics::Type::Aggregate(flatten, ref elems) => {
|
||||
// the output is a tuple so we need to munge it properly
|
||||
assert!(!flatten);
|
||||
|
||||
for i in 0..elems.len() {
|
||||
let val = ExtractValue(bcx, val, i);
|
||||
Store(bcx, val, StructGEP(bcx, llresult, i));
|
||||
}
|
||||
C_nil(ccx)
|
||||
}
|
||||
_ => val,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -464,6 +464,10 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
|
|||
};
|
||||
|
||||
match *expected {
|
||||
Void => match t.sty {
|
||||
ty::TyTuple(ref v) if v.is_empty() => {},
|
||||
_ => simple_error(&format!("`{}`", t), "()"),
|
||||
},
|
||||
// (The width we pass to LLVM doesn't concern the type checker.)
|
||||
Integer(signed, bits, _llvm_width) => match (signed, bits, &t.sty) {
|
||||
(true, 8, &ty::TyInt(hir::IntTy::TyI8)) |
|
||||
|
@ -485,8 +489,21 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
|
|||
_ => simple_error(&format!("`{}`", t),
|
||||
&format!("`f{n}`", n = bits)),
|
||||
},
|
||||
Pointer(_) => unimplemented!(),
|
||||
Vector(ref inner_expected, len) => {
|
||||
Pointer(ref inner_expected, ref _llvm_type, const_) => {
|
||||
match t.sty {
|
||||
ty::TyRawPtr(ty::TypeAndMut { ty, mutbl }) => {
|
||||
if (mutbl == hir::MutImmutable) != const_ {
|
||||
simple_error(&format!("`{}`", t),
|
||||
if const_ {"const pointer"} else {"mut pointer"})
|
||||
}
|
||||
match_intrinsic_type_to_type(tcx, position, span, structural_to_nominal,
|
||||
inner_expected, ty)
|
||||
}
|
||||
_ => simple_error(&format!("`{}`", t),
|
||||
&format!("raw pointer")),
|
||||
}
|
||||
}
|
||||
Vector(ref inner_expected, ref _llvm_type, len) => {
|
||||
if !t.is_simd() {
|
||||
simple_error(&format!("non-simd type `{}`", t),
|
||||
"simd type");
|
||||
|
|
Loading…
Reference in New Issue