diff --git a/src/etc/platform-intrinsics/aarch64.json b/src/etc/platform-intrinsics/aarch64.json index dbccdc37d3f40..79fd769942889 100644 --- a/src/etc/platform-intrinsics/aarch64.json +++ b/src/etc/platform-intrinsics/aarch64.json @@ -336,6 +336,48 @@ "ret": "i8", "args": ["0"] }, + { + "intrinsic": "ld2{0[0].width}_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);2]","[f(32-64);2]"], + "args": ["0.0SPc/0.0"] + }, + { + "intrinsic": "ld3{0[0].width}_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);3]","[f(32-64);3]"], + "args": ["0.0SPc/0.0"] + }, + { + "intrinsic": "ld4{0[0].width}_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);4]","[f(32-64);4]"], + "args": ["0.0SPc/0.0"] + }, + { + "intrinsic": "ld2{0[0].width}_dup_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);2]","[f(32-64);2]"], + "args": ["0.0SPc"] + }, + { + "intrinsic": "ld3{0[0].width}_dup_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);3]","[f(32-64);3]"], + "args": ["0.0SPc"] + }, + { + "intrinsic": "ld4{0[0].width}_dup_{0[0].data_type}", + "width": [64, 128], + "llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}", + "ret": ["[i(8-64);4]","[f(32-64);4]"], + "args": ["0.0SPc"] + }, { "intrinsic": "padd{0.width}_{0.data_type}", "width": [64, 128], diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py index 97b2f57010b97..d1217c1fb2b4a 100644 --- a/src/etc/platform-intrinsics/generator.py +++ b/src/etc/platform-intrinsics/generator.py @@ -14,11 +14,13 @@ import sys import re import textwrap +import itertools SPEC = re.compile( - r'^(?:(?P[iusfIUSF])(?:\((?P\d+)-(?P\d+)\)|' + r'^(?:(?PV)|(?P[iusfIUSF])(?:\((?P\d+)-(?P\d+)\)|' r'(?P\d+)(:?/(?P\d+))?)' - r'|(?P\d+)(?P[vShdnwus]*)(?Px\d+)?)$' + r'|(?P\d+))(?P\.\d+)?(?P[vShdnwusfDMC]*)(?Px\d+)?' + r'(?:(?PPm|Pc)(?P/.*)?|(?P->.*))?$' ) class PlatformInfo(object): @@ -68,18 +70,35 @@ def lookup(raw): {k: lookup(v) for k, v in data.items()}) class PlatformTypeInfo(object): - def __init__(self, llvm_name, properties): - self.properties = properties - self.llvm_name = llvm_name + def __init__(self, llvm_name, properties, elems = None): + if elems is None: + self.properties = properties + self.llvm_name = llvm_name + else: + assert properties is None and llvm_name is None + self.properties = {} + self.elems = elems + + def __repr__(self): + return ''.format(self.llvm_name, self.properties) def __getattr__(self, name): return self.properties[name] + def __getitem__(self, idx): + return self.elems[idx] + def vectorize(self, length, width_info): props = self.properties.copy() props.update(width_info) return PlatformTypeInfo('v{}{}'.format(length, self.llvm_name), props) + def pointer(self, llvm_elem): + name = self.llvm_name if llvm_elem is None else llvm_elem.llvm_name + return PlatformTypeInfo('p0{}'.format(name), self.properties) + +BITWIDTH_POINTER = '' + class Type(object): def __init__(self, bitwidth): self._bitwidth = bitwidth @@ -87,18 +106,39 @@ def __init__(self, bitwidth): def bitwidth(self): return self._bitwidth - def modify(self, spec, width): + def modify(self, spec, width, previous): raise NotImplementedError() + def __ne__(self, other): + return not (self == other) + +class Void(Type): + def __init__(self): + Type.__init__(self, 0) + + def compiler_ctor(self): + return 'void()' + + def rust_name(self): + return '()' + + def type_info(self, platform_info): + return None + + def __eq__(self, other): + return isinstance(other, Void) + class Number(Type): def __init__(self, bitwidth): Type.__init__(self, bitwidth) - def modify(self, spec, width): + def modify(self, spec, width, previous): if spec == 'u': return Unsigned(self.bitwidth()) elif spec == 's': return Signed(self.bitwidth()) + elif spec == 'f': + return Float(self.bitwidth()) elif spec == 'w': return self.__class__(self.bitwidth() * 2) elif spec == 'n': @@ -111,11 +151,16 @@ def modify(self, spec, width): def type_info(self, platform_info): return platform_info.number_type_info(self) + def __eq__(self, other): + # print(self, other) + return self.__class__ == other.__class__ and self.bitwidth() == other.bitwidth() + class Signed(Number): def __init__(self, bitwidth, llvm_bitwidth = None): Number.__init__(self, bitwidth) self._llvm_bitwidth = llvm_bitwidth + def compiler_ctor(self): if self._llvm_bitwidth is None: return 'i({})'.format(self.bitwidth()) @@ -164,26 +209,47 @@ def rust_name(self): return 'f{}'.format(self.bitwidth()) class Vector(Type): - def __init__(self, elem, length): + def __init__(self, elem, length, bitcast = None): assert isinstance(elem, Type) and not isinstance(elem, Vector) Type.__init__(self, elem.bitwidth() * length) self._length = length self._elem = elem + assert bitcast is None or (isinstance(bitcast, Vector) and + bitcast._bitcast is None and + bitcast._elem.bitwidth() == elem.bitwidth()) + if bitcast is not None and bitcast._elem != elem: + self._bitcast = bitcast._elem + else: + self._bitcast = None - def modify(self, spec, width): - if spec == 'h': + def modify(self, spec, width, previous): + if spec == 'S': + return self._elem + elif spec == 'h': return Vector(self._elem, self._length // 2) elif spec == 'd': return Vector(self._elem, self._length * 2) elif spec.startswith('x'): new_bitwidth = int(spec[1:]) return Vector(self._elem, new_bitwidth // self._elem.bitwidth()) + elif spec.startswith('->'): + bitcast_to = TypeSpec(spec[2:]) + choices = list(bitcast_to.enumerate(width, previous)) + assert len(choices) == 1 + bitcast_to = choices[0] + return Vector(self._elem, self._length, bitcast_to) else: - return Vector(self._elem.modify(spec, width), self._length) + return Vector(self._elem.modify(spec, width, previous), self._length) def compiler_ctor(self): - return 'v({}, {})'.format(self._elem.compiler_ctor(), self._length) + if self._bitcast is None: + return 'v({}, {})'.format(self._elem.compiler_ctor(), + self._length) + else: + return 'v_({}, {}, {})'.format(self._elem.compiler_ctor(), + self._bitcast.compiler_ctor(), + self._length) def rust_name(self): return '{}x{}'.format(self._elem.rust_name(), self._length) @@ -193,6 +259,51 @@ def type_info(self, platform_info): return elem_info.vectorize(self._length, platform_info.width_info(self.bitwidth())) + def __eq__(self, other): + return isinstance(other, Vector) and self._length == other._length and \ + self._elem == other._elem and self._bitcast == other._bitcast + +class Pointer(Type): + def __init__(self, elem, llvm_elem, const): + self._elem = elem; + self._llvm_elem = llvm_elem + self._const = const + Type.__init__(self, BITWIDTH_POINTER) + + def modify(self, spec, width, previous): + if spec == 'D': + return self._elem + elif spec == 'M': + return Pointer(self._elem, self._llvm_elem, False) + elif spec == 'C': + return Pointer(self._elem, self._llvm_elem, True) + else: + return Pointer(self._elem.modify(spec, width, previous), self._llvm_elem, self._const) + + def compiler_ctor(self): + if self._llvm_elem is None: + llvm_elem = 'None' + else: + llvm_elem = 'Some({})'.format(self._llvm_elem.compiler_ctor()) + return 'p({}, {}, {})'.format('true' if self._const else 'false', + self._elem.compiler_ctor(), + llvm_elem) + + def rust_name(self): + return '*{} {}'.format('const' if self._const else 'mut', + self._elem.rust_name()) + + def type_info(self, platform_info): + if self._llvm_elem is None: + llvm_elem = None + else: + llvm_elem = self._llvm_elem.type_info(platform_info) + return self._elem.type_info(platform_info).pointer(llvm_elem) + + def __eq__(self, other): + return isinstance(other, Pointer) and self._const == other._const \ + and self._elem == other._elem and self._llvm_elem == other._llvm_elem + class Aggregate(Type): def __init__(self, flatten, elems): self._flatten = flatten @@ -202,6 +313,14 @@ def __init__(self, flatten, elems): def __repr__(self): return ''.format(self._elems) + def modify(self, spec, width, previous): + if spec.startswith('.'): + num = int(spec[1:]) + return self._elems[num] + else: + print(spec) + raise NotImplementedError() + def compiler_ctor(self): return 'agg({}, vec![{}])'.format('true' if self._flatten else 'false', ', '.join(elem.compiler_ctor() for elem in self._elems)) @@ -210,8 +329,11 @@ def rust_name(self): return '({})'.format(', '.join(elem.rust_name() for elem in self._elems)) def type_info(self, platform_info): - #return PlatformTypeInfo(None, None, self._llvm_name) - return None + return PlatformTypeInfo(None, None, [elem.type_info(platform_info) for elem in self._elems]) + + def __eq__(self, other): + return isinstance(other, Aggregate) and self._flatten == other._flatten and \ + self._elems == other._elems TYPE_ID_LOOKUP = {'i': [Signed, Unsigned], @@ -219,6 +341,22 @@ def type_info(self, platform_info): 'u': [Unsigned], 'f': [Float]} +def ptrify(match, elem, width, previous): + ptr = match.group('pointer') + if ptr is None: + return elem + else: + llvm_ptr = match.group('llvm_pointer') + if llvm_ptr is None: + llvm_elem = None + else: + assert llvm_ptr.startswith('/') + options = list(TypeSpec(llvm_ptr[1:]).enumerate(width, previous)) + assert len(options) == 1 + llvm_elem = options[0] + assert ptr in ('Pc', 'Pm') + return Pointer(elem, llvm_elem, ptr == 'Pc') + class TypeSpec(object): def __init__(self, spec): if not isinstance(spec, list): @@ -226,71 +364,103 @@ def __init__(self, spec): self.spec = spec - def enumerate(self, width): + def enumerate(self, width, previous): for spec in self.spec: match = SPEC.match(spec) - if match: + if match is not None: id = match.group('id') - is_vector = id.islower() - type_ctors = TYPE_ID_LOOKUP[id.lower()] - - start = match.group('start') - if start is not None: - end = match.group('end') - llvm_width = None + reference = match.group('reference') + + modifiers = [] + index = match.group('index') + if index is not None: + modifiers.append(index) + modifiers += list(match.group('modifiers') or '') + force = match.group('force_width') + if force is not None: + modifiers.append(force) + bitcast = match.group('bitcast') + if bitcast is not None: + modifiers.append(bitcast) + + if match.group('void') is not None: + assert spec == 'V' + yield Void() + elif id is not None: + is_vector = id.islower() + type_ctors = TYPE_ID_LOOKUP[id.lower()] + + start = match.group('start') + if start is not None: + end = match.group('end') + llvm_width = None + else: + start = end = match.group('width') + llvm_width = match.group('llvm_width') + start = int(start) + end = int(end) + + bitwidth = start + while bitwidth <= end: + for ctor in type_ctors: + if llvm_width is not None: + assert not is_vector + llvm_width = int(llvm_width) + assert llvm_width < bitwidth + scalar = ctor(bitwidth, llvm_width) + else: + scalar = ctor(bitwidth) + + if is_vector: + elem = Vector(scalar, width // bitwidth) + else: + assert bitcast is None + elem = scalar + + for x in modifiers: + elem = elem.modify(x, width, previous) + yield ptrify(match, elem, width, previous) + bitwidth *= 2 + elif reference is not None: + reference = int(reference) + assert reference < len(previous), \ + 'referring to argument {}, but only {} are known'.format(reference, + len(previous)) + ret = previous[reference] + for x in modifiers: + ret = ret.modify(x, width, previous) + yield ptrify(match, ret, width, previous) else: - start = end = match.group('width') - llvm_width = match.group('llvm_width') - start = int(start) - end = int(end) - - bitwidth = start - while bitwidth <= end: - for ctor in type_ctors: - if llvm_width is not None: - assert not is_vector - llvm_width = int(llvm_width) - assert llvm_width < bitwidth - scalar = ctor(bitwidth, llvm_width) - else: - scalar = ctor(bitwidth) - - if is_vector: - yield Vector(scalar, width // bitwidth) - else: - yield scalar - bitwidth *= 2 + assert False, 'matched `{}`, but didn\'t understand it?'.format(spec) + elif spec.startswith('('): + if spec.endswith(')'): + true_spec = spec[1:-1] + flatten = False + elif spec.endswith(')f'): + true_spec = spec[1:-2] + flatten = True + else: + assert False, 'found unclosed aggregate `{}`'.format(spec) + + for elems in itertools.product(*(TypeSpec(subspec).enumerate(width, previous) + for subspec in true_spec.split(','))): + yield Aggregate(flatten, elems) + elif spec.startswith('['): + if spec.endswith(']'): + true_spec = spec[1:-1] + flatten = False + elif spec.endswith(']f'): + true_spec = spec[1:-2] + flatten = True + else: + assert False, 'found unclosed aggregate `{}`'.format(spec) + elem_spec, count = true_spec.split(';') + + count = int(count) + for elem in TypeSpec(elem_spec).enumerate(width, previous): + yield Aggregate(flatten, [elem] * count) else: - print('Failed to parse: `{}`'.format(spec), file=sys.stderr) - - def resolve(self, width, zero): - assert len(self.spec) == 1 - spec = self.spec[0] - match = SPEC.match(spec) - if match: - id = match.group('id') - if id is not None: - options = list(self.enumerate(width)) - assert len(options) == 1 - return options[0] - reference = match.group('reference') - if reference != '0': - raise NotImplementedError('only argument 0 (return value) references are supported') - ret = zero - for x in match.group('modifiers') or []: - ret = ret.modify(x, width) - force = match.group('force_width') - if force is not None: - ret = ret.modify(force, width) - return ret - elif spec.startswith('('): - if spec.endswith(')'): - raise NotImplementedError() - elif spec.endswith(')f'): - true_spec = spec[1:-2] - flatten = True - elems = [TypeSpec(subspec).resolve(width, zero) for subspec in true_spec.split(',')] - return Aggregate(flatten, elems) + assert False, 'Failed to parse `{}`'.format(spec) class GenericIntrinsic(object): def __init__(self, platform, intrinsic, widths, llvm_name, ret, args): @@ -305,10 +475,22 @@ def monomorphise(self): for width in self.widths: # must be a power of two assert width & (width - 1) == 0 - for ret in self.ret.enumerate(width): - args = [arg.resolve(width, ret) for arg in self.args] - yield MonomorphicIntrinsic(self._platform, self.intrinsic, width, self.llvm_name, - ret, args) + def recur(processed, untouched): + if untouched == []: + ret = processed[0] + args = processed[1:] + yield MonomorphicIntrinsic(self._platform, self.intrinsic, width, + self.llvm_name, + ret, args) + else: + raw_arg = untouched[0] + rest = untouched[1:] + for arg in raw_arg.enumerate(width, processed): + for intr in recur(processed + [arg], rest): + yield intr + + for x in recur([], [self.ret] + self.args): + yield x class MonomorphicIntrinsic(object): def __init__(self, platform, intrinsic, width, llvm_name, ret, args): @@ -369,7 +551,18 @@ def parse_args(): ## Type specifier grammar ``` - type := vector | scalar | aggregate | reference + type := core_type modifier* suffix? + + core_type := void | vector | scalar | aggregate | reference + + modifier := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' | + 'x' number | '.' number + suffix := pointer | bitcast + pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer? + llvm_pointer := '/' type + bitcast := '->' type + + void := 'V' vector := vector_elem width | vector_elem := 'i' | 'u' | 's' | 'f' @@ -378,18 +571,20 @@ def parse_args(): scalar_type := 'U' | 'S' | 'F' llvm_width := '/' number - aggregate := '(' (type),* ')' 'f'? - - reference := number modifiers* - modifiers := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' | - 'x' number + aggregate := '(' (type),* ')' 'f'? | '[' type ';' number ']' 'f'? + reference := number width = number | '(' number '-' number ')' number = [0-9]+ ``` + ## Void + + The `V` type corresponds to `void` in LLVM (`()` in + Rust). It's likely to only work in return position. + ## Vectors The vector grammar is a pattern describing many possibilities @@ -433,6 +628,12 @@ def parse_args(): - no `f` corresponds to `declare ... @llvm.foo({float, i32})`. - having an `f` corresponds to `declare ... @llvm.foo(float, i32)`. + The `[type;number]` form is a just shorter way to write + `(...)`, except avoids doing a cartesian product of generic + types, e.g. `[S32;2]` is the same as `(S32, S32)`, while + `[I32;2]` is describing just the two types `(S32,S32)` and + `(U32,U32)` (i.e. doesn't include `(S32,U32)`, `(U32,S32)` as + `(I32,I32)` would). (Currently aggregates can not contain other aggregates.) @@ -441,19 +642,49 @@ def parse_args(): A reference uses the type of another argument, with possible modifications. The number refers to the type to use, starting with 0 == return value, 1 == first argument, 2 == second - argument, etc. (Currently only referencing 0, the return - value, is supported.) + argument, etc. + + ## Affixes + + The `modifier` and `suffix` adaptors change the precise + representation. ### Modifiers - 'v': put a scalar into a vector of the current width (u32 -> u32x4, when width == 128) + - 'S': get the scalar element of a vector (u32x4 -> u32) - 'h': half the length of the vector (u32x4 -> u32x2) - 'd': double the length of the vector (u32x2 -> u32x4) - 'n': narrow the element of the vector (u32x4 -> u16x4) - 'w': widen the element of the vector (u16x4 -> u32x4) - - 'u': force an integer (vector or scalar) to be unsigned (i32x4 -> u32x4) - - 's': force an integer (vector or scalar) to be signed (u32x4 -> i32x4) + - 'u': force a number (vector or scalar) to be unsigned int (f32x4 -> u32x4) + - 's': force a number (vector or scalar) to be signed int (u32x4 -> i32x4) + - 'f': force a number (vector or scalar) to be float (u32x4 -> f32x4) - 'x' number: force the type to be a vector of bitwidth `number`. + - '.' number: get the `number`th element of an aggregate + - 'D': dereference a pointer (*mut u32 -> u32) + - 'C': make a pointer const (*mut u32 -> *const u32) + - 'M': make a pointer mut (*const u32 -> *mut u32) + + ### Pointers + + Pointers can be created of any type by appending a `P*` + suffix. The `m` vs. `c` chooses mut vs. const. e.g. `S32Pm` + corresponds to `*mut i32`, and `i32Pc` corresponds (with width + 128) to `*const i8x16`, `*const u32x4`, etc. + + The type after the `/` (optional) represents the type used + internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32` + in Rust, but is `i8*` in LLVM. (This defaults to the main + type). + + ### Bitcast + + The `'->' type` bitcast suffix will cause the value to be + bitcast to the right-hand type when calling the intrinsic, + e.g. `s32->f32` will expose the intrinsic as `i32x4` at the + Rust level, but will cast that vector to `f32x4` when calling + the LLVM intrinsic. ''')) parser.add_argument('--format', choices=FORMATS, required=True, help = 'Output format.') @@ -502,7 +733,7 @@ def open(self, platform): #![allow(unused_imports)] -use {{Intrinsic, i, i_, u, u_, f, v, agg}}; +use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}}; use IntrinsicDef::Named; use rustc::middle::ty; diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json index 4ac82fb90e900..2c1492c2954c8 100644 --- a/src/etc/platform-intrinsics/x86/avx.json +++ b/src/etc/platform-intrinsics/x86/avx.json @@ -36,6 +36,20 @@ "ret": "f(32-64)", "args": ["0", "0"] }, + { + "intrinsic": "{0.width_mm}_maskload_{0.data_type}", + "width": [128, 256], + "llvm": "maskload.{0.data_type_short}{0.width_suffix}", + "ret": ["f(32-64)"], + "args": ["0SPc/S8", "0s->0"] + }, + { + "intrinsic": "{3.width_mm}_maskstore_{3.data_type}", + "width": [128, 256], + "llvm": "maskstore.{3.data_type_short}{3.width_suffix}", + "ret": "V", + "args": ["F(32-64)Pm/S8", "1Dsv->1Dv", "1Dv"] + }, { "intrinsic": "256_min_{0.data_type}", "width": [256], @@ -78,6 +92,20 @@ "ret": "f32", "args": ["f32"] }, + { + "intrinsic": "256_storeu_{2.data_type}", + "width": [256], + "llvm": "storeu.ps.256", + "ret": "V", + "args": ["f(32-64)Pm/U8", "1D"] + }, + { + "intrinsic": "256_storeu_si256", + "width": [256], + "llvm": "storeu.dq.256", + "ret": "V", + "args": ["u8Pm/U8", "1D"] + }, { "intrinsic": "256_sqrt_{0.data_type}", "width": [256], @@ -147,6 +175,20 @@ "llvm": "ptestz.256", "ret": "S32", "args": ["u64", "u64"] + }, + { + "intrinsic": "256_zeroall", + "width": [256], + "llvm": "vzeroall", + "ret": "V", + "args": [] + }, + { + "intrinsic": "256_zeroupper", + "width": [256], + "llvm": "vzeroupper", + "ret": "V", + "args": [] } ] } diff --git a/src/etc/platform-intrinsics/x86/avx2.json b/src/etc/platform-intrinsics/x86/avx2.json index bd260ec02e930..e88ff3d2b806d 100644 --- a/src/etc/platform-intrinsics/x86/avx2.json +++ b/src/etc/platform-intrinsics/x86/avx2.json @@ -4,21 +4,21 @@ { "intrinsic": "256_abs_{0.data_type}", "width": [256], - "llvm": "avx2.pabs.{0.data_type_short}", + "llvm": "pabs.{0.data_type_short}", "ret": "s(8-32)", "args": ["0"] }, { "intrinsic": "256_adds_{0.data_type}", "width": [256], - "llvm": "avx2.padd{0.kind_short}s.{0.data_type_short}", + "llvm": "padd{0.kind_short}s.{0.data_type_short}", "ret": "i(8-16)", "args": ["0", "0"] }, { "intrinsic": "256_avg_{0.data_type}", "width": [256], - "llvm": "avx2.pavg.{0.data_type_short}", + "llvm": "pavg.{0.data_type_short}", "ret": "u(8-16)", "args": ["0", "0"] }, @@ -64,6 +64,48 @@ "ret": "s16", "args": ["s8", "s8"] }, + { + "intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}", + "width": [128, 256], + "llvm": "gather.d.{0.data_type_short}{0.width_suffix}", + "ret": ["s32", "f32"], + "args": ["0", "0SPc/S8", "s32", "0s->0", "S32/8"] + }, + { + "intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}", + "width": [128, 256], + "llvm": "gather.d.{0.data_type_short}{0.width_suffix}", + "ret": ["s64", "f64"], + "args": ["0", "0SPc/S8", "s32x128", "0s->0", "S32/8"] + }, + { + "intrinsic": "{3.width_mm}_mask_i64gather_{0.data_type}", + "width": [128, 256], + "llvm": "gather.q.{0.data_type_short}{0.width_suffix}", + "ret": ["s32x128", "f32x128"], + "args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"] + }, + { + "intrinsic": "{0.width_mm}_mask_i64gather_{0.data_type}", + "width": [128, 256], + "llvm": "gather.q.{0.data_type_short}{0.width_suffix}", + "ret": ["s64", "f64"], + "args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"] + }, + { + "intrinsic": "{0.width_mm}_maskload_{0.data_type}", + "width": [128, 256], + "llvm": "maskload.{0.data_type_short}{0.width_suffix}", + "ret": ["s(32-64)"], + "args": ["0Pc/S8", "0"] + }, + { + "intrinsic": "{2.width_mm}_maskstore_{2.data_type}", + "width": [128, 256], + "llvm": "maskstore.{2.data_type_short}{2.width_suffix}", + "ret": "V", + "args": ["S(32-64)Pm/S8", "1Dv", "2"] + }, { "intrinsic": "256_max_{0.data_type}", "width": [256], diff --git a/src/etc/platform-intrinsics/x86/sse.json b/src/etc/platform-intrinsics/x86/sse.json index 27da842934c0c..adff0dc41b2af 100644 --- a/src/etc/platform-intrinsics/x86/sse.json +++ b/src/etc/platform-intrinsics/x86/sse.json @@ -42,6 +42,13 @@ "llvm": "!llvm.sqrt.v4f32", "ret": "f32", "args": ["0"] + }, + { + "intrinsic": "_storeu_ps", + "width": [128], + "llvm": "storeu.ps", + "ret": "V", + "args": ["F32Pm/S8", "f32"] } ] } diff --git a/src/etc/platform-intrinsics/x86/sse2.json b/src/etc/platform-intrinsics/x86/sse2.json index abd0b369573a0..d09980d95f31b 100644 --- a/src/etc/platform-intrinsics/x86/sse2.json +++ b/src/etc/platform-intrinsics/x86/sse2.json @@ -15,6 +15,13 @@ "ret": "u(8-16)", "args": ["0", "0"] }, + { + "intrinsic": "_lfence", + "width": [128], + "llvm": "lfence", + "ret": "V", + "args": [] + }, { "intrinsic": "_madd_epi16", "width": [128], @@ -22,6 +29,13 @@ "ret": "s32", "args": ["s16", "s16"] }, + { + "intrinsic": "_maskmoveu_si128", + "width": [128], + "llvm": "maskmov.dqu", + "ret": "V", + "args": ["u8", "u8", "U8Pm"] + }, { "intrinsic": "_max_{0.data_type}", "width": [128], @@ -36,6 +50,13 @@ "ret": "f64", "args": ["0", "0"] }, + { + "intrinsic": "_mfence", + "width": [128], + "llvm": "fence", + "ret": "V", + "args": [] + }, { "intrinsic": "_min_{0.data_type}", "width": [128], @@ -99,6 +120,13 @@ "ret": "u64", "args": ["u8", "u8"] }, + { + "intrinsic": "_sfence", + "width": [128], + "llvm": "sfence", + "ret": "V", + "args": [] + }, { "intrinsic": "_sqrt_pd", "width": [128], @@ -106,6 +134,20 @@ "ret": "f64", "args": ["0"] }, + { + "intrinsic": "_storeu_pd", + "width": [128], + "llvm": "storeu.pd", + "ret": "V", + "args": ["F64Pm/U8", "f64"] + }, + { + "intrinsic": "_storeu_si128", + "width": [128], + "llvm": "storeu.dq", + "ret": "V", + "args": ["u8Pm/U8", "u8"] + }, { "intrinsic": "_subs_{0.data_type}", "width": [128], diff --git a/src/etc/platform-intrinsics/x86/sse3.json b/src/etc/platform-intrinsics/x86/sse3.json index 376e32fa91568..ed13595929d1b 100644 --- a/src/etc/platform-intrinsics/x86/sse3.json +++ b/src/etc/platform-intrinsics/x86/sse3.json @@ -21,6 +21,13 @@ "llvm": "hsub.{0.data_type}", "ret": "f(32-64)", "args": ["0", "0"] + }, + { + "intrinsic": "_lddqu_si128", + "width": [128], + "llvm": "ldu.dq", + "ret": "u8", + "args": ["0Pc/S8"] } ] } diff --git a/src/librustc_platform_intrinsics/aarch64.rs b/src/librustc_platform_intrinsics/aarch64.rs index 1f581d8ce855a..a3084d903e27f 100644 --- a/src/librustc_platform_intrinsics/aarch64.rs +++ b/src/librustc_platform_intrinsics/aarch64.rs @@ -13,7 +13,7 @@ #![allow(unused_imports)] -use {Intrinsic, i, u, f, v, agg}; +use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}; use IntrinsicDef::Named; use rustc::middle::ty; @@ -1910,6 +1910,606 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(u(8), 16), definition: Named("llvm.aarch64.neon.rbit.v16i8") }, + "ld2_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 8)))], + output: agg(false, vec![v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8") + }, + "ld2_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 8)))], + output: agg(false, vec![v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8") + }, + "ld2_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 4)))], + output: agg(false, vec![v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16") + }, + "ld2_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 4)))], + output: agg(false, vec![v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16") + }, + "ld2_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 2)))], + output: agg(false, vec![v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32") + }, + "ld2_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 2)))], + output: agg(false, vec![v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32") + }, + "ld2_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 1)))], + output: agg(false, vec![v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64") + }, + "ld2_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 1)))], + output: agg(false, vec![v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64") + }, + "ld2_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 2)))], + output: agg(false, vec![v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2f32.p0v2f32") + }, + "ld2_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 1)))], + output: agg(false, vec![v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1f64.p0v1f64") + }, + "ld2q_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 16)))], + output: agg(false, vec![v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8") + }, + "ld2q_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 16)))], + output: agg(false, vec![v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8") + }, + "ld2q_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 8)))], + output: agg(false, vec![v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16") + }, + "ld2q_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 8)))], + output: agg(false, vec![v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16") + }, + "ld2q_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 4)))], + output: agg(false, vec![v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32") + }, + "ld2q_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 4)))], + output: agg(false, vec![v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32") + }, + "ld2q_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 2)))], + output: agg(false, vec![v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64") + }, + "ld2q_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 2)))], + output: agg(false, vec![v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64") + }, + "ld2q_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 4)))], + output: agg(false, vec![v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4f32.p0v4f32") + }, + "ld2q_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 2)))], + output: agg(false, vec![v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2f64.p0v2f64") + }, + "ld3_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 8)))], + output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8") + }, + "ld3_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 8)))], + output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8") + }, + "ld3_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 4)))], + output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16") + }, + "ld3_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 4)))], + output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16") + }, + "ld3_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 2)))], + output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32") + }, + "ld3_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 2)))], + output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32") + }, + "ld3_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 1)))], + output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64") + }, + "ld3_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 1)))], + output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64") + }, + "ld3_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 2)))], + output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2f32.p0v2f32") + }, + "ld3_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 1)))], + output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1f64.p0v1f64") + }, + "ld3q_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 16)))], + output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8") + }, + "ld3q_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 16)))], + output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8") + }, + "ld3q_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 8)))], + output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16") + }, + "ld3q_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 8)))], + output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16") + }, + "ld3q_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 4)))], + output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32") + }, + "ld3q_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 4)))], + output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32") + }, + "ld3q_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 2)))], + output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64") + }, + "ld3q_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 2)))], + output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64") + }, + "ld3q_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 4)))], + output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4f32.p0v4f32") + }, + "ld3q_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 2)))], + output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2f64.p0v2f64") + }, + "ld4_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 8)))], + output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8") + }, + "ld4_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 8)))], + output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8") + }, + "ld4_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 4)))], + output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16") + }, + "ld4_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 4)))], + output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16") + }, + "ld4_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 2)))], + output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32") + }, + "ld4_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 2)))], + output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32") + }, + "ld4_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 1)))], + output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64") + }, + "ld4_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 1)))], + output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64") + }, + "ld4_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 2)))], + output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2f32.p0v2f32") + }, + "ld4_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 1)))], + output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1f64.p0v1f64") + }, + "ld4q_s8" => Intrinsic { + inputs: vec![p(true, i(8), Some(v(i(8), 16)))], + output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8") + }, + "ld4q_u8" => Intrinsic { + inputs: vec![p(true, u(8), Some(v(u(8), 16)))], + output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8") + }, + "ld4q_s16" => Intrinsic { + inputs: vec![p(true, i(16), Some(v(i(16), 8)))], + output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16") + }, + "ld4q_u16" => Intrinsic { + inputs: vec![p(true, u(16), Some(v(u(16), 8)))], + output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16") + }, + "ld4q_s32" => Intrinsic { + inputs: vec![p(true, i(32), Some(v(i(32), 4)))], + output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32") + }, + "ld4q_u32" => Intrinsic { + inputs: vec![p(true, u(32), Some(v(u(32), 4)))], + output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32") + }, + "ld4q_s64" => Intrinsic { + inputs: vec![p(true, i(64), Some(v(i(64), 2)))], + output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64") + }, + "ld4q_u64" => Intrinsic { + inputs: vec![p(true, u(64), Some(v(u(64), 2)))], + output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64") + }, + "ld4q_f32" => Intrinsic { + inputs: vec![p(true, f(32), Some(v(f(32), 4)))], + output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4f32.p0v4f32") + }, + "ld4q_f64" => Intrinsic { + inputs: vec![p(true, f(64), Some(v(f(64), 2)))], + output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2f64.p0v2f64") + }, + "ld2_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8") + }, + "ld2_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8") + }, + "ld2_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16") + }, + "ld2_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16") + }, + "ld2_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32") + }, + "ld2_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32") + }, + "ld2_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64") + }, + "ld2_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64") + }, + "ld2_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2f32.p0f32") + }, + "ld2_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld2.v1f64.p0f64") + }, + "ld2q_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8") + }, + "ld2q_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8") + }, + "ld2q_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16") + }, + "ld2q_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16") + }, + "ld2q_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32") + }, + "ld2q_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32") + }, + "ld2q_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64") + }, + "ld2q_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64") + }, + "ld2q_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld2.v4f32.p0f32") + }, + "ld2q_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld2.v2f64.p0f64") + }, + "ld3_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8") + }, + "ld3_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8") + }, + "ld3_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16") + }, + "ld3_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16") + }, + "ld3_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32") + }, + "ld3_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32") + }, + "ld3_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64") + }, + "ld3_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64") + }, + "ld3_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2f32.p0f32") + }, + "ld3_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld3.v1f64.p0f64") + }, + "ld3q_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8") + }, + "ld3q_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8") + }, + "ld3q_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16") + }, + "ld3q_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16") + }, + "ld3q_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32") + }, + "ld3q_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32") + }, + "ld3q_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64") + }, + "ld3q_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64") + }, + "ld3q_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld3.v4f32.p0f32") + }, + "ld3q_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld3.v2f64.p0f64") + }, + "ld4_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8") + }, + "ld4_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8") + }, + "ld4_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16") + }, + "ld4_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16") + }, + "ld4_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32") + }, + "ld4_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32") + }, + "ld4_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64") + }, + "ld4_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64") + }, + "ld4_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2f32.p0f32") + }, + "ld4_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]), + definition: Named("llvm.aarch64.neon.ld4.v1f64.p0f64") + }, + "ld4q_dup_s8" => Intrinsic { + inputs: vec![p(true, i(8), None)], + output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]), + definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8") + }, + "ld4q_dup_u8" => Intrinsic { + inputs: vec![p(true, u(8), None)], + output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]), + definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8") + }, + "ld4q_dup_s16" => Intrinsic { + inputs: vec![p(true, i(16), None)], + output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16") + }, + "ld4q_dup_u16" => Intrinsic { + inputs: vec![p(true, u(16), None)], + output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]), + definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16") + }, + "ld4q_dup_s32" => Intrinsic { + inputs: vec![p(true, i(32), None)], + output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32") + }, + "ld4q_dup_u32" => Intrinsic { + inputs: vec![p(true, u(32), None)], + output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32") + }, + "ld4q_dup_s64" => Intrinsic { + inputs: vec![p(true, i(64), None)], + output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64") + }, + "ld4q_dup_u64" => Intrinsic { + inputs: vec![p(true, u(64), None)], + output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64") + }, + "ld4q_dup_f32" => Intrinsic { + inputs: vec![p(true, f(32), None)], + output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]), + definition: Named("llvm.aarch64.neon.ld4.v4f32.p0f32") + }, + "ld4q_dup_f64" => Intrinsic { + inputs: vec![p(true, f(64), None)], + output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]), + definition: Named("llvm.aarch64.neon.ld4.v2f64.p0f64") + }, "padd_s8" => Intrinsic { inputs: vec![v(i(8), 8), v(i(8), 8)], output: v(i(8), 8), diff --git a/src/librustc_platform_intrinsics/arm.rs b/src/librustc_platform_intrinsics/arm.rs index 8ea725ee95df8..89b147027b5e8 100644 --- a/src/librustc_platform_intrinsics/arm.rs +++ b/src/librustc_platform_intrinsics/arm.rs @@ -13,7 +13,7 @@ #![allow(unused_imports)] -use {Intrinsic, i, u, f, v, agg}; +use {Intrinsic, i, i_, u, u_, f, v, agg, p}; use IntrinsicDef::Named; use rustc::middle::ty; diff --git a/src/librustc_platform_intrinsics/lib.rs b/src/librustc_platform_intrinsics/lib.rs index 1727347ef7bd5..9aee15b05df4c 100755 --- a/src/librustc_platform_intrinsics/lib.rs +++ b/src/librustc_platform_intrinsics/lib.rs @@ -30,10 +30,11 @@ pub struct Intrinsic { #[derive(Clone, Hash, Eq, PartialEq)] pub enum Type { + Void, Integer(/* signed */ bool, u8, /* llvm width */ u8), Float(u8), - Pointer(Box), - Vector(Box, u8), + Pointer(Box, Option>, /* const */ bool), + Vector(Box, Option>, u8), Aggregate(bool, Vec), } @@ -47,10 +48,19 @@ fn u(width: u8) -> Type { Type::Integer(false, width, width) } #[allow(dead_code)] fn u_(width: u8, llvm_width: u8) -> Type { Type::Integer(false, width, llvm_width) } fn f(width: u8) -> Type { Type::Float(width) } -fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) } +fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), None, length) } +fn v_(x: Type, bitcast: Type, length: u8) -> Type { + Type::Vector(Box::new(x), Some(Box::new(bitcast)), length) +} fn agg(flatten: bool, types: Vec) -> Type { Type::Aggregate(flatten, types) } +fn p(const_: bool, elem: Type, llvm_elem: Option) -> Type { + Type::Pointer(Box::new(elem), llvm_elem.map(Box::new), const_) +} +fn void() -> Type { + Type::Void +} mod x86; mod arm; diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs index 64c31ccb50d4a..2dfd00e9ce3bf 100644 --- a/src/librustc_platform_intrinsics/x86.rs +++ b/src/librustc_platform_intrinsics/x86.rs @@ -13,7 +13,7 @@ #![allow(unused_imports)] -use {Intrinsic, i, i_, u, u_, f, v, agg}; +use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}; use IntrinsicDef::Named; use rustc::middle::ty; @@ -50,6 +50,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(f(32), 4), definition: Named("llvm.sqrt.v4f32") }, + "_storeu_ps" => Intrinsic { + inputs: vec![p(false, f(32), Some(i(8))), v(f(32), 4)], + output: void(), + definition: Named("llvm.x86.sse.storeu.ps") + }, "_adds_epi8" => Intrinsic { inputs: vec![v(i(8), 16), v(i(8), 16)], output: v(i(8), 16), @@ -80,11 +85,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(u(16), 8), definition: Named("llvm.x86.sse2.pavg.w") }, + "_lfence" => Intrinsic { + inputs: vec![], + output: void(), + definition: Named("llvm.x86.sse2.lfence") + }, "_madd_epi16" => Intrinsic { inputs: vec![v(i(16), 8), v(i(16), 8)], output: v(i(32), 4), definition: Named("llvm.x86.sse2.pmadd.wd") }, + "_maskmoveu_si128" => Intrinsic { + inputs: vec![v(u(8), 16), v(u(8), 16), p(false, u(8), None)], + output: void(), + definition: Named("llvm.x86.sse2.maskmov.dqu") + }, "_max_epi16" => Intrinsic { inputs: vec![v(i(16), 8), v(i(16), 8)], output: v(i(16), 8), @@ -100,6 +115,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(f(64), 2), definition: Named("llvm.x86.sse2.max.pd") }, + "_mfence" => Intrinsic { + inputs: vec![], + output: void(), + definition: Named("llvm.x86.sse2.fence") + }, "_min_epi16" => Intrinsic { inputs: vec![v(i(16), 8), v(i(16), 8)], output: v(i(16), 8), @@ -160,11 +180,26 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(u(64), 2), definition: Named("llvm.x86.sse2.psad.bw") }, + "_sfence" => Intrinsic { + inputs: vec![], + output: void(), + definition: Named("llvm.x86.sse2.sfence") + }, "_sqrt_pd" => Intrinsic { inputs: vec![v(f(64), 2)], output: v(f(64), 2), definition: Named("llvm.sqrt.v2f64") }, + "_storeu_pd" => Intrinsic { + inputs: vec![p(false, f(64), Some(u(8))), v(f(64), 2)], + output: void(), + definition: Named("llvm.x86.sse2.storeu.pd") + }, + "_storeu_si128" => Intrinsic { + inputs: vec![p(false, v(u(8), 16), Some(u(8))), v(u(8), 16)], + output: void(), + definition: Named("llvm.x86.sse2.storeu.dq") + }, "_subs_epi8" => Intrinsic { inputs: vec![v(i(8), 16), v(i(8), 16)], output: v(i(8), 16), @@ -215,6 +250,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(f(64), 2), definition: Named("llvm.x86.sse3.hsub.pd") }, + "_lddqu_si128" => Intrinsic { + inputs: vec![p(true, v(u(8), 16), Some(i(8)))], + output: v(u(8), 16), + definition: Named("llvm.x86.sse3.ldu.dq") + }, "_abs_epi8" => Intrinsic { inputs: vec![v(i(8), 16)], output: v(i(8), 16), @@ -490,6 +530,46 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(f(64), 4), definition: Named("llvm.x86.avx.max.pd.256") }, + "_maskload_ps" => Intrinsic { + inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 4)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx.maskload.ps") + }, + "_maskload_pd" => Intrinsic { + inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 2)], + output: v(f(64), 2), + definition: Named("llvm.x86.avx.maskload.pd") + }, + "256_maskload_ps" => Intrinsic { + inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx.maskload.ps.256") + }, + "256_maskload_pd" => Intrinsic { + inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 4)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx.maskload.pd.256") + }, + "_maskstore_ps" => Intrinsic { + inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 4), v(f(32), 4)], + output: void(), + definition: Named("llvm.x86.avx.maskstore.ps") + }, + "_maskstore_pd" => Intrinsic { + inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 2), v(f(64), 2)], + output: void(), + definition: Named("llvm.x86.avx.maskstore.pd") + }, + "256_maskstore_ps" => Intrinsic { + inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 8), v(f(32), 8)], + output: void(), + definition: Named("llvm.x86.avx.maskstore.ps.256") + }, + "256_maskstore_pd" => Intrinsic { + inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 4), v(f(64), 4)], + output: void(), + definition: Named("llvm.x86.avx.maskstore.pd.256") + }, "256_min_ps" => Intrinsic { inputs: vec![v(f(32), 8), v(f(32), 8)], output: v(f(32), 8), @@ -540,6 +620,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(f(32), 8), definition: Named("llvm.x86.avx.rsqrt.ps.256") }, + "256_storeu_ps" => Intrinsic { + inputs: vec![p(false, v(f(32), 8), Some(u(8))), v(f(32), 8)], + output: void(), + definition: Named("llvm.x86.avx.storeu.ps.256") + }, + "256_storeu_pd" => Intrinsic { + inputs: vec![p(false, v(f(64), 4), Some(u(8))), v(f(64), 4)], + output: void(), + definition: Named("llvm.x86.avx.storeu.ps.256") + }, + "256_storeu_si256" => Intrinsic { + inputs: vec![p(false, v(u(8), 32), Some(u(8))), v(u(8), 32)], + output: void(), + definition: Named("llvm.x86.avx.storeu.dq.256") + }, "256_sqrt_ps" => Intrinsic { inputs: vec![v(f(32), 8)], output: v(f(32), 8), @@ -625,50 +720,60 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: i(32), definition: Named("llvm.x86.avx.ptestz.256") }, + "256_zeroall" => Intrinsic { + inputs: vec![], + output: void(), + definition: Named("llvm.x86.avx.vzeroall") + }, + "256_zeroupper" => Intrinsic { + inputs: vec![], + output: void(), + definition: Named("llvm.x86.avx.vzeroupper") + }, "256_abs_epi8" => Intrinsic { inputs: vec![v(i(8), 32)], output: v(i(8), 32), - definition: Named("llvm.x86.avx2.avx2.pabs.b") + definition: Named("llvm.x86.avx2.pabs.b") }, "256_abs_epi16" => Intrinsic { inputs: vec![v(i(16), 16)], output: v(i(16), 16), - definition: Named("llvm.x86.avx2.avx2.pabs.w") + definition: Named("llvm.x86.avx2.pabs.w") }, "256_abs_epi32" => Intrinsic { inputs: vec![v(i(32), 8)], output: v(i(32), 8), - definition: Named("llvm.x86.avx2.avx2.pabs.d") + definition: Named("llvm.x86.avx2.pabs.d") }, "256_adds_epi8" => Intrinsic { inputs: vec![v(i(8), 32), v(i(8), 32)], output: v(i(8), 32), - definition: Named("llvm.x86.avx2.avx2.padds.b") + definition: Named("llvm.x86.avx2.padds.b") }, "256_adds_epu8" => Intrinsic { inputs: vec![v(u(8), 32), v(u(8), 32)], output: v(u(8), 32), - definition: Named("llvm.x86.avx2.avx2.paddus.b") + definition: Named("llvm.x86.avx2.paddus.b") }, "256_adds_epi16" => Intrinsic { inputs: vec![v(i(16), 16), v(i(16), 16)], output: v(i(16), 16), - definition: Named("llvm.x86.avx2.avx2.padds.w") + definition: Named("llvm.x86.avx2.padds.w") }, "256_adds_epu16" => Intrinsic { inputs: vec![v(u(16), 16), v(u(16), 16)], output: v(u(16), 16), - definition: Named("llvm.x86.avx2.avx2.paddus.w") + definition: Named("llvm.x86.avx2.paddus.w") }, "256_avg_epu8" => Intrinsic { inputs: vec![v(u(8), 32), v(u(8), 32)], output: v(u(8), 32), - definition: Named("llvm.x86.avx2.avx2.pavg.b") + definition: Named("llvm.x86.avx2.pavg.b") }, "256_avg_epu16" => Intrinsic { inputs: vec![v(u(16), 16), v(u(16), 16)], output: v(u(16), 16), - definition: Named("llvm.x86.avx2.avx2.pavg.w") + definition: Named("llvm.x86.avx2.pavg.w") }, "256_hadd_epi16" => Intrinsic { inputs: vec![v(i(16), 16), v(i(16), 16)], @@ -710,6 +815,126 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option { output: v(i(16), 16), definition: Named("llvm.x86.avx2.pmadd.ub.sw") }, + "_mask_i32gather_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4), i_(32, 8)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx2.gather.d.d") + }, + "_mask_i32gather_ps" => Intrinsic { + inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(32), 4), v_(i(32), f(32), 4), i_(32, 8)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx2.gather.d.ps") + }, + "256_mask_i32gather_epi32" => Intrinsic { + inputs: vec![v(i(32), 8), p(true, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8), i_(32, 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.gather.d.d.256") + }, + "256_mask_i32gather_ps" => Intrinsic { + inputs: vec![v(f(32), 8), p(true, f(32), Some(i(8))), v(i(32), 8), v_(i(32), f(32), 8), i_(32, 8)], + output: v(f(32), 8), + definition: Named("llvm.x86.avx2.gather.d.ps.256") + }, + "_mask_i32gather_epi64" => Intrinsic { + inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 2), i_(32, 8)], + output: v(i(64), 2), + definition: Named("llvm.x86.avx2.gather.d.q") + }, + "_mask_i32gather_pd" => Intrinsic { + inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 2), i_(32, 8)], + output: v(f(64), 2), + definition: Named("llvm.x86.avx2.gather.d.pd") + }, + "256_mask_i32gather_epi64" => Intrinsic { + inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 4), i_(32, 8)], + output: v(i(64), 4), + definition: Named("llvm.x86.avx2.gather.d.q.256") + }, + "256_mask_i32gather_pd" => Intrinsic { + inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 4), i_(32, 8)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx2.gather.d.pd.256") + }, + "_mask_i64gather_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 2), v(i(32), 4), i_(32, 8)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx2.gather.q.d") + }, + "_mask_i64gather_ps" => Intrinsic { + inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 2), v_(i(32), f(32), 4), i_(32, 8)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx2.gather.q.ps") + }, + "256_mask_i64gather_epi32" => Intrinsic { + inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 4), v(i(32), 4), i_(32, 8)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx2.gather.q.d") + }, + "256_mask_i64gather_ps" => Intrinsic { + inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 4), v_(i(32), f(32), 4), i_(32, 8)], + output: v(f(32), 4), + definition: Named("llvm.x86.avx2.gather.q.ps") + }, + "_mask_i64gather_epi64" => Intrinsic { + inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2), i_(32, 8)], + output: v(i(64), 2), + definition: Named("llvm.x86.avx2.gather.q.q") + }, + "_mask_i64gather_pd" => Intrinsic { + inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(64), 2), v_(i(64), f(64), 2), i_(32, 8)], + output: v(f(64), 2), + definition: Named("llvm.x86.avx2.gather.q.pd") + }, + "256_mask_i64gather_epi64" => Intrinsic { + inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4), i_(32, 8)], + output: v(i(64), 4), + definition: Named("llvm.x86.avx2.gather.q.q.256") + }, + "256_mask_i64gather_pd" => Intrinsic { + inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(64), 4), v_(i(64), f(64), 4), i_(32, 8)], + output: v(f(64), 4), + definition: Named("llvm.x86.avx2.gather.q.pd.256") + }, + "_maskload_epi32" => Intrinsic { + inputs: vec![p(true, v(i(32), 4), Some(i(8))), v(i(32), 4)], + output: v(i(32), 4), + definition: Named("llvm.x86.avx2.maskload.d") + }, + "_maskload_epi64" => Intrinsic { + inputs: vec![p(true, v(i(64), 2), Some(i(8))), v(i(64), 2)], + output: v(i(64), 2), + definition: Named("llvm.x86.avx2.maskload.q") + }, + "256_maskload_epi32" => Intrinsic { + inputs: vec![p(true, v(i(32), 8), Some(i(8))), v(i(32), 8)], + output: v(i(32), 8), + definition: Named("llvm.x86.avx2.maskload.d.256") + }, + "256_maskload_epi64" => Intrinsic { + inputs: vec![p(true, v(i(64), 4), Some(i(8))), v(i(64), 4)], + output: v(i(64), 4), + definition: Named("llvm.x86.avx2.maskload.q.256") + }, + "_maskstore_epi32" => Intrinsic { + inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4)], + output: void(), + definition: Named("llvm.x86.avx2.maskstore.d") + }, + "_maskstore_epi64" => Intrinsic { + inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2)], + output: void(), + definition: Named("llvm.x86.avx2.maskstore.q") + }, + "256_maskstore_epi32" => Intrinsic { + inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8)], + output: void(), + definition: Named("llvm.x86.avx2.maskstore.d.256") + }, + "256_maskstore_epi64" => Intrinsic { + inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4)], + output: void(), + definition: Named("llvm.x86.avx2.maskstore.q.256") + }, "256_max_epi8" => Intrinsic { inputs: vec![v(i(8), 32), v(i(8), 32)], output: v(i(8), 32), diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs index abe72aed323f2..bcfd44d8835d7 100644 --- a/src/librustc_trans/trans/intrinsic.rs +++ b/src/librustc_trans/trans/intrinsic.rs @@ -936,6 +936,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, any_changes_needed: &mut bool) -> Vec { use intrinsics::Type::*; match *t { + Void => vec![Type::void(ccx)], Integer(_signed, width, llvm_width) => { *any_changes_needed |= width != llvm_width; vec![Type::ix(ccx, llvm_width as u64)] @@ -947,14 +948,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, _ => unreachable!() } } - Pointer(_) => unimplemented!(), - Vector(ref t, length) => { + Pointer(ref t, ref llvm_elem, _const) => { + *any_changes_needed |= llvm_elem.is_some(); + + let t = llvm_elem.as_ref().unwrap_or(t); + let elem = one(ty_to_type(ccx, t, + any_changes_needed)); + vec![elem.ptr_to()] + } + Vector(ref t, ref llvm_elem, length) => { + *any_changes_needed |= llvm_elem.is_some(); + + let t = llvm_elem.as_ref().unwrap_or(t); let elem = one(ty_to_type(ccx, t, any_changes_needed)); vec![Type::vector(&elem, length as u64)] } - Aggregate(false, _) => unimplemented!(), + Aggregate(false, ref contents) => { + let elems = contents.iter() + .map(|t| one(ty_to_type(ccx, t, any_changes_needed))) + .collect::>(); + vec![Type::struct_(ccx, &elems, false)] + } Aggregate(true, ref contents) => { *any_changes_needed = true; contents.iter() @@ -965,8 +981,9 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, } // This allows an argument list like `foo, (bar, baz), - // qux` to be converted into `foo, bar, baz, qux`, and - // integer arguments to be truncated as needed. + // qux` to be converted into `foo, bar, baz, qux`, integer + // arguments to be truncated as needed and pointers to be + // cast. fn modify_as_needed<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, t: &intrinsics::Type, arg_type: Ty<'tcx>, @@ -991,6 +1008,16 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, }) .collect() } + intrinsics::Type::Pointer(_, Some(ref llvm_elem), _) => { + let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false)); + vec![PointerCast(bcx, llarg, + llvm_elem.ptr_to())] + } + intrinsics::Type::Vector(_, Some(ref llvm_elem), length) => { + let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false)); + vec![BitCast(bcx, llarg, + Type::vector(&llvm_elem, length as u64))] + } intrinsics::Type::Integer(_, width, llvm_width) if width != llvm_width => { // the LLVM intrinsic uses a smaller integer // size than the C intrinsic's signature, so @@ -1027,7 +1054,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, }; assert_eq!(inputs.len(), llargs.len()); - match intr.definition { + let val = match intr.definition { intrinsics::IntrinsicDef::Named(name) => { let f = declare::declare_cfn(ccx, name, @@ -1035,6 +1062,20 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, tcx.mk_nil()); Call(bcx, f, &llargs, None, call_debug_location) } + }; + + match intr.output { + intrinsics::Type::Aggregate(flatten, ref elems) => { + // the output is a tuple so we need to munge it properly + assert!(!flatten); + + for i in 0..elems.len() { + let val = ExtractValue(bcx, val, i); + Store(bcx, val, StructGEP(bcx, llresult, i)); + } + C_nil(ccx) + } + _ => val, } } }; diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index 0c8bdc0ee04a3..d1f898d82fdd3 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -464,6 +464,10 @@ fn match_intrinsic_type_to_type<'tcx, 'a>( }; match *expected { + Void => match t.sty { + ty::TyTuple(ref v) if v.is_empty() => {}, + _ => simple_error(&format!("`{}`", t), "()"), + }, // (The width we pass to LLVM doesn't concern the type checker.) Integer(signed, bits, _llvm_width) => match (signed, bits, &t.sty) { (true, 8, &ty::TyInt(hir::IntTy::TyI8)) | @@ -485,8 +489,21 @@ fn match_intrinsic_type_to_type<'tcx, 'a>( _ => simple_error(&format!("`{}`", t), &format!("`f{n}`", n = bits)), }, - Pointer(_) => unimplemented!(), - Vector(ref inner_expected, len) => { + Pointer(ref inner_expected, ref _llvm_type, const_) => { + match t.sty { + ty::TyRawPtr(ty::TypeAndMut { ty, mutbl }) => { + if (mutbl == hir::MutImmutable) != const_ { + simple_error(&format!("`{}`", t), + if const_ {"const pointer"} else {"mut pointer"}) + } + match_intrinsic_type_to_type(tcx, position, span, structural_to_nominal, + inner_expected, ty) + } + _ => simple_error(&format!("`{}`", t), + &format!("raw pointer")), + } + } + Vector(ref inner_expected, ref _llvm_type, len) => { if !t.is_simd() { simple_error(&format!("non-simd type `{}`", t), "simd type");