From d12135a70de99e1cf86e3147379f4eb0678cd97c Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Wed, 2 Sep 2015 13:59:35 -0700
Subject: [PATCH 1/7] Add support for pointers to generator.py.

---
 src/etc/platform-intrinsics/generator.py    | 97 +++++++++++++++++++--
 src/librustc_platform_intrinsics/aarch64.rs |  2 +-
 src/librustc_platform_intrinsics/arm.rs     |  2 +-
 src/librustc_platform_intrinsics/lib.rs     |  5 +-
 src/librustc_platform_intrinsics/x86.rs     |  2 +-
 src/librustc_trans/trans/intrinsic.rs       | 19 +++-
 src/librustc_typeck/check/intrinsic.rs      | 15 +++-
 7 files changed, 125 insertions(+), 17 deletions(-)
diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index 97b2f57010b97..bc1d428fee856 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -18,7 +18,8 @@
 SPEC = re.compile(
     r'^(?:(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
     r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
-    r'|(?P<reference>\d+)(?P<modifiers>[vShdnwus]*)(?P<force_width>x\d+)?)$'
+    r'|(?P<reference>\d+)(?P<modifiers>[vShdnwusDMC]*)(?P<force_width>x\d+)?)'
+    r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?)?$'
 )
 
 class PlatformInfo(object):
@@ -80,6 +81,11 @@ def vectorize(self, length, width_info):
         props.update(width_info)
         return PlatformTypeInfo('v{}{}'.format(length, self.llvm_name), props)
 
+    def pointer(self):
+        return PlatformTypeInfo('p0{}'.format(self.llvm_name), self.properties)
+
+BITWIDTH_POINTER = '<pointer>'
+
 class Type(object):
     def __init__(self, bitwidth):
         self._bitwidth = bitwidth
@@ -193,6 +199,39 @@ def type_info(self, platform_info):
         return elem_info.vectorize(self._length,
                                    platform_info.width_info(self.bitwidth()))
 
+class Pointer(Type):
+    def __init__(self, elem, llvm_elem, const):
+        self._elem = elem;
+        self._llvm_elem = llvm_elem
+        self._const = const
+        Type.__init__(self, BITWIDTH_POINTER)
+
+    def modify(self, spec, width):
+        if spec == 'D':
+            return self._elem
+        elif spec == 'M':
+            return Pointer(self._elem, self._llvm_elem, False)
+        elif spec == 'C':
+            return Pointer(self._elem, self._llvm_elem, True)
+        else:
+            return Pointer(self._elem.modify(spec, width), self._llvm_elem, self._const)
+
+    def compiler_ctor(self):
+        if self._llvm_elem is None:
+            llvm_elem = 'None'
+        else:
+            llvm_elem = 'Some({})'.format(self._llvm_elem.compiler_ctor())
+        return 'p({}, {}, {})'.format('true' if self._const else 'false',
+                                      self._elem.compiler_ctor(),
+                                      llvm_elem)
+
+    def rust_name(self):
+        return '*{} {}'.format('const' if self._const else 'mut',
+                               self._elem.rust_name())
+
+    def type_info(self, platform_info):
+        return self._elem.type_info(platform_info).pointer()
+
 class Aggregate(Type):
     def __init__(self, flatten, elems):
         self._flatten = flatten
@@ -219,6 +258,22 @@ def type_info(self, platform_info):
                   'u': [Unsigned],
                   'f': [Float]}
 
+def ptrify(match, elem, width):
+    ptr = match.group('pointer')
+    if ptr is None:
+        return elem
+    else:
+        llvm_ptr = match.group('llvm_pointer')
+        if llvm_ptr is None:
+            llvm_elem = None
+        else:
+            assert llvm_ptr.startswith('/')
+            options = list(TypeSpec(llvm_ptr[1:]).enumerate(width))
+            assert len(options) == 1
+            llvm_elem = options[0]
+        assert ptr in ('Pc', 'Pm')
+        return Pointer(elem, llvm_elem, ptr == 'Pc')
+
 class TypeSpec(object):
     def __init__(self, spec):
         if not isinstance(spec, list):
@@ -229,8 +284,10 @@ def __init__(self, spec):
     def enumerate(self, width):
         for spec in self.spec:
             match = SPEC.match(spec)
-            if match:
+            assert match is not None
+            if True:
                 id = match.group('id')
+                assert id is not None
                 is_vector = id.islower()
                 type_ctors = TYPE_ID_LOOKUP[id.lower()]
 
@@ -256,19 +313,21 @@ def enumerate(self, width):
                             scalar = ctor(bitwidth)
 
                         if is_vector:
-                            yield Vector(scalar, width // bitwidth)
+                            elem = Vector(scalar, width // bitwidth)
                         else:
-                            yield scalar
+                            elem = scalar
+                        yield ptrify(match, elem, width)
                     bitwidth *= 2
             else:
-                print('Failed to parse: `{}`'.format(spec), file=sys.stderr)
+                pass
+                #print('Failed to parse: `{}`'.format(spec), file=sys.stderr)
 
     def resolve(self, width, zero):
         assert len(self.spec) == 1
         spec = self.spec[0]
         match = SPEC.match(spec)
         if match:
-            id  = match.group('id')
+            id = match.group('id')
             if id is not None:
                 options = list(self.enumerate(width))
                 assert len(options) == 1
@@ -282,7 +341,7 @@ def resolve(self, width, zero):
             force = match.group('force_width')
             if force is not None:
                 ret = ret.modify(force, width)
-            return ret
+            return ptrify(match, ret, width)
         elif spec.startswith('('):
             if spec.endswith(')'):
                 raise NotImplementedError()
@@ -291,6 +350,8 @@ def resolve(self, width, zero):
                 flatten = True
             elems = [TypeSpec(subspec).resolve(width, zero) for subspec in true_spec.split(',')]
             return Aggregate(flatten, elems)
+        else:
+            assert False, 'Failed to resolve: {}'.format(spec)
 
 class GenericIntrinsic(object):
     def __init__(self, platform, intrinsic, widths, llvm_name, ret, args):
@@ -369,7 +430,10 @@ def parse_args():
         ## Type specifier grammar
 
         ```
-        type := vector | scalar | aggregate | reference
+        type := ( vector | scalar | aggregate | reference ) pointer?
+
+        pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
+        llvm_pointer := '/' type
 
         vector := vector_elem width |
         vector_elem := 'i' | 'u' | 's' | 'f'
@@ -390,6 +454,18 @@ def parse_args():
         number = [0-9]+
         ```
 
+        ## Pointers
+
+        Pointers can be created to any type. The `m` vs. `c` chooses
+        mut vs. const. e.g. `S32Pm` corresponds to `*mut i32`, and
+        `i32Pc` corresponds (with width 128) to `*const i8x16`,
+        `*const u32x4`, etc.
+
+        The type after the `/` (optional) represents the type used
+        internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32`
+        in Rust, but is `i8*` in LLVM. (This defaults to the main
+        type).
+
         ## Vectors
 
         The vector grammar is a pattern describing many possibilities
@@ -454,6 +530,9 @@ def parse_args():
         - 'u': force an integer (vector or scalar) to be unsigned (i32x4 -> u32x4)
         - 's': force an integer (vector or scalar) to be signed (u32x4 -> i32x4)
         - 'x' number: force the type to be a vector of bitwidth `number`.
+        - 'D': dereference a pointer (*mut u32 -> u32)
+        - 'C': make a pointer const (*mut u32 -> *const u32)
+        - 'M': make a pointer mut (*const u32 -> *mut u32)
         '''))
     parser.add_argument('--format', choices=FORMATS, required=True,
                         help = 'Output format.')
@@ -502,7 +581,7 @@ def open(self, platform):
 
 #![allow(unused_imports)]
 
-use {{Intrinsic, i, i_, u, u_, f, v, agg}};
+use {{Intrinsic, i, i_, u, u_, f, v, agg, p}};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_platform_intrinsics/aarch64.rs b/src/librustc_platform_intrinsics/aarch64.rs
index 1f581d8ce855a..c90d6b3816ae3 100644
--- a/src/librustc_platform_intrinsics/aarch64.rs
+++ b/src/librustc_platform_intrinsics/aarch64.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, u, f, v, agg};
+use {Intrinsic, i, i_, u, u_, f, v, agg, p};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_platform_intrinsics/arm.rs b/src/librustc_platform_intrinsics/arm.rs
index 8ea725ee95df8..89b147027b5e8 100644
--- a/src/librustc_platform_intrinsics/arm.rs
+++ b/src/librustc_platform_intrinsics/arm.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, u, f, v, agg};
+use {Intrinsic, i, i_, u, u_, f, v, agg, p};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_platform_intrinsics/lib.rs b/src/librustc_platform_intrinsics/lib.rs
index 1727347ef7bd5..8c8beb031ebad 100755
--- a/src/librustc_platform_intrinsics/lib.rs
+++ b/src/librustc_platform_intrinsics/lib.rs
@@ -32,7 +32,7 @@ pub struct Intrinsic {
 pub enum Type {
     Integer(/* signed */ bool, u8, /* llvm width */ u8),
     Float(u8),
-    Pointer(Box<Type>),
+    Pointer(Box<Type>, Option<Box<Type>>, /* const */ bool),
     Vector(Box<Type>, u8),
     Aggregate(bool, Vec<Type>),
 }
@@ -51,6 +51,9 @@ fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) }
 fn agg(flatten: bool, types: Vec<Type>) -> Type {
     Type::Aggregate(flatten, types)
 }
+fn p(const_: bool, elem: Type, llvm_elem: Option<Type>) -> Type {
+    Type::Pointer(Box::new(elem), llvm_elem.map(Box::new), const_)
+}
 
 mod x86;
 mod arm;
diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs
index 64c31ccb50d4a..661603866ae02 100644
--- a/src/librustc_platform_intrinsics/x86.rs
+++ b/src/librustc_platform_intrinsics/x86.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, i_, u, u_, f, v, agg};
+use {Intrinsic, i, i_, u, u_, f, v, agg, p};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs
index abe72aed323f2..c2dee20b3bb93 100644
--- a/src/librustc_trans/trans/intrinsic.rs
+++ b/src/librustc_trans/trans/intrinsic.rs
@@ -947,7 +947,14 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                             _ => unreachable!()
                         }
                     }
-                    Pointer(_) => unimplemented!(),
+                    Pointer(ref t, ref llvm_elem, _const) => {
+                        *any_changes_needed |= llvm_elem.is_some();
+
+                        let t = llvm_elem.as_ref().unwrap_or(t);
+                        let elem = one(ty_to_type(ccx, t,
+                                                  any_changes_needed));
+                        vec![elem.ptr_to()]
+                    }
                     Vector(ref t, length) => {
                         let elem = one(ty_to_type(ccx, t,
                                                   any_changes_needed));
@@ -965,8 +972,9 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
             }
 
             // This allows an argument list like `foo, (bar, baz),
-            // qux` to be converted into `foo, bar, baz, qux`, and
-            // integer arguments to be truncated as needed.
+            // qux` to be converted into `foo, bar, baz, qux`, integer
+            // arguments to be truncated as needed and pointers to be
+            // cast.
             fn modify_as_needed<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
                                             t: &intrinsics::Type,
                                             arg_type: Ty<'tcx>,
@@ -991,6 +999,11 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                             })
                             .collect()
                     }
+                    intrinsics::Type::Pointer(_, Some(ref llvm_elem), _) => {
+                        let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
+                        vec![PointerCast(bcx, llarg,
+                                         llvm_elem.ptr_to())]
+                    }
                     intrinsics::Type::Integer(_, width, llvm_width) if width != llvm_width => {
                         // the LLVM intrinsic uses a smaller integer
                         // size than the C intrinsic's signature, so
diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs
index 0c8bdc0ee04a3..54f6ec0f0eda4 100644
--- a/src/librustc_typeck/check/intrinsic.rs
+++ b/src/librustc_typeck/check/intrinsic.rs
@@ -485,7 +485,20 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
             _ => simple_error(&format!("`{}`", t),
                               &format!("`f{n}`", n = bits)),
         },
-        Pointer(_) => unimplemented!(),
+        Pointer(ref inner_expected, ref _llvm_type, const_) => {
+            match t.sty {
+                ty::TyRawPtr(ty::TypeAndMut { ty, mutbl }) => {
+                    if (mutbl == hir::MutImmutable) != const_ {
+                        simple_error(&format!("`{}`", t),
+                                     if const_ {"const pointer"} else {"mut pointer"})
+                    }
+                    match_intrinsic_type_to_type(tcx, position, span, structural_to_nominal,
+                                                 inner_expected, ty)
+                }
+                _ => simple_error(&format!("`{}`", t),
+                                  &format!("raw pointer")),
+            }
+        }
         Vector(ref inner_expected, len) => {
             if !t.is_simd() {
                 simple_error(&format!("non-simd type `{}`", t),

From add04307f9b627992914b31dca82530f7886ef9a Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Wed, 2 Sep 2015 16:46:41 -0700
Subject: [PATCH 2/7] Support non-return value references in platform intrinsic
 generator.

---
 src/etc/platform-intrinsics/generator.py | 153 ++++++++++++-----------
 1 file changed, 79 insertions(+), 74 deletions(-)

diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index bc1d428fee856..2102bd9c488ba 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -14,6 +14,7 @@
 import sys
 import re
 import textwrap
+import itertools
 
 SPEC = re.compile(
     r'^(?:(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
@@ -258,7 +259,7 @@ def type_info(self, platform_info):
                   'u': [Unsigned],
                   'f': [Float]}
 
-def ptrify(match, elem, width):
+def ptrify(match, elem, width, previous):
     ptr = match.group('pointer')
     if ptr is None:
         return elem
@@ -268,7 +269,7 @@ def ptrify(match, elem, width):
             llvm_elem = None
         else:
             assert llvm_ptr.startswith('/')
-            options = list(TypeSpec(llvm_ptr[1:]).enumerate(width))
+            options = list(TypeSpec(llvm_ptr[1:]).enumerate(width, previous))
             assert len(options) == 1
             llvm_elem = options[0]
         assert ptr in ('Pc', 'Pm')
@@ -281,77 +282,70 @@ def __init__(self, spec):
 
         self.spec = spec
 
-    def enumerate(self, width):
+    def enumerate(self, width, previous):
         for spec in self.spec:
             match = SPEC.match(spec)
-            assert match is not None
-            if True:
+            if match is not None:
                 id = match.group('id')
-                assert id is not None
-                is_vector = id.islower()
-                type_ctors = TYPE_ID_LOOKUP[id.lower()]
-
-                start = match.group('start')
-                if start is not None:
-                    end = match.group('end')
-                    llvm_width = None
+                reference = match.group('reference')
+
+                if id is not None:
+                    is_vector = id.islower()
+                    type_ctors = TYPE_ID_LOOKUP[id.lower()]
+
+                    start = match.group('start')
+                    if start is not None:
+                        end = match.group('end')
+                        llvm_width = None
+                    else:
+                        start = end = match.group('width')
+                        llvm_width = match.group('llvm_width')
+                    start = int(start)
+                    end = int(end)
+
+                    bitwidth = start
+                    while bitwidth <= end:
+                        for ctor in type_ctors:
+                            if llvm_width is not None:
+                                assert not is_vector
+                                llvm_width = int(llvm_width)
+                                assert llvm_width < bitwidth
+                                scalar = ctor(bitwidth, llvm_width)
+                            else:
+                                scalar = ctor(bitwidth)
+
+                            if is_vector:
+                                elem = Vector(scalar, width // bitwidth)
+                            else:
+                                elem = scalar
+                            yield ptrify(match, elem, width, previous)
+                        bitwidth *= 2
+                elif reference is not None:
+                    reference = int(reference)
+                    assert reference < len(previous), \
+                        'referring to argument {}, but only {} are known'.format(reference,
+                                                                                 len(previous))
+                    ret = previous[reference]
+                    for x in match.group('modifiers') or []:
+                        ret = ret.modify(x, width)
+                    force = match.group('force_width')
+                    if force is not None:
+                        ret = ret.modify(force, width)
+                    yield ptrify(match, ret, width, previous)
                 else:
-                    start = end = match.group('width')
-                    llvm_width = match.group('llvm_width')
-                start = int(start)
-                end = int(end)
-
-                bitwidth = start
-                while bitwidth <= end:
-                    for ctor in type_ctors:
-                        if llvm_width is not None:
-                            assert not is_vector
-                            llvm_width = int(llvm_width)
-                            assert llvm_width < bitwidth
-                            scalar = ctor(bitwidth, llvm_width)
-                        else:
-                            scalar = ctor(bitwidth)
-
-                        if is_vector:
-                            elem = Vector(scalar, width // bitwidth)
-                        else:
-                            elem = scalar
-                        yield ptrify(match, elem, width)
-                    bitwidth *= 2
+                    assert False, 'matched `{}`, but didn\'t understand it?'.format(spec)
+            elif spec.startswith('('):
+                if spec.endswith(')'):
+                    raise NotImplementedError()
+                elif spec.endswith(')f'):
+                    true_spec = spec[1:-2]
+                    flatten = True
+
+                for elems in itertools.product(*(TypeSpec(subspec).enumerate(width, previous)
+                                                 for subspec in true_spec.split(','))):
+                    yield Aggregate(flatten, elems)
             else:
-                pass
-                #print('Failed to parse: `{}`'.format(spec), file=sys.stderr)
-
-    def resolve(self, width, zero):
-        assert len(self.spec) == 1
-        spec = self.spec[0]
-        match = SPEC.match(spec)
-        if match:
-            id = match.group('id')
-            if id is not None:
-                options = list(self.enumerate(width))
-                assert len(options) == 1
-                return options[0]
-            reference = match.group('reference')
-            if reference != '0':
-                raise NotImplementedError('only argument 0 (return value) references are supported')
-            ret = zero
-            for x in match.group('modifiers') or []:
-                ret = ret.modify(x, width)
-            force = match.group('force_width')
-            if force is not None:
-                ret = ret.modify(force, width)
-            return ptrify(match, ret, width)
-        elif spec.startswith('('):
-            if spec.endswith(')'):
-                raise NotImplementedError()
-            elif spec.endswith(')f'):
-                true_spec = spec[1:-2]
-                flatten = True
-            elems = [TypeSpec(subspec).resolve(width, zero) for subspec in true_spec.split(',')]
-            return Aggregate(flatten, elems)
-        else:
-            assert False, 'Failed to resolve: {}'.format(spec)
+                assert False, 'Failed to parse `{}`'.format(spec)
 
 class GenericIntrinsic(object):
     def __init__(self, platform, intrinsic, widths, llvm_name, ret, args):
@@ -366,10 +360,22 @@ def monomorphise(self):
         for width in self.widths:
             # must be a power of two
             assert width & (width - 1) == 0
-            for ret in self.ret.enumerate(width):
-                args = [arg.resolve(width, ret) for arg in self.args]
-                yield MonomorphicIntrinsic(self._platform, self.intrinsic, width, self.llvm_name,
-                                           ret, args)
+            def recur(processed, untouched):
+                if untouched == []:
+                    ret = processed[0]
+                    args = processed[1:]
+                    yield MonomorphicIntrinsic(self._platform, self.intrinsic, width,
+                                               self.llvm_name,
+                                               ret, args)
+                else:
+                    raw_arg = untouched[0]
+                    rest = untouched[1:]
+                    for arg in raw_arg.enumerate(width, processed):
+                        for intr in recur(processed + [arg], rest):
+                            yield intr
+
+            for x in recur([], [self.ret] + self.args):
+                yield x
 
 class MonomorphicIntrinsic(object):
     def __init__(self, platform, intrinsic, width, llvm_name, ret, args):
@@ -517,8 +523,7 @@ def parse_args():
         A reference uses the type of another argument, with possible
         modifications. The number refers to the type to use, starting
         with 0 == return value, 1 == first argument, 2 == second
-        argument, etc. (Currently only referencing 0, the return
-        value, is supported.)
+        argument, etc.
 
         ### Modifiers
 

From 62e346af4b7a1aac43db627f19d2511d5649e5d7 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Wed, 2 Sep 2015 16:55:28 -0700
Subject: [PATCH 3/7] Support void in platform intrinsic generator.

---
 src/etc/platform-intrinsics/generator.py | 33 +++++++++++++++++++++---
 src/librustc_platform_intrinsics/lib.rs  |  4 +++
 src/librustc_platform_intrinsics/x86.rs  |  2 +-
 src/librustc_trans/trans/intrinsic.rs    |  1 +
 src/librustc_typeck/check/intrinsic.rs   |  4 +++
 5 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index 2102bd9c488ba..b62c35246cab8 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -17,7 +17,7 @@
 import itertools
 
 SPEC = re.compile(
-    r'^(?:(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
+    r'^(?:(?P<void>V)|(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
     r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
     r'|(?P<reference>\d+)(?P<modifiers>[vShdnwusDMC]*)(?P<force_width>x\d+)?)'
     r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?)?$'
@@ -97,6 +97,19 @@ def bitwidth(self):
     def modify(self, spec, width):
         raise NotImplementedError()
 
+class Void(Type):
+    def __init__(self):
+        Type.__init__(self, 0)
+
+    def compiler_ctor(self):
+        return 'void()'
+
+    def rust_name(self):
+        return '()'
+
+    def type_info(self, platform_info):
+        return None
+
 class Number(Type):
     def __init__(self, bitwidth):
         Type.__init__(self, bitwidth)
@@ -289,7 +302,10 @@ def enumerate(self, width, previous):
                 id = match.group('id')
                 reference = match.group('reference')
 
-                if id is not None:
+                if match.group('void') is not None:
+                    assert spec == 'V'
+                    yield Void()
+                elif id is not None:
                     is_vector = id.islower()
                     type_ctors = TYPE_ID_LOOKUP[id.lower()]
 
@@ -436,11 +452,15 @@ def parse_args():
         ## Type specifier grammar
 
         ```
-        type := ( vector | scalar | aggregate | reference ) pointer?
+        type := core_type pointer?
+
+        core_type := void | vector | scalar | aggregate | reference
 
         pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
         llvm_pointer := '/' type
 
+        void := 'V'
+
         vector := vector_elem width |
         vector_elem := 'i' | 'u' | 's' | 'f'
 
@@ -472,6 +492,11 @@ def parse_args():
         in Rust, but is `i8*` in LLVM. (This defaults to the main
         type).
 
+        ## Void
+
+        The `V` type corresponds to `void` in LLVM (`()` in
+        Rust). It's likely to only work in return position.
+
         ## Vectors
 
         The vector grammar is a pattern describing many possibilities
@@ -586,7 +611,7 @@ def open(self, platform):
 
 #![allow(unused_imports)]
 
-use {{Intrinsic, i, i_, u, u_, f, v, agg, p}};
+use {{Intrinsic, i, i_, u, u_, f, v, agg, p, void}};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_platform_intrinsics/lib.rs b/src/librustc_platform_intrinsics/lib.rs
index 8c8beb031ebad..95da12a237829 100755
--- a/src/librustc_platform_intrinsics/lib.rs
+++ b/src/librustc_platform_intrinsics/lib.rs
@@ -30,6 +30,7 @@ pub struct Intrinsic {
 
 #[derive(Clone, Hash, Eq, PartialEq)]
 pub enum Type {
+    Void,
     Integer(/* signed */ bool, u8, /* llvm width */ u8),
     Float(u8),
     Pointer(Box<Type>, Option<Box<Type>>, /* const */ bool),
@@ -54,6 +55,9 @@ fn agg(flatten: bool, types: Vec<Type>) -> Type {
 fn p(const_: bool, elem: Type, llvm_elem: Option<Type>) -> Type {
     Type::Pointer(Box::new(elem), llvm_elem.map(Box::new), const_)
 }
+fn void() -> Type {
+    Type::Void
+}
 
 mod x86;
 mod arm;
diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs
index 661603866ae02..26421cb3e80ee 100644
--- a/src/librustc_platform_intrinsics/x86.rs
+++ b/src/librustc_platform_intrinsics/x86.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, i_, u, u_, f, v, agg, p};
+use {Intrinsic, i, i_, u, u_, f, v, agg, p, void};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs
index c2dee20b3bb93..a6816a99d28ae 100644
--- a/src/librustc_trans/trans/intrinsic.rs
+++ b/src/librustc_trans/trans/intrinsic.rs
@@ -936,6 +936,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                           any_changes_needed: &mut bool) -> Vec<Type> {
                 use intrinsics::Type::*;
                 match *t {
+                    Void => vec![Type::void(ccx)],
                     Integer(_signed, width, llvm_width) => {
                         *any_changes_needed |= width != llvm_width;
                         vec![Type::ix(ccx, llvm_width as u64)]
diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs
index 54f6ec0f0eda4..4501d1c618a72 100644
--- a/src/librustc_typeck/check/intrinsic.rs
+++ b/src/librustc_typeck/check/intrinsic.rs
@@ -464,6 +464,10 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
     };
 
     match *expected {
+        Void => match t.sty {
+            ty::TyTuple(ref v) if v.is_empty() => {},
+            _ => simple_error(&format!("`{}`", t), "()"),
+        },
         // (The width we pass to LLVM doesn't concern the type checker.)
         Integer(signed, bits, _llvm_width) => match (signed, bits, &t.sty) {
             (true,  8,  &ty::TyInt(hir::IntTy::TyI8)) |

From 2b45a9ab54fbf593c6df84e8852fc1b3f1f0810a Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 3 Sep 2015 11:25:52 -0700
Subject: [PATCH 4/7] Support bitcasts in platform intrinsic generator.

---
 src/etc/platform-intrinsics/generator.py | 139 +++++++++++++++++------
 src/librustc_platform_intrinsics/lib.rs  |   7 +-
 src/librustc_trans/trans/intrinsic.rs    |  10 +-
 src/librustc_typeck/check/intrinsic.rs   |   2 +-
 4 files changed, 120 insertions(+), 38 deletions(-)

diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index b62c35246cab8..8708e7c2f0f8b 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -19,8 +19,8 @@
 SPEC = re.compile(
     r'^(?:(?P<void>V)|(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
     r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
-    r'|(?P<reference>\d+)(?P<modifiers>[vShdnwusDMC]*)(?P<force_width>x\d+)?)'
-    r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?)?$'
+    r'|(?P<reference>\d+))(?P<modifiers>[vShdnwusDMC]*)(?P<force_width>x\d+)?'
+    r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?|(?P<bitcast>->.*))?$'
 )
 
 class PlatformInfo(object):
@@ -74,6 +74,9 @@ def __init__(self, llvm_name, properties):
         self.properties = properties
         self.llvm_name = llvm_name
 
+    def __repr__(self):
+        return '<PlatformTypeInfo {}, {}>'.format(self.llvm_name, self.properties)
+
     def __getattr__(self, name):
         return self.properties[name]
 
@@ -94,9 +97,12 @@ def __init__(self, bitwidth):
     def bitwidth(self):
         return self._bitwidth
 
-    def modify(self, spec, width):
+    def modify(self, spec, width, previous):
         raise NotImplementedError()
 
+    def __ne__(self, other):
+        return not (self == other)
+
 class Void(Type):
     def __init__(self):
         Type.__init__(self, 0)
@@ -110,11 +116,14 @@ def rust_name(self):
     def type_info(self, platform_info):
         return None
 
+    def __eq__(self, other):
+        return isinstance(other, Void)
+
 class Number(Type):
     def __init__(self, bitwidth):
         Type.__init__(self, bitwidth)
 
-    def modify(self, spec, width):
+    def modify(self, spec, width, previous):
         if spec == 'u':
             return Unsigned(self.bitwidth())
         elif spec == 's':
@@ -131,11 +140,16 @@ def modify(self, spec, width):
     def type_info(self, platform_info):
         return platform_info.number_type_info(self)
 
+    def __eq__(self, other):
+        # print(self, other)
+        return self.__class__ == other.__class__ and self.bitwidth() == other.bitwidth()
+
 class Signed(Number):
     def __init__(self, bitwidth, llvm_bitwidth = None):
         Number.__init__(self, bitwidth)
         self._llvm_bitwidth = llvm_bitwidth
 
+
     def compiler_ctor(self):
         if self._llvm_bitwidth is None:
             return 'i({})'.format(self.bitwidth())
@@ -184,26 +198,47 @@ def rust_name(self):
         return 'f{}'.format(self.bitwidth())
 
 class Vector(Type):
-    def __init__(self, elem, length):
+    def __init__(self, elem, length, bitcast = None):
         assert isinstance(elem, Type) and not isinstance(elem, Vector)
         Type.__init__(self,
                       elem.bitwidth() * length)
         self._length = length
         self._elem = elem
+        assert bitcast is None or (isinstance(bitcast, Vector) and
+                                   bitcast._bitcast is None and
+                                   bitcast._elem.bitwidth() == elem.bitwidth())
+        if bitcast is not None and bitcast._elem != elem:
+            self._bitcast = bitcast._elem
+        else:
+            self._bitcast = None
 
-    def modify(self, spec, width):
-        if spec == 'h':
+    def modify(self, spec, width, previous):
+        if spec == 'S':
+            return self._elem
+        elif spec == 'h':
             return Vector(self._elem, self._length // 2)
         elif spec == 'd':
             return Vector(self._elem, self._length * 2)
         elif spec.startswith('x'):
             new_bitwidth = int(spec[1:])
             return Vector(self._elem, new_bitwidth // self._elem.bitwidth())
+        elif spec.startswith('->'):
+            bitcast_to = TypeSpec(spec[2:])
+            choices = list(bitcast_to.enumerate(width, previous))
+            assert len(choices) == 1
+            bitcast_to = choices[0]
+            return Vector(self._elem, self._length, bitcast_to)
         else:
-            return Vector(self._elem.modify(spec, width), self._length)
+            return Vector(self._elem.modify(spec, width, previous), self._length)
 
     def compiler_ctor(self):
-        return 'v({}, {})'.format(self._elem.compiler_ctor(), self._length)
+        if self._bitcast is None:
+            return 'v({}, {})'.format(self._elem.compiler_ctor(),
+                                      self._length)
+        else:
+            return 'v_({}, {}, {})'.format(self._elem.compiler_ctor(),
+                                           self._bitcast.compiler_ctor(),
+                                           self._length)
 
     def rust_name(self):
         return '{}x{}'.format(self._elem.rust_name(), self._length)
@@ -213,6 +248,10 @@ def type_info(self, platform_info):
         return elem_info.vectorize(self._length,
                                    platform_info.width_info(self.bitwidth()))
 
+    def __eq__(self, other):
+        return isinstance(other, Vector) and self._length == other._length and \
+            self._elem == other._elem and self._bitcast == other._bitcast
+
 class Pointer(Type):
     def __init__(self, elem, llvm_elem, const):
         self._elem = elem;
@@ -220,7 +259,7 @@ def __init__(self, elem, llvm_elem, const):
         self._const = const
         Type.__init__(self, BITWIDTH_POINTER)
 
-    def modify(self, spec, width):
+    def modify(self, spec, width, previous):
         if spec == 'D':
             return self._elem
         elif spec == 'M':
@@ -228,7 +267,7 @@ def modify(self, spec, width):
         elif spec == 'C':
             return Pointer(self._elem, self._llvm_elem, True)
         else:
-            return Pointer(self._elem.modify(spec, width), self._llvm_elem, self._const)
+            return Pointer(self._elem.modify(spec, width, previous), self._llvm_elem, self._const)
 
     def compiler_ctor(self):
         if self._llvm_elem is None:
@@ -246,6 +285,10 @@ def rust_name(self):
     def type_info(self, platform_info):
         return self._elem.type_info(platform_info).pointer()
 
+    def __eq__(self, other):
+        return isinstance(other, Pointer) and self._const == other._const \
+            and self._elem == other._elem and self._llvm_elem == other._llvm_elem
+
 class Aggregate(Type):
     def __init__(self, flatten, elems):
         self._flatten = flatten
@@ -266,6 +309,10 @@ def type_info(self, platform_info):
         #return PlatformTypeInfo(None, None, self._llvm_name)
         return None
 
+    def __eq__(self, other):
+        return isinstance(other, Aggregate) and self._flatten == other._flatten and \
+            self._elems == other._elems
+
 
 TYPE_ID_LOOKUP = {'i': [Signed, Unsigned],
                   's': [Signed],
@@ -302,6 +349,14 @@ def enumerate(self, width, previous):
                 id = match.group('id')
                 reference = match.group('reference')
 
+                modifiers = list(match.group('modifiers') or '')
+                force = match.group('force_width')
+                if force is not None:
+                    modifiers.append(force)
+                bitcast = match.group('bitcast')
+                if bitcast is not None:
+                    modifiers.append(bitcast)
+
                 if match.group('void') is not None:
                     assert spec == 'V'
                     yield Void()
@@ -333,7 +388,11 @@ def enumerate(self, width, previous):
                             if is_vector:
                                 elem = Vector(scalar, width // bitwidth)
                             else:
+                                assert bitcast is None
                                 elem = scalar
+
+                            for x in modifiers:
+                                elem = elem.modify(x, width, previous)
                             yield ptrify(match, elem, width, previous)
                         bitwidth *= 2
                 elif reference is not None:
@@ -342,15 +401,13 @@ def enumerate(self, width, previous):
                         'referring to argument {}, but only {} are known'.format(reference,
                                                                                  len(previous))
                     ret = previous[reference]
-                    for x in match.group('modifiers') or []:
-                        ret = ret.modify(x, width)
-                    force = match.group('force_width')
-                    if force is not None:
-                        ret = ret.modify(force, width)
+                    for x in modifiers:
+                        ret = ret.modify(x, width, previous)
                     yield ptrify(match, ret, width, previous)
                 else:
                     assert False, 'matched `{}`, but didn\'t understand it?'.format(spec)
             elif spec.startswith('('):
+                assert bitcast is None
                 if spec.endswith(')'):
                     raise NotImplementedError()
                 elif spec.endswith(')f'):
@@ -452,12 +509,16 @@ def parse_args():
         ## Type specifier grammar
 
         ```
-        type := core_type pointer?
+        type := core_type modifier* suffix?
 
         core_type := void | vector | scalar | aggregate | reference
 
+        modifier := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
+                     'x' number
+        suffix := pointer | bitcast
         pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
         llvm_pointer := '/' type
+        bitcast := '->' type
 
         void := 'V'
 
@@ -470,28 +531,13 @@ def parse_args():
 
         aggregate := '(' (type),* ')' 'f'?
 
-        reference := number modifiers*
-        modifiers := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
-                     'x' number
-
+        reference := number
 
         width = number | '(' number '-' number ')'
 
         number = [0-9]+
         ```
 
-        ## Pointers
-
-        Pointers can be created to any type. The `m` vs. `c` chooses
-        mut vs. const. e.g. `S32Pm` corresponds to `*mut i32`, and
-        `i32Pc` corresponds (with width 128) to `*const i8x16`,
-        `*const u32x4`, etc.
-
-        The type after the `/` (optional) represents the type used
-        internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32`
-        in Rust, but is `i8*` in LLVM. (This defaults to the main
-        type).
-
         ## Void
 
         The `V` type corresponds to `void` in LLVM (`()` in
@@ -550,6 +596,11 @@ def parse_args():
         with 0 == return value, 1 == first argument, 2 == second
         argument, etc.
 
+        ## Affixes
+
+        The `modifier` and `suffix` adaptors change the precise
+        representation.
+
         ### Modifiers
 
         - 'v': put a scalar into a vector of the current width (u32 -> u32x4, when width == 128)
@@ -563,6 +614,26 @@ def parse_args():
         - 'D': dereference a pointer (*mut u32 -> u32)
         - 'C': make a pointer const (*mut u32 -> *const u32)
         - 'M': make a pointer mut (*const u32 -> *mut u32)
+
+        ### Pointers
+
+        Pointers can be created of any type by appending a `P*`
+        suffix. The `m` vs. `c` chooses mut vs. const. e.g. `S32Pm`
+        corresponds to `*mut i32`, and `i32Pc` corresponds (with width
+        128) to `*const i8x16`, `*const u32x4`, etc.
+
+        The type after the `/` (optional) represents the type used
+        internally to LLVM, e.g. `S32pm/S8` is exposed as `*mut i32`
+        in Rust, but is `i8*` in LLVM. (This defaults to the main
+        type).
+
+        ### Bitcast
+
+        The `'->' type` bitcast suffix will cause the value to be
+        bitcast to the right-hand type when calling the intrinsic,
+        e.g. `s32->f32` will expose the intrinsic as `i32x4` at the
+        Rust level, but will cast that vector to `f32x4` when calling
+        the LLVM intrinsic.
         '''))
     parser.add_argument('--format', choices=FORMATS, required=True,
                         help = 'Output format.')
@@ -611,7 +682,7 @@ def open(self, platform):
 
 #![allow(unused_imports)]
 
-use {{Intrinsic, i, i_, u, u_, f, v, agg, p, void}};
+use {{Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void}};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
diff --git a/src/librustc_platform_intrinsics/lib.rs b/src/librustc_platform_intrinsics/lib.rs
index 95da12a237829..9aee15b05df4c 100755
--- a/src/librustc_platform_intrinsics/lib.rs
+++ b/src/librustc_platform_intrinsics/lib.rs
@@ -34,7 +34,7 @@ pub enum Type {
     Integer(/* signed */ bool, u8, /* llvm width */ u8),
     Float(u8),
     Pointer(Box<Type>, Option<Box<Type>>, /* const */ bool),
-    Vector(Box<Type>, u8),
+    Vector(Box<Type>, Option<Box<Type>>, u8),
     Aggregate(bool, Vec<Type>),
 }
 
@@ -48,7 +48,10 @@ fn u(width: u8) -> Type { Type::Integer(false, width, width) }
 #[allow(dead_code)]
 fn u_(width: u8, llvm_width: u8) -> Type { Type::Integer(false, width, llvm_width) }
 fn f(width: u8) -> Type { Type::Float(width) }
-fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), length) }
+fn v(x: Type, length: u8) -> Type { Type::Vector(Box::new(x), None, length) }
+fn v_(x: Type, bitcast: Type, length: u8) -> Type {
+    Type::Vector(Box::new(x), Some(Box::new(bitcast)), length)
+}
 fn agg(flatten: bool, types: Vec<Type>) -> Type {
     Type::Aggregate(flatten, types)
 }
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs
index a6816a99d28ae..becdd71f1d37f 100644
--- a/src/librustc_trans/trans/intrinsic.rs
+++ b/src/librustc_trans/trans/intrinsic.rs
@@ -956,7 +956,10 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                                                   any_changes_needed));
                         vec![elem.ptr_to()]
                     }
-                    Vector(ref t, length) => {
+                    Vector(ref t, ref llvm_elem, length) => {
+                        *any_changes_needed |= llvm_elem.is_some();
+
+                        let t = llvm_elem.as_ref().unwrap_or(t);
                         let elem = one(ty_to_type(ccx, t,
                                                   any_changes_needed));
                         vec![Type::vector(&elem,
@@ -1005,6 +1008,11 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                         vec![PointerCast(bcx, llarg,
                                          llvm_elem.ptr_to())]
                     }
+                    intrinsics::Type::Vector(_, Some(ref llvm_elem), length) => {
+                        let llvm_elem = one(ty_to_type(bcx.ccx(), llvm_elem, &mut false));
+                        vec![BitCast(bcx, llarg,
+                                     Type::vector(&llvm_elem, length as u64))]
+                    }
                     intrinsics::Type::Integer(_, width, llvm_width) if width != llvm_width => {
                         // the LLVM intrinsic uses a smaller integer
                         // size than the C intrinsic's signature, so
diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs
index 4501d1c618a72..d1f898d82fdd3 100644
--- a/src/librustc_typeck/check/intrinsic.rs
+++ b/src/librustc_typeck/check/intrinsic.rs
@@ -503,7 +503,7 @@ fn match_intrinsic_type_to_type<'tcx, 'a>(
                                   &format!("raw pointer")),
             }
         }
-        Vector(ref inner_expected, len) => {
+        Vector(ref inner_expected, ref _llvm_type, len) => {
             if !t.is_simd() {
                 simple_error(&format!("non-simd type `{}`", t),
                              "simd type");

From c19e7b629b42fc2e153893762397a336423e6ec3 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 3 Sep 2015 14:50:20 -0700
Subject: [PATCH 5/7] Add various pointer & void-using x86 intrinsics.

---
 src/etc/platform-intrinsics/x86/avx.json  |  42 ++++
 src/etc/platform-intrinsics/x86/avx2.json |  48 ++++-
 src/etc/platform-intrinsics/x86/sse.json  |   7 +
 src/etc/platform-intrinsics/x86/sse2.json |  42 ++++
 src/etc/platform-intrinsics/x86/sse3.json |   7 +
 src/librustc_platform_intrinsics/x86.rs   | 245 +++++++++++++++++++++-
 6 files changed, 378 insertions(+), 13 deletions(-)

diff --git a/src/etc/platform-intrinsics/x86/avx.json b/src/etc/platform-intrinsics/x86/avx.json
index 4ac82fb90e900..2c1492c2954c8 100644
--- a/src/etc/platform-intrinsics/x86/avx.json
+++ b/src/etc/platform-intrinsics/x86/avx.json
@@ -36,6 +36,20 @@
             "ret": "f(32-64)",
             "args": ["0", "0"]
         },
+        {
+            "intrinsic": "{0.width_mm}_maskload_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "maskload.{0.data_type_short}{0.width_suffix}",
+            "ret": ["f(32-64)"],
+            "args": ["0SPc/S8", "0s->0"]
+        },
+        {
+            "intrinsic": "{3.width_mm}_maskstore_{3.data_type}",
+            "width": [128, 256],
+            "llvm": "maskstore.{3.data_type_short}{3.width_suffix}",
+            "ret": "V",
+            "args": ["F(32-64)Pm/S8", "1Dsv->1Dv", "1Dv"]
+        },
         {
             "intrinsic": "256_min_{0.data_type}",
             "width": [256],
@@ -78,6 +92,20 @@
             "ret": "f32",
             "args": ["f32"]
         },
+        {
+            "intrinsic": "256_storeu_{2.data_type}",
+            "width": [256],
+            "llvm": "storeu.ps.256",
+            "ret": "V",
+            "args": ["f(32-64)Pm/U8", "1D"]
+        },
+        {
+            "intrinsic": "256_storeu_si256",
+            "width": [256],
+            "llvm": "storeu.dq.256",
+            "ret": "V",
+            "args": ["u8Pm/U8", "1D"]
+        },
         {
             "intrinsic": "256_sqrt_{0.data_type}",
             "width": [256],
@@ -147,6 +175,20 @@
             "llvm": "ptestz.256",
             "ret": "S32",
             "args": ["u64", "u64"]
+        },
+        {
+            "intrinsic": "256_zeroall",
+            "width": [256],
+            "llvm": "vzeroall",
+            "ret": "V",
+            "args": []
+        },
+        {
+            "intrinsic": "256_zeroupper",
+            "width": [256],
+            "llvm": "vzeroupper",
+            "ret": "V",
+            "args": []
         }
     ]
 }
diff --git a/src/etc/platform-intrinsics/x86/avx2.json b/src/etc/platform-intrinsics/x86/avx2.json
index bd260ec02e930..e88ff3d2b806d 100644
--- a/src/etc/platform-intrinsics/x86/avx2.json
+++ b/src/etc/platform-intrinsics/x86/avx2.json
@@ -4,21 +4,21 @@
         {
             "intrinsic": "256_abs_{0.data_type}",
             "width": [256],
-            "llvm": "avx2.pabs.{0.data_type_short}",
+            "llvm": "pabs.{0.data_type_short}",
             "ret": "s(8-32)",
             "args": ["0"]
         },
         {
             "intrinsic": "256_adds_{0.data_type}",
             "width": [256],
-            "llvm": "avx2.padd{0.kind_short}s.{0.data_type_short}",
+            "llvm": "padd{0.kind_short}s.{0.data_type_short}",
             "ret": "i(8-16)",
             "args": ["0", "0"]
         },
         {
             "intrinsic": "256_avg_{0.data_type}",
             "width": [256],
-            "llvm": "avx2.pavg.{0.data_type_short}",
+            "llvm": "pavg.{0.data_type_short}",
             "ret": "u(8-16)",
             "args": ["0", "0"]
         },
@@ -64,6 +64,48 @@
             "ret": "s16",
             "args": ["s8", "s8"]
         },
+        {
+            "intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
+            "ret": ["s32", "f32"],
+            "args": ["0", "0SPc/S8", "s32", "0s->0", "S32/8"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_mask_i32gather_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "gather.d.{0.data_type_short}{0.width_suffix}",
+            "ret": ["s64", "f64"],
+            "args": ["0", "0SPc/S8", "s32x128", "0s->0", "S32/8"]
+        },
+        {
+            "intrinsic": "{3.width_mm}_mask_i64gather_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
+            "ret": ["s32x128", "f32x128"],
+            "args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_mask_i64gather_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "gather.q.{0.data_type_short}{0.width_suffix}",
+            "ret": ["s64", "f64"],
+            "args": ["0", "0SPc/S8", "s64", "0s->0", "S32/8"]
+        },
+        {
+            "intrinsic": "{0.width_mm}_maskload_{0.data_type}",
+            "width": [128, 256],
+            "llvm": "maskload.{0.data_type_short}{0.width_suffix}",
+            "ret": ["s(32-64)"],
+            "args": ["0Pc/S8", "0"]
+        },
+        {
+            "intrinsic": "{2.width_mm}_maskstore_{2.data_type}",
+            "width": [128, 256],
+            "llvm": "maskstore.{2.data_type_short}{2.width_suffix}",
+            "ret": "V",
+            "args": ["S(32-64)Pm/S8", "1Dv", "2"]
+        },
         {
             "intrinsic": "256_max_{0.data_type}",
             "width": [256],
diff --git a/src/etc/platform-intrinsics/x86/sse.json b/src/etc/platform-intrinsics/x86/sse.json
index 27da842934c0c..adff0dc41b2af 100644
--- a/src/etc/platform-intrinsics/x86/sse.json
+++ b/src/etc/platform-intrinsics/x86/sse.json
@@ -42,6 +42,13 @@
             "llvm": "!llvm.sqrt.v4f32",
             "ret": "f32",
             "args": ["0"]
+        },
+        {
+            "intrinsic": "_storeu_ps",
+            "width": [128],
+            "llvm": "storeu.ps",
+            "ret": "V",
+            "args": ["F32Pm/S8", "f32"]
         }
     ]
 }
diff --git a/src/etc/platform-intrinsics/x86/sse2.json b/src/etc/platform-intrinsics/x86/sse2.json
index abd0b369573a0..d09980d95f31b 100644
--- a/src/etc/platform-intrinsics/x86/sse2.json
+++ b/src/etc/platform-intrinsics/x86/sse2.json
@@ -15,6 +15,13 @@
             "ret": "u(8-16)",
             "args": ["0", "0"]
         },
+        {
+            "intrinsic": "_lfence",
+            "width": [128],
+            "llvm": "lfence",
+            "ret": "V",
+            "args": []
+        },
         {
             "intrinsic": "_madd_epi16",
             "width": [128],
@@ -22,6 +29,13 @@
             "ret": "s32",
             "args": ["s16", "s16"]
         },
+        {
+            "intrinsic": "_maskmoveu_si128",
+            "width": [128],
+            "llvm": "maskmov.dqu",
+            "ret": "V",
+            "args": ["u8", "u8", "U8Pm"]
+        },
         {
             "intrinsic": "_max_{0.data_type}",
             "width": [128],
@@ -36,6 +50,13 @@
             "ret": "f64",
             "args": ["0", "0"]
         },
+        {
+            "intrinsic": "_mfence",
+            "width": [128],
+            "llvm": "fence",
+            "ret": "V",
+            "args": []
+        },
         {
             "intrinsic": "_min_{0.data_type}",
             "width": [128],
@@ -99,6 +120,13 @@
             "ret": "u64",
             "args": ["u8", "u8"]
         },
+        {
+            "intrinsic": "_sfence",
+            "width": [128],
+            "llvm": "sfence",
+            "ret": "V",
+            "args": []
+        },
         {
             "intrinsic": "_sqrt_pd",
             "width": [128],
@@ -106,6 +134,20 @@
             "ret": "f64",
             "args": ["0"]
         },
+        {
+            "intrinsic": "_storeu_pd",
+            "width": [128],
+            "llvm": "storeu.pd",
+            "ret": "V",
+            "args": ["F64Pm/U8", "f64"]
+        },
+        {
+            "intrinsic": "_storeu_si128",
+            "width": [128],
+            "llvm": "storeu.dq",
+            "ret": "V",
+            "args": ["u8Pm/U8", "u8"]
+        },
         {
             "intrinsic": "_subs_{0.data_type}",
             "width": [128],
diff --git a/src/etc/platform-intrinsics/x86/sse3.json b/src/etc/platform-intrinsics/x86/sse3.json
index 376e32fa91568..ed13595929d1b 100644
--- a/src/etc/platform-intrinsics/x86/sse3.json
+++ b/src/etc/platform-intrinsics/x86/sse3.json
@@ -21,6 +21,13 @@
             "llvm": "hsub.{0.data_type}",
             "ret": "f(32-64)",
             "args": ["0", "0"]
+        },
+        {
+            "intrinsic": "_lddqu_si128",
+            "width": [128],
+            "llvm": "ldu.dq",
+            "ret": "u8",
+            "args": ["0Pc/S8"]
         }
     ]
 }
diff --git a/src/librustc_platform_intrinsics/x86.rs b/src/librustc_platform_intrinsics/x86.rs
index 26421cb3e80ee..2dfd00e9ce3bf 100644
--- a/src/librustc_platform_intrinsics/x86.rs
+++ b/src/librustc_platform_intrinsics/x86.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, i_, u, u_, f, v, agg, p, void};
+use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
@@ -50,6 +50,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(32), 4),
             definition: Named("llvm.sqrt.v4f32")
         },
+        "_storeu_ps" => Intrinsic {
+            inputs: vec![p(false, f(32), Some(i(8))), v(f(32), 4)],
+            output: void(),
+            definition: Named("llvm.x86.sse.storeu.ps")
+        },
         "_adds_epi8" => Intrinsic {
             inputs: vec![v(i(8), 16), v(i(8), 16)],
             output: v(i(8), 16),
@@ -80,11 +85,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(u(16), 8),
             definition: Named("llvm.x86.sse2.pavg.w")
         },
+        "_lfence" => Intrinsic {
+            inputs: vec![],
+            output: void(),
+            definition: Named("llvm.x86.sse2.lfence")
+        },
         "_madd_epi16" => Intrinsic {
             inputs: vec![v(i(16), 8), v(i(16), 8)],
             output: v(i(32), 4),
             definition: Named("llvm.x86.sse2.pmadd.wd")
         },
+        "_maskmoveu_si128" => Intrinsic {
+            inputs: vec![v(u(8), 16), v(u(8), 16), p(false, u(8), None)],
+            output: void(),
+            definition: Named("llvm.x86.sse2.maskmov.dqu")
+        },
         "_max_epi16" => Intrinsic {
             inputs: vec![v(i(16), 8), v(i(16), 8)],
             output: v(i(16), 8),
@@ -100,6 +115,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(64), 2),
             definition: Named("llvm.x86.sse2.max.pd")
         },
+        "_mfence" => Intrinsic {
+            inputs: vec![],
+            output: void(),
+            definition: Named("llvm.x86.sse2.fence")
+        },
         "_min_epi16" => Intrinsic {
             inputs: vec![v(i(16), 8), v(i(16), 8)],
             output: v(i(16), 8),
@@ -160,11 +180,26 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(u(64), 2),
             definition: Named("llvm.x86.sse2.psad.bw")
         },
+        "_sfence" => Intrinsic {
+            inputs: vec![],
+            output: void(),
+            definition: Named("llvm.x86.sse2.sfence")
+        },
         "_sqrt_pd" => Intrinsic {
             inputs: vec![v(f(64), 2)],
             output: v(f(64), 2),
             definition: Named("llvm.sqrt.v2f64")
         },
+        "_storeu_pd" => Intrinsic {
+            inputs: vec![p(false, f(64), Some(u(8))), v(f(64), 2)],
+            output: void(),
+            definition: Named("llvm.x86.sse2.storeu.pd")
+        },
+        "_storeu_si128" => Intrinsic {
+            inputs: vec![p(false, v(u(8), 16), Some(u(8))), v(u(8), 16)],
+            output: void(),
+            definition: Named("llvm.x86.sse2.storeu.dq")
+        },
         "_subs_epi8" => Intrinsic {
             inputs: vec![v(i(8), 16), v(i(8), 16)],
             output: v(i(8), 16),
@@ -215,6 +250,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(64), 2),
             definition: Named("llvm.x86.sse3.hsub.pd")
         },
+        "_lddqu_si128" => Intrinsic {
+            inputs: vec![p(true, v(u(8), 16), Some(i(8)))],
+            output: v(u(8), 16),
+            definition: Named("llvm.x86.sse3.ldu.dq")
+        },
         "_abs_epi8" => Intrinsic {
             inputs: vec![v(i(8), 16)],
             output: v(i(8), 16),
@@ -490,6 +530,46 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(64), 4),
             definition: Named("llvm.x86.avx.max.pd.256")
         },
+        "_maskload_ps" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 4)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.avx.maskload.ps")
+        },
+        "_maskload_pd" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 2)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.avx.maskload.pd")
+        },
+        "256_maskload_ps" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(i(8))), v_(i(32), f(32), 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.avx.maskload.ps.256")
+        },
+        "256_maskload_pd" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(i(8))), v_(i(64), f(64), 4)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx.maskload.pd.256")
+        },
+        "_maskstore_ps" => Intrinsic {
+            inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 4), v(f(32), 4)],
+            output: void(),
+            definition: Named("llvm.x86.avx.maskstore.ps")
+        },
+        "_maskstore_pd" => Intrinsic {
+            inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 2), v(f(64), 2)],
+            output: void(),
+            definition: Named("llvm.x86.avx.maskstore.pd")
+        },
+        "256_maskstore_ps" => Intrinsic {
+            inputs: vec![p(false, f(32), Some(i(8))), v_(i(32), f(32), 8), v(f(32), 8)],
+            output: void(),
+            definition: Named("llvm.x86.avx.maskstore.ps.256")
+        },
+        "256_maskstore_pd" => Intrinsic {
+            inputs: vec![p(false, f(64), Some(i(8))), v_(i(64), f(64), 4), v(f(64), 4)],
+            output: void(),
+            definition: Named("llvm.x86.avx.maskstore.pd.256")
+        },
         "256_min_ps" => Intrinsic {
             inputs: vec![v(f(32), 8), v(f(32), 8)],
             output: v(f(32), 8),
@@ -540,6 +620,21 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(f(32), 8),
             definition: Named("llvm.x86.avx.rsqrt.ps.256")
         },
+        "256_storeu_ps" => Intrinsic {
+            inputs: vec![p(false, v(f(32), 8), Some(u(8))), v(f(32), 8)],
+            output: void(),
+            definition: Named("llvm.x86.avx.storeu.ps.256")
+        },
+        "256_storeu_pd" => Intrinsic {
+            inputs: vec![p(false, v(f(64), 4), Some(u(8))), v(f(64), 4)],
+            output: void(),
+            definition: Named("llvm.x86.avx.storeu.ps.256")
+        },
+        "256_storeu_si256" => Intrinsic {
+            inputs: vec![p(false, v(u(8), 32), Some(u(8))), v(u(8), 32)],
+            output: void(),
+            definition: Named("llvm.x86.avx.storeu.dq.256")
+        },
         "256_sqrt_ps" => Intrinsic {
             inputs: vec![v(f(32), 8)],
             output: v(f(32), 8),
@@ -625,50 +720,60 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: i(32),
             definition: Named("llvm.x86.avx.ptestz.256")
         },
+        "256_zeroall" => Intrinsic {
+            inputs: vec![],
+            output: void(),
+            definition: Named("llvm.x86.avx.vzeroall")
+        },
+        "256_zeroupper" => Intrinsic {
+            inputs: vec![],
+            output: void(),
+            definition: Named("llvm.x86.avx.vzeroupper")
+        },
         "256_abs_epi8" => Intrinsic {
             inputs: vec![v(i(8), 32)],
             output: v(i(8), 32),
-            definition: Named("llvm.x86.avx2.avx2.pabs.b")
+            definition: Named("llvm.x86.avx2.pabs.b")
         },
         "256_abs_epi16" => Intrinsic {
             inputs: vec![v(i(16), 16)],
             output: v(i(16), 16),
-            definition: Named("llvm.x86.avx2.avx2.pabs.w")
+            definition: Named("llvm.x86.avx2.pabs.w")
         },
         "256_abs_epi32" => Intrinsic {
             inputs: vec![v(i(32), 8)],
             output: v(i(32), 8),
-            definition: Named("llvm.x86.avx2.avx2.pabs.d")
+            definition: Named("llvm.x86.avx2.pabs.d")
         },
         "256_adds_epi8" => Intrinsic {
             inputs: vec![v(i(8), 32), v(i(8), 32)],
             output: v(i(8), 32),
-            definition: Named("llvm.x86.avx2.avx2.padds.b")
+            definition: Named("llvm.x86.avx2.padds.b")
         },
         "256_adds_epu8" => Intrinsic {
             inputs: vec![v(u(8), 32), v(u(8), 32)],
             output: v(u(8), 32),
-            definition: Named("llvm.x86.avx2.avx2.paddus.b")
+            definition: Named("llvm.x86.avx2.paddus.b")
         },
         "256_adds_epi16" => Intrinsic {
             inputs: vec![v(i(16), 16), v(i(16), 16)],
             output: v(i(16), 16),
-            definition: Named("llvm.x86.avx2.avx2.padds.w")
+            definition: Named("llvm.x86.avx2.padds.w")
         },
         "256_adds_epu16" => Intrinsic {
             inputs: vec![v(u(16), 16), v(u(16), 16)],
             output: v(u(16), 16),
-            definition: Named("llvm.x86.avx2.avx2.paddus.w")
+            definition: Named("llvm.x86.avx2.paddus.w")
         },
         "256_avg_epu8" => Intrinsic {
             inputs: vec![v(u(8), 32), v(u(8), 32)],
             output: v(u(8), 32),
-            definition: Named("llvm.x86.avx2.avx2.pavg.b")
+            definition: Named("llvm.x86.avx2.pavg.b")
         },
         "256_avg_epu16" => Intrinsic {
             inputs: vec![v(u(16), 16), v(u(16), 16)],
             output: v(u(16), 16),
-            definition: Named("llvm.x86.avx2.avx2.pavg.w")
+            definition: Named("llvm.x86.avx2.pavg.w")
         },
         "256_hadd_epi16" => Intrinsic {
             inputs: vec![v(i(16), 16), v(i(16), 16)],
@@ -710,6 +815,126 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(i(16), 16),
             definition: Named("llvm.x86.avx2.pmadd.ub.sw")
         },
+        "_mask_i32gather_epi32" => Intrinsic {
+            inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4), i_(32, 8)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx2.gather.d.d")
+        },
+        "_mask_i32gather_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(32), 4), v_(i(32), f(32), 4), i_(32, 8)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.avx2.gather.d.ps")
+        },
+        "256_mask_i32gather_epi32" => Intrinsic {
+            inputs: vec![v(i(32), 8), p(true, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8), i_(32, 8)],
+            output: v(i(32), 8),
+            definition: Named("llvm.x86.avx2.gather.d.d.256")
+        },
+        "256_mask_i32gather_ps" => Intrinsic {
+            inputs: vec![v(f(32), 8), p(true, f(32), Some(i(8))), v(i(32), 8), v_(i(32), f(32), 8), i_(32, 8)],
+            output: v(f(32), 8),
+            definition: Named("llvm.x86.avx2.gather.d.ps.256")
+        },
+        "_mask_i32gather_epi64" => Intrinsic {
+            inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 2), i_(32, 8)],
+            output: v(i(64), 2),
+            definition: Named("llvm.x86.avx2.gather.d.q")
+        },
+        "_mask_i32gather_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 2), i_(32, 8)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.avx2.gather.d.pd")
+        },
+        "256_mask_i32gather_epi64" => Intrinsic {
+            inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(32), 4), v(i(64), 4), i_(32, 8)],
+            output: v(i(64), 4),
+            definition: Named("llvm.x86.avx2.gather.d.q.256")
+        },
+        "256_mask_i32gather_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(32), 4), v_(i(64), f(64), 4), i_(32, 8)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx2.gather.d.pd.256")
+        },
+        "_mask_i64gather_epi32" => Intrinsic {
+            inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 2), v(i(32), 4), i_(32, 8)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx2.gather.q.d")
+        },
+        "_mask_i64gather_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 2), v_(i(32), f(32), 4), i_(32, 8)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.avx2.gather.q.ps")
+        },
+        "256_mask_i64gather_epi32" => Intrinsic {
+            inputs: vec![v(i(32), 4), p(true, i(32), Some(i(8))), v(i(64), 4), v(i(32), 4), i_(32, 8)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx2.gather.q.d")
+        },
+        "256_mask_i64gather_ps" => Intrinsic {
+            inputs: vec![v(f(32), 4), p(true, f(32), Some(i(8))), v(i(64), 4), v_(i(32), f(32), 4), i_(32, 8)],
+            output: v(f(32), 4),
+            definition: Named("llvm.x86.avx2.gather.q.ps")
+        },
+        "_mask_i64gather_epi64" => Intrinsic {
+            inputs: vec![v(i(64), 2), p(true, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2), i_(32, 8)],
+            output: v(i(64), 2),
+            definition: Named("llvm.x86.avx2.gather.q.q")
+        },
+        "_mask_i64gather_pd" => Intrinsic {
+            inputs: vec![v(f(64), 2), p(true, f(64), Some(i(8))), v(i(64), 2), v_(i(64), f(64), 2), i_(32, 8)],
+            output: v(f(64), 2),
+            definition: Named("llvm.x86.avx2.gather.q.pd")
+        },
+        "256_mask_i64gather_epi64" => Intrinsic {
+            inputs: vec![v(i(64), 4), p(true, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4), i_(32, 8)],
+            output: v(i(64), 4),
+            definition: Named("llvm.x86.avx2.gather.q.q.256")
+        },
+        "256_mask_i64gather_pd" => Intrinsic {
+            inputs: vec![v(f(64), 4), p(true, f(64), Some(i(8))), v(i(64), 4), v_(i(64), f(64), 4), i_(32, 8)],
+            output: v(f(64), 4),
+            definition: Named("llvm.x86.avx2.gather.q.pd.256")
+        },
+        "_maskload_epi32" => Intrinsic {
+            inputs: vec![p(true, v(i(32), 4), Some(i(8))), v(i(32), 4)],
+            output: v(i(32), 4),
+            definition: Named("llvm.x86.avx2.maskload.d")
+        },
+        "_maskload_epi64" => Intrinsic {
+            inputs: vec![p(true, v(i(64), 2), Some(i(8))), v(i(64), 2)],
+            output: v(i(64), 2),
+            definition: Named("llvm.x86.avx2.maskload.q")
+        },
+        "256_maskload_epi32" => Intrinsic {
+            inputs: vec![p(true, v(i(32), 8), Some(i(8))), v(i(32), 8)],
+            output: v(i(32), 8),
+            definition: Named("llvm.x86.avx2.maskload.d.256")
+        },
+        "256_maskload_epi64" => Intrinsic {
+            inputs: vec![p(true, v(i(64), 4), Some(i(8))), v(i(64), 4)],
+            output: v(i(64), 4),
+            definition: Named("llvm.x86.avx2.maskload.q.256")
+        },
+        "_maskstore_epi32" => Intrinsic {
+            inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 4), v(i(32), 4)],
+            output: void(),
+            definition: Named("llvm.x86.avx2.maskstore.d")
+        },
+        "_maskstore_epi64" => Intrinsic {
+            inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 2), v(i(64), 2)],
+            output: void(),
+            definition: Named("llvm.x86.avx2.maskstore.q")
+        },
+        "256_maskstore_epi32" => Intrinsic {
+            inputs: vec![p(false, i(32), Some(i(8))), v(i(32), 8), v(i(32), 8)],
+            output: void(),
+            definition: Named("llvm.x86.avx2.maskstore.d.256")
+        },
+        "256_maskstore_epi64" => Intrinsic {
+            inputs: vec![p(false, i(64), Some(i(8))), v(i(64), 4), v(i(64), 4)],
+            output: void(),
+            definition: Named("llvm.x86.avx2.maskstore.q.256")
+        },
         "256_max_epi8" => Intrinsic {
             inputs: vec![v(i(8), 32), v(i(8), 32)],
             output: v(i(8), 32),

From 7241ae9112703ec6ec252260af8a0789db179a1b Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 3 Sep 2015 17:00:11 -0700
Subject: [PATCH 6/7] Support return aggregates in platform intrinsics.

This also involved adding `[TYPE;N]` syntax and aggregate indexing
support to the generator script: it's the only way to be able to have a
parameterised intrinsic that returns an aggregate, since one can't refer
to previous elements of the current aggregate (and that was harder to
implement).
---
 src/etc/platform-intrinsics/generator.py | 83 +++++++++++++++++++-----
 src/librustc_trans/trans/intrinsic.rs    | 23 ++++++-
 2 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/src/etc/platform-intrinsics/generator.py b/src/etc/platform-intrinsics/generator.py
index 8708e7c2f0f8b..d1217c1fb2b4a 100644
--- a/src/etc/platform-intrinsics/generator.py
+++ b/src/etc/platform-intrinsics/generator.py
@@ -19,7 +19,7 @@
 SPEC = re.compile(
     r'^(?:(?P<void>V)|(?P<id>[iusfIUSF])(?:\((?P<start>\d+)-(?P<end>\d+)\)|'
     r'(?P<width>\d+)(:?/(?P<llvm_width>\d+))?)'
-    r'|(?P<reference>\d+))(?P<modifiers>[vShdnwusDMC]*)(?P<force_width>x\d+)?'
+    r'|(?P<reference>\d+))(?P<index>\.\d+)?(?P<modifiers>[vShdnwusfDMC]*)(?P<force_width>x\d+)?'
     r'(?:(?P<pointer>Pm|Pc)(?P<llvm_pointer>/.*)?|(?P<bitcast>->.*))?$'
 )
 
@@ -70,9 +70,14 @@ def lookup(raw):
                                 {k: lookup(v) for k, v in data.items()})
 
 class PlatformTypeInfo(object):
-    def __init__(self, llvm_name, properties):
-        self.properties = properties
-        self.llvm_name = llvm_name
+    def __init__(self, llvm_name, properties, elems = None):
+        if elems is None:
+            self.properties = properties
+            self.llvm_name = llvm_name
+        else:
+            assert properties is None and llvm_name is None
+            self.properties = {}
+            self.elems = elems
 
     def __repr__(self):
         return '<PlatformTypeInfo {}, {}>'.format(self.llvm_name, self.properties)
@@ -80,13 +85,17 @@ def __repr__(self):
     def __getattr__(self, name):
         return self.properties[name]
 
+    def __getitem__(self, idx):
+        return self.elems[idx]
+
     def vectorize(self, length, width_info):
         props = self.properties.copy()
         props.update(width_info)
         return PlatformTypeInfo('v{}{}'.format(length, self.llvm_name), props)
 
-    def pointer(self):
-        return PlatformTypeInfo('p0{}'.format(self.llvm_name), self.properties)
+    def pointer(self, llvm_elem):
+        name = self.llvm_name if llvm_elem is None else llvm_elem.llvm_name
+        return PlatformTypeInfo('p0{}'.format(name), self.properties)
 
 BITWIDTH_POINTER = '<pointer>'
 
@@ -128,6 +137,8 @@ def modify(self, spec, width, previous):
             return Unsigned(self.bitwidth())
         elif spec == 's':
             return Signed(self.bitwidth())
+        elif spec == 'f':
+            return Float(self.bitwidth())
         elif spec == 'w':
             return self.__class__(self.bitwidth() * 2)
         elif spec == 'n':
@@ -283,7 +294,11 @@ def rust_name(self):
                                self._elem.rust_name())
 
     def type_info(self, platform_info):
-        return self._elem.type_info(platform_info).pointer()
+        if self._llvm_elem is None:
+            llvm_elem = None
+        else:
+            llvm_elem = self._llvm_elem.type_info(platform_info)
+        return self._elem.type_info(platform_info).pointer(llvm_elem)
 
     def __eq__(self, other):
         return isinstance(other, Pointer) and self._const == other._const \
@@ -298,6 +313,14 @@ def __init__(self, flatten, elems):
     def __repr__(self):
         return '<Aggregate {}>'.format(self._elems)
 
+    def modify(self, spec, width, previous):
+        if spec.startswith('.'):
+            num = int(spec[1:])
+            return self._elems[num]
+        else:
+            print(spec)
+            raise NotImplementedError()
+
     def compiler_ctor(self):
         return 'agg({}, vec![{}])'.format('true' if self._flatten else 'false',
                                           ', '.join(elem.compiler_ctor() for elem in self._elems))
@@ -306,8 +329,7 @@ def rust_name(self):
         return '({})'.format(', '.join(elem.rust_name() for elem in self._elems))
 
     def type_info(self, platform_info):
-        #return PlatformTypeInfo(None, None, self._llvm_name)
-        return None
+        return PlatformTypeInfo(None, None, [elem.type_info(platform_info) for elem in self._elems])
 
     def __eq__(self, other):
         return isinstance(other, Aggregate) and self._flatten == other._flatten and \
@@ -349,7 +371,11 @@ def enumerate(self, width, previous):
                 id = match.group('id')
                 reference = match.group('reference')
 
-                modifiers = list(match.group('modifiers') or '')
+                modifiers = []
+                index = match.group('index')
+                if index is not None:
+                    modifiers.append(index)
+                modifiers += list(match.group('modifiers') or '')
                 force = match.group('force_width')
                 if force is not None:
                     modifiers.append(force)
@@ -407,16 +433,32 @@ def enumerate(self, width, previous):
                 else:
                     assert False, 'matched `{}`, but didn\'t understand it?'.format(spec)
             elif spec.startswith('('):
-                assert bitcast is None
                 if spec.endswith(')'):
-                    raise NotImplementedError()
+                    true_spec = spec[1:-1]
+                    flatten = False
                 elif spec.endswith(')f'):
                     true_spec = spec[1:-2]
                     flatten = True
+                else:
+                    assert False, 'found unclosed aggregate `{}`'.format(spec)
 
                 for elems in itertools.product(*(TypeSpec(subspec).enumerate(width, previous)
                                                  for subspec in true_spec.split(','))):
                     yield Aggregate(flatten, elems)
+            elif spec.startswith('['):
+                if spec.endswith(']'):
+                    true_spec = spec[1:-1]
+                    flatten = False
+                elif spec.endswith(']f'):
+                    true_spec = spec[1:-2]
+                    flatten = True
+                else:
+                    assert False, 'found unclosed aggregate `{}`'.format(spec)
+                elem_spec, count = true_spec.split(';')
+
+                count = int(count)
+                for elem in TypeSpec(elem_spec).enumerate(width, previous):
+                    yield Aggregate(flatten, [elem] * count)
             else:
                 assert False, 'Failed to parse `{}`'.format(spec)
 
@@ -514,7 +556,7 @@ def parse_args():
         core_type := void | vector | scalar | aggregate | reference
 
         modifier := 'v' | 'h' | 'd' | 'n' | 'w' | 'u' | 's' |
-                     'x' number
+                     'x' number | '.' number
         suffix := pointer | bitcast
         pointer := 'Pm' llvm_pointer? | 'Pc' llvm_pointer?
         llvm_pointer := '/' type
@@ -529,7 +571,7 @@ def parse_args():
         scalar_type := 'U' | 'S' | 'F'
         llvm_width := '/' number
 
-        aggregate := '(' (type),* ')' 'f'?
+        aggregate := '(' (type),* ')' 'f'? | '[' type ';' number ']' 'f'?
 
         reference := number
 
@@ -586,6 +628,12 @@ def parse_args():
         - no `f` corresponds to `declare ... @llvm.foo({float, i32})`.
         - having an `f` corresponds to `declare ... @llvm.foo(float, i32)`.
 
+        The `[type;number]` form is a just shorter way to write
+        `(...)`, except avoids doing a cartesian product of generic
+        types, e.g. `[S32;2]` is the same as `(S32, S32)`, while
+        `[I32;2]` is describing just the two types `(S32,S32)` and
+        `(U32,U32)` (i.e. doesn't include `(S32,U32)`, `(U32,S32)` as
+        `(I32,I32)` would).
 
         (Currently aggregates can not contain other aggregates.)
 
@@ -604,13 +652,16 @@ def parse_args():
         ### Modifiers
 
         - 'v': put a scalar into a vector of the current width (u32 -> u32x4, when width == 128)
+        - 'S': get the scalar element of a vector (u32x4 -> u32)
         - 'h': half the length of the vector (u32x4 -> u32x2)
         - 'd': double the length of the vector (u32x2 -> u32x4)
         - 'n': narrow the element of the vector (u32x4 -> u16x4)
         - 'w': widen the element of the vector (u16x4 -> u32x4)
-        - 'u': force an integer (vector or scalar) to be unsigned (i32x4 -> u32x4)
-        - 's': force an integer (vector or scalar) to be signed (u32x4 -> i32x4)
+        - 'u': force a number (vector or scalar) to be unsigned int (f32x4 -> u32x4)
+        - 's': force a number (vector or scalar) to be signed int (u32x4 -> i32x4)
+        - 'f': force a number (vector or scalar) to be float (u32x4 -> f32x4)
         - 'x' number: force the type to be a vector of bitwidth `number`.
+        - '.' number: get the `number`th element of an aggregate
         - 'D': dereference a pointer (*mut u32 -> u32)
         - 'C': make a pointer const (*mut u32 -> *const u32)
         - 'M': make a pointer mut (*const u32 -> *mut u32)
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs
index becdd71f1d37f..bcfd44d8835d7 100644
--- a/src/librustc_trans/trans/intrinsic.rs
+++ b/src/librustc_trans/trans/intrinsic.rs
@@ -965,7 +965,12 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                         vec![Type::vector(&elem,
                                           length as u64)]
                     }
-                    Aggregate(false, _) => unimplemented!(),
+                    Aggregate(false, ref contents) => {
+                        let elems = contents.iter()
+                                            .map(|t| one(ty_to_type(ccx, t, any_changes_needed)))
+                                            .collect::<Vec<_>>();
+                        vec![Type::struct_(ccx, &elems, false)]
+                    }
                     Aggregate(true, ref contents) => {
                         *any_changes_needed = true;
                         contents.iter()
@@ -1049,7 +1054,7 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
             };
             assert_eq!(inputs.len(), llargs.len());
 
-            match intr.definition {
+            let val = match intr.definition {
                 intrinsics::IntrinsicDef::Named(name) => {
                     let f = declare::declare_cfn(ccx,
                                                  name,
@@ -1057,6 +1062,20 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
                                                  tcx.mk_nil());
                     Call(bcx, f, &llargs, None, call_debug_location)
                 }
+            };
+
+            match intr.output {
+                intrinsics::Type::Aggregate(flatten, ref elems) => {
+                    // the output is a tuple so we need to munge it properly
+                    assert!(!flatten);
+
+                    for i in 0..elems.len() {
+                        let val = ExtractValue(bcx, val, i);
+                        Store(bcx, val, StructGEP(bcx, llresult, i));
+                    }
+                    C_nil(ccx)
+                }
+                _ => val,
             }
         }
     };

From 67aa4c775ac90342440bb5f2af3b023d3c0f3042 Mon Sep 17 00:00:00 2001
From: Huon Wilson <dbau.pp+github@gmail.com>
Date: Thu, 3 Sep 2015 17:06:34 -0700
Subject: [PATCH 7/7] Add some fancier AArch64 load/store instructions.

---
 src/etc/platform-intrinsics/aarch64.json    |  42 ++
 src/librustc_platform_intrinsics/aarch64.rs | 602 +++++++++++++++++++-
 2 files changed, 643 insertions(+), 1 deletion(-)

diff --git a/src/etc/platform-intrinsics/aarch64.json b/src/etc/platform-intrinsics/aarch64.json
index dbccdc37d3f40..79fd769942889 100644
--- a/src/etc/platform-intrinsics/aarch64.json
+++ b/src/etc/platform-intrinsics/aarch64.json
@@ -336,6 +336,48 @@
             "ret": "i8",
             "args": ["0"]
         },
+        {
+            "intrinsic": "ld2{0[0].width}_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);2]","[f(32-64);2]"],
+            "args": ["0.0SPc/0.0"]
+        },
+        {
+            "intrinsic": "ld3{0[0].width}_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);3]","[f(32-64);3]"],
+            "args": ["0.0SPc/0.0"]
+        },
+        {
+            "intrinsic": "ld4{0[0].width}_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);4]","[f(32-64);4]"],
+            "args": ["0.0SPc/0.0"]
+        },
+        {
+            "intrinsic": "ld2{0[0].width}_dup_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld2.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);2]","[f(32-64);2]"],
+            "args": ["0.0SPc"]
+        },
+        {
+            "intrinsic": "ld3{0[0].width}_dup_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld3.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);3]","[f(32-64);3]"],
+            "args": ["0.0SPc"]
+        },
+        {
+            "intrinsic": "ld4{0[0].width}_dup_{0[0].data_type}",
+            "width": [64, 128],
+            "llvm": "ld4.{0[0].llvm_name}.{1.llvm_name}",
+            "ret": ["[i(8-64);4]","[f(32-64);4]"],
+            "args": ["0.0SPc"]
+        },
         {
             "intrinsic": "padd{0.width}_{0.data_type}",
             "width": [64, 128],
diff --git a/src/librustc_platform_intrinsics/aarch64.rs b/src/librustc_platform_intrinsics/aarch64.rs
index c90d6b3816ae3..a3084d903e27f 100644
--- a/src/librustc_platform_intrinsics/aarch64.rs
+++ b/src/librustc_platform_intrinsics/aarch64.rs
@@ -13,7 +13,7 @@
 
 #![allow(unused_imports)]
 
-use {Intrinsic, i, i_, u, u_, f, v, agg, p};
+use {Intrinsic, i, i_, u, u_, f, v, v_, agg, p, void};
 use IntrinsicDef::Named;
 use rustc::middle::ty;
 
@@ -1910,6 +1910,606 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
             output: v(u(8), 16),
             definition: Named("llvm.aarch64.neon.rbit.v16i8")
         },
+        "ld2_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
+        },
+        "ld2_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i8.p0v8i8")
+        },
+        "ld2_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
+        },
+        "ld2_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i16.p0v4i16")
+        },
+        "ld2_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
+        },
+        "ld2_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i32.p0v2i32")
+        },
+        "ld2_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
+        },
+        "ld2_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1i64.p0v1i64")
+        },
+        "ld2_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2f32.p0v2f32")
+        },
+        "ld2_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1f64.p0v1f64")
+        },
+        "ld2q_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
+        },
+        "ld2q_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld2.v16i8.p0v16i8")
+        },
+        "ld2q_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
+        },
+        "ld2q_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i16.p0v8i16")
+        },
+        "ld2q_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
+        },
+        "ld2q_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i32.p0v4i32")
+        },
+        "ld2q_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
+        },
+        "ld2q_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i64.p0v2i64")
+        },
+        "ld2q_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4f32.p0v4f32")
+        },
+        "ld2q_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2f64.p0v2f64")
+        },
+        "ld3_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
+        },
+        "ld3_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i8.p0v8i8")
+        },
+        "ld3_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
+        },
+        "ld3_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i16.p0v4i16")
+        },
+        "ld3_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
+        },
+        "ld3_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i32.p0v2i32")
+        },
+        "ld3_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
+        },
+        "ld3_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1i64.p0v1i64")
+        },
+        "ld3_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2f32.p0v2f32")
+        },
+        "ld3_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1f64.p0v1f64")
+        },
+        "ld3q_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
+        },
+        "ld3q_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld3.v16i8.p0v16i8")
+        },
+        "ld3q_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
+        },
+        "ld3q_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i16.p0v8i16")
+        },
+        "ld3q_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
+        },
+        "ld3q_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i32.p0v4i32")
+        },
+        "ld3q_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
+        },
+        "ld3q_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i64.p0v2i64")
+        },
+        "ld3q_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4f32.p0v4f32")
+        },
+        "ld3q_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2f64.p0v2f64")
+        },
+        "ld4_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 8)))],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
+        },
+        "ld4_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 8)))],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i8.p0v8i8")
+        },
+        "ld4_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 4)))],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
+        },
+        "ld4_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 4)))],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i16.p0v4i16")
+        },
+        "ld4_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 2)))],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
+        },
+        "ld4_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 2)))],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i32.p0v2i32")
+        },
+        "ld4_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 1)))],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
+        },
+        "ld4_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 1)))],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1i64.p0v1i64")
+        },
+        "ld4_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 2)))],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2f32.p0v2f32")
+        },
+        "ld4_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 1)))],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1f64.p0v1f64")
+        },
+        "ld4q_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), Some(v(i(8), 16)))],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
+        },
+        "ld4q_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), Some(v(u(8), 16)))],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld4.v16i8.p0v16i8")
+        },
+        "ld4q_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), Some(v(i(16), 8)))],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
+        },
+        "ld4q_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), Some(v(u(16), 8)))],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i16.p0v8i16")
+        },
+        "ld4q_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), Some(v(i(32), 4)))],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
+        },
+        "ld4q_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), Some(v(u(32), 4)))],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i32.p0v4i32")
+        },
+        "ld4q_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), Some(v(i(64), 2)))],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
+        },
+        "ld4q_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), Some(v(u(64), 2)))],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i64.p0v2i64")
+        },
+        "ld4q_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), Some(v(f(32), 4)))],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4f32.p0v4f32")
+        },
+        "ld4q_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), Some(v(f(64), 2)))],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2f64.p0v2f64")
+        },
+        "ld2_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
+        },
+        "ld2_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i8.p0i8")
+        },
+        "ld2_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
+        },
+        "ld2_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i16.p0i16")
+        },
+        "ld2_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
+        },
+        "ld2_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i32.p0i32")
+        },
+        "ld2_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
+        },
+        "ld2_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1i64.p0i64")
+        },
+        "ld2_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2f32.p0f32")
+        },
+        "ld2_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld2.v1f64.p0f64")
+        },
+        "ld2q_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
+        },
+        "ld2q_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld2.v16i8.p0i8")
+        },
+        "ld2q_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
+        },
+        "ld2q_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld2.v8i16.p0i16")
+        },
+        "ld2q_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
+        },
+        "ld2q_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4i32.p0i32")
+        },
+        "ld2q_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
+        },
+        "ld2q_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2i64.p0i64")
+        },
+        "ld2q_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld2.v4f32.p0f32")
+        },
+        "ld2q_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld2.v2f64.p0f64")
+        },
+        "ld3_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
+        },
+        "ld3_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i8.p0i8")
+        },
+        "ld3_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
+        },
+        "ld3_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i16.p0i16")
+        },
+        "ld3_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
+        },
+        "ld3_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i32.p0i32")
+        },
+        "ld3_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
+        },
+        "ld3_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1i64.p0i64")
+        },
+        "ld3_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2f32.p0f32")
+        },
+        "ld3_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld3.v1f64.p0f64")
+        },
+        "ld3q_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
+        },
+        "ld3q_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld3.v16i8.p0i8")
+        },
+        "ld3q_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
+        },
+        "ld3q_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld3.v8i16.p0i16")
+        },
+        "ld3q_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
+        },
+        "ld3q_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4i32.p0i32")
+        },
+        "ld3q_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
+        },
+        "ld3q_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2i64.p0i64")
+        },
+        "ld3q_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld3.v4f32.p0f32")
+        },
+        "ld3q_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld3.v2f64.p0f64")
+        },
+        "ld4_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 8), v(i(8), 8), v(i(8), 8), v(i(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
+        },
+        "ld4_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 8), v(u(8), 8), v(u(8), 8), v(u(8), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i8.p0i8")
+        },
+        "ld4_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 4), v(i(16), 4), v(i(16), 4), v(i(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
+        },
+        "ld4_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 4), v(u(16), 4), v(u(16), 4), v(u(16), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i16.p0i16")
+        },
+        "ld4_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 2), v(i(32), 2), v(i(32), 2), v(i(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
+        },
+        "ld4_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 2), v(u(32), 2), v(u(32), 2), v(u(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i32.p0i32")
+        },
+        "ld4_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 1), v(i(64), 1), v(i(64), 1), v(i(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
+        },
+        "ld4_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 1), v(u(64), 1), v(u(64), 1), v(u(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1i64.p0i64")
+        },
+        "ld4_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 2), v(f(32), 2), v(f(32), 2), v(f(32), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2f32.p0f32")
+        },
+        "ld4_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 1), v(f(64), 1), v(f(64), 1), v(f(64), 1)]),
+            definition: Named("llvm.aarch64.neon.ld4.v1f64.p0f64")
+        },
+        "ld4q_dup_s8" => Intrinsic {
+            inputs: vec![p(true, i(8), None)],
+            output: agg(false, vec![v(i(8), 16), v(i(8), 16), v(i(8), 16), v(i(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
+        },
+        "ld4q_dup_u8" => Intrinsic {
+            inputs: vec![p(true, u(8), None)],
+            output: agg(false, vec![v(u(8), 16), v(u(8), 16), v(u(8), 16), v(u(8), 16)]),
+            definition: Named("llvm.aarch64.neon.ld4.v16i8.p0i8")
+        },
+        "ld4q_dup_s16" => Intrinsic {
+            inputs: vec![p(true, i(16), None)],
+            output: agg(false, vec![v(i(16), 8), v(i(16), 8), v(i(16), 8), v(i(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
+        },
+        "ld4q_dup_u16" => Intrinsic {
+            inputs: vec![p(true, u(16), None)],
+            output: agg(false, vec![v(u(16), 8), v(u(16), 8), v(u(16), 8), v(u(16), 8)]),
+            definition: Named("llvm.aarch64.neon.ld4.v8i16.p0i16")
+        },
+        "ld4q_dup_s32" => Intrinsic {
+            inputs: vec![p(true, i(32), None)],
+            output: agg(false, vec![v(i(32), 4), v(i(32), 4), v(i(32), 4), v(i(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
+        },
+        "ld4q_dup_u32" => Intrinsic {
+            inputs: vec![p(true, u(32), None)],
+            output: agg(false, vec![v(u(32), 4), v(u(32), 4), v(u(32), 4), v(u(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4i32.p0i32")
+        },
+        "ld4q_dup_s64" => Intrinsic {
+            inputs: vec![p(true, i(64), None)],
+            output: agg(false, vec![v(i(64), 2), v(i(64), 2), v(i(64), 2), v(i(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
+        },
+        "ld4q_dup_u64" => Intrinsic {
+            inputs: vec![p(true, u(64), None)],
+            output: agg(false, vec![v(u(64), 2), v(u(64), 2), v(u(64), 2), v(u(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2i64.p0i64")
+        },
+        "ld4q_dup_f32" => Intrinsic {
+            inputs: vec![p(true, f(32), None)],
+            output: agg(false, vec![v(f(32), 4), v(f(32), 4), v(f(32), 4), v(f(32), 4)]),
+            definition: Named("llvm.aarch64.neon.ld4.v4f32.p0f32")
+        },
+        "ld4q_dup_f64" => Intrinsic {
+            inputs: vec![p(true, f(64), None)],
+            output: agg(false, vec![v(f(64), 2), v(f(64), 2), v(f(64), 2), v(f(64), 2)]),
+            definition: Named("llvm.aarch64.neon.ld4.v2f64.p0f64")
+        },
         "padd_s8" => Intrinsic {
             inputs: vec![v(i(8), 8), v(i(8), 8)],
             output: v(i(8), 8),