From b4172e5151112b685f3109abbf4def9f754882b2 Mon Sep 17 00:00:00 2001
From: Benjamin Feng <benjamin.feng@glassdoor.com>
Date: Fri, 21 Jun 2019 08:13:03 -0500
Subject: [PATCH 01/24] Humanize tokenized symbol names

---
 std/zig/ast.zig       |  14 ++---
 std/zig/tokenizer.zig | 124 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 7 deletions(-)

diff --git a/std/zig/ast.zig b/std/zig/ast.zig
index 38bd94339f..aefbd9cbab 100644
--- a/std/zig/ast.zig
+++ b/std/zig/ast.zig
@@ -324,11 +324,11 @@ pub const Error = union(enum) {
                     return stream.print("`&&` is invalid. Note that `and` is boolean AND.");
                 },
                 .Invalid => {
-                    return stream.print("expected {}, found invalid bytes", @tagName(self.expected_id));
+                    return stream.print("expected {}, found invalid bytes", self.expected_id.symbol());
                 },
                 else => {
-                    const token_name = @tagName(found_token.id);
-                    return stream.print("expected {}, found {}", @tagName(self.expected_id), token_name);
+                    const token_name = found_token.id.symbol();
+                    return stream.print("expected {}, found {}", self.expected_id.symbol(), token_name);
                 },
             }
         }
@@ -339,8 +339,8 @@ pub const Error = union(enum) {
         end_id: Token.Id,
 
         pub fn render(self: *const ExpectedCommaOrEnd, tokens: *Tree.TokenList, stream: var) !void {
-            const token_name = @tagName(tokens.at(self.token).id);
-            return stream.print("expected ',' or {}, found {}", @tagName(self.end_id), token_name);
+            const actual_token = tokens.at(self.token);
+            return stream.print("expected ',' or {}, found {}", self.end_id.symbol(), actual_token.id.symbol());
         }
     };
 
@@ -351,8 +351,8 @@ pub const Error = union(enum) {
             token: TokenIndex,
 
             pub fn render(self: *const ThisError, tokens: *Tree.TokenList, stream: var) !void {
-                const token_name = @tagName(tokens.at(self.token).id);
-                return stream.print(msg, token_name);
+                const actual_token = tokens.at(self.token);
+                return stream.print(msg, actual_token.id.symbol());
             }
         };
     }
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index 4539e1e5b2..e406567976 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -194,6 +194,130 @@ pub const Token = struct {
         Keyword_var,
         Keyword_volatile,
         Keyword_while,
+
+        pub fn symbol(id: Id) []const u8 {
+            return switch (id) {
+                .Invalid => "[Invalid]",
+                .Invalid_ampersands => "&&",
+                .Identifier => "[Identifier]",
+                .StringLiteral => "[StringLiteral]",
+                .MultilineStringLiteralLine => "[MultilineStringLiteralLine]",
+                .CharLiteral => "[CharLiteral]",
+                .Eof => "[Eof]",
+                .Builtin => "[Builtin]",
+                .IntegerLiteral => "[IntegerLiteral]",
+                .FloatLiteral => "[FloatLiteral]",
+                .LineComment => "[LineComment]",
+                .DocComment => "[DocComment]",
+                .ShebangLine => "[ShebangLine]",
+
+                .Bang => "!",
+                .Pipe => "|",
+                .PipePipe => "||",
+                .PipeEqual => "|=",
+                .Equal => "=",
+                .EqualEqual => "==",
+                .EqualAngleBracketRight => "=>",
+                .BangEqual => "!=",
+                .LParen => "(",
+                .RParen => ")",
+                .Semicolon => ";",
+                .Percent => "%",
+                .PercentEqual => "%=",
+                .LBrace => "{",
+                .RBrace => "}",
+                .LBracket => "[",
+                .RBracket => "]",
+                .Period => ".",
+                .Ellipsis2 => "..",
+                .Ellipsis3 => "...",
+                .Caret => "^",
+                .CaretEqual => "^=",
+                .Plus => "+",
+                .PlusPlus => "++",
+                .PlusEqual => "+=",
+                .PlusPercent => "+%",
+                .PlusPercentEqual => "+%=",
+                .Minus => "-",
+                .MinusEqual => "-=",
+                .MinusPercent => "-%",
+                .MinusPercentEqual => "-%=",
+                .Asterisk => "*",
+                .AsteriskEqual => "*=",
+                .AsteriskAsterisk => "**",
+                .AsteriskPercent => "*%",
+                .AsteriskPercentEqual => "*%=",
+                .Arrow => "->",
+                .Colon => ":",
+                .Slash => "/",
+                .SlashEqual => "/=",
+                .Comma => ",",
+                .Ampersand => "&",
+                .AmpersandEqual => "&=",
+                .QuestionMark => "?",
+                .AngleBracketLeft => "<",
+                .AngleBracketLeftEqual => "<=",
+                .AngleBracketAngleBracketLeft => "<<",
+                .AngleBracketAngleBracketLeftEqual => "<<=",
+                .AngleBracketRight => ">",
+                .AngleBracketRightEqual => ">=",
+                .AngleBracketAngleBracketRight => ">>",
+                .AngleBracketAngleBracketRightEqual => ">>=",
+                .Tilde => "~",
+                .BracketStarBracket => "[*]",
+                .BracketStarCBracket => "[*c]",
+                .Keyword_align => "align",
+                .Keyword_allowzero => "allowzero",
+                .Keyword_and => "and",
+                .Keyword_asm => "asm",
+                .Keyword_async => "async",
+                .Keyword_await => "await",
+                .Keyword_break => "break",
+                .Keyword_cancel => "cancel",
+                .Keyword_catch => "catch",
+                .Keyword_comptime => "comptime",
+                .Keyword_const => "const",
+                .Keyword_continue => "continue",
+                .Keyword_defer => "defer",
+                .Keyword_else => "else",
+                .Keyword_enum => "enum",
+                .Keyword_errdefer => "errdefer",
+                .Keyword_error => "error",
+                .Keyword_export => "export",
+                .Keyword_extern => "extern",
+                .Keyword_false => "false",
+                .Keyword_fn => "fn",
+                .Keyword_for => "for",
+                .Keyword_if => "if",
+                .Keyword_inline => "inline",
+                .Keyword_nakedcc => "nakedcc",
+                .Keyword_noalias => "noalias",
+                .Keyword_null => "null",
+                .Keyword_or => "or",
+                .Keyword_orelse => "orelse",
+                .Keyword_packed => "packed",
+                .Keyword_promise => "promise",
+                .Keyword_pub => "pub",
+                .Keyword_resume => "resume",
+                .Keyword_return => "return",
+                .Keyword_linksection => "linksection",
+                .Keyword_stdcallcc => "stdcallcc",
+                .Keyword_struct => "struct",
+                .Keyword_suspend => "suspend",
+                .Keyword_switch => "switch",
+                .Keyword_test => "test",
+                .Keyword_threadlocal => "threadlocal",
+                .Keyword_true => "true",
+                .Keyword_try => "try",
+                .Keyword_undefined => "undefined",
+                .Keyword_union => "union",
+                .Keyword_unreachable => "unreachable",
+                .Keyword_usingnamespace => "usingnamespace",
+                .Keyword_var => "var",
+                .Keyword_volatile => "volatile",
+                .Keyword_while => "while",
+            };
+        }
     };
 };
 

From 23ee619fc5ae43a9ca116fa4fa956759dd74e977 Mon Sep 17 00:00:00 2001
From: Benjamin Feng <benjamin.feng@glassdoor.com>
Date: Mon, 5 Aug 2019 21:24:46 -0500
Subject: [PATCH 02/24] Output token symbols similar to stage1

---
 std/zig/ast.zig       | 72 +++++++++++++++++++++----------------------
 std/zig/tokenizer.zig | 24 +++++++--------
 2 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/std/zig/ast.zig b/std/zig/ast.zig
index aefbd9cbab..f63cc0d1bf 100644
--- a/std/zig/ast.zig
+++ b/std/zig/ast.zig
@@ -255,39 +255,39 @@ pub const Error = union(enum) {
         }
     }
 
-    pub const InvalidToken = SingleTokenError("Invalid token {}");
-    pub const ExpectedContainerMembers = SingleTokenError("Expected test, comptime, var decl, or container field, found {}");
-    pub const ExpectedStringLiteral = SingleTokenError("Expected string literal, found {}");
-    pub const ExpectedIntegerLiteral = SingleTokenError("Expected integer literal, found {}");
-    pub const ExpectedIdentifier = SingleTokenError("Expected identifier, found {}");
-    pub const ExpectedStatement = SingleTokenError("Expected statement, found {}");
-    pub const ExpectedVarDeclOrFn = SingleTokenError("Expected variable declaration or function, found {}");
-    pub const ExpectedVarDecl = SingleTokenError("Expected variable declaration, found {}");
-    pub const ExpectedReturnType = SingleTokenError("Expected 'var' or return type expression, found {}");
-    pub const ExpectedAggregateKw = SingleTokenError("Expected " ++ @tagName(Token.Id.Keyword_struct) ++ ", " ++ @tagName(Token.Id.Keyword_union) ++ ", or " ++ @tagName(Token.Id.Keyword_enum) ++ ", found {}");
-    pub const ExpectedEqOrSemi = SingleTokenError("Expected '=' or ';', found {}");
-    pub const ExpectedSemiOrLBrace = SingleTokenError("Expected ';' or '{{', found {}");
-    pub const ExpectedSemiOrElse = SingleTokenError("Expected ';' or 'else', found {}");
-    pub const ExpectedLBrace = SingleTokenError("Expected '{{', found {}");
-    pub const ExpectedLabelOrLBrace = SingleTokenError("Expected label or '{{', found {}");
-    pub const ExpectedColonOrRParen = SingleTokenError("Expected ':' or ')', found {}");
-    pub const ExpectedLabelable = SingleTokenError("Expected 'while', 'for', 'inline', 'suspend', or '{{', found {}");
-    pub const ExpectedInlinable = SingleTokenError("Expected 'while' or 'for', found {}");
-    pub const ExpectedAsmOutputReturnOrType = SingleTokenError("Expected '->' or " ++ @tagName(Token.Id.Identifier) ++ ", found {}");
-    pub const ExpectedSliceOrRBracket = SingleTokenError("Expected ']' or '..', found {}");
-    pub const ExpectedTypeExpr = SingleTokenError("Expected type expression, found {}");
-    pub const ExpectedPrimaryTypeExpr = SingleTokenError("Expected primary type expression, found {}");
-    pub const ExpectedExpr = SingleTokenError("Expected expression, found {}");
-    pub const ExpectedPrimaryExpr = SingleTokenError("Expected primary expression, found {}");
-    pub const ExpectedParamList = SingleTokenError("Expected parameter list, found {}");
-    pub const ExpectedPayload = SingleTokenError("Expected loop payload, found {}");
-    pub const ExpectedBlockOrAssignment = SingleTokenError("Expected block or assignment, found {}");
-    pub const ExpectedBlockOrExpression = SingleTokenError("Expected block or expression, found {}");
-    pub const ExpectedExprOrAssignment = SingleTokenError("Expected expression or assignment, found {}");
-    pub const ExpectedPrefixExpr = SingleTokenError("Expected prefix expression, found {}");
-    pub const ExpectedLoopExpr = SingleTokenError("Expected loop expression, found {}");
-    pub const ExpectedDerefOrUnwrap = SingleTokenError("Expected pointer dereference or optional unwrap, found {}");
-    pub const ExpectedSuffixOp = SingleTokenError("Expected pointer dereference, optional unwrap, or field access, found {}");
+    pub const InvalidToken = SingleTokenError("Invalid token '{}'");
+    pub const ExpectedContainerMembers = SingleTokenError("Expected test, comptime, var decl, or container field, found '{}'");
+    pub const ExpectedStringLiteral = SingleTokenError("Expected string literal, found '{}'");
+    pub const ExpectedIntegerLiteral = SingleTokenError("Expected integer literal, found '{}'");
+    pub const ExpectedIdentifier = SingleTokenError("Expected identifier, found '{}'");
+    pub const ExpectedStatement = SingleTokenError("Expected statement, found '{}'");
+    pub const ExpectedVarDeclOrFn = SingleTokenError("Expected variable declaration or function, found '{}'");
+    pub const ExpectedVarDecl = SingleTokenError("Expected variable declaration, found '{}'");
+    pub const ExpectedReturnType = SingleTokenError("Expected 'var' or return type expression, found '{}'");
+    pub const ExpectedAggregateKw = SingleTokenError("Expected '" ++ Token.Id.Keyword_struct.symbol() ++ "', '" ++ Token.Id.Keyword_union.symbol() ++ "', or '" ++ Token.Id.Keyword_enum.symbol() ++ "', found '{}'");
+    pub const ExpectedEqOrSemi = SingleTokenError("Expected '=' or ';', found '{}'");
+    pub const ExpectedSemiOrLBrace = SingleTokenError("Expected ';' or '{{', found '{}'");
+    pub const ExpectedSemiOrElse = SingleTokenError("Expected ';' or 'else', found '{}'");
+    pub const ExpectedLBrace = SingleTokenError("Expected '{{', found '{}'");
+    pub const ExpectedLabelOrLBrace = SingleTokenError("Expected label or '{{', found '{}'");
+    pub const ExpectedColonOrRParen = SingleTokenError("Expected ':' or ')', found '{}'");
+    pub const ExpectedLabelable = SingleTokenError("Expected 'while', 'for', 'inline', 'suspend', or '{{', found '{}'");
+    pub const ExpectedInlinable = SingleTokenError("Expected 'while' or 'for', found '{}'");
+    pub const ExpectedAsmOutputReturnOrType = SingleTokenError("Expected '->' or '" ++ Token.Id.Identifier.symbol() ++ "', found '{}'");
+    pub const ExpectedSliceOrRBracket = SingleTokenError("Expected ']' or '..', found '{}'");
+    pub const ExpectedTypeExpr = SingleTokenError("Expected type expression, found '{}'");
+    pub const ExpectedPrimaryTypeExpr = SingleTokenError("Expected primary type expression, found '{}'");
+    pub const ExpectedExpr = SingleTokenError("Expected expression, found '{}'");
+    pub const ExpectedPrimaryExpr = SingleTokenError("Expected primary expression, found '{}'");
+    pub const ExpectedParamList = SingleTokenError("Expected parameter list, found '{}'");
+    pub const ExpectedPayload = SingleTokenError("Expected loop payload, found '{}'");
+    pub const ExpectedBlockOrAssignment = SingleTokenError("Expected block or assignment, found '{}'");
+    pub const ExpectedBlockOrExpression = SingleTokenError("Expected block or expression, found '{}'");
+    pub const ExpectedExprOrAssignment = SingleTokenError("Expected expression or assignment, found '{}'");
+    pub const ExpectedPrefixExpr = SingleTokenError("Expected prefix expression, found '{}'");
+    pub const ExpectedLoopExpr = SingleTokenError("Expected loop expression, found '{}'");
+    pub const ExpectedDerefOrUnwrap = SingleTokenError("Expected pointer dereference or optional unwrap, found '{}'");
+    pub const ExpectedSuffixOp = SingleTokenError("Expected pointer dereference, optional unwrap, or field access, found '{}'");
 
     pub const ExpectedParamType = SimpleError("Expected parameter type");
     pub const ExpectedPubItem = SimpleError("Pub must be followed by fn decl, var decl, or container member");
@@ -324,11 +324,11 @@ pub const Error = union(enum) {
                     return stream.print("`&&` is invalid. Note that `and` is boolean AND.");
                 },
                 .Invalid => {
-                    return stream.print("expected {}, found invalid bytes", self.expected_id.symbol());
+                    return stream.print("expected '{}', found invalid bytes", self.expected_id.symbol());
                 },
                 else => {
                     const token_name = found_token.id.symbol();
-                    return stream.print("expected {}, found {}", self.expected_id.symbol(), token_name);
+                    return stream.print("expected '{}', found '{}'", self.expected_id.symbol(), token_name);
                 },
             }
         }
@@ -340,7 +340,7 @@ pub const Error = union(enum) {
 
         pub fn render(self: *const ExpectedCommaOrEnd, tokens: *Tree.TokenList, stream: var) !void {
             const actual_token = tokens.at(self.token);
-            return stream.print("expected ',' or {}, found {}", self.end_id.symbol(), actual_token.id.symbol());
+            return stream.print("expected ',' or '{}', found '{}'", self.end_id.symbol(), actual_token.id.symbol());
         }
     };
 
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index e406567976..4569a9f5a5 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -197,19 +197,19 @@ pub const Token = struct {
 
         pub fn symbol(id: Id) []const u8 {
             return switch (id) {
-                .Invalid => "[Invalid]",
+                .Invalid => "Invalid",
                 .Invalid_ampersands => "&&",
-                .Identifier => "[Identifier]",
-                .StringLiteral => "[StringLiteral]",
-                .MultilineStringLiteralLine => "[MultilineStringLiteralLine]",
-                .CharLiteral => "[CharLiteral]",
-                .Eof => "[Eof]",
-                .Builtin => "[Builtin]",
-                .IntegerLiteral => "[IntegerLiteral]",
-                .FloatLiteral => "[FloatLiteral]",
-                .LineComment => "[LineComment]",
-                .DocComment => "[DocComment]",
-                .ShebangLine => "[ShebangLine]",
+                .Identifier => "Identifier",
+                .StringLiteral => "StringLiteral",
+                .MultilineStringLiteralLine => "MultilineStringLiteralLine",
+                .CharLiteral => "CharLiteral",
+                .Eof => "Eof",
+                .Builtin => "Builtin",
+                .IntegerLiteral => "IntegerLiteral",
+                .FloatLiteral => "FloatLiteral",
+                .LineComment => "LineComment",
+                .DocComment => "DocComment",
+                .ShebangLine => "ShebangLine",
 
                 .Bang => "!",
                 .Pipe => "|",

From cddd6b46d8ad32f70e0b6c8c6f3192ae4bff9e79 Mon Sep 17 00:00:00 2001
From: Jay Weisskopf <jay@jayschwa.net>
Date: Sun, 15 Sep 2019 23:21:21 -0400
Subject: [PATCH 03/24] Fix typos: "seperate" to "separate"

Fixes #3236
---
 src/main.cpp         | 2 +-
 std/http/headers.zig | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main.cpp b/src/main.cpp
index 006d62dfa9..03709745f2 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -90,7 +90,7 @@ static int print_full_usage(const char *arg0, FILE *file, int return_code) {
         "  -mllvm [arg]                 (unsupported) forward an arg to LLVM's option processing\n"
         "  --override-std-dir [arg]     override path to Zig standard library\n"
         "  --override-lib-dir [arg]     override path to Zig lib library\n"
-        "  -ffunction-sections          places each function in a seperate section\n"
+        "  -ffunction-sections          places each function in a separate section\n"
         "\n"
         "Link Options:\n"
         "  --bundle-compiler-rt         for static libraries, include compiler-rt symbols\n"
diff --git a/std/http/headers.zig b/std/http/headers.zig
index 67624b5b11..a8dfa68629 100644
--- a/std/http/headers.zig
+++ b/std/http/headers.zig
@@ -299,7 +299,7 @@ pub const Headers = struct {
         return buf;
     }
 
-    /// Returns all headers with the given name as a comma seperated string.
+    /// Returns all headers with the given name as a comma separated string.
     ///
     /// Useful for HTTP headers that follow RFC-7230 section 3.2.2:
     ///   A recipient MAY combine multiple header fields with the same field

From 1cefe1442450961a7960fa3b4b488ae50d5abbef Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 16 Sep 2019 14:22:32 -0400
Subject: [PATCH 04/24] update clone on arm32 to latest musl implementation

See musl commit 05870abeaac0588fb9115cfd11f96880a0af2108
by Rich Felker.

Commit message from musl reproduced here:

fix code path where child function returns in arm __clone built as thumb

mov lr,pc is not a valid way to save the return address in thumb mode
since it omits the thumb bit. use a chain of bl and bx to emulate blx.
this could be avoided by converting to a .S file with preprocessor
conditions to use blx if available, but the time cost here is
dominated by the syscall anyway.

while making this change, also remove the remnants of support for
pre-bx ISA levels. commit 9f290a49bf9ee247d540d3c83875288a7991699c
removed the hack from the parent code paths, but left the unnecessary
code in the child. keeping it would require rewriting two code paths
rather than one, and is useless for reasons described in that commit.
---
 std/special/c.zig | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/std/special/c.zig b/std/special/c.zig
index 669a771c2e..cf5007326b 100644
--- a/std/special/c.zig
+++ b/std/special/c.zig
@@ -258,16 +258,11 @@ nakedcc fn clone() void {
             \\    bx lr
             \\
             \\1:  mov r0,r6
-            \\    tst r5,#1
-            \\    bne 1f
-            \\    mov lr,pc
-            \\    mov pc,r5
+            \\    bl 3f
             \\2:  mov r7,#1
             \\    svc 0
-            \\
-            \\1:  mov lr,pc
-            \\    bx r5
             \\    b 2b
+            \\3:  bx r5
         );
     } else {
         @compileError("Implement clone() for this arch.");

From c6e77f248d3771070162d80341f9aeef89a49924 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 16 Sep 2019 14:02:00 -0400
Subject: [PATCH 05/24] fix tripping llvm assert

```
Assertion `!isa<DIType>(Scope) && "shouldn't
make a namespace scope for a type"
```

We've had this problem and solved it before; see #579.
---
 src/analyze.cpp                | 42 +++++++++++++++++++++-------------
 test/stage1/behavior/union.zig |  3 ++-
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/analyze.cpp b/src/analyze.cpp
index 58ec0cf133..d5d8745018 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -7864,6 +7864,26 @@ static void resolve_llvm_types_struct(CodeGen *g, ZigType *struct_type, ResolveS
     }
 }
 
+// This is to be used instead of void for debug info types, to avoid tripping
+// Assertion `!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type"'
+// when targeting CodeView (Windows).
+static ZigLLVMDIType *make_empty_namespace_llvm_di_type(CodeGen *g, ZigType *import, const char *name,
+        AstNode *decl_node)
+{
+    uint64_t debug_size_in_bits = 0;
+    uint64_t debug_align_in_bits = 0;
+    ZigLLVMDIType **di_element_types = nullptr;
+    size_t debug_field_count = 0;
+    return ZigLLVMCreateDebugStructType(g->dbuilder,
+        ZigLLVMFileToScope(import->data.structure.root_struct->di_file),
+        name,
+        import->data.structure.root_struct->di_file, (unsigned)(decl_node->line + 1),
+        debug_size_in_bits,
+        debug_align_in_bits,
+        ZigLLVM_DIFlags_Zero,
+        nullptr, di_element_types, (int)debug_field_count, 0, nullptr, "");
+}
+
 static void resolve_llvm_types_enum(CodeGen *g, ZigType *enum_type, ResolveStatus wanted_resolve_status) {
     assert(enum_type->data.enumeration.resolve_status >= ResolveStatusSizeKnown);
     if (enum_type->data.enumeration.resolve_status >= wanted_resolve_status) return;
@@ -7874,19 +7894,8 @@ static void resolve_llvm_types_enum(CodeGen *g, ZigType *enum_type, ResolveStatu
 
     if (!type_has_bits(enum_type)) {
         enum_type->llvm_type = g->builtin_types.entry_void->llvm_type;
-
-        uint64_t debug_size_in_bits = 0;
-        uint64_t debug_align_in_bits = 0;
-        ZigLLVMDIType **di_element_types = nullptr;
-        size_t debug_field_count = 0;
-        enum_type->llvm_di_type = ZigLLVMCreateDebugStructType(g->dbuilder,
-                ZigLLVMFileToScope(import->data.structure.root_struct->di_file),
-                buf_ptr(&enum_type->name),
-                import->data.structure.root_struct->di_file, (unsigned)(decl_node->line + 1),
-                debug_size_in_bits,
-                debug_align_in_bits,
-                ZigLLVM_DIFlags_Zero,
-                nullptr, di_element_types, (int)debug_field_count, 0, nullptr, "");
+        enum_type->llvm_di_type = make_empty_namespace_llvm_di_type(g, import, buf_ptr(&enum_type->name),
+                decl_node);
         enum_type->data.enumeration.resolve_status = ResolveStatusLLVMFull;
         return;
     }
@@ -7927,6 +7936,8 @@ static void resolve_llvm_types_union(CodeGen *g, ZigType *union_type, ResolveSta
     if (union_type->data.unionation.resolve_status >= wanted_resolve_status) return;
 
     bool packed = (union_type->data.unionation.layout == ContainerLayoutPacked);
+    Scope *scope = &union_type->data.unionation.decls_scope->base;
+    ZigType *import = get_scope_import(scope);
 
     TypeUnionField *most_aligned_union_member = union_type->data.unionation.most_aligned_union_member;
     ZigType *tag_type = union_type->data.unionation.tag_type;
@@ -7934,7 +7945,8 @@ static void resolve_llvm_types_union(CodeGen *g, ZigType *union_type, ResolveSta
     if (gen_field_count == 0) {
         if (tag_type == nullptr) {
             union_type->llvm_type = g->builtin_types.entry_void->llvm_type;
-            union_type->llvm_di_type = g->builtin_types.entry_void->llvm_di_type;
+            union_type->llvm_di_type = make_empty_namespace_llvm_di_type(g, import, buf_ptr(&union_type->name),
+                    union_type->data.unionation.decl_node);
         } else {
             union_type->llvm_type = get_llvm_type(g, tag_type);
             union_type->llvm_di_type = get_llvm_di_type(g, tag_type);
@@ -7943,8 +7955,6 @@ static void resolve_llvm_types_union(CodeGen *g, ZigType *union_type, ResolveSta
         return;
     }
 
-    Scope *scope = &union_type->data.unionation.decls_scope->base;
-    ZigType *import = get_scope_import(scope);
     AstNode *decl_node = union_type->data.unionation.decl_node;
 
     if (union_type->data.unionation.resolve_status < ResolveStatusLLVMFwdDecl) {
diff --git a/test/stage1/behavior/union.zig b/test/stage1/behavior/union.zig
index 75b01164f8..d340a52d1e 100644
--- a/test/stage1/behavior/union.zig
+++ b/test/stage1/behavior/union.zig
@@ -1,4 +1,5 @@
-const expect = @import("std").testing.expect;
+const std = @import("std");
+const expect = std.testing.expect;
 
 const Value = union(enum) {
     Int: u64,

From 914ad1ec2eff4ea9061804ad0da9cde7dd6543b6 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Tue, 17 Sep 2019 22:30:49 -0400
Subject: [PATCH 06/24] fix peer result location with typed parent, ...

...runtime condition, comptime prongs.

closes #3244
---
 src/ir.cpp                    |  2 +-
 test/stage1/behavior/misc.zig | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/ir.cpp b/src/ir.cpp
index f29afdcf7b..ea9039a1b6 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -15198,7 +15198,7 @@ static IrInstruction *ir_resolve_result_raw(IrAnalyze *ira, IrInstruction *suspe
                 }
                 peer_parent->skipped = true;
                 return ir_resolve_result(ira, suspend_source_instr, peer_parent->parent,
-                        value_type, value, force_runtime, true, true);
+                        value_type, value, force_runtime || !is_comptime, true, true);
             }
 
             if (peer_parent->resolved_type == nullptr) {
diff --git a/test/stage1/behavior/misc.zig b/test/stage1/behavior/misc.zig
index c122b18e0a..613bb9ac54 100644
--- a/test/stage1/behavior/misc.zig
+++ b/test/stage1/behavior/misc.zig
@@ -721,3 +721,23 @@ test "global variable assignment with optional unwrapping with var initialized t
     };
     expect(global_foo.* == 1234);
 }
+
+test "peer result location with typed parent, runtime condition, comptime prongs" {
+    const S = struct {
+        fn doTheTest(arg: i32) i32 {
+            const st = Structy{
+                .bleh = if (arg == 1) 1 else 1,
+            };
+
+            if (st.bleh == 1)
+                return 1234;
+            return 0;
+        }
+
+        const Structy = struct {
+            bleh: i32,
+        };
+    };
+    expect(S.doTheTest(0) == 1234);
+    expect(S.doTheTest(1) == 1234);
+}

From 0e3ca4c63ecb8e43af8261020d21bc6888d18fc0 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Thu, 25 Jul 2019 11:11:37 -0500
Subject: [PATCH 07/24] Fix array->vector and vector->array for many types.
 Allow vector of bool.

Vectors do not have the same packing as arrays, and just bitcasting
is not the correct way to convert them.
---
 src/analyze.cpp                 |  3 ++-
 src/codegen.cpp                 | 28 ++++++++++++++--------
 src/ir.cpp                      |  2 +-
 test/compile_errors.zig         |  2 +-
 test/stage1/behavior/vector.zig | 41 +++++++++++++++++++++++++++++++++
 5 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/src/analyze.cpp b/src/analyze.cpp
index d5d8745018..ac70d5646f 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -4708,6 +4708,7 @@ ZigType *get_int_type(CodeGen *g, bool is_signed, uint32_t size_in_bits) {
 bool is_valid_vector_elem_type(ZigType *elem_type) {
     return elem_type->id == ZigTypeIdInt ||
         elem_type->id == ZigTypeIdFloat ||
+        elem_type->id == ZigTypeIdBool ||
         get_codegen_ptr_type(elem_type) != nullptr;
 }
 
@@ -4727,7 +4728,7 @@ ZigType *get_vector_type(CodeGen *g, uint32_t len, ZigType *elem_type) {
 
     ZigType *entry = new_type_table_entry(ZigTypeIdVector);
     if ((len != 0) && type_has_bits(elem_type)) {
-        // Vectors can only be ints, floats, or pointers. ints and floats have trivially resolvable
+        // Vectors can only be ints, floats, bools, or pointers. ints (inc. bools) and floats have trivially resolvable
         // llvm type refs. pointers we will use usize instead.
         LLVMTypeRef example_vector_llvm_type;
         if (elem_type->id == ZigTypeIdPointer) {
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 4799c0a28f..1b86f95433 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -5549,10 +5549,14 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     assert(handle_is_ptr(array_type));
     LLVMValueRef result_loc = ir_llvm_value(g, instruction->result_loc);
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
-    LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
-            LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
-    uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
-    gen_store_untyped(g, vector, casted_ptr, alignment, false);
+    LLVMValueRef array = LLVMGetUndef(get_llvm_type(g, array_type));
+    for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
+        LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
+        LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector,
+            index, "vector_to_array");
+        array = LLVMBuildInsertValue(g->builder, array, elem, i, "");
+    }
+    LLVMBuildStore(g->builder, array, result_loc);
     return result_loc;
 }
 
@@ -5563,12 +5567,16 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     assert(vector_type->id == ZigTypeIdVector);
     assert(!handle_is_ptr(vector_type));
     LLVMValueRef array_ptr = ir_llvm_value(g, instruction->array);
-    LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
-            LLVMPointerType(get_llvm_type(g, vector_type), 0), "");
-    ZigType *array_type = instruction->array->value.type;
-    assert(array_type->id == ZigTypeIdArray);
-    uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
-    return gen_load_untyped(g, casted_ptr, alignment, false, "");
+    LLVMValueRef array = LLVMBuildLoad2(g->builder, get_llvm_type(g, instruction->array->value.type),
+        array_ptr, "");
+    LLVMValueRef vector = LLVMGetUndef(get_llvm_type(g, vector_type));
+    for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
+        LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
+        LLVMValueRef elem = LLVMBuildExtractValue(g->builder, array,
+            i, "vector_to_array");
+        vector = LLVMBuildInsertElement(g->builder, vector, elem, index, "");
+    }
+    return vector;
 }
 
 static LLVMValueRef ir_render_assert_zero(CodeGen *g, IrExecutable *executable,
diff --git a/src/ir.cpp b/src/ir.cpp
index ea9039a1b6..56866340c4 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -22024,7 +22024,7 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
 
     if (!is_valid_vector_elem_type(elem_type)) {
         ir_add_error(ira, instruction->elem_type,
-            buf_sprintf("vector element type must be integer, float, or pointer; '%s' is invalid",
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
                 buf_ptr(&elem_type->name)));
         return ira->codegen->invalid_instruction;
     }
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 6365ca64cb..9d96d6f948 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6491,7 +6491,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         \\    var v: V = undefined;
         \\}
     ,
-        "tmp.zig:2:26: error: vector element type must be integer, float, or pointer; '@Vector(4, u8)' is invalid",
+        "tmp.zig:2:26: error: vector element type must be integer, float, bool, or pointer; '@Vector(4, u8)' is invalid",
     );
 
     cases.add("compileLog of tagged enum doesn't crash the compiler",
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 431e3fe272..94d3aa1a45 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -2,6 +2,18 @@ const std = @import("std");
 const mem = std.mem;
 const expect = std.testing.expect;
 
+test "implicit cast vector to array - bool" {
+    const S = struct {
+        fn doTheTest() void {
+            const a: @Vector(4, bool) = [_]bool{ true, false, true, false };
+            const result_array: [4]bool = a;
+            expect(mem.eql(bool, result_array, [4]bool{ true, false, true, false }));
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
+
 test "vector wrap operators" {
     const S = struct {
         fn doTheTest() void {
@@ -80,3 +92,32 @@ test "array to vector" {
     var arr = [4]f32{ foo, 1.5, 0.0, 0.0 };
     var vec: @Vector(4, f32) = arr;
 }
+
+test "vector casts of sizes not divisable by 8" {
+    const S = struct {
+        fn doTheTest() void {
+            {
+                var v: @Vector(4, u3) = [4]u3{ 5, 2,  3, 0};
+                var x: [4]u3 = v;
+                expect(mem.eql(u3, x, ([4]u3)(v)));
+            }
+            {
+                var v: @Vector(4, u2) = [4]u2{ 1, 2,  3, 0};
+                var x: [4]u2 = v;
+                expect(mem.eql(u2, x, ([4]u2)(v)));
+            }
+            {
+                var v: @Vector(4, u1) = [4]u1{ 1, 0,  1, 0};
+                var x: [4]u1 = v;
+                expect(mem.eql(u1, x, ([4]u1)(v)));
+            }
+            {
+                var v: @Vector(4, bool) = [4]bool{ false, false,  true, false};
+                var x: [4]bool = v;
+                expect(mem.eql(bool, x, ([4]bool)(v)));
+            }
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}

From 558b4ac1f0fd7123ebe25f3e59eef275b066c50a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 18 Sep 2019 10:24:28 -0400
Subject: [PATCH 08/24] adjust codegen of casting between arrays and vectors

 * bitcasting is still better when the size_in_bits aligns with the ABI
   size of the element type. Logic is reworked to do bitcasting when
   possible
 * rather than using insertelement/extractelement to work with arrays,
   store/load elements directly. This matches codegen for arrays
   elsewhere.
---
 src/all_types.hpp |  2 +-
 src/codegen.cpp   | 65 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/all_types.hpp b/src/all_types.hpp
index 60b292662d..e682eb8de1 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1351,7 +1351,7 @@ struct ZigTypeBoundFn {
 };
 
 struct ZigTypeVector {
-    // The type must be a pointer, integer, or float
+    // The type must be a pointer, integer, bool, or float
     ZigType *elem_type;
     uint32_t len;
 };
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 1b86f95433..e4b47be8e5 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -5549,14 +5549,29 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     assert(handle_is_ptr(array_type));
     LLVMValueRef result_loc = ir_llvm_value(g, instruction->result_loc);
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
-    LLVMValueRef array = LLVMGetUndef(get_llvm_type(g, array_type));
-    for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
-        LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
-        LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector,
-            index, "vector_to_array");
-        array = LLVMBuildInsertValue(g->builder, array, elem, i, "");
+
+    ZigType *elem_type = array_type->data.array.child_type;
+    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    if (bitcast_ok) {
+        LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
+                LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
+        uint32_t alignment = get_ptr_align(g, instruction->result_loc->value.type);
+        gen_store_untyped(g, vector, casted_ptr, alignment, false);
+    } else {
+        // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+        // will not work, and we fall back to extractelement.
+        LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+        LLVMTypeRef u32_type_ref = LLVMInt32Type();
+        LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+        for (uintptr_t i = 0; i < instruction->vector->value.type->data.vector.len; i++) {
+            LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+            LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+            LLVMValueRef indexes[] = { zero, index_usize };
+            LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, result_loc, indexes, 2, "");
+            LLVMValueRef elem = LLVMBuildExtractElement(g->builder, vector, index_u32, "");
+            LLVMBuildStore(g->builder, elem, elem_ptr);
+        }
     }
-    LLVMBuildStore(g->builder, array, result_loc);
     return result_loc;
 }
 
@@ -5567,16 +5582,34 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     assert(vector_type->id == ZigTypeIdVector);
     assert(!handle_is_ptr(vector_type));
     LLVMValueRef array_ptr = ir_llvm_value(g, instruction->array);
-    LLVMValueRef array = LLVMBuildLoad2(g->builder, get_llvm_type(g, instruction->array->value.type),
-        array_ptr, "");
-    LLVMValueRef vector = LLVMGetUndef(get_llvm_type(g, vector_type));
-    for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
-        LLVMValueRef index = LLVMConstInt(g->builtin_types.entry_u32->llvm_type, i, false);
-        LLVMValueRef elem = LLVMBuildExtractValue(g->builder, array,
-            i, "vector_to_array");
-        vector = LLVMBuildInsertElement(g->builder, vector, elem, index, "");
+    LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
+
+    ZigType *elem_type = vector_type->data.vector.elem_type;
+    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    if (bitcast_ok) {
+        LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
+                LLVMPointerType(vector_type_ref, 0), "");
+        ZigType *array_type = instruction->array->value.type;
+        assert(array_type->id == ZigTypeIdArray);
+        uint32_t alignment = get_abi_alignment(g, array_type->data.array.child_type);
+        return gen_load_untyped(g, casted_ptr, alignment, false, "");
+    } else {
+        // If the ABI size of the element type is not evenly divisible by size_in_bits, a simple bitcast
+        // will not work, and we fall back to insertelement.
+        LLVMTypeRef usize_type_ref = g->builtin_types.entry_usize->llvm_type;
+        LLVMTypeRef u32_type_ref = LLVMInt32Type();
+        LLVMValueRef zero = LLVMConstInt(usize_type_ref, 0, false);
+        LLVMValueRef vector = LLVMGetUndef(vector_type_ref);
+        for (uintptr_t i = 0; i < instruction->base.value.type->data.vector.len; i++) {
+            LLVMValueRef index_usize = LLVMConstInt(usize_type_ref, i, false);
+            LLVMValueRef index_u32 = LLVMConstInt(u32_type_ref, i, false);
+            LLVMValueRef indexes[] = { zero, index_usize };
+            LLVMValueRef elem_ptr = LLVMBuildInBoundsGEP(g->builder, array_ptr, indexes, 2, "");
+            LLVMValueRef elem = LLVMBuildLoad(g->builder, elem_ptr, "");
+            vector = LLVMBuildInsertElement(g->builder, vector, elem, index_u32, "");
+        }
+        return vector;
     }
-    return vector;
 }
 
 static LLVMValueRef ir_render_assert_zero(CodeGen *g, IrExecutable *executable,

From 74ce5e9e13014d2657bf00b5893fd4687c7f0359 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Wed, 31 Jul 2019 10:55:53 -0500
Subject: [PATCH 09/24] stage1: proper return type on vector comparisons

---
 src/ir.cpp                      | 119 ++++++++++++++++++++------------
 test/stage1/behavior/vector.zig |  17 +++++
 2 files changed, 91 insertions(+), 45 deletions(-)

diff --git a/src/ir.cpp b/src/ir.cpp
index 56866340c4..b2a32c96d0 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13092,6 +13092,59 @@ static bool optional_value_is_null(ConstExprValue *val) {
     }
 }
 
+static IrInstruction *ir_evaluate_bin_op_cmp(IrAnalyze *ira, ZigType *resolved_type,
+    ConstExprValue *op1_val, ConstExprValue *op2_val, IrInstructionBinOp *bin_op_instruction, IrBinOp op_id,
+    bool one_possible_value) {
+    if (op1_val->special == ConstValSpecialUndef ||
+        op2_val->special == ConstValSpecialUndef)
+        return ir_const_undef(ira, &bin_op_instruction->base, resolved_type);
+    if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
+        if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
+            return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
+        }
+        Cmp cmp_result = float_cmp(op1_val, op2_val);
+        bool answer = resolve_cmp_op_id(op_id, cmp_result);
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
+        Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
+        bool answer = resolve_cmp_op_id(op_id, cmp_result);
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
+        if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+                op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
+            (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
+                op2_val->data.x_ptr.special == ConstPtrSpecialNull))
+        {
+            uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
+                0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
+            uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
+                0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
+            Cmp cmp_result;
+            if (op1_addr > op2_addr) {
+                cmp_result = CmpGT;
+            } else if (op1_addr < op2_addr) {
+                cmp_result = CmpLT;
+            } else {
+                cmp_result = CmpEQ;
+            }
+            bool answer = resolve_cmp_op_id(op_id, cmp_result);
+            return ir_const_bool(ira, &bin_op_instruction->base, answer);
+        }
+    } else {
+        bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
+        bool answer;
+        if (op_id == IrBinOpCmpEq) {
+            answer = are_equal;
+        } else if (op_id == IrBinOpCmpNotEq) {
+            answer = !are_equal;
+        } else {
+            zig_unreachable();
+        }
+        return ir_const_bool(ira, &bin_op_instruction->base, answer);
+    }
+    zig_unreachable();
+}
+
 // Returns ErrorNotLazy when the value cannot be determined
 static Error lazy_cmp_zero(AstNode *source_node, ConstExprValue *val, Cmp *result) {
     Error err;
@@ -13427,7 +13480,8 @@ static IrInstruction *ir_analyze_bin_op_cmp(IrAnalyze *ira, IrInstructionBinOp *
     }
 
     if (one_possible_value || (instr_is_comptime(casted_op1) && instr_is_comptime(casted_op2))) {
-        {
+        // TODO do we need lazy values on vector comparisons?
+        if (resolved_type->id != ZigTypeIdVector) {
             // Before resolving the values, we special case comparisons against zero. These can often be done
             // without resolving lazy values, preventing potential dependency loops.
             Cmp op1_cmp_zero;
@@ -13477,51 +13531,22 @@ never_mind_just_calculate_it_normally:
         ConstExprValue *op2_val = one_possible_value ? &casted_op2->value : ir_resolve_const(ira, casted_op2, UndefBad);
         if (op2_val == nullptr)
             return ira->codegen->invalid_instruction;
+        if (resolved_type->id != ZigTypeIdVector)
+            return ir_evaluate_bin_op_cmp(ira, resolved_type, op1_val, op2_val, bin_op_instruction, op_id, one_possible_value);
+        IrInstruction *result = ir_const(ira, &bin_op_instruction->base,
+            get_vector_type(ira->codegen, resolved_type->data.vector.len, ira->codegen->builtin_types.entry_bool));
+        result->value.data.x_array.data.s_none.elements =
+            create_const_vals(resolved_type->data.vector.len);
 
-        if (resolved_type->id == ZigTypeIdComptimeFloat || resolved_type->id == ZigTypeIdFloat) {
-            if (float_is_nan(op1_val) || float_is_nan(op2_val)) {
-                return ir_const_bool(ira, &bin_op_instruction->base, op_id == IrBinOpCmpNotEq);
-            }
-            Cmp cmp_result = float_cmp(op1_val, op2_val);
-            bool answer = resolve_cmp_op_id(op_id, cmp_result);
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
-        } else if (resolved_type->id == ZigTypeIdComptimeInt || resolved_type->id == ZigTypeIdInt) {
-            Cmp cmp_result = bigint_cmp(&op1_val->data.x_bigint, &op2_val->data.x_bigint);
-            bool answer = resolve_cmp_op_id(op_id, cmp_result);
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
-        } else if (resolved_type->id == ZigTypeIdPointer && op_id != IrBinOpCmpEq && op_id != IrBinOpCmpNotEq) {
-            if ((op1_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
-                    op1_val->data.x_ptr.special == ConstPtrSpecialNull) &&
-                (op2_val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr ||
-                    op2_val->data.x_ptr.special == ConstPtrSpecialNull))
-            {
-                uint64_t op1_addr = op1_val->data.x_ptr.special == ConstPtrSpecialNull ?
-                    0 : op1_val->data.x_ptr.data.hard_coded_addr.addr;
-                uint64_t op2_addr = op2_val->data.x_ptr.special == ConstPtrSpecialNull ?
-                    0 : op2_val->data.x_ptr.data.hard_coded_addr.addr;
-                Cmp cmp_result;
-                if (op1_addr > op2_addr) {
-                    cmp_result = CmpGT;
-                } else if (op1_addr < op2_addr) {
-                    cmp_result = CmpLT;
-                } else {
-                    cmp_result = CmpEQ;
-                }
-                bool answer = resolve_cmp_op_id(op_id, cmp_result);
-                return ir_const_bool(ira, &bin_op_instruction->base, answer);
-            }
-        } else {
-            bool are_equal = one_possible_value || const_values_equal(ira->codegen, op1_val, op2_val);
-            bool answer;
-            if (op_id == IrBinOpCmpEq) {
-                answer = are_equal;
-            } else if (op_id == IrBinOpCmpNotEq) {
-                answer = !are_equal;
-            } else {
-                zig_unreachable();
-            }
-            return ir_const_bool(ira, &bin_op_instruction->base, answer);
+        expand_undef_array(ira->codegen, &result->value);
+        for (size_t i = 0;i < resolved_type->data.vector.len;i++) {
+            IrInstruction *cur_res = ir_evaluate_bin_op_cmp(ira, resolved_type->data.vector.elem_type,
+                &op1_val->data.x_array.data.s_none.elements[i],
+                &op2_val->data.x_array.data.s_none.elements[i],
+                bin_op_instruction, op_id, one_possible_value);
+            copy_const_val(&result->value.data.x_array.data.s_none.elements[i], &cur_res->value, false);
         }
+        return result;
     }
 
     // some comparisons with unsigned numbers can be evaluated
@@ -13564,7 +13589,11 @@ never_mind_just_calculate_it_normally:
     IrInstruction *result = ir_build_bin_op(&ira->new_irb,
             bin_op_instruction->base.scope, bin_op_instruction->base.source_node,
             op_id, casted_op1, casted_op2, bin_op_instruction->safety_check_on);
-    result->value.type = ira->codegen->builtin_types.entry_bool;
+    if (resolved_type->id == ZigTypeIdVector)
+        result->value.type = get_vector_type(ira->codegen, resolved_type->data.vector.len,
+            ira->codegen->builtin_types.entry_bool);
+    else
+        result->value.type = ira->codegen->builtin_types.entry_bool;
     return result;
 }
 
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 94d3aa1a45..27277b5e52 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -30,6 +30,23 @@ test "vector wrap operators" {
     comptime S.doTheTest();
 }
 
+test "vector bin compares with mem.eql" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+            var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 30, 4 };
+            expect(mem.eql(bool, ([4]bool)(v == x), [4]bool{ false, false,  true, false}));
+            expect(mem.eql(bool, ([4]bool)(v != x), [4]bool{  true,  true, false,  true}));
+            expect(mem.eql(bool, ([4]bool)(v  < x), [4]bool{ false,  true, false, false}));
+            expect(mem.eql(bool, ([4]bool)(v  > x), [4]bool{  true, false, false,  true}));
+            expect(mem.eql(bool, ([4]bool)(v <= x), [4]bool{ false,  true,  true, false}));
+            expect(mem.eql(bool, ([4]bool)(v >= x), [4]bool{  true, false,  true,  true}));
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}
+
 test "vector int operators" {
     const S = struct {
         fn doTheTest() void {

From 9e4065fa738f040dd338c613409fc1089cc33580 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 18 Sep 2019 10:52:32 -0400
Subject: [PATCH 10/24] remove TODO regarding lazy values

The question was:

> // TODO do we need lazy values on vector comparisons?

Nope, in fact the existing code already was returning ErrorNotLazy
for that particular type, and would already goto
never_mind_just_calculate_it_normally. So the explicit check for
ZigTypeIdVector is not needed. I appreciate the caution though.
---
 src/ir.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/ir.cpp b/src/ir.cpp
index b2a32c96d0..6de08de913 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13480,8 +13480,7 @@ static IrInstruction *ir_analyze_bin_op_cmp(IrAnalyze *ira, IrInstructionBinOp *
     }
 
     if (one_possible_value || (instr_is_comptime(casted_op1) && instr_is_comptime(casted_op2))) {
-        // TODO do we need lazy values on vector comparisons?
-        if (resolved_type->id != ZigTypeIdVector) {
+        {
             // Before resolving the values, we special case comparisons against zero. These can often be done
             // without resolving lazy values, preventing potential dependency loops.
             Cmp op1_cmp_zero;
@@ -13589,11 +13588,12 @@ never_mind_just_calculate_it_normally:
     IrInstruction *result = ir_build_bin_op(&ira->new_irb,
             bin_op_instruction->base.scope, bin_op_instruction->base.source_node,
             op_id, casted_op1, casted_op2, bin_op_instruction->safety_check_on);
-    if (resolved_type->id == ZigTypeIdVector)
+    if (resolved_type->id == ZigTypeIdVector) {
         result->value.type = get_vector_type(ira->codegen, resolved_type->data.vector.len,
             ira->codegen->builtin_types.entry_bool);
-    else
+    } else {
         result->value.type = ira->codegen->builtin_types.entry_bool;
+    }
     return result;
 }
 

From 193604c837df75ab0c3fa5860f8b234263fe5b50 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sat, 29 Jun 2019 11:32:26 -0500
Subject: [PATCH 11/24] stage1: add @shuffle() shufflevector support

I change the semantics of the mask operand, to make it a little more
flexible. There is no real danger in this because it is a compile-error
if you do it the LLVM way (and there is an appropiate error to tell you
this).

v2: avoid problems with double-free
---
 doc/langref.html.in              |  22 +++
 src/all_types.hpp                |  11 ++
 src/codegen.cpp                  |  32 ++++
 src/ir.cpp                       | 274 +++++++++++++++++++++++++++++++
 src/ir_print.cpp                 |  17 ++
 test/compile_errors.zig          |  13 ++
 test/stage1/behavior/shuffle.zig |  57 +++++++
 7 files changed, 426 insertions(+)
 create mode 100644 test/stage1/behavior/shuffle.zig

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 374fbfcde5..7ae0ee7c1c 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8226,6 +8226,28 @@ fn foo(comptime T: type, ptr: *T) T {
       {#link|pointer|Pointers#}.
       </p>
       {#header_close#}
+
+      {#header_open|@shuffle#}
+      <pre>{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}</pre>
+      <p>
+      Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
+      (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
+      Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
+      from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
+      operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
+      value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
+      then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
+      the relevent <a href="https://llvm.org/docs/LangRef.html#i-shufflevector">LLVM Documentation on
+      {#syntax#}shufflevector{#endsyntax#}</a>, although note that the mask values are interpreted differently than in LLVM-IR.
+      Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
+      The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
+      and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
+      {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.</p>
+      <p>
+      {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
+      {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
+      </p>
+      {#header_close#}
       {#header_close#}
 
       {#header_open|Build Mode#}
diff --git a/src/all_types.hpp b/src/all_types.hpp
index e682eb8de1..deb56cbb40 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1611,6 +1611,7 @@ enum BuiltinFnId {
     BuiltinFnIdIntToEnum,
     BuiltinFnIdIntType,
     BuiltinFnIdVectorType,
+    BuiltinFnIdShuffle,
     BuiltinFnIdSetCold,
     BuiltinFnIdSetRuntimeSafety,
     BuiltinFnIdSetFloatMode,
@@ -2428,6 +2429,7 @@ enum IrInstructionId {
     IrInstructionIdBoolToInt,
     IrInstructionIdIntType,
     IrInstructionIdVectorType,
+    IrInstructionIdShuffleVector,
     IrInstructionIdBoolNot,
     IrInstructionIdMemset,
     IrInstructionIdMemcpy,
@@ -3669,6 +3671,15 @@ struct IrInstructionVectorToArray {
     IrInstruction *result_loc;
 };
 
+struct IrInstructionShuffleVector {
+    IrInstruction base;
+
+    IrInstruction *scalar_type;
+    IrInstruction *a;
+    IrInstruction *b;
+    IrInstruction *mask; // This is in zig-format, not llvm format
+};
+
 struct IrInstructionAssertZero {
     IrInstruction base;
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index e4b47be8e5..2f1488635a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4581,6 +4581,35 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
     return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
 }
 
+static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
+    uint64_t len_a = instruction->a->value.type->data.vector.len;
+    uint64_t len_c = instruction->mask->value.type->data.vector.len;
+
+    // LLVM uses integers larger than the length of the first array to
+    // index into the second array. This was deemed unnecessarily fragile
+    // when changing code, so Zig uses negative numbers to index the
+    // second vector. These start at -1 and go down, and are easiest to use
+    // with the ~ operator. Here we convert between the two formats.
+    IrInstruction *mask = instruction->mask;
+    LLVMValueRef *values = allocate<LLVMValueRef>(len_c);
+    for (uint64_t i = 0;i < len_c;i++) {
+        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
+            values[i] = LLVMGetUndef(LLVMInt32Type());
+        } else {
+            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+            if (v < 0)
+                v = (uint32_t)~v + (uint32_t)len_a;
+            values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
+        }
+    }
+
+    return LLVMBuildShuffleVector(g->builder,
+        ir_llvm_value(g, instruction->a),
+        ir_llvm_value(g, instruction->b),
+        LLVMConstVector(values, len_c),
+        "");
+}
+
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
     ZigType *int_type = instruction->op->value.type;
     LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
@@ -6095,6 +6124,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_spill_begin(g, executable, (IrInstructionSpillBegin *)instruction);
         case IrInstructionIdSpillEnd:
             return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction);
+        case IrInstructionIdShuffleVector:
+            return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction);
     }
     zig_unreachable();
 }
@@ -7785,6 +7816,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdCompileLog, "compileLog", SIZE_MAX);
     create_builtin_fn(g, BuiltinFnIdIntType, "IntType", 2); // TODO rename to Int
     create_builtin_fn(g, BuiltinFnIdVectorType, "Vector", 2);
+    create_builtin_fn(g, BuiltinFnIdShuffle, "shuffle", 4);
     create_builtin_fn(g, BuiltinFnIdSetCold, "setCold", 1);
     create_builtin_fn(g, BuiltinFnIdSetRuntimeSafety, "setRuntimeSafety", 1);
     create_builtin_fn(g, BuiltinFnIdSetFloatMode, "setFloatMode", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index 6de08de913..f62a58e37e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -717,6 +717,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionVectorType *) {
     return IrInstructionIdVectorType;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *) {
+    return IrInstructionIdShuffleVector;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) {
     return IrInstructionIdBoolNot;
 }
@@ -2277,6 +2281,25 @@ static IrInstruction *ir_build_vector_type(IrBuilder *irb, Scope *scope, AstNode
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstNode *source_node,
+    IrInstruction *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    IrInstructionShuffleVector *instruction = ir_build_instruction<IrInstructionShuffleVector>(irb, scope, source_node);
+    instruction->scalar_type = scalar_type;
+    instruction->a = a;
+    instruction->b = b;
+    instruction->mask = mask;
+
+    if (scalar_type != nullptr) {
+        ir_ref_instruction(scalar_type, irb->current_basic_block);
+    }
+    ir_ref_instruction(a, irb->current_basic_block);
+    ir_ref_instruction(b, irb->current_basic_block);
+    ir_ref_instruction(mask, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_bool_not(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
     IrInstructionBoolNot *instruction = ir_build_instruction<IrInstructionBoolNot>(irb, scope, source_node);
     instruction->value = value;
@@ -4936,6 +4959,32 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 IrInstruction *vector_type = ir_build_vector_type(irb, scope, node, arg0_value, arg1_value);
                 return ir_lval_wrap(irb, scope, vector_type, lval, result_loc);
             }
+        case BuiltinFnIdShuffle:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                AstNode *arg1_node = node->data.fn_call_expr.params.at(1);
+                IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope);
+                if (arg1_value == irb->codegen->invalid_instruction)
+                    return arg1_value;
+
+                AstNode *arg2_node = node->data.fn_call_expr.params.at(2);
+                IrInstruction *arg2_value = ir_gen_node(irb, arg2_node, scope);
+                if (arg2_value == irb->codegen->invalid_instruction)
+                    return arg2_value;
+
+                AstNode *arg3_node = node->data.fn_call_expr.params.at(3);
+                IrInstruction *arg3_value = ir_gen_node(irb, arg3_node, scope);
+                if (arg3_value == irb->codegen->invalid_instruction)
+                    return arg3_value;
+
+                IrInstruction *shuffle_vector = ir_build_shuffle_vector(irb, scope, node,
+                    arg0_value, arg1_value, arg2_value, arg3_value);
+                return ir_lval_wrap(irb, scope, shuffle_vector, lval, result_loc);
+            }
         case BuiltinFnIdMemcpy:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -22063,6 +22112,228 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
     return ir_const_type(ira, &instruction->base, vector_type);
 }
 
+static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
+    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
+    assert(source_instr && scalar_type && a && b && mask);
+    assert(scalar_type->id == ZigTypeIdBool ||
+           scalar_type->id == ZigTypeIdInt ||
+           scalar_type->id == ZigTypeIdFloat ||
+           scalar_type->id == ZigTypeIdPointer);
+
+    ZigType *mask_type = mask->value.type;
+    if (type_is_invalid(mask_type))
+        return ira->codegen->invalid_instruction;
+
+    const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
+
+    if (mask_type->id == ZigTypeIdArray) {
+        ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
+        mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
+        if (!mask)
+            return ira->codegen->invalid_instruction;
+        mask_type = vector_type;
+    }
+
+    if (mask_type->id != ZigTypeIdVector) {
+        ir_add_error(ira, mask,
+            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    ZigType *mask_scalar_type = mask_type->data.array.child_type;
+    if (mask_scalar_type->id != ZigTypeIdInt) {
+        ir_add_error(ira, mask,
+            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    if (mask_scalar_type->data.integral.bit_count != 32 ||
+        mask_scalar_type->data.integral.is_signed == false) {
+        ir_add_error(ira, mask,
+            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
+    if (a->value.type->id != ZigTypeIdVector) {
+        if (a->value.type->id != ZigTypeIdUndefined) {
+            ir_add_error(ira, a,
+                buf_sprintf("expected vector of element type '%s' got '%s'",
+                    buf_ptr(&scalar_type->name),
+                    buf_ptr(&a->value.type->name)));
+            return ira->codegen->invalid_instruction;
+        }
+    } else {
+        len_a = a->value.type->data.vector.len;
+    }
+
+    if (b->value.type->id != ZigTypeIdVector) {
+        if (b->value.type->id != ZigTypeIdUndefined) {
+            ir_add_error(ira, b,
+                buf_sprintf("expected vector of element type '%s' got '%s'",
+                    buf_ptr(&scalar_type->name),
+                    buf_ptr(&b->value.type->name)));
+            return ira->codegen->invalid_instruction;
+        }
+    } else {
+        len_b = b->value.type->data.vector.len;
+    }
+
+    if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
+        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
+    }
+
+    // undefined is a vector up to length of the other vector.
+    if (a->value.type->id == ZigTypeIdUndefined) {
+        a = ir_const_undef(ira, a, b->value.type);
+        len_a = b->value.type->data.vector.len;
+    } else if (b->value.type->id == ZigTypeIdUndefined) {
+        b = ir_const_undef(ira, b, a->value.type);
+        len_b = a->value.type->data.vector.len;
+    }
+
+    // FIXME I think this needs to be more sophisticated
+    if (a->value.type->data.vector.elem_type != scalar_type) {
+        ir_add_error(ira, a,
+            buf_sprintf("element type '%s' does not match '%s'",
+                buf_ptr(&a->value.type->data.vector.elem_type->name),
+                buf_ptr(&scalar_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+    if (b->value.type->data.vector.elem_type != scalar_type) {
+        ir_add_error(ira, b,
+            buf_sprintf("element type '%s' does not match '%s'",
+                buf_ptr(&b->value.type->data.vector.elem_type->name),
+                buf_ptr(&scalar_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    if (a->value.type != b->value.type) {
+        assert(len_a != len_b);
+        uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
+        bool expand_b = len_b < len_a;
+        IrInstruction *expand_mask = ir_const(ira, mask,
+            get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
+        expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
+        uint32_t i = 0;
+        for (; i < len_min; i++)
+            bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
+        for (; i < len_max; i++)
+            bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+        IrInstruction *undef = ir_const_undef(ira, source_instr,
+            get_vector_type(ira->codegen, len_min, scalar_type));
+        if (expand_b) {
+            if (instr_is_comptime(b)) {
+                ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
+                b->value.data.x_array.data.s_none.elements =
+                    allocate<ConstExprValue>(len_a);
+                memcpy(b->value.data.x_array.data.s_none.elements, old,
+                    b->value.type->data.vector.len * sizeof(ConstExprValue));
+            } else {
+                b = ir_build_shuffle_vector(&ira->new_irb,
+                    source_instr->scope, source_instr->source_node,
+                    nullptr, b, undef, expand_mask);
+                b->value.special = ConstValSpecialRuntime;
+            }
+            b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
+        } else {
+            if (instr_is_comptime(a)) {
+                ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
+                a->value.data.x_array.data.s_none.elements =
+                    allocate<ConstExprValue>(len_b);
+                memcpy(a->value.data.x_array.data.s_none.elements, old,
+                    a->value.type->data.vector.len * sizeof(ConstExprValue));
+            } else {
+                a = ir_build_shuffle_vector(&ira->new_irb,
+                    source_instr->scope, source_instr->source_node,
+                    nullptr, a, undef, expand_mask);
+                a->value.special = ConstValSpecialRuntime;
+            }
+            a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
+        }
+    }
+    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+    if (!mask_val) {
+        ir_add_error(ira, mask,
+            buf_sprintf("mask must be comptime"));
+        return ira->codegen->invalid_instruction;
+    }
+    for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
+        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
+            continue;
+        int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+        if (v >= 0 && (uint64_t)v + 1 > len_a) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index out of bounds"));
+            add_error_note(ira->codegen, msg, mask->source_node,
+                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
+            if ((uint64_t)v <= len_a + len_b)
+                add_error_note(ira->codegen, msg, mask->source_node,
+                    buf_sprintf("selections from the second vector are specified with negative numbers"));
+        } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index out of bounds"));
+            add_error_note(ira->codegen, msg, mask->source_node,
+                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
+        }
+        else
+            continue;
+        return ira->codegen->invalid_instruction;
+    }
+
+    ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
+    if (instr_is_comptime(a) &&
+        instr_is_comptime(b)) {
+        IrInstruction *result = ir_const(ira, source_instr, result_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
+        for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
+            if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
+                result->value.data.x_array.data.s_none.elements[i].special =
+                    ConstValSpecialUndef;
+            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+            if (v >= 0)
+                result->value.data.x_array.data.s_none.elements[i] =
+                    a->value.data.x_array.data.s_none.elements[v];
+            else if (v < 0)
+                result->value.data.x_array.data.s_none.elements[i] =
+                    b->value.data.x_array.data.s_none.elements[~v];
+            else
+                zig_unreachable();
+            result->value.data.x_array.data.s_none.elements[i].special =
+                ConstValSpecialStatic;
+        }
+        result->value.special = ConstValSpecialStatic;
+        return result;
+    }
+
+    // All static analysis passed, and not comptime
+    IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
+        source_instr->scope, source_instr->source_node,
+        nullptr, a, b, mask);
+    result->value.type = result_type;
+    result->value.special = ConstValSpecialRuntime;
+    return result;
+}
+
+static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
+    ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
+    assert(scalar_type);
+    if (type_is_invalid(scalar_type))
+        return ira->codegen->invalid_instruction;
+
+    if (scalar_type->id != ZigTypeIdBool &&
+        scalar_type->id != ZigTypeIdInt &&
+        scalar_type->id != ZigTypeIdFloat &&
+        scalar_type->id != ZigTypeIdPointer) {
+        ir_add_error(ira, instruction->scalar_type,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&scalar_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+}
+
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
     IrInstruction *value = instruction->value->child;
     if (type_is_invalid(value->value.type))
@@ -25607,6 +25878,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_int_type(ira, (IrInstructionIntType *)instruction);
         case IrInstructionIdVectorType:
             return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction);
+        case IrInstructionIdShuffleVector:
+            return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction);
         case IrInstructionIdBoolNot:
             return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction);
         case IrInstructionIdMemset:
@@ -25942,6 +26215,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdTruncate:
         case IrInstructionIdIntType:
         case IrInstructionIdVectorType:
+        case IrInstructionIdShuffleVector:
         case IrInstructionIdBoolNot:
         case IrInstructionIdSliceSrc:
         case IrInstructionIdMemberCount:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index f2877b46e6..8561ed4508 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -42,6 +42,8 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) {
     switch (instruction->id) {
         case IrInstructionIdInvalid:
             return "Invalid";
+        case IrInstructionIdShuffleVector:
+            return "Shuffle";
         case IrInstructionIdDeclVarSrc:
             return "DeclVarSrc";
         case IrInstructionIdDeclVarGen:
@@ -1208,6 +1210,18 @@ static void ir_print_vector_type(IrPrint *irp, IrInstructionVectorType *instruct
     fprintf(irp->f, ")");
 }
 
+static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *instruction) {
+    fprintf(irp->f, "@shuffle(");
+    ir_print_other_instruction(irp, instruction->scalar_type);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->a);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->b);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->mask);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) {
     fprintf(irp->f, "! ");
     ir_print_other_instruction(irp, instruction->value);
@@ -2143,6 +2157,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool
         case IrInstructionIdVectorType:
             ir_print_vector_type(irp, (IrInstructionVectorType *)instruction);
             break;
+        case IrInstructionIdShuffleVector:
+            ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction);
+            break;
         case IrInstructionIdBoolNot:
             ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction);
             break;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 9d96d6f948..d9b4ee6a95 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6484,6 +6484,19 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         "tmp.zig:7:23: error: unable to evaluate constant expression",
     );
 
+    cases.addTest(
+        "using LLVM syntax for @shuffle",
+        \\export fn entry() void {
+        \\    const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3};
+        \\    const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7};
+        \\    var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7});
+        \\}
+    ,
+        "tmp.zig:4:39: error: mask index out of bounds",
+        "tmp.zig:4:39: note: when computing vector element at index 4",
+        "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers",
+    );
+
     cases.addTest(
         "nested vectors",
         \\export fn entry() void {
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
new file mode 100644
index 0000000000..70bff5991e
--- /dev/null
+++ b/test/stage1/behavior/shuffle.zig
@@ -0,0 +1,57 @@
+const std = @import("std");
+const mem = std.mem;
+const expect = std.testing.expect;
+
+test "@shuffle" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
+            var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
+            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)};
+            var res = @shuffle(i32, v, x, mask);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+            // Implicit cast from array (of mask)
+            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)});
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
+
+            // Undefined
+            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0};
+            res = @shuffle(i32, v, undefined, mask2);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647}));
+
+            // Upcasting of b
+            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined};
+            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3};
+            res = @shuffle(i32, x, v2, mask3);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
+
+            // Upcasting of a
+            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2};
+            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)};
+            res = @shuffle(i32, v3, x, mask4);
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
+
+            // bool
+            {
+                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true};
+                var v4: @Vector(2, bool) = [2]bool{ true, false};
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var res2 = @shuffle(bool, x2, v4, mask5);
+                expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+            }
+
+            // FIXME re-enable when LLVM codegen is fixed
+            // https://bugs.llvm.org/show_bug.cgi?id=42803
+            if (false) {
+                var x2: @Vector(3, bool) = [3]bool{ false, true, false};
+                var v4: @Vector(2, bool) = [2]bool{ true, false};
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var res2 = @shuffle(bool, x2, v4, mask5);
+                expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
+            }
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}

From 2038f4d45a597cc672380c0a5fc8dd98e928d24c Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 18 Sep 2019 15:41:56 -0400
Subject: [PATCH 12/24] rework the implementation

 * update documentation
   - move `@shuffle` to be sorted alphabetically
   - remove mention of LLVM
   - minor clarifications & rewording
 * introduce ir_resolve_vector_elem_type to avoid duplicate compile
   error message and duplicate vector element checking logic
 * rework ir_analyze_shuffle_vector to solve various issues
 * improve `@shuffle` to allow implicit cast of arrays
 * the shuffle tests weren't being run
---
 doc/langref.html.in              |  59 +++--
 src/codegen.cpp                  |  19 +-
 src/ir.cpp                       | 375 +++++++++++++++----------------
 test/compile_errors.zig          |  14 +-
 test/stage1/behavior.zig         |   1 +
 test/stage1/behavior/shuffle.zig |  32 +--
 6 files changed, 250 insertions(+), 250 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 7ae0ee7c1c..8a303640e6 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -7673,6 +7673,43 @@ test "@setRuntimeSafety" {
       {#see_also|@shlExact|@shlWithOverflow#}
       {#header_close#}
 
+      {#header_open|@shuffle#}
+      <pre>{#syntax#}@shuffle(comptime E: type, a: @Vector(a_len, E), b: @Vector(b_len, E), comptime mask: @Vector(mask_len, i32)) @Vector(mask_len, E){#endsyntax#}</pre>
+      <p>
+      Constructs a new {#link|vector|Vectors#} by selecting elements from {#syntax#}a{#endsyntax#} and
+      {#syntax#}b{#endsyntax#} based on {#syntax#}mask{#endsyntax#}.
+      </p>
+      <p>
+      Each element in {#syntax#}mask{#endsyntax#} selects an element from either {#syntax#}a{#endsyntax#} or
+      {#syntax#}b{#endsyntax#}. Positive numbers select from {#syntax#}a{#endsyntax#} starting at 0.
+      Negative values select from {#syntax#}b{#endsyntax#}, starting at {#syntax#}-1{#endsyntax#} and going down.
+      It is recommended to use the {#syntax#}~{#endsyntax#} operator from indexes from {#syntax#}b{#endsyntax#}
+      so that both indexes can start from {#syntax#}0{#endsyntax#} (i.e. {#syntax#}~i32(0){#endsyntax#} is
+      {#syntax#}-1{#endsyntax#}).
+      </p>
+      <p>
+      For each element of {#syntax#}mask{#endsyntax#}, if it or the selected value from
+      {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#},
+      then the resulting element is {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}a_len{#endsyntax#} and {#syntax#}b_len{#endsyntax#} may differ in length. Out-of-bounds element
+      indexes in {#syntax#}mask{#endsyntax#} result in compile errors.
+      </p>
+      <p>
+      If {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#}, it
+      is equivalent to a vector of all {#syntax#}undefined{#endsyntax#} with the same length as the other vector.
+      If both vectors are {#syntax#}undefined{#endsyntax#}, {#syntax#}@shuffle{#endsyntax#} returns
+      a vector with all elements {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}E{#endsyntax#} must be an {#link|integer|Integers#}, {#link|float|Floats#},
+      {#link|pointer|Pointers#}, or {#syntax#}bool{#endsyntax#}. The mask may be any vector length, and its
+      length determines the result length.
+      </p>
+      {#see_also|SIMD#}
+      {#header_close#}
+
       {#header_open|@sizeOf#}
       <pre>{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}</pre>
       <p>
@@ -8226,28 +8263,6 @@ fn foo(comptime T: type, ptr: *T) T {
       {#link|pointer|Pointers#}.
       </p>
       {#header_close#}
-
-      {#header_open|@shuffle#}
-      <pre>{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}</pre>
-      <p>
-      Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
-      (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
-      Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
-      from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
-      operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
-      value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
-      then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
-      the relevent <a href="https://llvm.org/docs/LangRef.html#i-shufflevector">LLVM Documentation on
-      {#syntax#}shufflevector{#endsyntax#}</a>, although note that the mask values are interpreted differently than in LLVM-IR.
-      Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
-      The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
-      and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
-      {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.</p>
-      <p>
-      {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
-      {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
-      </p>
-      {#header_close#}
       {#header_close#}
 
       {#header_open|Build Mode#}
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 2f1488635a..7676b3bbd0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4583,7 +4583,7 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
 
 static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
     uint64_t len_a = instruction->a->value.type->data.vector.len;
-    uint64_t len_c = instruction->mask->value.type->data.vector.len;
+    uint64_t len_mask = instruction->mask->value.type->data.vector.len;
 
     // LLVM uses integers larger than the length of the first array to
     // index into the second array. This was deemed unnecessarily fragile
@@ -4591,23 +4591,24 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
     // second vector. These start at -1 and go down, and are easiest to use
     // with the ~ operator. Here we convert between the two formats.
     IrInstruction *mask = instruction->mask;
-    LLVMValueRef *values = allocate<LLVMValueRef>(len_c);
-    for (uint64_t i = 0;i < len_c;i++) {
+    LLVMValueRef *values = allocate<LLVMValueRef>(len_mask);
+    for (uint64_t i = 0; i < len_mask; i++) {
         if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
             values[i] = LLVMGetUndef(LLVMInt32Type());
         } else {
-            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-            if (v < 0)
-                v = (uint32_t)~v + (uint32_t)len_a;
-            values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
+            int32_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+            uint32_t index_val = (v >= 0) ? (uint32_t)v : (uint32_t)~v + (uint32_t)len_a;
+            values[i] = LLVMConstInt(LLVMInt32Type(), index_val, false);
         }
     }
 
+    LLVMValueRef llvm_mask_value = LLVMConstVector(values, len_mask);
+    free(values);
+
     return LLVMBuildShuffleVector(g->builder,
         ir_llvm_value(g, instruction->a),
         ir_llvm_value(g, instruction->b),
-        LLVMConstVector(values, len_c),
-        "");
+        llvm_mask_value, "");
 }
 
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
diff --git a/src/ir.cpp b/src/ir.cpp
index f62a58e37e..cbc00f0cfe 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11049,6 +11049,19 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
     return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
 }
 
+static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+    ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
+    if (type_is_invalid(elem_type))
+        return ira->codegen->builtin_types.entry_invalid;
+    if (!is_valid_vector_elem_type(elem_type)) {
+        ir_add_error(ira, elem_type_value,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&elem_type->name)));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+    return elem_type;
+}
+
 static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
     ZigType *ty = ir_resolve_type(ira, type_value);
     if (type_is_invalid(ty))
@@ -22096,242 +22109,212 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
     if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
         return ira->codegen->invalid_instruction;
 
-    ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child);
+    ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
     if (type_is_invalid(elem_type))
         return ira->codegen->invalid_instruction;
 
-    if (!is_valid_vector_elem_type(elem_type)) {
-        ir_add_error(ira, instruction->elem_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&elem_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
     ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
 
     return ir_const_type(ira, &instruction->base, vector_type);
 }
 
 static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
-    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
-    assert(source_instr && scalar_type && a && b && mask);
-    assert(scalar_type->id == ZigTypeIdBool ||
-           scalar_type->id == ZigTypeIdInt ||
-           scalar_type->id == ZigTypeIdFloat ||
-           scalar_type->id == ZigTypeIdPointer);
+    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
+    ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
 
-    ZigType *mask_type = mask->value.type;
-    if (type_is_invalid(mask_type))
-        return ira->codegen->invalid_instruction;
-
-    const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
-
-    if (mask_type->id == ZigTypeIdArray) {
-        ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
-        mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
-        if (!mask)
-            return ira->codegen->invalid_instruction;
-        mask_type = vector_type;
-    }
-
-    if (mask_type->id != ZigTypeIdVector) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
-    ZigType *mask_scalar_type = mask_type->data.array.child_type;
-    if (mask_scalar_type->id != ZigTypeIdInt) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
-    if (mask_scalar_type->data.integral.bit_count != 32 ||
-        mask_scalar_type->data.integral.is_signed == false) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
-    uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
-    if (a->value.type->id != ZigTypeIdVector) {
-        if (a->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, a,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&a->value.type->name)));
-            return ira->codegen->invalid_instruction;
-        }
+    uint32_t len_mask;
+    if (mask->value.type->id == ZigTypeIdVector) {
+        len_mask = mask->value.type->data.vector.len;
+    } else if (mask->value.type->id == ZigTypeIdArray) {
+        len_mask = mask->value.type->data.array.len;
     } else {
+        ir_add_error(ira, mask,
+            buf_sprintf("expected vector or array, found '%s'",
+                buf_ptr(&mask->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+    mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
+                ira->codegen->builtin_types.entry_i32));
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint32_t len_a;
+    if (a->value.type->id == ZigTypeIdVector) {
         len_a = a->value.type->data.vector.len;
+    } else if (a->value.type->id == ZigTypeIdArray) {
+        len_a = a->value.type->data.array.len;
+    } else if (a->value.type->id == ZigTypeIdUndefined) {
+        len_a = UINT32_MAX;
+    } else {
+        ir_add_error(ira, a,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&a->value.type->name)));
+        return ira->codegen->invalid_instruction;
     }
 
-    if (b->value.type->id != ZigTypeIdVector) {
-        if (b->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, b,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&b->value.type->name)));
+    uint32_t len_b;
+    if (b->value.type->id == ZigTypeIdVector) {
+        len_b = b->value.type->data.vector.len;
+    } else if (b->value.type->id == ZigTypeIdArray) {
+        len_b = b->value.type->data.array.len;
+    } else if (b->value.type->id == ZigTypeIdUndefined) {
+        len_b = UINT32_MAX;
+    } else {
+        ir_add_error(ira, b,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&b->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
+        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
+    }
+
+    if (len_a == UINT32_MAX) {
+        len_a = len_b;
+        a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+    } else {
+        a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+        if (type_is_invalid(a->value.type))
+            return ira->codegen->invalid_instruction;
+    }
+
+    if (len_b == UINT32_MAX) {
+        len_b = len_a;
+        b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+    } else {
+        b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+        if (type_is_invalid(b->value.type))
+            return ira->codegen->invalid_instruction;
+    }
+
+    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+    if (mask_val == nullptr)
+        return ira->codegen->invalid_instruction;
+
+    expand_undef_array(ira->codegen, mask_val);
+
+    for (uint32_t i = 0; i < len_mask; i += 1) {
+        ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+        if (mask_elem_val->special == ConstValSpecialUndef)
+            continue;
+        int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
+        uint32_t v;
+        IrInstruction *chosen_operand;
+        if (v_i32 >= 0) {
+            v = (uint32_t)v_i32;
+            chosen_operand = a;
+        } else {
+            v = (uint32_t)~v_i32;
+            chosen_operand = b;
+        }
+        if (v >= chosen_operand->value.type->data.vector.len) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index '%u' has out-of-bounds selection", i));
+            add_error_note(ira->codegen, msg, chosen_operand->source_node,
+                buf_sprintf("selected index '%u' out of bounds of %s", v,
+                    buf_ptr(&chosen_operand->value.type->name)));
+            if (chosen_operand == a && v < len_a + len_b) {
+                add_error_note(ira->codegen, msg, b->source_node,
+                    buf_create_from_str("selections from the second vector are specified with negative numbers"));
+            }
             return ira->codegen->invalid_instruction;
         }
-    } else {
-        len_b = b->value.type->data.vector.len;
     }
 
-    if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
-        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
-    }
+    ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
+    if (instr_is_comptime(a) && instr_is_comptime(b)) {
+        ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
+        if (a_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // undefined is a vector up to length of the other vector.
-    if (a->value.type->id == ZigTypeIdUndefined) {
-        a = ir_const_undef(ira, a, b->value.type);
-        len_a = b->value.type->data.vector.len;
-    } else if (b->value.type->id == ZigTypeIdUndefined) {
-        b = ir_const_undef(ira, b, a->value.type);
-        len_b = a->value.type->data.vector.len;
-    }
+        ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
+        if (b_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // FIXME I think this needs to be more sophisticated
-    if (a->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, a,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&a->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-    if (b->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, b,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&b->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
+        expand_undef_array(ira->codegen, a_val);
+        expand_undef_array(ira->codegen, b_val);
 
-    if (a->value.type != b->value.type) {
-        assert(len_a != len_b);
-        uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
-        bool expand_b = len_b < len_a;
-        IrInstruction *expand_mask = ir_const(ira, mask,
-            get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
-        expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
-        uint32_t i = 0;
-        for (; i < len_min; i++)
-            bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
-        for (; i < len_max; i++)
-            bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
-        IrInstruction *undef = ir_const_undef(ira, source_instr,
-            get_vector_type(ira->codegen, len_min, scalar_type));
-        if (expand_b) {
-            if (instr_is_comptime(b)) {
-                ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
-                b->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_a);
-                memcpy(b->value.data.x_array.data.s_none.elements, old,
-                    b->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                b = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, b, undef, expand_mask);
-                b->value.special = ConstValSpecialRuntime;
-            }
-            b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        } else {
-            if (instr_is_comptime(a)) {
-                ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
-                a->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_b);
-                memcpy(a->value.data.x_array.data.s_none.elements, old,
-                    a->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                a = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, a, undef, expand_mask);
-                a->value.special = ConstValSpecialRuntime;
-            }
-            a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        }
-    }
-    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
-    if (!mask_val) {
-        ir_add_error(ira, mask,
-            buf_sprintf("mask must be comptime"));
-        return ira->codegen->invalid_instruction;
-    }
-    for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-            continue;
-        int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-        if (v >= 0 && (uint64_t)v + 1 > len_a) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-            if ((uint64_t)v <= len_a + len_b)
-                add_error_note(ira->codegen, msg, mask->source_node,
-                    buf_sprintf("selections from the second vector are specified with negative numbers"));
-        } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-        }
-        else
-            continue;
-        return ira->codegen->invalid_instruction;
-    }
-
-    ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
-    if (instr_is_comptime(a) &&
-        instr_is_comptime(b)) {
         IrInstruction *result = ir_const(ira, source_instr, result_type);
-        result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
-        for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-            if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-                result->value.data.x_array.data.s_none.elements[i].special =
-                    ConstValSpecialUndef;
-            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-            if (v >= 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    a->value.data.x_array.data.s_none.elements[v];
-            else if (v < 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    b->value.data.x_array.data.s_none.elements[~v];
-            else
-                zig_unreachable();
-            result->value.data.x_array.data.s_none.elements[i].special =
-                ConstValSpecialStatic;
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
+        for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
+            ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+            ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+            if (mask_elem_val->special == ConstValSpecialUndef) {
+                result_elem_val->special = ConstValSpecialUndef;
+                continue;
+            }
+            int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
+            // We've already checked for and emitted compile errors for index out of bounds here.
+            ConstExprValue *src_elem_val = (v >= 0) ?
+                &a->value.data.x_array.data.s_none.elements[v] :
+                &b->value.data.x_array.data.s_none.elements[~v];
+            copy_const_val(result_elem_val, src_elem_val, false);
+
+            ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
         }
         result->value.special = ConstValSpecialStatic;
         return result;
     }
 
-    // All static analysis passed, and not comptime
+    // All static analysis passed, and not comptime.
+    // For runtime codegen, vectors a and b must be the same length. Here we
+    // recursively @shuffle the smaller vector to append undefined elements
+    // to it up to the length of the longer vector. This recursion terminates
+    // in 1 call because these calls to ir_analyze_shuffle_vector guarantee
+    // len_a == len_b.
+    if (len_a != len_b) {
+        uint32_t len_min = min(len_a, len_b);
+        uint32_t len_max = max(len_a, len_b);
+
+        IrInstruction *expand_mask = ir_const(ira, mask,
+            get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
+        expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
+        uint32_t i = 0;
+        for (; i < len_min; i += 1)
+            bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
+        for (; i < len_max; i += 1)
+            bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+
+        IrInstruction *undef = ir_const_undef(ira, source_instr,
+            get_vector_type(ira->codegen, len_min, scalar_type));
+
+        if (len_b < len_a) {
+            b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
+        } else {
+            a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
+        }
+    }
+
     IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
         source_instr->scope, source_instr->source_node,
         nullptr, a, b, mask);
     result->value.type = result_type;
-    result->value.special = ConstValSpecialRuntime;
     return result;
 }
 
 static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
-    ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
-    assert(scalar_type);
+    ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
     if (type_is_invalid(scalar_type))
         return ira->codegen->invalid_instruction;
 
-    if (scalar_type->id != ZigTypeIdBool &&
-        scalar_type->id != ZigTypeIdInt &&
-        scalar_type->id != ZigTypeIdFloat &&
-        scalar_type->id != ZigTypeIdPointer) {
-        ir_add_error(ira, instruction->scalar_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&scalar_type->name)));
+    IrInstruction *a = instruction->a->child;
+    if (type_is_invalid(a->value.type))
         return ira->codegen->invalid_instruction;
-    }
 
-    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+    IrInstruction *b = instruction->b->child;
+    if (type_is_invalid(b->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *mask = instruction->mask->child;
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
+
+    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
 }
 
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index d9b4ee6a95..1fe3fc58ab 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6485,16 +6485,16 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
     );
 
     cases.addTest(
-        "using LLVM syntax for @shuffle",
+        "@shuffle with selected index past first vector length",
         \\export fn entry() void {
-        \\    const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3};
-        \\    const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7};
-        \\    var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7});
+        \\    const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
+        \\    const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
+        \\    var z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
         \\}
     ,
-        "tmp.zig:4:39: error: mask index out of bounds",
-        "tmp.zig:4:39: note: when computing vector element at index 4",
-        "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers",
+        "tmp.zig:4:39: error: mask index '4' has out-of-bounds selection",
+        "tmp.zig:4:27: note: selected index '7' out of bounds of @Vector(4, u32)",
+        "tmp.zig:4:30: note: selections from the second vector are specified with negative numbers",
     );
 
     cases.addTest(
diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index db6cdad3b1..e56fc7ba7f 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -80,6 +80,7 @@ comptime {
     _ = @import("behavior/pub_enum.zig");
     _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
     _ = @import("behavior/reflection.zig");
+    _ = @import("behavior/shuffle.zig");
     _ = @import("behavior/sizeof_and_typeof.zig");
     _ = @import("behavior/slice.zig");
     _ = @import("behavior/slicetobytes.zig");
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
index 70bff5991e..2029ec582f 100644
--- a/test/stage1/behavior/shuffle.zig
+++ b/test/stage1/behavior/shuffle.zig
@@ -7,46 +7,46 @@ test "@shuffle" {
         fn doTheTest() void {
             var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
             var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
-            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)};
+            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3) };
             var res = @shuffle(i32, v, x, mask);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
 
             // Implicit cast from array (of mask)
-            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)});
+            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3) });
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
 
             // Undefined
-            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0};
+            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0 };
             res = @shuffle(i32, v, undefined, mask2);
-            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647}));
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647 }));
 
             // Upcasting of b
-            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined};
-            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3};
+            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined };
+            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3 };
             res = @shuffle(i32, x, v2, mask3);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
 
             // Upcasting of a
-            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2};
-            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)};
+            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2 };
+            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3) };
             res = @shuffle(i32, v3, x, mask4);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
 
             // bool
             {
-                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true};
-                var v4: @Vector(2, bool) = [2]bool{ true, false};
-                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
                 var res2 = @shuffle(bool, x2, v4, mask5);
                 expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
             }
 
-            // FIXME re-enable when LLVM codegen is fixed
-            // https://bugs.llvm.org/show_bug.cgi?id=42803
+            // TODO re-enable when LLVM codegen is fixed
+            // https://github.com/ziglang/zig/issues/3246
             if (false) {
-                var x2: @Vector(3, bool) = [3]bool{ false, true, false};
-                var v4: @Vector(2, bool) = [2]bool{ true, false};
-                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var x2: @Vector(3, bool) = [3]bool{ false, true, false };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
                 var res2 = @shuffle(bool, x2, v4, mask5);
                 expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
             }

From ef0f3ba905e992556a60f935cbb7cb30cf1f27db Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 18 Sep 2019 16:34:36 -0400
Subject: [PATCH 13/24] relax std.auto_hash requirements regarding vectors

Previously, auto hash tests required vectors of different types to not
hash to the same value. Now, this is allowed.
---
 std/hash/auto_hash.zig | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/std/hash/auto_hash.zig b/std/hash/auto_hash.zig
index d34fc2719a..8a22788e5c 100644
--- a/std/hash/auto_hash.zig
+++ b/std/hash/auto_hash.zig
@@ -116,7 +116,7 @@ pub fn hash(hasher: var, key: var, comptime strat: HashStrategy) void {
                 // Otherwise, hash every element.
                 // TODO remove the copy to an array once field access is done.
                 const array: [info.len]info.child = key;
-                comptime var i: u32 = 0;
+                comptime var i = 0;
                 inline while (i < info.len) : (i += 1) {
                     hash(hasher, array[i], strat);
                 }
@@ -357,10 +357,13 @@ test "testHash union" {
 test "testHash vector" {
     const a: @Vector(4, u32) = [_]u32{ 1, 2, 3, 4 };
     const b: @Vector(4, u32) = [_]u32{ 1, 2, 3, 5 };
-    const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
     testing.expect(testHash(a) == testHash(a));
     testing.expect(testHash(a) != testHash(b));
-    testing.expect(testHash(a) != testHash(c));
+
+    const c: @Vector(4, u31) = [_]u31{ 1, 2, 3, 4 };
+    const d: @Vector(4, u31) = [_]u31{ 1, 2, 3, 5 };
+    testing.expect(testHash(c) == testHash(c));
+    testing.expect(testHash(c) != testHash(d));
 }
 
 test "testHash error union" {

From 76f53960778e84ab49730edb77b85490b07fbea2 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 14 Jul 2019 09:22:37 -0500
Subject: [PATCH 14/24] @byteSwap on vectors

---
 src/all_types.hpp                 |  1 +
 src/codegen.cpp                   | 28 ++++++++++----
 src/ir.cpp                        | 62 ++++++++++++++++++++++++++-----
 test/stage1/behavior/byteswap.zig | 11 ++++++
 4 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/src/all_types.hpp b/src/all_types.hpp
index deb56cbb40..7887c06158 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1771,6 +1771,7 @@ struct ZigLLVMFnKey {
         } overflow_arithmetic;
         struct {
             uint32_t bit_count;
+            uint32_t vector_len; // 0 means not a vector
         } bswap;
         struct {
             uint32_t bit_count;
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 7676b3bbd0..6a575d32a2 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4505,7 +4505,13 @@ static LLVMValueRef ir_render_optional_unwrap_ptr(CodeGen *g, IrExecutable *exec
     }
 }
 
-static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnId fn_id) {
+static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFnId fn_id) {
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
+    assert(int_type->id == ZigTypeIdInt);
+    uint32_t vector_len = 0;
+    if (is_vector)
+        vector_len = expr_type->data.vector.len;
     ZigLLVMFnKey key = {};
     const char *fn_name;
     uint32_t n_args;
@@ -4529,6 +4535,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         n_args = 1;
         key.id = ZigLLVMFnIdBswap;
         key.data.bswap.bit_count = (uint32_t)int_type->data.integral.bit_count;
+        key.data.bswap.vector_len = vector_len;
     } else if (fn_id == BuiltinFnIdBitReverse) {
         fn_name = "bitreverse";
         n_args = 1;
@@ -4543,12 +4550,15 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *int_type, BuiltinFnI
         return existing_entry->value;
 
     char llvm_name[64];
-    sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
+    if (is_vector)
+        sprintf(llvm_name, "llvm.%s.v%" PRIu32 "i%" PRIu32, fn_name, vector_len, int_type->data.integral.bit_count);
+    else
+        sprintf(llvm_name, "llvm.%s.i%" PRIu32, fn_name, int_type->data.integral.bit_count);
     LLVMTypeRef param_types[] = {
-        get_llvm_type(g, int_type),
+        get_llvm_type(g, expr_type),
         LLVMInt1Type(),
     };
-    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, int_type), param_types, n_args, false);
+    LLVMTypeRef fn_type = LLVMFunctionType(get_llvm_type(g, expr_type), param_types, n_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -5542,15 +5552,19 @@ static LLVMValueRef ir_render_mul_add(CodeGen *g, IrExecutable *executable, IrIn
 
 static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInstructionBswap *instruction) {
     LLVMValueRef op = ir_llvm_value(g, instruction->op);
-    ZigType *int_type = instruction->base.value.type;
+    ZigType *expr_type = instruction->base.value.type;
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
     assert(int_type->id == ZigTypeIdInt);
     if (int_type->data.integral.bit_count % 16 == 0) {
-        LLVMValueRef fn_val = get_int_builtin_fn(g, instruction->base.value.type, BuiltinFnIdBswap);
+        LLVMValueRef fn_val = get_int_builtin_fn(g, expr_type, BuiltinFnIdBswap);
         return LLVMBuildCall(g->builder, fn_val, &op, 1, "");
     }
     // Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
     ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
             int_type->data.integral.bit_count + 8);
+    if (is_vector)
+        extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
     // aabbcc
     LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
     // 00aabbcc
@@ -5560,7 +5574,7 @@ static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInst
     LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
             LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
     // 00ccbbaa
-    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, int_type), "");
+    return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
 }
 
 static LLVMValueRef ir_render_bit_reverse(CodeGen *g, IrExecutable *executable, IrInstructionBitReverse *instruction) {
diff --git a/src/ir.cpp b/src/ir.cpp
index cbc00f0cfe..e8ef45a116 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -25253,16 +25253,42 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
 }
 
 static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
-    ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
-    if (type_is_invalid(int_type))
+    IrInstruction *op = instruction->op->child;
+    ZigType *type_expr = ir_resolve_type(ira, instruction->type->child);
+    if (type_is_invalid(type_expr))
         return ira->codegen->invalid_instruction;
 
-    IrInstruction *op = ir_implicit_cast(ira, instruction->op->child, int_type);
+    if (type_expr->id != ZigTypeIdInt) {
+        ir_add_error(ira, instruction->type,
+            buf_sprintf("expected integer type, found '%s'", buf_ptr(&type_expr->name)));
+        if (type_expr->id == ZigTypeIdVector &&
+            type_expr->data.vector.elem_type->id == ZigTypeIdInt)
+            ir_add_error(ira, instruction->type,
+                buf_sprintf("represent vectors with their scalar types, i.e. '%s'",
+                    buf_ptr(&type_expr->data.vector.elem_type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+    ZigType *int_type = type_expr;
+
+    ZigType *expr_type = op->value.type;
+    bool is_vector = expr_type->id == ZigTypeIdVector;
+    ZigType *ret_type = int_type;
+    if (is_vector)
+        ret_type = get_vector_type(ira->codegen, expr_type->data.vector.len, int_type);
+
+    op = ir_implicit_cast(ira, instruction->op->child, ret_type);
     if (type_is_invalid(op->value.type))
         return ira->codegen->invalid_instruction;
 
     if (int_type->data.integral.bit_count == 0) {
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
+        if (is_vector) {
+            expand_undef_array(ira->codegen, &result->value);
+            result->value.data.x_array.data.s_none.elements =
+                allocate<ConstExprValue>(expr_type->data.vector.len);
+            for (unsigned i = 0; i < expr_type->data.vector.len; i++)
+                bigint_init_unsigned(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint, 0);
+        }
         bigint_init_unsigned(&result->value.data.x_bigint, 0);
         return result;
     }
@@ -25282,20 +25308,36 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
         if (val == nullptr)
             return ira->codegen->invalid_instruction;
         if (val->special == ConstValSpecialUndef)
-            return ir_const_undef(ira, &instruction->base, int_type);
+            return ir_const_undef(ira, &instruction->base, ret_type);
 
-        IrInstruction *result = ir_const(ira, &instruction->base, int_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
         size_t buf_size = int_type->data.integral.bit_count / 8;
         uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
-        bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
-        bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
-                int_type->data.integral.is_signed);
+        if (is_vector) {
+            expand_undef_array(ira->codegen, &result->value);
+            result->value.data.x_array.data.s_none.elements =
+                allocate<ConstExprValue>(expr_type->data.vector.len);
+            for (unsigned i = 0; i < expr_type->data.vector.len; i++) {
+                ConstExprValue *cur = &val->data.x_array.data.s_none.elements[i];
+                result->value.data.x_array.data.s_none.elements[i].special = cur->special;
+                if (cur->special == ConstValSpecialUndef)
+                    continue;
+                bigint_write_twos_complement(&cur->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+                bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
+                        buf, int_type->data.integral.bit_count, false,
+                        int_type->data.integral.is_signed);
+            }
+        } else {
+            bigint_write_twos_complement(&val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+            bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
+                    int_type->data.integral.is_signed);
+        }
         return result;
     }
 
     IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node, nullptr, op);
-    result->value.type = int_type;
+    result->value.type = ret_type;
     return result;
 }
 
diff --git a/test/stage1/behavior/byteswap.zig b/test/stage1/behavior/byteswap.zig
index 3e7c34cb85..249db155b7 100644
--- a/test/stage1/behavior/byteswap.zig
+++ b/test/stage1/behavior/byteswap.zig
@@ -6,6 +6,11 @@ test "@byteSwap" {
     testByteSwap();
 }
 
+test "@byteSwap on vectors" {
+    comptime testVectorByteSwap();
+    testVectorByteSwap();
+}
+
 fn testByteSwap() void {
     expect(@byteSwap(u0, 0) == 0);
     expect(@byteSwap(u8, 0x12) == 0x12);
@@ -30,3 +35,9 @@ fn testByteSwap() void {
     expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
         @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
 }
+
+fn testVectorByteSwap() void {
+    expect((@byteSwap(u8, @Vector(2, u8)([2]u8{0x12, 0x13})) == @Vector(2, u8)([2]u8{0x12, 0x13})).all);
+    expect((@byteSwap(u16, @Vector(2, u16)([2]u16{0x1234, 0x2345})) == @Vector(2, u16)([2]u16{0x3412, 0x4523})).all);
+    expect((@byteSwap(u24, @Vector(2, u24)([2]u24{0x123456, 0x234567})) == @Vector(2, u24)([2]u24{0x563412, 0x674523})).all);
+}

From 380c8ec2c95fa8d732c141c705d9940629eb2012 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 00:59:04 -0400
Subject: [PATCH 15/24] implement runtime `@byteSwap` and other fixups

 * update docs for `@byteSwap`.
 * fix hash & eql functions for ZigLLVMFnIdBswap not updated to
   include vector len. this was causing incorrect bswap function
   being called in unrelated code
 * fix `@byteSwap` behavior tests only testing comptime and not
   runtime operations
 * implement runtime `@byteSwap`
 * fix incorrect logic in ir_render_vector_to_array and
   ir_render_array_to_vector with regards to whether or not to bitcast
 * `@byteSwap` accepts an array operand which it will cast to vector
 * simplify `@byteSwap` semantic analysis code and various fixes
---
 doc/langref.html.in               | 11 +++-
 src/analyze.cpp                   |  6 +-
 src/codegen.cpp                   | 23 +++++---
 src/ir.cpp                        | 91 ++++++++++++++++---------------
 test/stage1/behavior/byteswap.zig | 91 +++++++++++++++++++------------
 5 files changed, 130 insertions(+), 92 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 8a303640e6..61fc06fd02 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6542,12 +6542,21 @@ async fn func(y: *i32) void {
       {#header_close#}
 
       {#header_open|@byteSwap#}
-      <pre>{#syntax#}@byteSwap(comptime T: type, integer: T) T{#endsyntax#}</pre>
+      <pre>{#syntax#}@byteSwap(comptime T: type, operand: T) T{#endsyntax#}</pre>
       <p>{#syntax#}T{#endsyntax#} must be an integer type with bit count evenly divisible by 8.</p>
+      <p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
       <p>
       Swaps the byte order of the integer. This converts a big endian integer to a little endian integer,
       and converts a little endian integer to a big endian integer.
       </p>
+      <p>
+      Note that for the purposes of memory layout with respect to endianness, the integer type should be
+      related to the number of bytes reported by {#link|@sizeOf#} bytes. This is demonstrated with
+      {#syntax#}u24{#endsyntax#}. {#syntax#}@sizeOf(u24) == 4{#endsyntax#}, which means that a
+      {#syntax#}u24{#endsyntax#} stored in memory takes 4 bytes, and those 4 bytes are what are swapped on
+      a little vs big endian system. On the other hand, if {#syntax#}T{#endsyntax#} is specified to
+      be {#syntax#}u24{#endsyntax#}, then only 3 bytes are reversed.
+      </p>
       {#header_close#}
 
       {#header_open|@bitReverse#}
diff --git a/src/analyze.cpp b/src/analyze.cpp
index ac70d5646f..66b72b935d 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -6896,7 +6896,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
                    (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
-            return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
+            return (uint32_t)(x.data.bswap.bit_count) * ((uint32_t)3661994335) +
+                   (uint32_t)(x.data.bswap.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBitReverse:
             return (uint32_t)(x.data.bit_reverse.bit_count) * (uint32_t)2621398431;
         case ZigLLVMFnIdOverflowArithmetic:
@@ -6919,7 +6920,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdPopCount:
             return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdBswap:
-            return a.data.bswap.bit_count == b.data.bswap.bit_count;
+            return a.data.bswap.bit_count == b.data.bswap.bit_count &&
+                   a.data.bswap.vector_len == b.data.bswap.vector_len;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
         case ZigLLVMFnIdFloatOp:
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 6a575d32a2..54c02b288a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4509,9 +4509,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFn
     bool is_vector = expr_type->id == ZigTypeIdVector;
     ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
     assert(int_type->id == ZigTypeIdInt);
-    uint32_t vector_len = 0;
-    if (is_vector)
-        vector_len = expr_type->data.vector.len;
+    uint32_t vector_len = is_vector ? expr_type->data.vector.len : 0;
     ZigLLVMFnKey key = {};
     const char *fn_name;
     uint32_t n_args;
@@ -5563,16 +5561,23 @@ static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInst
     // Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
     ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
             int_type->data.integral.bit_count + 8);
-    if (is_vector)
+    LLVMValueRef shift_amt = LLVMConstInt(get_llvm_type(g, extended_type), 8, false);
+    if (is_vector) {
         extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
+        LLVMValueRef *values = allocate_nonzero<LLVMValueRef>(expr_type->data.vector.len);
+        for (uint32_t i = 0; i < expr_type->data.vector.len; i += 1) {
+            values[i] = shift_amt;
+        }
+        shift_amt = LLVMConstVector(values, expr_type->data.vector.len);
+        free(values);
+    }
     // aabbcc
     LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
     // 00aabbcc
     LLVMValueRef fn_val = get_int_builtin_fn(g, extended_type, BuiltinFnIdBswap);
     LLVMValueRef swapped = LLVMBuildCall(g->builder, fn_val, &extended, 1, "");
     // ccbbaa00
-    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
-            LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
+    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped, shift_amt, "");
     // 00ccbbaa
     return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
 }
@@ -5595,7 +5600,7 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
 
     ZigType *elem_type = array_type->data.array.child_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
                 LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
@@ -5629,7 +5634,7 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
 
     ZigType *elem_type = vector_type->data.vector.elem_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
                 LLVMPointerType(vector_type_ref, 0), "");
@@ -8902,7 +8907,7 @@ void add_cc_args(CodeGen *g, ZigList<const char *> &args, const char *out_dep_pa
         args.append(g->framework_dirs.at(i));
     }
 
-    //note(dimenus): appending libc headers before c_headers breaks intrinsics 
+    //note(dimenus): appending libc headers before c_headers breaks intrinsics
     //and other compiler specific items
     // According to Rich Felker libc headers are supposed to go before C language headers.
     args.append("-isystem");
diff --git a/src/ir.cpp b/src/ir.cpp
index e8ef45a116..1eba53ef45 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11068,8 +11068,15 @@ static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
         return ira->codegen->builtin_types.entry_invalid;
 
     if (ty->id != ZigTypeIdInt) {
-        ir_add_error(ira, type_value,
+        ErrorMsg *msg = ir_add_error(ira, type_value,
             buf_sprintf("expected integer type, found '%s'", buf_ptr(&ty->name)));
+        if (ty->id == ZigTypeIdVector &&
+            ty->data.vector.elem_type->id == ZigTypeIdInt)
+        {
+            add_error_note(ira->codegen, msg, type_value->source_node,
+                buf_sprintf("represent vectors with their element types, i.e. '%s'",
+                    buf_ptr(&ty->data.vector.elem_type->name)));
+        }
         return ira->codegen->builtin_types.entry_invalid;
     }
 
@@ -25253,47 +25260,35 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
 }
 
 static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
-    IrInstruction *op = instruction->op->child;
-    ZigType *type_expr = ir_resolve_type(ira, instruction->type->child);
-    if (type_is_invalid(type_expr))
+    Error err;
+
+    ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
+    if (type_is_invalid(int_type))
         return ira->codegen->invalid_instruction;
 
-    if (type_expr->id != ZigTypeIdInt) {
-        ir_add_error(ira, instruction->type,
-            buf_sprintf("expected integer type, found '%s'", buf_ptr(&type_expr->name)));
-        if (type_expr->id == ZigTypeIdVector &&
-            type_expr->data.vector.elem_type->id == ZigTypeIdInt)
-            ir_add_error(ira, instruction->type,
-                buf_sprintf("represent vectors with their scalar types, i.e. '%s'",
-                    buf_ptr(&type_expr->data.vector.elem_type->name)));
+    IrInstruction *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value.type))
         return ira->codegen->invalid_instruction;
+
+    uint32_t vector_len; // UINT32_MAX means not a vector
+    if (uncasted_op->value.type->id == ZigTypeIdArray &&
+        is_valid_vector_elem_type(uncasted_op->value.type->data.array.child_type))
+    {
+        vector_len = uncasted_op->value.type->data.array.len;
+    } else if (uncasted_op->value.type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value.type->data.vector.len;
+    } else {
+        vector_len = UINT32_MAX;
     }
-    ZigType *int_type = type_expr;
 
-    ZigType *expr_type = op->value.type;
-    bool is_vector = expr_type->id == ZigTypeIdVector;
-    ZigType *ret_type = int_type;
-    if (is_vector)
-        ret_type = get_vector_type(ira->codegen, expr_type->data.vector.len, int_type);
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
 
-    op = ir_implicit_cast(ira, instruction->op->child, ret_type);
+    IrInstruction *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value.type))
         return ira->codegen->invalid_instruction;
 
-    if (int_type->data.integral.bit_count == 0) {
-        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
-        if (is_vector) {
-            expand_undef_array(ira->codegen, &result->value);
-            result->value.data.x_array.data.s_none.elements =
-                allocate<ConstExprValue>(expr_type->data.vector.len);
-            for (unsigned i = 0; i < expr_type->data.vector.len; i++)
-                bigint_init_unsigned(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint, 0);
-        }
-        bigint_init_unsigned(&result->value.data.x_bigint, 0);
-        return result;
-    }
-
-    if (int_type->data.integral.bit_count == 8)
+    if (int_type->data.integral.bit_count == 8 || int_type->data.integral.bit_count == 0)
         return op;
 
     if (int_type->data.integral.bit_count % 8 != 0) {
@@ -25308,21 +25303,28 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
         if (val == nullptr)
             return ira->codegen->invalid_instruction;
         if (val->special == ConstValSpecialUndef)
-            return ir_const_undef(ira, &instruction->base, ret_type);
+            return ir_const_undef(ira, &instruction->base, op_type);
 
-        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, op_type);
         size_t buf_size = int_type->data.integral.bit_count / 8;
         uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
         if (is_vector) {
-            expand_undef_array(ira->codegen, &result->value);
-            result->value.data.x_array.data.s_none.elements =
-                allocate<ConstExprValue>(expr_type->data.vector.len);
-            for (unsigned i = 0; i < expr_type->data.vector.len; i++) {
-                ConstExprValue *cur = &val->data.x_array.data.s_none.elements[i];
-                result->value.data.x_array.data.s_none.elements[i].special = cur->special;
-                if (cur->special == ConstValSpecialUndef)
+            expand_undef_array(ira->codegen, val);
+            result->value.data.x_array.data.s_none.elements = create_const_vals(op_type->data.vector.len);
+            for (unsigned i = 0; i < op_type->data.vector.len; i += 1) {
+                ConstExprValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_instruction;
+                }
+                ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+                result_elem_val->type = int_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
                     continue;
-                bigint_write_twos_complement(&cur->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+
+                bigint_write_twos_complement(&op_elem_val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
                 bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
                         buf, int_type->data.integral.bit_count, false,
                         int_type->data.integral.is_signed);
@@ -25332,12 +25334,13 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
             bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
                     int_type->data.integral.is_signed);
         }
+        free(buf);
         return result;
     }
 
     IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node, nullptr, op);
-    result->value.type = ret_type;
+    result->value.type = op_type;
     return result;
 }
 
diff --git a/test/stage1/behavior/byteswap.zig b/test/stage1/behavior/byteswap.zig
index 249db155b7..d8fc554808 100644
--- a/test/stage1/behavior/byteswap.zig
+++ b/test/stage1/behavior/byteswap.zig
@@ -1,43 +1,62 @@
 const std = @import("std");
 const expect = std.testing.expect;
 
-test "@byteSwap" {
-    comptime testByteSwap();
-    testByteSwap();
+test "@byteSwap integers" {
+    const ByteSwapIntTest = struct {
+        fn run() void {
+            t(u0, 0, 0);
+            t(u8, 0x12, 0x12);
+            t(u16, 0x1234, 0x3412);
+            t(u24, 0x123456, 0x563412);
+            t(u32, 0x12345678, 0x78563412);
+            t(u40, 0x123456789a, 0x9a78563412);
+            t(i48, 0x123456789abc, @bitCast(i48, u48(0xbc9a78563412)));
+            t(u56, 0x123456789abcde, 0xdebc9a78563412);
+            t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+            t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
+
+            t(u0, u0(0), 0);
+            t(i8, i8(-50), -50);
+            t(i16, @bitCast(i16, u16(0x1234)), @bitCast(i16, u16(0x3412)));
+            t(i24, @bitCast(i24, u24(0x123456)), @bitCast(i24, u24(0x563412)));
+            t(i32, @bitCast(i32, u32(0x12345678)), @bitCast(i32, u32(0x78563412)));
+            t(u40, @bitCast(i40, u40(0x123456789a)), u40(0x9a78563412));
+            t(i48, @bitCast(i48, u48(0x123456789abc)), @bitCast(i48, u48(0xbc9a78563412)));
+            t(i56, @bitCast(i56, u56(0x123456789abcde)), @bitCast(i56, u56(0xdebc9a78563412)));
+            t(i64, @bitCast(i64, u64(0x123456789abcdef1)), @bitCast(i64, u64(0xf1debc9a78563412)));
+            t(
+                i128,
+                @bitCast(i128, u128(0x123456789abcdef11121314151617181)),
+                @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)),
+            );
+        }
+        fn t(comptime I: type, input: I, expected_output: I) void {
+            std.testing.expectEqual(expected_output, @byteSwap(I, input));
+        }
+    };
+    comptime ByteSwapIntTest.run();
+    ByteSwapIntTest.run();
 }
 
-test "@byteSwap on vectors" {
-    comptime testVectorByteSwap();
-    testVectorByteSwap();
-}
+test "@byteSwap vectors" {
+    const ByteSwapVectorTest = struct {
+        fn run() void {
+            t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
+            t(u16, 2, [_]u16{ 0x1234, 0x2345 }, [_]u16{ 0x3412, 0x4523 });
+            t(u24, 2, [_]u24{ 0x123456, 0x234567 }, [_]u24{ 0x563412, 0x674523 });
+        }
 
-fn testByteSwap() void {
-    expect(@byteSwap(u0, 0) == 0);
-    expect(@byteSwap(u8, 0x12) == 0x12);
-    expect(@byteSwap(u16, 0x1234) == 0x3412);
-    expect(@byteSwap(u24, 0x123456) == 0x563412);
-    expect(@byteSwap(u32, 0x12345678) == 0x78563412);
-    expect(@byteSwap(u40, 0x123456789a) == 0x9a78563412);
-    expect(@byteSwap(i48, 0x123456789abc) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(u56, 0x123456789abcde) == 0xdebc9a78563412);
-    expect(@byteSwap(u64, 0x123456789abcdef1) == 0xf1debc9a78563412);
-    expect(@byteSwap(u128, 0x123456789abcdef11121314151617181) == 0x8171615141312111f1debc9a78563412);
-
-    expect(@byteSwap(u0, u0(0)) == 0);
-    expect(@byteSwap(i8, i8(-50)) == -50);
-    expect(@byteSwap(i16, @bitCast(i16, u16(0x1234))) == @bitCast(i16, u16(0x3412)));
-    expect(@byteSwap(i24, @bitCast(i24, u24(0x123456))) == @bitCast(i24, u24(0x563412)));
-    expect(@byteSwap(i32, @bitCast(i32, u32(0x12345678))) == @bitCast(i32, u32(0x78563412)));
-    expect(@byteSwap(u40, @bitCast(i40, u40(0x123456789a))) == u40(0x9a78563412));
-    expect(@byteSwap(i48, @bitCast(i48, u48(0x123456789abc))) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(i56, @bitCast(i56, u56(0x123456789abcde))) == @bitCast(i56, u56(0xdebc9a78563412)));
-    expect(@byteSwap(i64, @bitCast(i64, u64(0x123456789abcdef1))) == @bitCast(i64, u64(0xf1debc9a78563412)));
-    expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
-        @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
-}
-
-fn testVectorByteSwap() void {
-    expect((@byteSwap(u8, @Vector(2, u8)([2]u8{0x12, 0x13})) == @Vector(2, u8)([2]u8{0x12, 0x13})).all);
-    expect((@byteSwap(u16, @Vector(2, u16)([2]u16{0x1234, 0x2345})) == @Vector(2, u16)([2]u16{0x3412, 0x4523})).all);
-    expect((@byteSwap(u24, @Vector(2, u24)([2]u24{0x123456, 0x234567})) == @Vector(2, u24)([2]u24{0x563412, 0x674523})).all);
+        fn t(
+            comptime I: type,
+            comptime n: comptime_int,
+            input: @Vector(n, I),
+            expected_vector: @Vector(n, I),
+        ) void {
+            const actual_output: [n]I = @byteSwap(I, input);
+            const expected_output: [n]I = expected_vector;
+            std.testing.expectEqual(expected_output, actual_output);
+        }
+    };
+    comptime ByteSwapVectorTest.run();
+    ByteSwapVectorTest.run();
 }

From 01577a3af480cff02c5f78864f8056487b3d3b44 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 21 Jul 2019 10:41:43 -0500
Subject: [PATCH 16/24] `@splat`

---
 src/all_types.hpp               |  9 ++++
 src/codegen.cpp                 | 17 +++++++
 src/ir.cpp                      | 82 +++++++++++++++++++++++++++++++++
 src/ir_print.cpp                | 13 ++++++
 test/compile_errors.zig         | 10 ++++
 test/stage1/behavior/vector.zig | 36 +++++++++++----
 6 files changed, 157 insertions(+), 10 deletions(-)

diff --git a/src/all_types.hpp b/src/all_types.hpp
index 7887c06158..464a1d6ba4 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1612,6 +1612,7 @@ enum BuiltinFnId {
     BuiltinFnIdIntType,
     BuiltinFnIdVectorType,
     BuiltinFnIdShuffle,
+    BuiltinFnIdSplat,
     BuiltinFnIdSetCold,
     BuiltinFnIdSetRuntimeSafety,
     BuiltinFnIdSetFloatMode,
@@ -2431,6 +2432,7 @@ enum IrInstructionId {
     IrInstructionIdIntType,
     IrInstructionIdVectorType,
     IrInstructionIdShuffleVector,
+    IrInstructionIdSplat,
     IrInstructionIdBoolNot,
     IrInstructionIdMemset,
     IrInstructionIdMemcpy,
@@ -3681,6 +3683,13 @@ struct IrInstructionShuffleVector {
     IrInstruction *mask; // This is in zig-format, not llvm format
 };
 
+struct IrInstructionSplat {
+    IrInstruction base;
+
+    IrInstruction *len;
+    IrInstruction *scalar;
+};
+
 struct IrInstructionAssertZero {
     IrInstruction base;
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 54c02b288a..49681c20c1 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4619,6 +4619,20 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
         llvm_mask_value, "");
 }
 
+static LLVMValueRef ir_render_splat(CodeGen *g, IrExecutable *executable, IrInstructionSplat *instruction) {
+    uint64_t len = bigint_as_u64(&instruction->len->value.data.x_bigint);
+    LLVMValueRef wrapped_scalar_undef = LLVMGetUndef(instruction->base.value.type->llvm_type);
+    LLVMValueRef wrapped_scalar = LLVMBuildInsertElement(g->builder, wrapped_scalar_undef,
+        ir_llvm_value(g, instruction->scalar),
+        LLVMConstInt(LLVMInt32Type(), 0, false),
+        "");
+    return LLVMBuildShuffleVector(g->builder,
+        wrapped_scalar,
+        wrapped_scalar_undef,
+        LLVMConstNull(LLVMVectorType(g->builtin_types.entry_u32->llvm_type, (uint32_t)len)),
+        "");
+}
+
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
     ZigType *int_type = instruction->op->value.type;
     LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
@@ -6146,6 +6160,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction);
         case IrInstructionIdShuffleVector:
             return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction);
+        case IrInstructionIdSplat:
+            return ir_render_splat(g, executable, (IrInstructionSplat *) instruction);
     }
     zig_unreachable();
 }
@@ -7837,6 +7853,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdIntType, "IntType", 2); // TODO rename to Int
     create_builtin_fn(g, BuiltinFnIdVectorType, "Vector", 2);
     create_builtin_fn(g, BuiltinFnIdShuffle, "shuffle", 4);
+    create_builtin_fn(g, BuiltinFnIdSplat, "splat", 2);
     create_builtin_fn(g, BuiltinFnIdSetCold, "setCold", 1);
     create_builtin_fn(g, BuiltinFnIdSetRuntimeSafety, "setRuntimeSafety", 1);
     create_builtin_fn(g, BuiltinFnIdSetFloatMode, "setFloatMode", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index 1eba53ef45..8fca50c6f7 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -721,6 +721,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *)
     return IrInstructionIdShuffleVector;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionSplat *) {
+    return IrInstructionIdSplat;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) {
     return IrInstructionIdBoolNot;
 }
@@ -2300,6 +2304,19 @@ static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstN
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_splat(IrBuilder *irb, Scope *scope, AstNode *source_node,
+    IrInstruction *len, IrInstruction *scalar)
+{
+    IrInstructionSplat *instruction = ir_build_instruction<IrInstructionSplat>(irb, scope, source_node);
+    instruction->len = len;
+    instruction->scalar = scalar;
+
+    ir_ref_instruction(len, irb->current_basic_block);
+    ir_ref_instruction(scalar, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_bool_not(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
     IrInstructionBoolNot *instruction = ir_build_instruction<IrInstructionBoolNot>(irb, scope, source_node);
     instruction->value = value;
@@ -4985,6 +5002,22 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                     arg0_value, arg1_value, arg2_value, arg3_value);
                 return ir_lval_wrap(irb, scope, shuffle_vector, lval, result_loc);
             }
+        case BuiltinFnIdSplat:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                AstNode *arg1_node = node->data.fn_call_expr.params.at(1);
+                IrInstruction *arg1_value = ir_gen_node(irb, arg1_node, scope);
+                if (arg1_value == irb->codegen->invalid_instruction)
+                    return arg1_value;
+
+                IrInstruction *splat = ir_build_splat(irb, scope, node,
+                    arg0_value, arg1_value);
+                return ir_lval_wrap(irb, scope, splat, lval, result_loc);
+            }
         case BuiltinFnIdMemcpy:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -22324,6 +22357,52 @@ static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrIn
     return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
 }
 
+static IrInstruction *ir_analyze_instruction_splat(IrAnalyze *ira, IrInstructionSplat *instruction) {
+    IrInstruction *len = instruction->len->child;
+    if (type_is_invalid(len->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *scalar = instruction->scalar->child;
+    if (type_is_invalid(scalar->value.type))
+        return ira->codegen->invalid_instruction;
+
+    uint64_t len_int;
+    if (!ir_resolve_unsigned(ira, len, ira->codegen->builtin_types.entry_u32, &len_int)) {
+        ir_add_error(ira, len,
+            buf_sprintf("splat length must be comptime"));
+        return ira->codegen->invalid_instruction;
+    }
+
+    if (!is_valid_vector_elem_type(scalar->value.type)) {
+        ir_add_error(ira, len,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&scalar->value.type->name)));
+        return ira->codegen->invalid_instruction;
+    }
+
+    ZigType *return_type = get_vector_type(ira->codegen, len_int, scalar->value.type);
+
+    if (instr_is_comptime(scalar)) {
+        IrInstruction *result = ir_const_undef(ira, scalar, return_type);
+        result->value.data.x_array.data.s_none.elements =
+            allocate<ConstExprValue>(len_int);
+        for (uint32_t i = 0; i < len_int; i++) {
+            result->value.data.x_array.data.s_none.elements[i] =
+                scalar->value;
+        }
+        result->value.type = return_type;
+        result->value.special = ConstValSpecialStatic;
+        return result;
+    }
+
+    IrInstruction *result = ir_build_splat(&ira->new_irb,
+        instruction->base.scope, instruction->base.source_node,
+        instruction->len->child, instruction->scalar->child);
+    result->value.type = return_type;
+    result->value.special = ConstValSpecialRuntime;
+    return result;
+}
+
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
     IrInstruction *value = instruction->value->child;
     if (type_is_invalid(value->value.type))
@@ -25908,6 +25987,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction);
         case IrInstructionIdShuffleVector:
             return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction);
+         case IrInstructionIdSplat:
+            return ir_analyze_instruction_splat(ira, (IrInstructionSplat *)instruction);
         case IrInstructionIdBoolNot:
             return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction);
         case IrInstructionIdMemset:
@@ -26244,6 +26325,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdIntType:
         case IrInstructionIdVectorType:
         case IrInstructionIdShuffleVector:
+        case IrInstructionIdSplat:
         case IrInstructionIdBoolNot:
         case IrInstructionIdSliceSrc:
         case IrInstructionIdMemberCount:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index 8561ed4508..0dee7d342a 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -44,6 +44,8 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) {
             return "Invalid";
         case IrInstructionIdShuffleVector:
             return "Shuffle";
+        case IrInstructionIdSplat:
+            return "Splat";
         case IrInstructionIdDeclVarSrc:
             return "DeclVarSrc";
         case IrInstructionIdDeclVarGen:
@@ -1222,6 +1224,14 @@ static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *in
     fprintf(irp->f, ")");
 }
 
+static void ir_print_splat(IrPrint *irp, IrInstructionSplat *instruction) {
+    fprintf(irp->f, "@splat(");
+    ir_print_other_instruction(irp, instruction->len);
+    fprintf(irp->f, ", ");
+    ir_print_other_instruction(irp, instruction->scalar);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) {
     fprintf(irp->f, "! ");
     ir_print_other_instruction(irp, instruction->value);
@@ -2160,6 +2170,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool
         case IrInstructionIdShuffleVector:
             ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction);
             break;
+        case IrInstructionIdSplat:
+            ir_print_splat(irp, (IrInstructionSplat *)instruction);
+            break;
         case IrInstructionIdBoolNot:
             ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction);
             break;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 1fe3fc58ab..2909bffc3b 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6507,6 +6507,16 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         "tmp.zig:2:26: error: vector element type must be integer, float, bool, or pointer; '@Vector(4, u8)' is invalid",
     );
 
+    cases.addTest(
+        "bad @splat type",
+        \\export fn entry() void {
+        \\    const c = 4;
+        \\    var v = @splat(4, c);
+        \\}
+    ,
+        "tmp.zig:3:20: error: vector element type must be integer, float, bool, or pointer; 'comptime_int' is invalid",
+    );
+
     cases.add("compileLog of tagged enum doesn't crash the compiler",
         \\const Bar = union(enum(u32)) {
         \\    X: i32 = 1
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 27277b5e52..88a332d87b 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -35,12 +35,12 @@ test "vector bin compares with mem.eql" {
         fn doTheTest() void {
             var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
             var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 30, 4 };
-            expect(mem.eql(bool, ([4]bool)(v == x), [4]bool{ false, false,  true, false}));
-            expect(mem.eql(bool, ([4]bool)(v != x), [4]bool{  true,  true, false,  true}));
-            expect(mem.eql(bool, ([4]bool)(v  < x), [4]bool{ false,  true, false, false}));
-            expect(mem.eql(bool, ([4]bool)(v  > x), [4]bool{  true, false, false,  true}));
-            expect(mem.eql(bool, ([4]bool)(v <= x), [4]bool{ false,  true,  true, false}));
-            expect(mem.eql(bool, ([4]bool)(v >= x), [4]bool{  true, false,  true,  true}));
+            expect(mem.eql(bool, ([4]bool)(v == x), [4]bool{ false, false, true, false }));
+            expect(mem.eql(bool, ([4]bool)(v != x), [4]bool{ true, true, false, true }));
+            expect(mem.eql(bool, ([4]bool)(v < x), [4]bool{ false, true, false, false }));
+            expect(mem.eql(bool, ([4]bool)(v > x), [4]bool{ true, false, false, true }));
+            expect(mem.eql(bool, ([4]bool)(v <= x), [4]bool{ false, true, true, false }));
+            expect(mem.eql(bool, ([4]bool)(v >= x), [4]bool{ true, false, true, true }));
         }
     };
     S.doTheTest();
@@ -114,22 +114,22 @@ test "vector casts of sizes not divisable by 8" {
     const S = struct {
         fn doTheTest() void {
             {
-                var v: @Vector(4, u3) = [4]u3{ 5, 2,  3, 0};
+                var v: @Vector(4, u3) = [4]u3{ 5, 2, 3, 0 };
                 var x: [4]u3 = v;
                 expect(mem.eql(u3, x, ([4]u3)(v)));
             }
             {
-                var v: @Vector(4, u2) = [4]u2{ 1, 2,  3, 0};
+                var v: @Vector(4, u2) = [4]u2{ 1, 2, 3, 0 };
                 var x: [4]u2 = v;
                 expect(mem.eql(u2, x, ([4]u2)(v)));
             }
             {
-                var v: @Vector(4, u1) = [4]u1{ 1, 0,  1, 0};
+                var v: @Vector(4, u1) = [4]u1{ 1, 0, 1, 0 };
                 var x: [4]u1 = v;
                 expect(mem.eql(u1, x, ([4]u1)(v)));
             }
             {
-                var v: @Vector(4, bool) = [4]bool{ false, false,  true, false};
+                var v: @Vector(4, bool) = [4]bool{ false, false, true, false };
                 var x: [4]bool = v;
                 expect(mem.eql(bool, x, ([4]bool)(v)));
             }
@@ -138,3 +138,19 @@ test "vector casts of sizes not divisable by 8" {
     S.doTheTest();
     comptime S.doTheTest();
 }
+
+test "vector @splat" {
+    const S = struct {
+        fn doTheTest() void {
+            var v: u32 = 5;
+            var x = @splat(4, v);
+            expect(@typeOf(x) == @Vector(4, u32));
+            expect(x[0] == 5);
+            expect(x[1] == 5);
+            expect(x[2] == 5);
+            expect(x[3] == 5);
+        }
+    };
+    S.doTheTest();
+    comptime S.doTheTest();
+}

From 005a54a853a77b9c28551490fc08dc37cd7d7715 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 10:48:04 -0400
Subject: [PATCH 17/24] fixups for `@splat`

 * Fix codegen for splat - instead of giving vectors of length N
   to shufflevector for both of the operands, it gives vectors of length
   1. The mask vector is the only one that needs N elements.
 * Separate Splat into SplatSrc and SplatGen; the `len` is not needed
   once it gets to codegen since it is redundant with the result type.
 * Refactor compile error for wrong vector element type so that the
   compile error message is not duplicated in zig source code
 * Improve implementation to correctly handle comptime values such as
   undefined and lazy values.
 * Improve compile error for bad vector element type to point to the
   correct place.
 * Delete dead code.
 * Modify behavior test to use an array cast instead of vector element
   indexing since I'm merging this splat commit out-of-order from
   Shawn's patch set.
---
 src/all_types.hpp               | 11 +++-
 src/codegen.cpp                 | 27 +++++-----
 src/ir.cpp                      | 95 ++++++++++++++++++++-------------
 src/ir_print.cpp                | 21 ++++++--
 test/compile_errors.zig         |  2 +-
 test/stage1/behavior/vector.zig |  9 ++--
 6 files changed, 101 insertions(+), 64 deletions(-)

diff --git a/src/all_types.hpp b/src/all_types.hpp
index 464a1d6ba4..695f22ac90 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -2432,7 +2432,8 @@ enum IrInstructionId {
     IrInstructionIdIntType,
     IrInstructionIdVectorType,
     IrInstructionIdShuffleVector,
-    IrInstructionIdSplat,
+    IrInstructionIdSplatSrc,
+    IrInstructionIdSplatGen,
     IrInstructionIdBoolNot,
     IrInstructionIdMemset,
     IrInstructionIdMemcpy,
@@ -3683,13 +3684,19 @@ struct IrInstructionShuffleVector {
     IrInstruction *mask; // This is in zig-format, not llvm format
 };
 
-struct IrInstructionSplat {
+struct IrInstructionSplatSrc {
     IrInstruction base;
 
     IrInstruction *len;
     IrInstruction *scalar;
 };
 
+struct IrInstructionSplatGen {
+    IrInstruction base;
+
+    IrInstruction *scalar;
+};
+
 struct IrInstructionAssertZero {
     IrInstruction base;
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 49681c20c1..b0817e8eb8 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4619,18 +4619,16 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
         llvm_mask_value, "");
 }
 
-static LLVMValueRef ir_render_splat(CodeGen *g, IrExecutable *executable, IrInstructionSplat *instruction) {
-    uint64_t len = bigint_as_u64(&instruction->len->value.data.x_bigint);
-    LLVMValueRef wrapped_scalar_undef = LLVMGetUndef(instruction->base.value.type->llvm_type);
-    LLVMValueRef wrapped_scalar = LLVMBuildInsertElement(g->builder, wrapped_scalar_undef,
-        ir_llvm_value(g, instruction->scalar),
-        LLVMConstInt(LLVMInt32Type(), 0, false),
-        "");
-    return LLVMBuildShuffleVector(g->builder,
-        wrapped_scalar,
-        wrapped_scalar_undef,
-        LLVMConstNull(LLVMVectorType(g->builtin_types.entry_u32->llvm_type, (uint32_t)len)),
-        "");
+static LLVMValueRef ir_render_splat(CodeGen *g, IrExecutable *executable, IrInstructionSplatGen *instruction) {
+    ZigType *result_type = instruction->base.value.type;
+    src_assert(result_type->id == ZigTypeIdVector, instruction->base.source_node);
+    uint32_t len = result_type->data.vector.len;
+    LLVMTypeRef op_llvm_type = LLVMVectorType(get_llvm_type(g, instruction->scalar->value.type), 1);
+    LLVMTypeRef mask_llvm_type = LLVMVectorType(LLVMInt32Type(), len);
+    LLVMValueRef undef_vector = LLVMGetUndef(op_llvm_type);
+    LLVMValueRef op_vector = LLVMBuildInsertElement(g->builder, undef_vector,
+            ir_llvm_value(g, instruction->scalar), LLVMConstInt(LLVMInt32Type(), 0, false), "");
+    return LLVMBuildShuffleVector(g->builder, op_vector, undef_vector, LLVMConstNull(mask_llvm_type), "");
 }
 
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
@@ -6000,6 +5998,7 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
         case IrInstructionIdFrameSizeSrc:
         case IrInstructionIdAllocaGen:
         case IrInstructionIdAwaitSrc:
+        case IrInstructionIdSplatSrc:
             zig_unreachable();
 
         case IrInstructionIdDeclVarGen:
@@ -6160,8 +6159,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_spill_end(g, executable, (IrInstructionSpillEnd *)instruction);
         case IrInstructionIdShuffleVector:
             return ir_render_shuffle_vector(g, executable, (IrInstructionShuffleVector *) instruction);
-        case IrInstructionIdSplat:
-            return ir_render_splat(g, executable, (IrInstructionSplat *) instruction);
+        case IrInstructionIdSplatGen:
+            return ir_render_splat(g, executable, (IrInstructionSplatGen *) instruction);
     }
     zig_unreachable();
 }
diff --git a/src/ir.cpp b/src/ir.cpp
index 8fca50c6f7..0c48a2f982 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -721,8 +721,12 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionShuffleVector *)
     return IrInstructionIdShuffleVector;
 }
 
-static constexpr IrInstructionId ir_instruction_id(IrInstructionSplat *) {
-    return IrInstructionIdSplat;
+static constexpr IrInstructionId ir_instruction_id(IrInstructionSplatSrc *) {
+    return IrInstructionIdSplatSrc;
+}
+
+static constexpr IrInstructionId ir_instruction_id(IrInstructionSplatGen *) {
+    return IrInstructionIdSplatGen;
 }
 
 static constexpr IrInstructionId ir_instruction_id(IrInstructionBoolNot *) {
@@ -2304,10 +2308,10 @@ static IrInstruction *ir_build_shuffle_vector(IrBuilder *irb, Scope *scope, AstN
     return &instruction->base;
 }
 
-static IrInstruction *ir_build_splat(IrBuilder *irb, Scope *scope, AstNode *source_node,
+static IrInstruction *ir_build_splat_src(IrBuilder *irb, Scope *scope, AstNode *source_node,
     IrInstruction *len, IrInstruction *scalar)
 {
-    IrInstructionSplat *instruction = ir_build_instruction<IrInstructionSplat>(irb, scope, source_node);
+    IrInstructionSplatSrc *instruction = ir_build_instruction<IrInstructionSplatSrc>(irb, scope, source_node);
     instruction->len = len;
     instruction->scalar = scalar;
 
@@ -2373,6 +2377,19 @@ static IrInstruction *ir_build_slice_src(IrBuilder *irb, Scope *scope, AstNode *
     return &instruction->base;
 }
 
+static IrInstruction *ir_build_splat_gen(IrAnalyze *ira, IrInstruction *source_instruction, ZigType *result_type,
+    IrInstruction *scalar)
+{
+    IrInstructionSplatGen *instruction = ir_build_instruction<IrInstructionSplatGen>(
+            &ira->new_irb, source_instruction->scope, source_instruction->source_node);
+    instruction->base.value.type = result_type;
+    instruction->scalar = scalar;
+
+    ir_ref_instruction(scalar, ira->new_irb.current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_slice_gen(IrAnalyze *ira, IrInstruction *source_instruction, ZigType *slice_type,
     IrInstruction *ptr, IrInstruction *start, IrInstruction *end, bool safety_check_on, IrInstruction *result_loc)
 {
@@ -5014,7 +5031,7 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 if (arg1_value == irb->codegen->invalid_instruction)
                     return arg1_value;
 
-                IrInstruction *splat = ir_build_splat(irb, scope, node,
+                IrInstruction *splat = ir_build_splat_src(irb, scope, node,
                     arg0_value, arg1_value);
                 return ir_lval_wrap(irb, scope, splat, lval, result_loc);
             }
@@ -11082,16 +11099,23 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
     return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
 }
 
+static Error ir_validate_vector_elem_type(IrAnalyze *ira, IrInstruction *source_instr, ZigType *elem_type) {
+    if (!is_valid_vector_elem_type(elem_type)) {
+        ir_add_error(ira, source_instr,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&elem_type->name)));
+        return ErrorSemanticAnalyzeFail;
+    }
+    return ErrorNone;
+}
+
 static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+    Error err;
     ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
     if (type_is_invalid(elem_type))
         return ira->codegen->builtin_types.entry_invalid;
-    if (!is_valid_vector_elem_type(elem_type)) {
-        ir_add_error(ira, elem_type_value,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&elem_type->name)));
+    if ((err = ir_validate_vector_elem_type(ira, elem_type_value, elem_type)))
         return ira->codegen->builtin_types.entry_invalid;
-    }
     return elem_type;
 }
 
@@ -22357,7 +22381,9 @@ static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrIn
     return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
 }
 
-static IrInstruction *ir_analyze_instruction_splat(IrAnalyze *ira, IrInstructionSplat *instruction) {
+static IrInstruction *ir_analyze_instruction_splat(IrAnalyze *ira, IrInstructionSplatSrc *instruction) {
+    Error err;
+
     IrInstruction *len = instruction->len->child;
     if (type_is_invalid(len->value.type))
         return ira->codegen->invalid_instruction;
@@ -22366,41 +22392,32 @@ static IrInstruction *ir_analyze_instruction_splat(IrAnalyze *ira, IrInstruction
     if (type_is_invalid(scalar->value.type))
         return ira->codegen->invalid_instruction;
 
-    uint64_t len_int;
-    if (!ir_resolve_unsigned(ira, len, ira->codegen->builtin_types.entry_u32, &len_int)) {
-        ir_add_error(ira, len,
-            buf_sprintf("splat length must be comptime"));
+    uint64_t len_u64;
+    if (!ir_resolve_unsigned(ira, len, ira->codegen->builtin_types.entry_u32, &len_u64))
         return ira->codegen->invalid_instruction;
-    }
+    uint32_t len_int = len_u64;
 
-    if (!is_valid_vector_elem_type(scalar->value.type)) {
-        ir_add_error(ira, len,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&scalar->value.type->name)));
+    if ((err = ir_validate_vector_elem_type(ira, scalar, scalar->value.type)))
         return ira->codegen->invalid_instruction;
-    }
 
     ZigType *return_type = get_vector_type(ira->codegen, len_int, scalar->value.type);
 
     if (instr_is_comptime(scalar)) {
-        IrInstruction *result = ir_const_undef(ira, scalar, return_type);
-        result->value.data.x_array.data.s_none.elements =
-            allocate<ConstExprValue>(len_int);
-        for (uint32_t i = 0; i < len_int; i++) {
-            result->value.data.x_array.data.s_none.elements[i] =
-                scalar->value;
+        ConstExprValue *scalar_val = ir_resolve_const(ira, scalar, UndefOk);
+        if (scalar_val == nullptr)
+            return ira->codegen->invalid_instruction;
+        if (scalar_val->special == ConstValSpecialUndef)
+            return ir_const_undef(ira, &instruction->base, return_type);
+
+        IrInstruction *result = ir_const(ira, &instruction->base, return_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_int);
+        for (uint32_t i = 0; i < len_int; i += 1) {
+            copy_const_val(&result->value.data.x_array.data.s_none.elements[i], scalar_val, false);
         }
-        result->value.type = return_type;
-        result->value.special = ConstValSpecialStatic;
         return result;
     }
 
-    IrInstruction *result = ir_build_splat(&ira->new_irb,
-        instruction->base.scope, instruction->base.source_node,
-        instruction->len->child, instruction->scalar->child);
-    result->value.type = return_type;
-    result->value.special = ConstValSpecialRuntime;
-    return result;
+    return ir_build_splat_gen(ira, &instruction->base, return_type, scalar);
 }
 
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
@@ -25857,6 +25874,7 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
         case IrInstructionIdTestErrGen:
         case IrInstructionIdFrameSizeGen:
         case IrInstructionIdAwaitGen:
+        case IrInstructionIdSplatGen:
             zig_unreachable();
 
         case IrInstructionIdReturn:
@@ -25987,8 +26005,8 @@ static IrInstruction *ir_analyze_instruction_base(IrAnalyze *ira, IrInstruction
             return ir_analyze_instruction_vector_type(ira, (IrInstructionVectorType *)instruction);
         case IrInstructionIdShuffleVector:
             return ir_analyze_instruction_shuffle_vector(ira, (IrInstructionShuffleVector *)instruction);
-         case IrInstructionIdSplat:
-            return ir_analyze_instruction_splat(ira, (IrInstructionSplat *)instruction);
+         case IrInstructionIdSplatSrc:
+            return ir_analyze_instruction_splat(ira, (IrInstructionSplatSrc *)instruction);
         case IrInstructionIdBoolNot:
             return ir_analyze_instruction_bool_not(ira, (IrInstructionBoolNot *)instruction);
         case IrInstructionIdMemset:
@@ -26325,7 +26343,8 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdIntType:
         case IrInstructionIdVectorType:
         case IrInstructionIdShuffleVector:
-        case IrInstructionIdSplat:
+        case IrInstructionIdSplatSrc:
+        case IrInstructionIdSplatGen:
         case IrInstructionIdBoolNot:
         case IrInstructionIdSliceSrc:
         case IrInstructionIdMemberCount:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index 0dee7d342a..aae65d50a9 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -44,8 +44,10 @@ static const char* ir_instruction_type_str(IrInstruction* instruction) {
             return "Invalid";
         case IrInstructionIdShuffleVector:
             return "Shuffle";
-        case IrInstructionIdSplat:
-            return "Splat";
+        case IrInstructionIdSplatSrc:
+            return "SplatSrc";
+        case IrInstructionIdSplatGen:
+            return "SplatGen";
         case IrInstructionIdDeclVarSrc:
             return "DeclVarSrc";
         case IrInstructionIdDeclVarGen:
@@ -1224,7 +1226,7 @@ static void ir_print_shuffle_vector(IrPrint *irp, IrInstructionShuffleVector *in
     fprintf(irp->f, ")");
 }
 
-static void ir_print_splat(IrPrint *irp, IrInstructionSplat *instruction) {
+static void ir_print_splat_src(IrPrint *irp, IrInstructionSplatSrc *instruction) {
     fprintf(irp->f, "@splat(");
     ir_print_other_instruction(irp, instruction->len);
     fprintf(irp->f, ", ");
@@ -1232,6 +1234,12 @@ static void ir_print_splat(IrPrint *irp, IrInstructionSplat *instruction) {
     fprintf(irp->f, ")");
 }
 
+static void ir_print_splat_gen(IrPrint *irp, IrInstructionSplatGen *instruction) {
+    fprintf(irp->f, "@splat(");
+    ir_print_other_instruction(irp, instruction->scalar);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_bool_not(IrPrint *irp, IrInstructionBoolNot *instruction) {
     fprintf(irp->f, "! ");
     ir_print_other_instruction(irp, instruction->value);
@@ -2170,8 +2178,11 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction, bool
         case IrInstructionIdShuffleVector:
             ir_print_shuffle_vector(irp, (IrInstructionShuffleVector *)instruction);
             break;
-        case IrInstructionIdSplat:
-            ir_print_splat(irp, (IrInstructionSplat *)instruction);
+        case IrInstructionIdSplatSrc:
+            ir_print_splat_src(irp, (IrInstructionSplatSrc *)instruction);
+            break;
+        case IrInstructionIdSplatGen:
+            ir_print_splat_gen(irp, (IrInstructionSplatGen *)instruction);
             break;
         case IrInstructionIdBoolNot:
             ir_print_bool_not(irp, (IrInstructionBoolNot *)instruction);
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 2909bffc3b..034800fd4c 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6514,7 +6514,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         \\    var v = @splat(4, c);
         \\}
     ,
-        "tmp.zig:3:20: error: vector element type must be integer, float, bool, or pointer; 'comptime_int' is invalid",
+        "tmp.zig:3:23: error: vector element type must be integer, float, bool, or pointer; 'comptime_int' is invalid",
     );
 
     cases.add("compileLog of tagged enum doesn't crash the compiler",
diff --git a/test/stage1/behavior/vector.zig b/test/stage1/behavior/vector.zig
index 88a332d87b..d3a771fca8 100644
--- a/test/stage1/behavior/vector.zig
+++ b/test/stage1/behavior/vector.zig
@@ -145,10 +145,11 @@ test "vector @splat" {
             var v: u32 = 5;
             var x = @splat(4, v);
             expect(@typeOf(x) == @Vector(4, u32));
-            expect(x[0] == 5);
-            expect(x[1] == 5);
-            expect(x[2] == 5);
-            expect(x[3] == 5);
+            var array_x: [4]u32 = x;
+            expect(array_x[0] == 5);
+            expect(array_x[1] == 5);
+            expect(array_x[2] == 5);
+            expect(array_x[3] == 5);
         }
     };
     S.doTheTest();

From 28c7fe60b6de6e3c32e082a0abfb5a7bac8fc45a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 11:14:42 -0400
Subject: [PATCH 18/24] add docs for `@splat`

---
 doc/langref.html.in | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 61fc06fd02..1158135dab 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -5864,7 +5864,7 @@ volatile (
     : [number] "{rax}" (number),
         [arg1] "{rdi}" (arg1)
 // Next is the list of clobbers. These declare a set of registers whose
-// values will not be preserved by the execution of this assembly code. 
+// values will not be preserved by the execution of this assembly code.
 // These do not include output or input registers. The special clobber
 // value of "memory" means that the assembly writes to arbitrary undeclared
 // memory locations - not only the memory pointed to by a declared indirect
@@ -5885,7 +5885,7 @@ volatile (
       </p>
       {#header_open|Output Constraints#}
       <p>
-      Output constraints are still considered to be unstable in Zig, and 
+      Output constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5900,7 +5900,7 @@ volatile (
 
       {#header_open|Input Constraints#}
       <p>
-      Input constraints are still considered to be unstable in Zig, and 
+      Input constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5919,7 +5919,7 @@ volatile (
       the assembly code. These do not include output or input registers. The special clobber
       value of {#syntax#}"memory"{#endsyntax#} means that the assembly causes writes to
       arbitrary undeclared memory locations - not only the memory pointed to by a declared
-      indirect output. 
+      indirect output.
       </p>
       <p>
       Failure to declare the full set of clobbers for a given inline assembly
@@ -7746,6 +7746,30 @@ test "@setRuntimeSafety" {
       </p>
       {#header_close#}
 
+      {#header_open|@splat#}
+      <pre>{#syntax#}@splat(comptime len: u32, scalar: var) @Vector(len, @typeOf(scalar)){#endsyntax#}</pre>
+      <p>
+      Produces a vector of length {#syntax#}len{#endsyntax#} where each element is the value
+      {#syntax#}scalar{#endsyntax#}:
+      </p>
+      {#code_begin|test#}
+const std = @import("std");
+const assert = std.debug.assert;
+
+test "vector @splat" {
+    const scalar: u32 = 5;
+    const result = @splat(4, scalar);
+    comptime assert(@typeOf(result) == @Vector(4, u32));
+    assert(std.mem.eql(u32, ([4]u32)(result), [_]u32{ 5, 5, 5, 5 }));
+}
+      {#code_end#}
+      <p>
+      {#syntax#}scalar{#endsyntax#} must be an {#link|integer|Integers#}, {#link|bool|Primitive Types#},
+      {#link|float|Floats#}, or {#link|pointer|Pointers#}.
+      </p>
+      {#see_also|Vectors|@shuffle#}
+      {#header_close#}
+
       {#header_open|@sqrt#}
       <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
@@ -9456,8 +9480,8 @@ const c = @cImport({
         <li>Does not support Zig-only pointer attributes such as alignment. Use normal {#link|Pointers#}
         please!</li>
       </ul>
-      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to 
-        access to the struct's fields or member data. That syntax looks like 
+      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to
+        access to the struct's fields or member data. That syntax looks like
         this: </p>
         <p>{#syntax#}ptr_to_struct.*.struct_member{#endsyntax#}</p>
         <p>This is comparable to doing {#syntax#}->{#endsyntax#} in C.</p>

From 7ebb7ca58091d136bf12dff4e19c49f8a120f102 Mon Sep 17 00:00:00 2001
From: ScorrMorr <none>
Date: Thu, 19 Sep 2019 11:35:40 +0200
Subject: [PATCH 19/24] pass param as ref in ZigList::append

---
 src/list.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/list.hpp b/src/list.hpp
index 8dce75f2b8..59782b46a8 100644
--- a/src/list.hpp
+++ b/src/list.hpp
@@ -15,7 +15,7 @@ struct ZigList {
     void deinit() {
         free(items);
     }
-    void append(T item) {
+    void append(const T& item) {
         ensure_capacity(length + 1);
         items[length++] = item;
     }

From c9937f4a2b56fc14d6d64ca9a43cca0236c6d1ad Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sun, 28 Jul 2019 18:18:35 -0500
Subject: [PATCH 20/24] Allow Zig programs to implement their own startup
 (_start) for ELF executables.

/home/shawn/git/zig-simd/build/lib/zig/std/special/start.zig:23:40: error: exported symbol collision: '_start'
        @export("_start", _start, .Strong);
        ^
/home/shawn/git/zig-simd/build/d.zig:1:1: note: other symbol is here
pub export fn _start() void {
^
/home/shawn/git/zig-simd/build/lib/zig/std/special/start.zig:124:35: error: root source file has no member called 'main'
    switch (@typeInfo(@typeOf(root.main).ReturnType)) {
---
 std/special/start.zig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/std/special/start.zig b/std/special/start.zig
index a7878a7570..2ea065591b 100644
--- a/std/special/start.zig
+++ b/std/special/start.zig
@@ -23,7 +23,7 @@ comptime {
     } else if (builtin.os == .uefi) {
         @export("EfiMain", EfiMain, .Strong);
     } else {
-        @export("_start", _start, .Strong);
+        if (!@hasDecl(root, "_start")) @export("_start", _start, .Strong);
     }
 }
 

From ff9f3275dede031cdbea67272f648bb91c79c574 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Wed, 18 Sep 2019 18:34:40 -0500
Subject: [PATCH 21/24] docs: clarify @clz and @ctz terminology to not be
 endian-specific.

This was brought up in IRC a few days ago.
---
 doc/langref.html.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 1158135dab..d9750a6635 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6650,7 +6650,7 @@ async fn func(y: *i32) void {
       {#header_open|@clz#}
       <pre>{#syntax#}@clz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of leading zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of most-significant (leading in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
@@ -6792,7 +6792,7 @@ test "main" {
       {#header_open|@ctz#}
       <pre>{#syntax#}@ctz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of trailing zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},

From 3b297f58f70c72fb57cdc46a681db4e99abd34b5 Mon Sep 17 00:00:00 2001
From: daurnimator <quae@daurnimator.com>
Date: Tue, 10 Sep 2019 00:09:08 +1000
Subject: [PATCH 22/24] src: use zig_panic rather than having LLVM abort

---
 src/codegen.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/codegen.cpp b/src/codegen.cpp
index b0817e8eb8..3c7a1048f4 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -7526,7 +7526,9 @@ static void do_code_gen(CodeGen *g) {
     }
 
     char *error = nullptr;
-    LLVMVerifyModule(g->module, LLVMAbortProcessAction, &error);
+    if (LLVMVerifyModule(g->module, LLVMReturnStatusAction, &error)) {
+        zig_panic("broken LLVM module found: %s", error);
+    }
 }
 
 static void zig_llvm_emit_output(CodeGen *g) {

From ef3f7ecc1927454bf03051f0a76a05ea8911fbe5 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 13:07:30 -0400
Subject: [PATCH 23/24] update std.zig.tokenizer

---
 std/zig/tokenizer.zig | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
index d4173cde0e..b0a6cd1122 100644
--- a/std/zig/tokenizer.zig
+++ b/std/zig/tokenizer.zig
@@ -271,11 +271,11 @@ pub const Token = struct {
                 .Keyword_align => "align",
                 .Keyword_allowzero => "allowzero",
                 .Keyword_and => "and",
+                .Keyword_anyframe => "anyframe",
                 .Keyword_asm => "asm",
                 .Keyword_async => "async",
                 .Keyword_await => "await",
                 .Keyword_break => "break",
-                .Keyword_cancel => "cancel",
                 .Keyword_catch => "catch",
                 .Keyword_comptime => "comptime",
                 .Keyword_const => "const",
@@ -294,11 +294,12 @@ pub const Token = struct {
                 .Keyword_inline => "inline",
                 .Keyword_nakedcc => "nakedcc",
                 .Keyword_noalias => "noalias",
+                .Keyword_noasync => "noasync",
+                .Keyword_noinline => "noinline",
                 .Keyword_null => "null",
                 .Keyword_or => "or",
                 .Keyword_orelse => "orelse",
                 .Keyword_packed => "packed",
-                .Keyword_promise => "promise",
                 .Keyword_pub => "pub",
                 .Keyword_resume => "resume",
                 .Keyword_return => "return",

From 925ffbce7f424548be9eb42eb3914d5035066003 Mon Sep 17 00:00:00 2001
From: stratact <stratact@stratacter.com>
Date: Thu, 19 Sep 2019 10:45:54 -0700
Subject: [PATCH 24/24] Disable Channel, Future, and Lock tests for FreeBSD
 (#3253)

* Disable Channel, Future, and Lock tests for FreeBSD
---
 std/event/channel.zig | 2 ++
 std/event/future.zig  | 2 ++
 std/event/lock.zig    | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/std/event/channel.zig b/std/event/channel.zig
index a397d280de..2f211d21e2 100644
--- a/std/event/channel.zig
+++ b/std/event/channel.zig
@@ -306,6 +306,8 @@ pub fn Channel(comptime T: type) type {
 test "std.event.Channel" {
     // https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     var loop: Loop = undefined;
     // TODO make a multi threaded test
diff --git a/std/event/future.zig b/std/event/future.zig
index b55b795de6..1e3508de41 100644
--- a/std/event/future.zig
+++ b/std/event/future.zig
@@ -85,6 +85,8 @@ pub fn Future(comptime T: type) type {
 test "std.event.Future" {
     // https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     const allocator = std.heap.direct_allocator;
 
diff --git a/std/event/lock.zig b/std/event/lock.zig
index 0fa65f031d..a0b1fd3e50 100644
--- a/std/event/lock.zig
+++ b/std/event/lock.zig
@@ -118,6 +118,8 @@ pub const Lock = struct {
 test "std.event.Lock" {
     // TODO https://github.com/ziglang/zig/issues/1908
     if (builtin.single_threaded) return error.SkipZigTest;
+    // TODO https://github.com/ziglang/zig/issues/3251
+    if (std.os.freebsd.is_the_target) return error.SkipZigTest;
 
     const allocator = std.heap.direct_allocator;