From 1ac512df70465629d1e80ff02be51c121c6f4c07 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Sat, 17 Oct 2020 13:11:10 +0200
Subject: [PATCH] stage1: Let LLVM legalize the overflowing ops on vectors

LLVM is smart enough to produce tight overflow checking sequences.

On x86_64 a simple v4u32 addition is turned from:

```
  c5 e9 d4 d4                  	vpaddq	%xmm4, %xmm2, %xmm2
  c5 f1 d4 cb                  	vpaddq	%xmm3, %xmm1, %xmm1
  c5 f8 28 e9                  	vmovaps	%xmm1, %xmm5
  c4 e3 55 18 ea 01            	vinsertf128	$1, %xmm2, %ymm5, %ymm5
  c5 f0 c6 da 88               	vshufps	$136, %xmm2, %xmm1, %xmm3
  c5 c8 57 f6                  	vxorps	%xmm6, %xmm6, %xmm6
  c4 e3 55 0c ee aa            	vblendps	$170, %ymm6, %ymm5, %ymm5
  c4 e3 7d 19 ec 01            	vextractf128	$1, %ymm5, %xmm4
  c4 e2 69 29 d4               	vpcmpeqq	%xmm4, %xmm2, %xmm2
  c5 e9 ef d0                  	vpxor	%xmm0, %xmm2, %xmm2
  c4 e2 71 29 cd               	vpcmpeqq	%xmm5, %xmm1, %xmm1
  c5 f1 ef c0                  	vpxor	%xmm0, %xmm1, %xmm0
  c5 f9 6b c2                  	vpackssdw	%xmm2, %xmm0, %xmm0
  c5 f8 50 c0                  	vmovmskps	%xmm0, %eax
  84 c0                        	testb	%al, %al
```

into:

```
  c5 f9 fe c9                  	vpaddd	%xmm1, %xmm0, %xmm1
  c4 e2 71 3f c0               	vpmaxud	%xmm0, %xmm1, %xmm0
  c5 f1 76 c0                  	vpcmpeqd	%xmm0, %xmm1, %xmm0
  c5 f8 50 c0                  	vmovmskps	%xmm0, %eax
  34 0f                        	xorb	$15, %al
  0f 95 c1                     	setne	%cl
  f6 c1 01                     	testb	$1, %cl
```
---
 src/stage1/codegen.cpp | 37 +++++++++----------------------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 9928a37f9d..410f83b530 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -1593,35 +1593,16 @@ static const BuildBinOpFunc unsigned_op[3] = { LLVMBuildNUWAdd, LLVMBuildNUWSub,
 static LLVMValueRef gen_overflow_op(CodeGen *g, ZigType *operand_type, AddSubMul op,
         LLVMValueRef val1, LLVMValueRef val2)
 {
-    LLVMValueRef overflow_bit;
-    LLVMValueRef result;
-
+    LLVMValueRef fn_val = get_int_overflow_fn(g, operand_type, op);
+    LLVMValueRef params[] = {
+        val1,
+        val2,
+    };
+    LLVMValueRef result_struct = LLVMBuildCall(g->builder, fn_val, params, 2, "");
+    LLVMValueRef result = LLVMBuildExtractValue(g->builder, result_struct, 0, "");
+    LLVMValueRef overflow_bit = LLVMBuildExtractValue(g->builder, result_struct, 1, "");
     if (operand_type->id == ZigTypeIdVector) {
-        ZigType *int_type = operand_type->data.vector.elem_type;
-        assert(int_type->id == ZigTypeIdInt);
-        LLVMTypeRef one_more_bit_int = LLVMIntType(int_type->data.integral.bit_count + 1);
-        LLVMTypeRef one_more_bit_int_vector = LLVMVectorType(one_more_bit_int, operand_type->data.vector.len);
-        const auto buildExtFn = int_type->data.integral.is_signed ? LLVMBuildSExt : LLVMBuildZExt;
-        LLVMValueRef extended1 = buildExtFn(g->builder, val1, one_more_bit_int_vector, "");
-        LLVMValueRef extended2 = buildExtFn(g->builder, val2, one_more_bit_int_vector, "");
-        LLVMValueRef extended_result = wrap_op[op](g->builder, extended1, extended2, "");
-        result = LLVMBuildTrunc(g->builder, extended_result, get_llvm_type(g, operand_type), "");
-
-        LLVMValueRef re_extended_result = buildExtFn(g->builder, result, one_more_bit_int_vector, "");
-        LLVMValueRef overflow_vector = LLVMBuildICmp(g->builder, LLVMIntNE, extended_result, re_extended_result, "");
-        LLVMTypeRef bitcast_int_type = LLVMIntType(operand_type->data.vector.len);
-        LLVMValueRef bitcasted_overflow = LLVMBuildBitCast(g->builder, overflow_vector, bitcast_int_type, "");
-        LLVMValueRef zero = LLVMConstNull(bitcast_int_type);
-        overflow_bit = LLVMBuildICmp(g->builder, LLVMIntNE, bitcasted_overflow, zero, "");
-    } else {
-        LLVMValueRef fn_val = get_int_overflow_fn(g, operand_type, op);
-        LLVMValueRef params[] = {
-            val1,
-            val2,
-        };
-        LLVMValueRef result_struct = LLVMBuildCall(g->builder, fn_val, params, 2, "");
-        result = LLVMBuildExtractValue(g->builder, result_struct, 0, "");
-        overflow_bit = LLVMBuildExtractValue(g->builder, result_struct, 1, "");
+        overflow_bit = ZigLLVMBuildOrReduce(g->builder, overflow_bit);
     }
 
     LLVMBasicBlockRef fail_block = LLVMAppendBasicBlock(g->cur_fn_val, "OverflowFail");