stage2: implement cmpxchg and improve comptime eval

* Implement Sema for `@cmpxchgWeak` and `@cmpxchgStrong`. Both runtime and comptime codepaths are implement. * Implement Codegen for LLVM backend and C backend. * Add LazySrcLoc.node_offset_builtin_call_argX 3...5 * Sema: rework comptime control flow. - `error.ComptimeReturn` is used to signal that a comptime function call has returned a result (stored in the Inlining struct). `analyzeCall` notices this and handles the result. - The ZIR instructions `break_inline`, `block_inline`, `condbr_inline` are now redundant and can be deleted. `break`, `block`, and `condbr` function equivalently inside a comptime scope. - The ZIR instructions `loop` and `repeat` also are modified to directly perform comptime control flow inside a comptime scope, skipping an unnecessary mechanism for analysis of runtime code. This makes Zig perform closer to an interpreter when evaluating comptime code. * Sema: zirRetErrValue looks at Sema.ret_fn_ty rather than sema.func for adding to the inferred error set. This fixes a bug for inlined/comptime function calls. * Implement ZIR printing for cmpxchg. * stage1: make cmpxchg respect --single-threaded - Our LLVM C++ API wrapper failed to expose this boolean flag before. * Fix AIR printing for struct fields showing incorrect liveness data.
2021-09-14 21:58:22 -07:00
parent 5d14590ed1
commit 0395b35cee
19 changed files with 682 additions and 115 deletions
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -911,6 +911,8 @@ fn genBody(o: *Object, body: []const Air.Inst.Index) error{ AnalysisFail, OutOfM
            .wrap_optional    => try airWrapOptional(o, inst),
            .struct_field_ptr => try airStructFieldPtr(o, inst),
            .array_to_slice   => try airArrayToSlice(o, inst),
+            .cmpxchg_weak     => try airCmpxchg(o, inst, "weak"),
+            .cmpxchg_strong   => try airCmpxchg(o, inst, "strong"),

            .struct_field_ptr_index_0 => try airStructFieldPtrIndex(o, inst, 0),
            .struct_field_ptr_index_1 => try airStructFieldPtrIndex(o, inst, 1),
@@ -1878,6 +1880,43 @@ fn airArrayToSlice(o: *Object, inst: Air.Inst.Index) !CValue {
    return local;
 }

+fn airCmpxchg(o: *Object, inst: Air.Inst.Index, flavor: [*:0]const u8) !CValue {
+    const ty_pl = o.air.instructions.items(.data)[inst].ty_pl;
+    const extra = o.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+    const inst_ty = o.air.typeOfIndex(inst);
+    const ptr = try o.resolveInst(extra.ptr);
+    const expected_value = try o.resolveInst(extra.expected_value);
+    const new_value = try o.resolveInst(extra.new_value);
+    const local = try o.allocLocal(inst_ty, .Const);
+    const writer = o.writer();
+
+    try writer.print(" = zig_cmpxchg_{s}(", .{flavor});
+    try o.writeCValue(writer, ptr);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, expected_value);
+    try writer.writeAll(", ");
+    try o.writeCValue(writer, new_value);
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, extra.successOrder());
+    try writer.writeAll(", ");
+    try writeMemoryOrder(writer, extra.failureOrder());
+    try writer.writeAll(");\n");
+
+    return local;
+}
+
+fn writeMemoryOrder(w: anytype, order: std.builtin.AtomicOrder) !void {
+    const str = switch (order) {
+        .Unordered => "memory_order_relaxed",
+        .Monotonic => "memory_order_consume",
+        .Acquire => "memory_order_acquire",
+        .Release => "memory_order_release",
+        .AcqRel => "memory_order_acq_rel",
+        .SeqCst => "memory_order_seq_cst",
+    };
+    return w.writeAll(str);
+}
+
 fn IndentWriter(comptime UnderlyingWriter: type) type {
    return struct {
        const Self = @This();
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -389,6 +389,7 @@ pub const Object = struct {
            .latest_alloca_inst = null,
            .llvm_func = llvm_func,
            .blocks = .{},
+            .single_threaded = module.comp.bin_file.options.single_threaded,
        };
        defer fg.deinit();

@@ -906,6 +907,31 @@ pub const DeclGen = struct {
        // TODO: improve this API, `addAttr(-1, attr_name)`
        self.addAttr(val, std.math.maxInt(llvm.AttributeIndex), attr_name);
    }
+
+    /// If the operand type of an atomic operation is not byte sized we need to
+    /// widen it before using it and then truncate the result.
+    /// RMW exchange of floating-point values is bitcasted to same-sized integer
+    /// types to work around a LLVM deficiency when targeting ARM/AArch64.
+    fn getAtomicAbiType(dg: *DeclGen, ty: Type, is_rmw_xchg: bool) ?*const llvm.Type {
+        const target = dg.module.getTarget();
+        var buffer: Type.Payload.Bits = undefined;
+        const int_ty = switch (ty.zigTypeTag()) {
+            .Int => ty,
+            .Enum => ty.enumTagType(&buffer),
+            .Float => {
+                if (!is_rmw_xchg) return null;
+                return dg.context.intType(@intCast(c_uint, ty.abiSize(target) * 8));
+            },
+            .Bool => return dg.context.intType(8),
+            else => return null,
+        };
+        const bit_count = int_ty.intInfo(target).bits;
+        if (!std.math.isPowerOfTwo(bit_count) or (bit_count % 8) != 0) {
+            return dg.context.intType(@intCast(c_uint, int_ty.abiSize(target) * 8));
+        } else {
+            return null;
+        }
+    }
 };

 pub const FuncGen = struct {
@@ -940,6 +966,8 @@ pub const FuncGen = struct {
        break_vals: *BreakValues,
    }),

+    single_threaded: bool,
+
    const BreakBasicBlocks = std.ArrayListUnmanaged(*const llvm.BasicBlock);
    const BreakValues = std.ArrayListUnmanaged(*const llvm.Value);

@@ -1029,6 +1057,8 @@ pub const FuncGen = struct {
                .slice_ptr      => try self.airSliceField(inst, 0),
                .slice_len      => try self.airSliceField(inst, 1),
                .array_to_slice => try self.airArrayToSlice(inst),
+                .cmpxchg_weak   => try self.airCmpxchg(inst, true),
+                .cmpxchg_strong => try self.airCmpxchg(inst, false),

                .struct_field_ptr => try self.airStructFieldPtr(inst),
                .struct_field_val => try self.airStructFieldVal(inst),
@@ -1975,6 +2005,58 @@ pub const FuncGen = struct {
        return null;
    }

+    fn airCmpxchg(self: *FuncGen, inst: Air.Inst.Index, is_weak: bool) !?*const llvm.Value {
+        const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+        const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+        var ptr = try self.resolveInst(extra.ptr);
+        var expected_value = try self.resolveInst(extra.expected_value);
+        var new_value = try self.resolveInst(extra.new_value);
+        const operand_ty = self.air.typeOf(extra.ptr).elemType();
+        const opt_abi_ty = self.dg.getAtomicAbiType(operand_ty, false);
+        if (opt_abi_ty) |abi_ty| {
+            // operand needs widening and truncating
+            ptr = self.builder.buildBitCast(ptr, abi_ty.pointerType(0), "");
+            if (operand_ty.isSignedInt()) {
+                expected_value = self.builder.buildSExt(expected_value, abi_ty, "");
+                new_value = self.builder.buildSExt(new_value, abi_ty, "");
+            } else {
+                expected_value = self.builder.buildZExt(expected_value, abi_ty, "");
+                new_value = self.builder.buildZExt(new_value, abi_ty, "");
+            }
+        }
+        const success_order = toLlvmAtomicOrdering(extra.successOrder());
+        const failure_order = toLlvmAtomicOrdering(extra.failureOrder());
+        const result = self.builder.buildCmpXchg(
+            ptr,
+            expected_value,
+            new_value,
+            success_order,
+            failure_order,
+            is_weak,
+            self.single_threaded,
+        );
+
+        const optional_ty = self.air.typeOfIndex(inst);
+        var buffer: Type.Payload.ElemType = undefined;
+        const child_ty = optional_ty.optionalChild(&buffer);
+
+        var payload = self.builder.buildExtractValue(result, 0, "");
+        if (opt_abi_ty != null) {
+            payload = self.builder.buildTrunc(payload, try self.dg.llvmType(operand_ty), "");
+        }
+        const success_bit = self.builder.buildExtractValue(result, 1, "");
+
+        if (optional_ty.isPtrLikeOptional()) {
+            const child_llvm_ty = try self.dg.llvmType(child_ty);
+            return self.builder.buildSelect(success_bit, child_llvm_ty.constNull(), payload, "");
+        }
+
+        const optional_llvm_ty = try self.dg.llvmType(optional_ty);
+        const non_null_bit = self.builder.buildNot(success_bit, "");
+        const partial = self.builder.buildInsertValue(optional_llvm_ty.getUndef(), payload, 0, "");
+        return self.builder.buildInsertValue(partial, non_null_bit, 1, "");
+    }
+
    fn getIntrinsic(self: *FuncGen, name: []const u8) *const llvm.Value {
        const id = llvm.lookupIntrinsicID(name.ptr, name.len);
        assert(id != 0);
@@ -2125,3 +2207,14 @@ fn initializeLLVMTarget(arch: std.Target.Cpu.Arch) void {
        .spirv64 => {},
    }
 }
+
+fn toLlvmAtomicOrdering(atomic_order: std.builtin.AtomicOrder) llvm.AtomicOrdering {
+    return switch (atomic_order) {
+        .Unordered => .Unordered,
+        .Monotonic => .Monotonic,
+        .Acquire => .Acquire,
+        .Release => .Release,
+        .AcqRel => .AcquireRelease,
+        .SeqCst => .SequentiallyConsistent,
+    };
+}
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -298,6 +298,14 @@ pub const Builder = opaque {
        Name: [*:0]const u8,
    ) *const Value;

+    pub const buildSExt = LLVMBuildSExt;
+    extern fn LLVMBuildSExt(
+        *const Builder,
+        Val: *const Value,
+        DestTy: *const Type,
+        Name: [*:0]const u8,
+    ) *const Value;
+
    pub const buildCall = LLVMBuildCall;
    extern fn LLVMBuildCall(
        *const Builder,
@@ -493,6 +501,27 @@ pub const Builder = opaque {
        Index: c_uint,
        Name: [*:0]const u8,
    ) *const Value;
+
+    pub const buildCmpXchg = ZigLLVMBuildCmpXchg;
+    extern fn ZigLLVMBuildCmpXchg(
+        builder: *const Builder,
+        ptr: *const Value,
+        cmp: *const Value,
+        new_val: *const Value,
+        success_ordering: AtomicOrdering,
+        failure_ordering: AtomicOrdering,
+        is_weak: bool,
+        is_single_threaded: bool,
+    ) *const Value;
+
+    pub const buildSelect = LLVMBuildSelect;
+    extern fn LLVMBuildSelect(
+        *const Builder,
+        If: *const Value,
+        Then: *const Value,
+        Else: *const Value,
+        Name: [*:0]const u8,
+    ) *const Value;
 };

 pub const IntPredicate = enum(c_uint) {
@@ -854,3 +883,13 @@ pub const Linkage = enum(c_uint) {
    LinkerPrivate,
    LinkerPrivateWeak,
 };
+
+pub const AtomicOrdering = enum(c_uint) {
+    NotAtomic = 0,
+    Unordered = 1,
+    Monotonic = 2,
+    Acquire = 4,
+    Release = 5,
+    AcquireRelease = 6,
+    SequentiallyConsistent = 7,
+};