stage2: improve inline asm stage1 compatibility

* outputs can have names and be referenced with template replacements the same as inputs. * fix print_air.zig not decoding correctly. * LLVM backend: use a table for template names for simplicity
2022-05-02 20:19:31 -07:00
parent b95942744c
commit 65389dc280
10 changed files with 82 additions and 53 deletions
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -815,6 +815,8 @@ pub const VectorCmp = struct {
 /// 1. `Inst.Ref` for every inputs_len
 /// 2. for every outputs_len
 ///    - constraint: memory at this position is reinterpreted as a null
+///      terminated string.
+///    - name: memory at this position is reinterpreted as a null
 ///      terminated string. pad to the next u32 after the null byte.
 /// 3. for every inputs_len
 ///    - constraint: memory at this position is reinterpreted as a null
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -10535,7 +10535,11 @@ fn zirAsm(
    var output_type_bits = extra.data.output_type_bits;
    var needed_capacity: usize = @typeInfo(Air.Asm).Struct.fields.len + outputs_len + inputs_len;

-    const Output = struct { constraint: []const u8, ty: Type };
+    const Output = struct {
+        constraint: []const u8,
+        name: []const u8,
+        ty: Type,
+    };
    const output: ?Output = if (outputs_len == 0) null else blk: {
        const output = sema.code.extraData(Zir.Inst.Asm.Output, extra_i);
        extra_i = output.end;
@@ -10548,10 +10552,12 @@ fn zirAsm(
        }

        const constraint = sema.code.nullTerminatedString(output.data.constraint);
-        needed_capacity += constraint.len / 4 + 1;
+        const name = sema.code.nullTerminatedString(output.data.name);
+        needed_capacity += (constraint.len + name.len + (2 + 3)) / 4;

        break :blk Output{
            .constraint = constraint,
+            .name = name,
            .ty = try sema.resolveType(block, ret_ty_src, output.data.operand),
        };
    };
@@ -10573,7 +10579,7 @@ fn zirAsm(

        const constraint = sema.code.nullTerminatedString(input.data.constraint);
        const name = sema.code.nullTerminatedString(input.data.name);
-        needed_capacity += (constraint.len + name.len + 1) / 4 + 1;
+        needed_capacity += (constraint.len + name.len + (2 + 3)) / 4;
        inputs[arg_i] = .{ .c = constraint, .n = name };
    }

@@ -10611,7 +10617,9 @@ fn zirAsm(
        const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
        mem.copy(u8, buffer, o.constraint);
        buffer[o.constraint.len] = 0;
-        sema.air_extra.items.len += o.constraint.len / 4 + 1;
+        mem.copy(u8, buffer[o.constraint.len + 1 ..], o.name);
+        buffer[o.constraint.len + 1 + o.name.len] = 0;
+        sema.air_extra.items.len += (o.constraint.len + o.name.len + (2 + 3)) / 4;
    }
    for (inputs) |input| {
        const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
@@ -10619,7 +10627,7 @@ fn zirAsm(
        buffer[input.c.len] = 0;
        mem.copy(u8, buffer[input.c.len + 1 ..], input.n);
        buffer[input.c.len + 1 + input.n.len] = 0;
-        sema.air_extra.items.len += (input.c.len + input.n.len + 1) / 4 + 1;
+        sema.air_extra.items.len += (input.c.len + input.n.len + (2 + 3)) / 4;
    }
    for (clobbers) |clobber| {
        const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -3272,10 +3272,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
            if (output != .none) {
                return self.fail("TODO implement codegen for non-expr asm", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            break constraint;
        } else null;
@@ -3283,10 +3285,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
        for (inputs) |input| {
            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
                return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -4078,10 +4078,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
            if (output != .none) {
                return self.fail("TODO implement codegen for non-expr asm", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            break constraint;
        } else null;
@@ -4089,10 +4091,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
        for (inputs) |input| {
            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
                return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -2098,10 +2098,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
            if (output != .none) {
                return self.fail("TODO implement codegen for non-expr asm", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            break constraint;
        } else null;
@@ -2109,10 +2111,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
        for (inputs) |input| {
            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
                return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
--- a/src/arch/sparcv9/CodeGen.zig
+++ b/src/arch/sparcv9/CodeGen.zig
@@ -642,10 +642,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
            if (output != .none) {
                return self.fail("TODO implement codegen for non-expr asm", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            break constraint;
        } else null;
@@ -653,10 +655,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
        for (inputs) |input| {
            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
                return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -4739,10 +4739,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
            if (output != .none) {
                return self.fail("TODO implement codegen for non-expr asm", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            break constraint;
        } else null;
@@ -4750,10 +4752,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
        for (inputs) |input| {
            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
                return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3018,10 +3018,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
        if (output != .none) {
            return f.fail("TODO implement codegen for non-expr asm", .{});
        }
+        const extra_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]);
        const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
+        const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
        // This equation accounts for the fact that even if we have exactly 4 bytes
        // for the string, we still use the next u32 for the null terminator.
-        extra_i += constraint.len / 4 + 1;
+        extra_i += (constraint.len + name.len + (2 + 3)) / 4;

        break constraint;
    } else null;
@@ -3031,10 +3033,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {

    const inputs_extra_begin = extra_i;
    for (inputs) |input, i| {
-        const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
+        const input_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]);
+        const constraint = std.mem.sliceTo(input_bytes, 0);
+        const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
        // This equation accounts for the fact that even if we have exactly 4 bytes
        // for the string, we still use the next u32 for the null terminator.
-        extra_i += constraint.len / 4 + 1;
+        extra_i += (constraint.len + name.len + (2 + 3)) / 4;

        if (constraint[0] == '{' and constraint[constraint.len - 1] == '}') {
            const reg = constraint[1 .. constraint.len - 1];
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -4638,14 +4638,19 @@ pub const FuncGen = struct {
        var llvm_param_i: usize = 0;
        var total_i: usize = 0;

+        var name_map: std.StringArrayHashMapUnmanaged(void) = .{};
+        try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len);
+
        for (outputs) |output| {
            if (output != .none) {
                return self.todo("implement inline asm with non-returned output", .{});
            }
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
            const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            try llvm_constraints.ensureUnusedCapacity(self.gpa, constraint.len + 1);
            if (total_i != 0) {
@@ -4654,17 +4659,17 @@ pub const FuncGen = struct {
            llvm_constraints.appendAssumeCapacity('=');
            llvm_constraints.appendSliceAssumeCapacity(constraint[1..]);

+            name_map.putAssumeCapacityNoClobber(name, {});
            total_i += 1;
        }

-        const input_start_extra_i = extra_i;
        for (inputs) |input| {
-            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
-            const constraint = std.mem.sliceTo(input_bytes, 0);
-            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
+            const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
+            const constraint = std.mem.sliceTo(extra_bytes, 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
            // for the string, we still use the next u32 for the null terminator.
-            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            const arg_llvm_value = try self.resolveInst(input);

@@ -4677,6 +4682,7 @@ pub const FuncGen = struct {
            }
            llvm_constraints.appendSliceAssumeCapacity(constraint);

+            name_map.putAssumeCapacityNoClobber(name, {});
            llvm_param_i += 1;
            total_i += 1;
        }
@@ -4739,20 +4745,11 @@ pub const FuncGen = struct {
                        const name = asm_source[name_start..i];
                        state = .start;

-                        extra_i = input_start_extra_i;
-                        for (inputs) |_, input_i| {
-                            const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
-                            const constraint = std.mem.sliceTo(input_bytes, 0);
-                            const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
-                            extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
-
-                            if (std.mem.eql(u8, name, input_name)) {
-                                try rendered_template.writer().print("{d}", .{input_i});
-                                break;
-                            }
-                        } else {
-                            return self.todo("TODO validate asm in Sema", .{});
-                        }
+                        const index = name_map.getIndex(name) orelse {
+                            // we should validate the assembly in Sema; by now it is too late
+                            return self.todo("unknown input or output name: '{s}'", .{name});
+                        };
+                        try rendered_template.writer().print("{d}", .{index});
                    },
                    else => {},
                },
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -542,15 +542,19 @@ const Writer = struct {
        extra_i += inputs.len;

        for (outputs) |output| {
-            const constraint = w.air.nullTerminatedString(extra_i);
+            const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
+            const constraint = std.mem.sliceTo(extra_bytes, 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
+
            // This equation accounts for the fact that even if we have exactly 4 bytes
-            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            // for the strings and their null terminators, we still use the next u32
+            // for the null terminator.
+            extra_i += (constraint.len + name.len + (2 + 3)) / 4;

            if (output == .none) {
-                try s.print(", -> {s}", .{constraint});
+                try s.print(", [{s}] -> {s}", .{ name, constraint });
            } else {
-                try s.print(", out {s} = (", .{constraint});
+                try s.print(", [{s}] out {s} = (", .{ name, constraint });
                try w.writeOperand(s, inst, op_index, output);
                op_index += 1;
                try s.writeByte(')');
@@ -558,12 +562,15 @@ const Writer = struct {
        }

        for (inputs) |input| {
-            const constraint = w.air.nullTerminatedString(extra_i);
+            const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
+            const constraint = std.mem.sliceTo(extra_bytes, 0);
+            const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
            // This equation accounts for the fact that even if we have exactly 4 bytes
-            // for the string, we still use the next u32 for the null terminator.
-            extra_i += constraint.len / 4 + 1;
+            // for the strings and their null terminators, we still use the next u32
+            // for the null terminator.
+            extra_i += (constraint.len + name.len + 1) / 4 + 1;

-            try s.print(", in {s} = (", .{constraint});
+            try s.print(", [{s}] in {s} = (", .{ name, constraint });
            try w.writeOperand(s, inst, op_index, input);
            op_index += 1;
            try s.writeByte(')');
@@ -572,7 +579,8 @@ const Writer = struct {
        {
            var clobber_i: u32 = 0;
            while (clobber_i < clobbers_len) : (clobber_i += 1) {
-                const clobber = w.air.nullTerminatedString(extra_i);
+                const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
+                const clobber = std.mem.sliceTo(extra_bytes, 0);
                // This equation accounts for the fact that even if we have exactly 4 bytes
                // for the string, we still use the next u32 for the null terminator.
                extra_i += clobber.len / 4 + 1;