stage2: improve inline asm stage1 compatibility
* outputs can have names and be referenced with template replacements the same as inputs. * fix print_air.zig not decoding correctly. * LLVM backend: use a table for template names for simplicity
This commit is contained in:
@@ -815,6 +815,8 @@ pub const VectorCmp = struct {
|
||||
/// 1. `Inst.Ref` for every inputs_len
|
||||
/// 2. for every outputs_len
|
||||
/// - constraint: memory at this position is reinterpreted as a null
|
||||
/// terminated string.
|
||||
/// - name: memory at this position is reinterpreted as a null
|
||||
/// terminated string. pad to the next u32 after the null byte.
|
||||
/// 3. for every inputs_len
|
||||
/// - constraint: memory at this position is reinterpreted as a null
|
||||
|
||||
18
src/Sema.zig
18
src/Sema.zig
@@ -10535,7 +10535,11 @@ fn zirAsm(
|
||||
var output_type_bits = extra.data.output_type_bits;
|
||||
var needed_capacity: usize = @typeInfo(Air.Asm).Struct.fields.len + outputs_len + inputs_len;
|
||||
|
||||
const Output = struct { constraint: []const u8, ty: Type };
|
||||
const Output = struct {
|
||||
constraint: []const u8,
|
||||
name: []const u8,
|
||||
ty: Type,
|
||||
};
|
||||
const output: ?Output = if (outputs_len == 0) null else blk: {
|
||||
const output = sema.code.extraData(Zir.Inst.Asm.Output, extra_i);
|
||||
extra_i = output.end;
|
||||
@@ -10548,10 +10552,12 @@ fn zirAsm(
|
||||
}
|
||||
|
||||
const constraint = sema.code.nullTerminatedString(output.data.constraint);
|
||||
needed_capacity += constraint.len / 4 + 1;
|
||||
const name = sema.code.nullTerminatedString(output.data.name);
|
||||
needed_capacity += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break :blk Output{
|
||||
.constraint = constraint,
|
||||
.name = name,
|
||||
.ty = try sema.resolveType(block, ret_ty_src, output.data.operand),
|
||||
};
|
||||
};
|
||||
@@ -10573,7 +10579,7 @@ fn zirAsm(
|
||||
|
||||
const constraint = sema.code.nullTerminatedString(input.data.constraint);
|
||||
const name = sema.code.nullTerminatedString(input.data.name);
|
||||
needed_capacity += (constraint.len + name.len + 1) / 4 + 1;
|
||||
needed_capacity += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
inputs[arg_i] = .{ .c = constraint, .n = name };
|
||||
}
|
||||
|
||||
@@ -10611,7 +10617,9 @@ fn zirAsm(
|
||||
const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
|
||||
mem.copy(u8, buffer, o.constraint);
|
||||
buffer[o.constraint.len] = 0;
|
||||
sema.air_extra.items.len += o.constraint.len / 4 + 1;
|
||||
mem.copy(u8, buffer[o.constraint.len + 1 ..], o.name);
|
||||
buffer[o.constraint.len + 1 + o.name.len] = 0;
|
||||
sema.air_extra.items.len += (o.constraint.len + o.name.len + (2 + 3)) / 4;
|
||||
}
|
||||
for (inputs) |input| {
|
||||
const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
|
||||
@@ -10619,7 +10627,7 @@ fn zirAsm(
|
||||
buffer[input.c.len] = 0;
|
||||
mem.copy(u8, buffer[input.c.len + 1 ..], input.n);
|
||||
buffer[input.c.len + 1 + input.n.len] = 0;
|
||||
sema.air_extra.items.len += (input.c.len + input.n.len + 1) / 4 + 1;
|
||||
sema.air_extra.items.len += (input.c.len + input.n.len + (2 + 3)) / 4;
|
||||
}
|
||||
for (clobbers) |clobber| {
|
||||
const buffer = mem.sliceAsBytes(sema.air_extra.unusedCapacitySlice());
|
||||
|
||||
@@ -3272,10 +3272,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (output != .none) {
|
||||
return self.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -3283,10 +3285,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
|
||||
return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
|
||||
|
||||
@@ -4078,10 +4078,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (output != .none) {
|
||||
return self.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -4089,10 +4091,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
|
||||
return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
|
||||
|
||||
@@ -2098,10 +2098,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (output != .none) {
|
||||
return self.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -2109,10 +2111,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
|
||||
return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
|
||||
|
||||
@@ -642,10 +642,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (output != .none) {
|
||||
return self.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -653,10 +655,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
|
||||
return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
|
||||
|
||||
@@ -4739,10 +4739,12 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
if (output != .none) {
|
||||
return self.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -4750,10 +4752,10 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void {
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint.len < 3 or constraint[0] != '{' or constraint[constraint.len - 1] != '}') {
|
||||
return self.fail("unrecognized asm input constraint: '{s}'", .{constraint});
|
||||
|
||||
@@ -3018,10 +3018,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
if (output != .none) {
|
||||
return f.fail("TODO implement codegen for non-expr asm", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
break constraint;
|
||||
} else null;
|
||||
@@ -3031,10 +3033,12 @@ fn airAsm(f: *Function, inst: Air.Inst.Index) !CValue {
|
||||
|
||||
const inputs_extra_begin = extra_i;
|
||||
for (inputs) |input, i| {
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(f.air.extra[extra_i..]), 0);
|
||||
const input_bytes = std.mem.sliceAsBytes(f.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (constraint[0] == '{' and constraint[constraint.len - 1] == '}') {
|
||||
const reg = constraint[1 .. constraint.len - 1];
|
||||
|
||||
@@ -4638,14 +4638,19 @@ pub const FuncGen = struct {
|
||||
var llvm_param_i: usize = 0;
|
||||
var total_i: usize = 0;
|
||||
|
||||
var name_map: std.StringArrayHashMapUnmanaged(void) = .{};
|
||||
try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len);
|
||||
|
||||
for (outputs) |output| {
|
||||
if (output != .none) {
|
||||
return self.todo("implement inline asm with non-returned output", .{});
|
||||
}
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
try llvm_constraints.ensureUnusedCapacity(self.gpa, constraint.len + 1);
|
||||
if (total_i != 0) {
|
||||
@@ -4654,17 +4659,17 @@ pub const FuncGen = struct {
|
||||
llvm_constraints.appendAssumeCapacity('=');
|
||||
llvm_constraints.appendSliceAssumeCapacity(constraint[1..]);
|
||||
|
||||
name_map.putAssumeCapacityNoClobber(name, {});
|
||||
total_i += 1;
|
||||
}
|
||||
|
||||
const input_start_extra_i = extra_i;
|
||||
for (inputs) |input| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(extra_bytes, 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
const arg_llvm_value = try self.resolveInst(input);
|
||||
|
||||
@@ -4677,6 +4682,7 @@ pub const FuncGen = struct {
|
||||
}
|
||||
llvm_constraints.appendSliceAssumeCapacity(constraint);
|
||||
|
||||
name_map.putAssumeCapacityNoClobber(name, {});
|
||||
llvm_param_i += 1;
|
||||
total_i += 1;
|
||||
}
|
||||
@@ -4739,20 +4745,11 @@ pub const FuncGen = struct {
|
||||
const name = asm_source[name_start..i];
|
||||
state = .start;
|
||||
|
||||
extra_i = input_start_extra_i;
|
||||
for (inputs) |_, input_i| {
|
||||
const input_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(input_bytes, 0);
|
||||
const input_name = std.mem.sliceTo(input_bytes[constraint.len + 1 ..], 0);
|
||||
extra_i += (constraint.len + input_name.len + 1) / 4 + 1;
|
||||
|
||||
if (std.mem.eql(u8, name, input_name)) {
|
||||
try rendered_template.writer().print("{d}", .{input_i});
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
return self.todo("TODO validate asm in Sema", .{});
|
||||
}
|
||||
const index = name_map.getIndex(name) orelse {
|
||||
// we should validate the assembly in Sema; by now it is too late
|
||||
return self.todo("unknown input or output name: '{s}'", .{name});
|
||||
};
|
||||
try rendered_template.writer().print("{d}", .{index});
|
||||
},
|
||||
else => {},
|
||||
},
|
||||
|
||||
@@ -542,15 +542,19 @@ const Writer = struct {
|
||||
extra_i += inputs.len;
|
||||
|
||||
for (outputs) |output| {
|
||||
const constraint = w.air.nullTerminatedString(extra_i);
|
||||
const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(extra_bytes, 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
// for the strings and their null terminators, we still use the next u32
|
||||
// for the null terminator.
|
||||
extra_i += (constraint.len + name.len + (2 + 3)) / 4;
|
||||
|
||||
if (output == .none) {
|
||||
try s.print(", -> {s}", .{constraint});
|
||||
try s.print(", [{s}] -> {s}", .{ name, constraint });
|
||||
} else {
|
||||
try s.print(", out {s} = (", .{constraint});
|
||||
try s.print(", [{s}] out {s} = (", .{ name, constraint });
|
||||
try w.writeOperand(s, inst, op_index, output);
|
||||
op_index += 1;
|
||||
try s.writeByte(')');
|
||||
@@ -558,12 +562,15 @@ const Writer = struct {
|
||||
}
|
||||
|
||||
for (inputs) |input| {
|
||||
const constraint = w.air.nullTerminatedString(extra_i);
|
||||
const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
|
||||
const constraint = std.mem.sliceTo(extra_bytes, 0);
|
||||
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += constraint.len / 4 + 1;
|
||||
// for the strings and their null terminators, we still use the next u32
|
||||
// for the null terminator.
|
||||
extra_i += (constraint.len + name.len + 1) / 4 + 1;
|
||||
|
||||
try s.print(", in {s} = (", .{constraint});
|
||||
try s.print(", [{s}] in {s} = (", .{ name, constraint });
|
||||
try w.writeOperand(s, inst, op_index, input);
|
||||
op_index += 1;
|
||||
try s.writeByte(')');
|
||||
@@ -572,7 +579,8 @@ const Writer = struct {
|
||||
{
|
||||
var clobber_i: u32 = 0;
|
||||
while (clobber_i < clobbers_len) : (clobber_i += 1) {
|
||||
const clobber = w.air.nullTerminatedString(extra_i);
|
||||
const extra_bytes = std.mem.sliceAsBytes(w.air.extra[extra_i..]);
|
||||
const clobber = std.mem.sliceTo(extra_bytes, 0);
|
||||
// This equation accounts for the fact that even if we have exactly 4 bytes
|
||||
// for the string, we still use the next u32 for the null terminator.
|
||||
extra_i += clobber.len / 4 + 1;
|
||||
|
||||
Reference in New Issue
Block a user