stage2 LLVM: Pass inline assembly outputs directly when not targeting memory

This change provides a basic implementation of #2349 for stage2. There's
still quite a lot of work before this logic is as complete as what's in
Clang (b364535304/clang/lib/CodeGen/CGStmt.cpp (L2304-L2795)),
particularly considering the diversity of constraints across targets.

It's probably not worth doing the complete work until there's a clearer
picture for constraints in Zig's future dedicated ASM syntax, but at
least this gives us a small improvement for now.

As a bonus, this also fixes a bug with how we were handling `_`
identifiers.
This commit is contained in:
Cody Tapscott
2022-07-28 17:06:57 -07:00
committed by Andrew Kelley
parent 1ab15b6c9c
commit 075f93fa10

View File

@@ -5491,22 +5491,26 @@ pub const FuncGen = struct {
defer arena_allocator.deinit();
const arena = arena_allocator.allocator();
const return_count: u8 = for (outputs) |output| {
if (output == .none) break 1;
} else 0;
const llvm_params_len = inputs.len + outputs.len - return_count;
const llvm_param_types = try arena.alloc(*const llvm.Type, llvm_params_len);
const llvm_param_values = try arena.alloc(*const llvm.Value, llvm_params_len);
const llvm_param_attrs = try arena.alloc(bool, llvm_params_len);
// The exact number of return / parameter values depends on which output values
// are passed by reference as indirect outputs (determined below).
const max_return_count = outputs.len;
const llvm_ret_types = try arena.alloc(*const llvm.Type, max_return_count);
const llvm_ret_indirect = try arena.alloc(bool, max_return_count);
const max_param_count = inputs.len + outputs.len;
const llvm_param_types = try arena.alloc(*const llvm.Type, max_param_count);
const llvm_param_values = try arena.alloc(*const llvm.Value, max_param_count);
const llvm_param_attrs = try arena.alloc(bool, max_param_count);
const target = self.dg.module.getTarget();
var llvm_ret_i: usize = 0;
var llvm_param_i: usize = 0;
var total_i: usize = 0;
var total_i: u16 = 0;
var name_map: std.StringArrayHashMapUnmanaged(void) = .{};
try name_map.ensureUnusedCapacity(arena, outputs.len + inputs.len);
var name_map: std.StringArrayHashMapUnmanaged(u16) = .{};
try name_map.ensureUnusedCapacity(arena, max_param_count);
for (outputs) |output| {
for (outputs) |output, i| {
const extra_bytes = std.mem.sliceAsBytes(self.air.extra[extra_i..]);
const constraint = std.mem.sliceTo(std.mem.sliceAsBytes(self.air.extra[extra_i..]), 0);
const name = std.mem.sliceTo(extra_bytes[constraint.len + 1 ..], 0);
@@ -5519,15 +5523,30 @@ pub const FuncGen = struct {
llvm_constraints.appendAssumeCapacity(',');
}
llvm_constraints.appendAssumeCapacity('=');
// Pass any non-return outputs indirectly, if the constraint accepts a memory location
llvm_ret_indirect[i] = (output != .none) and constraintAllowsMemory(constraint);
if (output != .none) {
try llvm_constraints.ensureUnusedCapacity(self.gpa, llvm_constraints.capacity + 1);
llvm_constraints.appendAssumeCapacity('*');
const output_inst = try self.resolveInst(output);
llvm_param_values[llvm_param_i] = output_inst;
llvm_param_types[llvm_param_i] = output_inst.typeOf();
llvm_param_attrs[llvm_param_i] = true;
llvm_param_i += 1;
if (llvm_ret_indirect[i]) {
// Pass the result by reference as an indirect output (e.g. "=*m")
llvm_constraints.appendAssumeCapacity('*');
llvm_param_values[llvm_param_i] = output_inst;
llvm_param_types[llvm_param_i] = output_inst.typeOf();
llvm_param_attrs[llvm_param_i] = true;
llvm_param_i += 1;
} else {
// Pass the result directly (e.g. "=r")
llvm_ret_types[llvm_ret_i] = output_inst.typeOf().getElementType();
llvm_ret_i += 1;
}
} else {
const ret_ty = self.air.typeOfIndex(inst);
llvm_ret_types[llvm_ret_i] = try self.dg.lowerType(ret_ty);
llvm_ret_i += 1;
}
// LLVM uses commas internally to separate different constraints,
@@ -5536,13 +5555,16 @@ pub const FuncGen = struct {
// to GCC's inline assembly.
// http://llvm.org/docs/LangRef.html#constraint-codes
for (constraint[1..]) |byte| {
llvm_constraints.appendAssumeCapacity(switch (byte) {
',' => '|',
else => byte,
});
switch (byte) {
',' => llvm_constraints.appendAssumeCapacity('|'),
'*' => {}, // Indirect outputs are handled above
else => llvm_constraints.appendAssumeCapacity(byte),
}
}
name_map.putAssumeCapacityNoClobber(name, {});
if (!std.mem.eql(u8, name, "_")) {
name_map.putAssumeCapacityNoClobber(name, total_i);
}
total_i += 1;
}
@@ -5594,7 +5616,7 @@ pub const FuncGen = struct {
}
if (!std.mem.eql(u8, name, "_")) {
name_map.putAssumeCapacityNoClobber(name, {});
name_map.putAssumeCapacityNoClobber(name, total_i);
}
// In the case of indirect inputs, LLVM requires the callsite to have
@@ -5625,6 +5647,11 @@ pub const FuncGen = struct {
}
}
// We have finished scanning through all inputs/outputs, so the number of
// parameters and return values is known.
const param_count = llvm_param_i;
const return_count = llvm_ret_i;
// For some targets, Clang unconditionally adds some clobbers to all inline assembly.
// While this is probably not strictly necessary, if we don't follow Clang's lead
// here then we may risk tripping LLVM bugs since anything not used by Clang tends
@@ -5682,7 +5709,7 @@ pub const FuncGen = struct {
const name = asm_source[name_start..i];
state = .start;
const index = name_map.getIndex(name) orelse {
const index = name_map.get(name) orelse {
// we should validate the assembly in Sema; by now it is too late
return self.todo("unknown input or output name: '{s}'", .{name});
};
@@ -5693,12 +5720,20 @@ pub const FuncGen = struct {
}
}
const ret_ty = self.air.typeOfIndex(inst);
const ret_llvm_ty = try self.dg.lowerType(ret_ty);
const ret_llvm_ty = switch (return_count) {
0 => self.context.voidType(),
1 => llvm_ret_types[0],
else => self.context.structType(
llvm_ret_types.ptr,
@intCast(c_uint, return_count),
.False,
),
};
const llvm_fn_ty = llvm.functionType(
ret_llvm_ty,
llvm_param_types.ptr,
@intCast(c_uint, llvm_param_types.len),
@intCast(c_uint, param_count),
.False,
);
const asm_fn = llvm.getInlineAsm(
@@ -5715,18 +5750,40 @@ pub const FuncGen = struct {
const call = self.builder.buildCall(
asm_fn,
llvm_param_values.ptr,
@intCast(c_uint, llvm_param_values.len),
@intCast(c_uint, param_count),
.C,
.Auto,
"",
);
for (llvm_param_attrs) |need_elem_ty, i| {
for (llvm_param_attrs[0..param_count]) |need_elem_ty, i| {
if (need_elem_ty) {
const elem_ty = llvm_param_types[i].getElementType();
llvm.setCallElemTypeAttr(call, i, elem_ty);
}
}
return call;
var ret_val = call;
llvm_ret_i = 0;
for (outputs) |output, i| {
if (llvm_ret_indirect[i]) continue;
const output_value = if (return_count > 1) b: {
break :b self.builder.buildExtractValue(call, @intCast(c_uint, llvm_ret_i), "");
} else call;
if (output != .none) {
const output_ptr = try self.resolveInst(output);
const output_ptr_ty = self.air.typeOf(output);
const store_inst = self.builder.buildStore(output_value, output_ptr);
store_inst.setAlignment(output_ptr_ty.ptrAlignment(target));
} else {
ret_val = output_value;
}
llvm_ret_i += 1;
}
return ret_val;
}
fn airIsNonNull(
@@ -9709,10 +9766,30 @@ fn errUnionErrorOffset(payload_ty: Type, target: std.Target) u1 {
return @boolToInt(Type.anyerror.abiAlignment(target) <= payload_ty.abiAlignment(target));
}
/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a memory location
///
/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
fn constraintAllowsMemory(constraint: []const u8) bool {
return constraint[0] == 'm';
// TODO: This implementation is woefully incomplete.
for (constraint) |byte| {
switch (byte) {
'=', '*', ',', '&' => {},
'm', 'o', 'X', 'g' => return true,
else => {},
}
} else return false;
}
/// Returns true for asm constraint (e.g. "=*m", "=r") if it accepts a register
///
/// See also TargetInfo::validateOutputConstraint, AArch64TargetInfo::validateAsmConstraint, etc. in Clang
fn constraintAllowsRegister(constraint: []const u8) bool {
return constraint[0] != 'm';
// TODO: This implementation is woefully incomplete.
for (constraint) |byte| {
switch (byte) {
'=', '*', ',', '&' => {},
'm', 'o' => {},
else => return true,
}
} else return false;
}