From d211dc37c800c1e0f235b1fe0a9c4fb376524b5a Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Sat, 8 May 2021 15:33:01 +0800 Subject: [PATCH 1/2] stage2 ARM: Overhaul of genArmBinOp --- src/codegen.zig | 230 +++++++++++++++++++++++++++++---------- src/register_manager.zig | 83 +++++++++----- 2 files changed, 228 insertions(+), 85 deletions(-) diff --git a/src/codegen.zig b/src/codegen.zig index 7990c8d1b7..c41802ca00 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -928,7 +928,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { // TODO separate architectures with registers from // stack-based architectures (spu_2) if (callee_preserved_regs.len > 0) { - if (self.register_manager.tryAllocReg(inst)) |reg| { + if (self.register_manager.tryAllocReg(inst, &.{})) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } @@ -940,6 +940,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { pub fn spillInstruction(self: *Self, src: LazySrcLoc, reg: Register, inst: *ir.Inst) !void { const stack_mcv = try self.allocRegOrMem(inst, false); + log.debug("spilling {*} to stack mcv {any}", .{ inst, stack_mcv }); const reg_mcv = self.getResolvedInstValue(inst); assert(reg == toCanonicalReg(reg_mcv.register)); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -951,7 +952,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, src: LazySrcLoc, ty: Type, mcv: MCValue) !Register { - const reg = try self.register_manager.allocRegWithoutTracking(); + const reg = try self.register_manager.allocRegWithoutTracking(&.{}); try self.genSetReg(src, ty, reg, mcv); return reg; } @@ -960,7 +961,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue { - const reg = try self.register_manager.allocReg(reg_owner); + const reg = try self.register_manager.allocReg(reg_owner, &.{}); try self.genSetReg(reg_owner.src, reg_owner.ty, reg, mcv); return MCValue{ .register = reg }; } @@ -1380,54 +1381,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } - fn genArmBinOp(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, op: ir.Inst.Tag) !MCValue { - const lhs = try self.resolveInst(op_lhs); - const rhs = try self.resolveInst(op_rhs); - - // Destination must be a register - var dst_mcv: MCValue = undefined; - var lhs_mcv: MCValue = undefined; - var rhs_mcv: MCValue = undefined; - if (self.reuseOperand(inst, 0, lhs)) { - // LHS is the destination - // RHS is the source - lhs_mcv = if (lhs != .register) try self.copyToNewRegister(inst, lhs) else lhs; - rhs_mcv = rhs; - dst_mcv = lhs_mcv; - } else if (self.reuseOperand(inst, 1, rhs)) { - // RHS is the destination - // LHS is the source - lhs_mcv = lhs; - rhs_mcv = if (rhs != .register) try self.copyToNewRegister(inst, rhs) else rhs; - dst_mcv = rhs_mcv; - } else { - // TODO save 1 copy instruction by directly allocating the destination register - // LHS is the destination - // RHS is the source - lhs_mcv = try self.copyToNewRegister(inst, lhs); - rhs_mcv = rhs; - dst_mcv = lhs_mcv; - } - - try self.genArmBinOpCode(inst.src, dst_mcv.register, lhs_mcv, rhs_mcv, op); - return dst_mcv; - } - - fn genArmBinOpCode( - self: *Self, - src: LazySrcLoc, - dst_reg: Register, - lhs_mcv: MCValue, - rhs_mcv: MCValue, - op: ir.Inst.Tag, - ) !void { - assert(lhs_mcv == .register or lhs_mcv == .register); - - const swap_lhs_and_rhs = rhs_mcv == .register and lhs_mcv != .register; - const op1 = if (swap_lhs_and_rhs) rhs_mcv.register else lhs_mcv.register; - const op2 = if (swap_lhs_and_rhs) lhs_mcv else rhs_mcv; - - const operand = switch (op2) { + fn armOperandShouldBeRegister(self: *Self, src: LazySrcLoc, mcv: MCValue) !bool { + return switch (mcv) { .none => unreachable, .undef => unreachable, .dead, .unreach => unreachable, @@ -1439,15 +1394,142 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (imm > std.math.maxInt(u32)) return self.fail(src, "TODO ARM binary arithmetic immediate larger than u32", .{}); // Load immediate into register if it doesn't fit - // as an operand - break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) orelse - Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none); + // in an operand + break :blk Instruction.Operand.fromU32(@intCast(u32, imm)) == null; }, + .register => true, + .stack_offset, + .embedded_in_code, + .memory, + => true, + }; + } + + fn genArmBinOp(self: *Self, inst: *ir.Inst, op_lhs: *ir.Inst, op_rhs: *ir.Inst, op: ir.Inst.Tag) !MCValue { + const lhs = try self.resolveInst(op_lhs); + const rhs = try self.resolveInst(op_rhs); + + const lhs_is_register = lhs == .register; + const rhs_is_register = rhs == .register; + const lhs_should_be_register = try self.armOperandShouldBeRegister(op_lhs.src, lhs); + const rhs_should_be_register = try self.armOperandShouldBeRegister(op_rhs.src, rhs); + const reuse_lhs = lhs_is_register and self.reuseOperand(inst, 0, lhs); + const reuse_rhs = !reuse_lhs and rhs_is_register and self.reuseOperand(inst, 1, rhs); + + // Destination must be a register + var dst_mcv: MCValue = undefined; + var lhs_mcv = lhs; + var rhs_mcv = rhs; + var swap_lhs_and_rhs = false; + + // Allocate registers for operands and/or destination + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + if (reuse_lhs) { + // Allocate 0 or 1 registers + if (!rhs_is_register and rhs_should_be_register) { + rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_rhs, &.{lhs.register}) }; + branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv); + } + dst_mcv = lhs; + } else if (reuse_rhs) { + // Allocate 0 or 1 registers + if (!lhs_is_register and lhs_should_be_register) { + lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(op_lhs, &.{rhs.register}) }; + branch.inst_table.putAssumeCapacity(op_lhs, lhs_mcv); + } + dst_mcv = rhs; + + swap_lhs_and_rhs = true; + } else { + // Allocate 1 or 2 registers + if (lhs_should_be_register and rhs_should_be_register) { + if (lhs_is_register and rhs_is_register) { + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{ lhs.register, rhs.register }) }; + } else if (lhs_is_register) { + // Move RHS to register + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) }; + rhs_mcv = dst_mcv; + } else if (rhs_is_register) { + // Move LHS to register + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) }; + lhs_mcv = dst_mcv; + } else { + // Move LHS and RHS to register + const regs = try self.register_manager.allocRegs(2, .{ inst, op_rhs }, &.{}); + lhs_mcv = MCValue{ .register = regs[0] }; + rhs_mcv = MCValue{ .register = regs[1] }; + dst_mcv = lhs_mcv; + + branch.inst_table.putAssumeCapacity(op_rhs, rhs_mcv); + } + } else if (lhs_should_be_register) { + // RHS is immediate + if (lhs_is_register) { + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{lhs.register}) }; + } else { + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) }; + lhs_mcv = dst_mcv; + } + } else if (rhs_should_be_register) { + // LHS is immediate + if (rhs_is_register) { + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{rhs.register}) }; + } else { + dst_mcv = MCValue{ .register = try self.register_manager.allocReg(inst, &.{}) }; + rhs_mcv = dst_mcv; + } + + swap_lhs_and_rhs = true; + } else unreachable; // binary operation on two immediates + } + + // Move the operands to the newly allocated registers + if (lhs_mcv == .register and !lhs_is_register) { + try self.genSetReg(op_lhs.src, op_lhs.ty, lhs_mcv.register, lhs); + } + if (rhs_mcv == .register and !rhs_is_register) { + try self.genSetReg(op_rhs.src, op_rhs.ty, rhs_mcv.register, rhs); + } + + try self.genArmBinOpCode( + inst.src, + dst_mcv.register, + lhs_mcv, + rhs_mcv, + swap_lhs_and_rhs, + op, + ); + return dst_mcv; + } + + fn genArmBinOpCode( + self: *Self, + src: LazySrcLoc, + dst_reg: Register, + lhs_mcv: MCValue, + rhs_mcv: MCValue, + swap_lhs_and_rhs: bool, + op: ir.Inst.Tag, + ) !void { + assert(lhs_mcv == .register or rhs_mcv == .register); + + const op1 = if (swap_lhs_and_rhs) rhs_mcv.register else lhs_mcv.register; + const op2 = if (swap_lhs_and_rhs) lhs_mcv else rhs_mcv; + + const operand = switch (op2) { + .none => unreachable, + .undef => unreachable, + .dead, .unreach => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .immediate => |imm| Instruction.Operand.fromU32(@intCast(u32, imm)).?, .register => |reg| Instruction.Operand.reg(reg, Instruction.Operand.Shift.none), .stack_offset, .embedded_in_code, .memory, - => Instruction.Operand.reg(try self.copyToTmpRegister(src, Type.initTag(.u32), op2), Instruction.Operand.Shift.none), + => unreachable, }; switch (op) { @@ -2613,10 +2695,42 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const lhs = try self.resolveInst(inst.lhs); const rhs = try self.resolveInst(inst.rhs); - const src_mcv = rhs; - const dst_mcv = if (lhs != .register) try self.copyToNewRegister(inst.lhs, lhs) else lhs; + const lhs_is_register = lhs == .register; + const rhs_is_register = rhs == .register; + // lhs should always be a register + const rhs_should_be_register = try self.armOperandShouldBeRegister(inst.rhs.src, rhs); + + var lhs_mcv = lhs; + var rhs_mcv = rhs; + + // Allocate registers + if (rhs_should_be_register) { + if (!lhs_is_register and !rhs_is_register) { + const regs = try self.register_manager.allocRegs(2, .{ inst.rhs, inst.lhs }, &.{}); + lhs_mcv = MCValue{ .register = regs[0] }; + rhs_mcv = MCValue{ .register = regs[1] }; + } else if (!rhs_is_register) { + rhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.rhs, &.{}) }; + } + } + if (!lhs_is_register) { + lhs_mcv = MCValue{ .register = try self.register_manager.allocReg(inst.lhs, &.{}) }; + } + + // Move the operands to the newly allocated registers + const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; + if (lhs_mcv == .register and !lhs_is_register) { + try self.genSetReg(inst.lhs.src, inst.lhs.ty, lhs_mcv.register, lhs); + branch.inst_table.putAssumeCapacity(inst.lhs, lhs); + } + if (rhs_mcv == .register and !rhs_is_register) { + try self.genSetReg(inst.rhs.src, inst.rhs.ty, rhs_mcv.register, rhs); + branch.inst_table.putAssumeCapacity(inst.rhs, rhs); + } + + // The destination register is not present in the cmp instruction + try self.genArmBinOpCode(inst.base.src, undefined, lhs_mcv, rhs_mcv, false, .cmp_eq); - try self.genArmBinOpCode(inst.base.src, dst_mcv.register, dst_mcv, src_mcv, .cmp_eq); const info = inst.lhs.ty.intInfo(self.target.*); return switch (info.signedness) { .signed => MCValue{ .compare_flags_signed = op }, diff --git a/src/register_manager.zig b/src/register_manager.zig index 2c812cef89..c9235b88c1 100644 --- a/src/register_manager.zig +++ b/src/register_manager.zig @@ -1,5 +1,6 @@ const std = @import("std"); const math = std.math; +const mem = std.mem; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ir = @import("ir.zig"); @@ -66,8 +67,13 @@ pub fn RegisterManager( } /// Returns `null` if all registers are allocated. - pub fn tryAllocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ?[count]Register { - if (self.tryAllocRegsWithoutTracking(count)) |regs| { + pub fn tryAllocRegs( + self: *Self, + comptime count: comptime_int, + insts: [count]*ir.Inst, + exceptions: []Register, + ) ?[count]Register { + if (self.tryAllocRegsWithoutTracking(count, exceptions)) |regs| { for (regs) |reg, i| { const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null self.registers[index] = insts[i]; @@ -81,21 +87,30 @@ pub fn RegisterManager( } /// Returns `null` if all registers are allocated. - pub fn tryAllocReg(self: *Self, inst: *ir.Inst) ?Register { - return if (tryAllocRegs(self, 1, .{inst})) |regs| regs[0] else null; + pub fn tryAllocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) ?Register { + return if (tryAllocRegs(self, 1, .{inst}, exceptions)) |regs| regs[0] else null; } - pub fn allocRegs(self: *Self, comptime count: comptime_int, insts: [count]*ir.Inst) ![count]Register { + pub fn allocRegs( + self: *Self, + comptime count: comptime_int, + insts: [count]*ir.Inst, + exceptions: []Register, + ) ![count]Register { comptime assert(count > 0 and count <= callee_preserved_regs.len); + assert(count + exceptions.len <= callee_preserved_regs.len); - return self.tryAllocRegs(count, insts) orelse blk: { + return self.tryAllocRegs(count, insts, exceptions) orelse blk: { // We'll take over the first count registers. Spill // the instructions that were previously there to a // stack allocations. var regs: [count]Register = undefined; - std.mem.copy(Register, ®s, callee_preserved_regs[0..count]); + var i: usize = 0; + for (callee_preserved_regs) |reg| { + if (i >= count) break; + if (mem.indexOfScalar(Register, exceptions, reg) != null) continue; + regs[i] = reg; - for (regs) |reg, i| { const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null if (self.isRegFree(reg)) { self.markRegUsed(reg); @@ -104,21 +119,28 @@ pub fn RegisterManager( try self.getFunction().spillInstruction(spilled_inst.src, reg, spilled_inst); } self.registers[index] = insts[i]; + + i += 1; } break :blk regs; }; } - pub fn allocReg(self: *Self, inst: *ir.Inst) !Register { - return (try self.allocRegs(1, .{inst}))[0]; + pub fn allocReg(self: *Self, inst: *ir.Inst, exceptions: []Register) !Register { + return (try self.allocRegs(1, .{inst}, exceptions))[0]; } /// Does not track the registers. /// Returns `null` if not enough registers are free. - pub fn tryAllocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ?[count]Register { + pub fn tryAllocRegsWithoutTracking( + self: *Self, + comptime count: comptime_int, + exceptions: []Register, + ) ?[count]Register { comptime if (callee_preserved_regs.len == 0) return null; comptime assert(count > 0 and count <= callee_preserved_regs.len); + assert(count + exceptions.len <= callee_preserved_regs.len); const free_registers = @popCount(FreeRegInt, self.free_registers); if (free_registers < count) return null; @@ -127,30 +149,35 @@ pub fn RegisterManager( var i: usize = 0; for (callee_preserved_regs) |reg| { if (i >= count) break; + if (mem.indexOfScalar(Register, exceptions, reg) != null) continue; if (self.isRegFree(reg)) { regs[i] = reg; i += 1; } } - return regs; + + return if (i < count) null else regs; } /// Does not track the register. /// Returns `null` if all registers are allocated. - pub fn tryAllocRegWithoutTracking(self: *Self) ?Register { - return if (self.tryAllocRegsWithoutTracking(1)) |regs| regs[0] else null; + pub fn tryAllocRegWithoutTracking(self: *Self, exceptions: []Register) ?Register { + return if (self.tryAllocRegsWithoutTracking(1, exceptions)) |regs| regs[0] else null; } /// Does not track the registers - pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int) ![count]Register { - return self.tryAllocRegsWithoutTracking(count) orelse blk: { + pub fn allocRegsWithoutTracking(self: *Self, comptime count: comptime_int, exceptions: []Register) ![count]Register { + return self.tryAllocRegsWithoutTracking(count, exceptions) orelse blk: { // We'll take over the first count registers. Spill // the instructions that were previously there to a // stack allocations. var regs: [count]Register = undefined; - std.mem.copy(Register, ®s, callee_preserved_regs[0..count]); + var i: usize = 0; + for (callee_preserved_regs) |reg| { + if (i >= count) break; + if (mem.indexOfScalar(Register, exceptions, reg) != null) continue; + regs[i] = reg; - for (regs) |reg, i| { const index = reg.allocIndex().?; // allocIndex() on a callee-preserved reg should never return null if (!self.isRegFree(reg)) { const spilled_inst = self.registers[index].?; @@ -158,6 +185,8 @@ pub fn RegisterManager( self.registers[index] = null; self.markRegFree(reg); } + + i += 1; } break :blk regs; @@ -165,8 +194,8 @@ pub fn RegisterManager( } /// Does not track the register. - pub fn allocRegWithoutTracking(self: *Self) !Register { - return (try self.allocRegsWithoutTracking(1))[0]; + pub fn allocRegWithoutTracking(self: *Self, exceptions: []Register) !Register { + return (try self.allocRegsWithoutTracking(1, exceptions))[0]; } /// Allocates the specified register with the specified @@ -270,9 +299,9 @@ test "tryAllocReg: no spilling" { try std.testing.expect(!function.register_manager.isRegAllocated(.r2)); try std.testing.expect(!function.register_manager.isRegAllocated(.r3)); - try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction)); - try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction)); - try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction)); + try std.testing.expectEqual(@as(?MockRegister, .r2), function.register_manager.tryAllocReg(&mock_instruction, &.{})); + try std.testing.expectEqual(@as(?MockRegister, .r3), function.register_manager.tryAllocReg(&mock_instruction, &.{})); + try std.testing.expectEqual(@as(?MockRegister, null), function.register_manager.tryAllocReg(&mock_instruction, &.{})); try std.testing.expect(function.register_manager.isRegAllocated(.r2)); try std.testing.expect(function.register_manager.isRegAllocated(.r3)); @@ -301,16 +330,16 @@ test "allocReg: spilling" { try std.testing.expect(!function.register_manager.isRegAllocated(.r2)); try std.testing.expect(!function.register_manager.isRegAllocated(.r3)); - try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction)); - try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction)); + try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{})); + try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{})); // Spill a register - try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction)); + try std.testing.expectEqual(@as(?MockRegister, .r2), try function.register_manager.allocReg(&mock_instruction, &.{})); try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items); // No spilling necessary function.register_manager.freeReg(.r3); - try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction)); + try std.testing.expectEqual(@as(?MockRegister, .r3), try function.register_manager.allocReg(&mock_instruction, &.{})); try std.testing.expectEqualSlices(MockRegister, &[_]MockRegister{.r2}, function.spilled.items); } From f2a33688913cd36b1aee0b27a4a1de2cfb1cc04e Mon Sep 17 00:00:00 2001 From: joachimschmidt557 Date: Sat, 8 May 2021 15:38:48 +0800 Subject: [PATCH 2/2] stage2 ARM: Add spill registers test --- test/stage2/arm.zig | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/test/stage2/arm.zig b/test/stage2/arm.zig index f4efbd9b2a..31b3c06dcd 100644 --- a/test/stage2/arm.zig +++ b/test/stage2/arm.zig @@ -458,4 +458,57 @@ pub fn addCases(ctx: *TestContext) !void { "", ); } + + { + var case = ctx.exe("spilling registers", linux_arm); + case.addCompareOutput( + \\export fn _start() noreturn { + \\ assert(add(3, 4) == 791); + \\ exit(); + \\} + \\ + \\fn add(a: u32, b: u32) u32 { + \\ const x: u32 = blk: { + \\ const c = a + b; // 7 + \\ const d = a + c; // 10 + \\ const e = d + b; // 14 + \\ const f = d + e; // 24 + \\ const g = e + f; // 38 + \\ const h = f + g; // 62 + \\ const i = g + h; // 100 + \\ const j = i + d; // 110 + \\ const k = i + j; // 210 + \\ const l = k + c; // 217 + \\ const m = l + d; // 227 + \\ const n = m + e; // 241 + \\ const o = n + f; // 265 + \\ const p = o + g; // 303 + \\ const q = p + h; // 365 + \\ const r = q + i; // 465 + \\ const s = r + j; // 575 + \\ const t = s + k; // 785 + \\ break :blk t; + \\ }; + \\ const y = x + a; // 788 + \\ const z = y + a; // 791 + \\ return z; + \\} + \\ + \\fn assert(ok: bool) void { + \\ if (!ok) unreachable; + \\} + \\ + \\fn exit() noreturn { + \\ asm volatile ("svc #0" + \\ : + \\ : [number] "{r7}" (1), + \\ [arg1] "{r0}" (0) + \\ : "memory" + \\ ); + \\ unreachable; + \\} + , + "", + ); + } }