stage2 AArch64: implement slice_len and slice_elem_val

This commit is contained in:
joachimschmidt557
2022-02-05 20:51:11 +01:00
parent 82f91adbb4
commit 8204ad1937
3 changed files with 225 additions and 13 deletions

View File

@@ -1164,7 +1164,20 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void {
fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void {
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_len for {}", .{self.target.cpu.arch});
const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: {
const mcv = try self.resolveInst(ty_op.operand);
switch (mcv) {
.dead, .unreach => unreachable,
.register => unreachable, // a slice doesn't fit in one register
.stack_offset => |off| {
break :result MCValue{ .stack_offset = off + 8 };
},
.memory => |addr| {
break :result MCValue{ .memory = addr + 8 };
},
else => return self.fail("TODO implement slice_len for {}", .{mcv}),
}
};
return self.finishAir(inst, result, .{ ty_op.operand, .none, .none });
}
@@ -1183,10 +1196,114 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void {
const is_volatile = false; // TODO
const bin_op = self.air.instructions.items(.data)[inst].bin_op;
const result: MCValue = if (!is_volatile and self.liveness.isUnused(inst)) .dead else return self.fail("TODO implement slice_elem_val for {}", .{self.target.cpu.arch});
if (!is_volatile and self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none });
const result: MCValue = result: {
const slice_mcv = try self.resolveInst(bin_op.lhs);
// TODO optimize for the case where the index is a constant,
// i.e. index_mcv == .immediate
const index_mcv = try self.resolveInst(bin_op.rhs);
const index_is_register = index_mcv == .register;
const slice_ty = self.air.typeOf(bin_op.lhs);
const elem_ty = slice_ty.childType();
const elem_size = elem_ty.abiSize(self.target.*);
var buf: Type.SlicePtrFieldTypeBuffer = undefined;
const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf);
if (index_is_register) self.register_manager.freezeRegs(&.{index_mcv.register});
defer if (index_is_register) self.register_manager.unfreezeRegs(&.{index_mcv.register});
const base_mcv: MCValue = switch (slice_mcv) {
.stack_offset => |off| .{ .register = try self.copyToTmpRegister(slice_ptr_field_type, .{ .stack_offset = off + 8 }) },
else => return self.fail("TODO slice_elem_val when slice is {}", .{slice_mcv}),
};
self.register_manager.freezeRegs(&.{base_mcv.register});
// TODO implement optimized ldr for airSliceElemVal
const dst_mcv = try self.allocRegOrMem(inst, true);
const offset_mcv = try self.genMulConstant(bin_op.rhs, @intCast(u32, elem_size));
assert(offset_mcv == .register); // result of multiplication should always be register
self.register_manager.freezeRegs(&.{offset_mcv.register});
const addr_reg = try self.register_manager.allocReg(null);
self.register_manager.freezeRegs(&.{addr_reg});
defer self.register_manager.unfreezeRegs(&.{addr_reg});
_ = try self.addInst(.{
.tag = .add_shifted_register,
.data = .{ .rrr_imm6_shift = .{
.rd = addr_reg,
.rn = base_mcv.register,
.rm = offset_mcv.register,
.imm6 = 0,
.shift = .lsl,
} },
});
// At this point in time, neither the base register
// nor the offset register contains any valuable data
// anymore.
self.register_manager.unfreezeRegs(&.{ base_mcv.register, offset_mcv.register });
try self.load(dst_mcv, .{ .register = addr_reg }, slice_ptr_field_type);
break :result dst_mcv;
};
return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
}
fn genMulConstant(self: *Self, op: Air.Inst.Ref, imm: u32) !MCValue {
const lhs = try self.resolveInst(op);
const rhs = MCValue{ .immediate = imm };
const lhs_is_register = lhs == .register;
if (lhs_is_register) self.register_manager.freezeRegs(&.{lhs.register});
defer if (lhs_is_register) self.register_manager.unfreezeRegs(&.{lhs.register});
// Destination must be a register
// LHS must be a register
// RHS must be a register
var dst_mcv: MCValue = undefined;
var lhs_mcv: MCValue = lhs;
var rhs_mcv: MCValue = rhs;
// Allocate registers for operands and/or destination
// Allocate 1 or 2 registers
if (lhs_is_register) {
// Move RHS to register
dst_mcv = MCValue{ .register = try self.register_manager.allocReg(null) };
rhs_mcv = dst_mcv;
} else {
// Move LHS and RHS to register
const regs = try self.register_manager.allocRegs(2, .{ null, null });
lhs_mcv = MCValue{ .register = regs[0] };
rhs_mcv = MCValue{ .register = regs[1] };
dst_mcv = lhs_mcv;
}
// Move the operands to the newly allocated registers
if (!lhs_is_register) {
try self.genSetReg(self.air.typeOf(op), lhs_mcv.register, lhs);
}
try self.genSetReg(Type.initTag(.usize), rhs_mcv.register, rhs);
_ = try self.addInst(.{
.tag = .mul,
.data = .{ .rrr = .{
.rd = dst_mcv.register,
.rn = lhs_mcv.register,
.rm = rhs_mcv.register,
} },
});
return dst_mcv;
}
fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void {
const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
@@ -1310,6 +1427,16 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo
.undef => unreachable,
.compare_flags_signed, .compare_flags_unsigned => unreachable,
.embedded_in_code => unreachable,
.register => |dst_reg| {
_ = try self.addInst(.{
.tag = .ldr_immediate,
.data = .{ .load_store_register_immediate = .{
.rt = dst_reg,
.rn = addr_reg,
.offset = Instruction.LoadStoreOffset.none.immediate,
} },
});
},
.stack_offset => |off| {
if (elem_ty.abiSize(self.target.*) <= 8) {
const tmp_reg = try self.register_manager.allocReg(null);
@@ -2590,8 +2717,61 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro
if (stack_offset == off)
return; // Copy stack variable to itself; nothing to do.
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
const ptr_bits = self.target.cpu.arch.ptrBitWidth();
const ptr_bytes: u64 = @divExact(ptr_bits, 8);
if (ty.abiSize(self.target.*) <= ptr_bytes) {
const reg = try self.copyToTmpRegister(ty, mcv);
return self.genSetStack(ty, stack_offset, MCValue{ .register = reg });
} else {
// TODO optimize the register allocation
const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null });
self.register_manager.freezeRegs(&regs);
defer self.register_manager.unfreezeRegs(&regs);
const src_reg = regs[0];
const dst_reg = regs[1];
const len_reg = regs[2];
const count_reg = regs[3];
const tmp_reg = regs[4];
// sub src_reg, fp, #off
const adj_src_offset = off + @intCast(u32, ty.abiSize(self.target.*));
const src_offset = math.cast(u12, adj_src_offset) catch return self.fail("TODO load: larger stack offsets", .{});
_ = try self.addInst(.{
.tag = .sub_immediate,
.data = .{ .rr_imm12_sh = .{
.rd = src_reg,
.rn = .x29,
.imm12 = src_offset,
} },
});
// sub dst_reg, fp, #stack_offset
const adj_dst_off = stack_offset + @intCast(u32, ty.abiSize(self.target.*));
const dst_offset = math.cast(u12, adj_dst_off) catch return self.fail("TODO load: larger stack offsets", .{});
_ = try self.addInst(.{
.tag = .sub_immediate,
.data = .{ .rr_imm12_sh = .{
.rd = dst_reg,
.rn = .x29,
.imm12 = dst_offset,
} },
});
// mov len, #elem_size
const elem_size = @intCast(u32, ty.abiSize(self.target.*));
const len_imm = math.cast(u16, elem_size) catch return self.fail("TODO load: larger stack offsets", .{});
_ = try self.addInst(.{
.tag = .movk,
.data = .{ .r_imm16_sh = .{
.rd = len_reg,
.imm16 = len_imm,
} },
});
// memcpy(src, dst, len)
try self.genInlineMemcpy(src_reg, dst_reg, len_reg, count_reg, tmp_reg);
}
},
}
}
@@ -2711,7 +2891,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
} },
});
},
else => return self.fail("TODO implement genSetReg other types abi_size={}", .{abi_size}),
3 => return self.fail("TODO implement genSetReg types size 3", .{}),
else => unreachable,
}
},
else => return self.fail("TODO implement genSetReg for aarch64 {}", .{mcv}),

View File

@@ -91,6 +91,7 @@ pub fn emitMir(
.call_extern => try emit.mirCallExtern(inst),
.add_shifted_register => try emit.mirAddSubtractShiftedRegister(inst),
.cmp_shifted_register => try emit.mirAddSubtractShiftedRegister(inst),
.cset => try emit.mirConditionalSelect(inst),
@@ -132,6 +133,8 @@ pub fn emitMir(
.movk => try emit.mirMoveWideImmediate(inst),
.movz => try emit.mirMoveWideImmediate(inst),
.mul => try emit.mirDataProcessing3Source(inst),
.nop => try emit.mirNop(),
.push_regs => try emit.mirPushPopRegs(inst),
@@ -201,6 +204,12 @@ fn instructionSize(emit: *Emit, inst: Mir.Inst.Index) usize {
return 5 * 4;
}
},
.pop_regs, .push_regs => {
const reg_list = emit.mir.instructions.items(.data)[inst].reg_list;
const number_of_regs = @popCount(u32, reg_list);
const number_of_insts = std.math.divCeil(u6, number_of_regs, 2) catch unreachable;
return number_of_insts * 4;
},
.call_extern => return 4,
.dbg_line,
.dbg_epilogue_begin,
@@ -565,15 +574,15 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void {
fn mirAddSubtractShiftedRegister(emit: *Emit, inst: Mir.Inst.Index) !void {
const tag = emit.mir.instructions.items(.tag)[inst];
const rrr_imm6_shift = emit.mir.instructions.items(.data)[inst].rrr_imm6_shift;
const rd = rrr_imm6_shift.rd;
const rn = rrr_imm6_shift.rn;
const rm = rrr_imm6_shift.rm;
const shift = rrr_imm6_shift.shift;
const imm6 = rrr_imm6_shift.imm6;
switch (tag) {
.cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister(
rrr_imm6_shift.rd,
rrr_imm6_shift.rn,
rrr_imm6_shift.rm,
rrr_imm6_shift.shift,
rrr_imm6_shift.imm6,
)),
.cmp_shifted_register => try emit.writeInstruction(Instruction.subsShiftedRegister(rd, rn, rm, shift, imm6)),
.add_shifted_register => try emit.writeInstruction(Instruction.addShiftedRegister(rd, rn, rm, shift, imm6)),
else => unreachable,
}
}
@@ -802,6 +811,16 @@ fn mirMoveWideImmediate(emit: *Emit, inst: Mir.Inst.Index) !void {
}
}
fn mirDataProcessing3Source(emit: *Emit, inst: Mir.Inst.Index) !void {
const tag = emit.mir.instructions.items(.tag)[inst];
const rrr = emit.mir.instructions.items(.data)[inst].rrr;
switch (tag) {
.mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)),
else => unreachable,
}
}
fn mirNop(emit: *Emit) !void {
try emit.writeInstruction(Instruction.nop());
}

View File

@@ -26,6 +26,8 @@ pub const Inst = struct {
pub const Tag = enum(u16) {
/// Add (immediate)
add_immediate,
/// Add (shifted register)
add_shifted_register,
/// Branch conditionally
b_cond,
/// Branch
@@ -82,6 +84,8 @@ pub const Inst = struct {
movk,
/// Move wide with zero
movz,
/// Multiply
mul,
/// No Operation
nop,
/// Pseudo-instruction: Pop multiple registers
@@ -187,6 +191,14 @@ pub const Inst = struct {
imm12: u12,
sh: u1 = 0,
},
/// Two registers
///
/// Used by e.g. mul
rrr: struct {
rd: Register,
rn: Register,
rm: Register,
},
/// Three registers and a shift (shift type and 6-bit amount)
///
/// Used by e.g. cmp_shifted_register
@@ -208,7 +220,7 @@ pub const Inst = struct {
},
/// Two registers and a LoadStoreOffsetImmediate
///
/// Used by e.g. str_register
/// Used by e.g. str_immediate
load_store_register_immediate: struct {
rt: Register,
rn: Register,