LLVM: elide some loads when lowering
Generally, the load instruction may need to make a copy of an
isByRef=true value, such as in the case of the following code:
```zig
pub fn swap(comptime T: type, a: *T, b: *T) void {
const tmp = a.*;
a.* = b.*;
b.* = tmp;
}
```
However, it only needs to do so if there are any instructions which can
possibly write to memory. When calling functions with isByRef=true
parameters, the AIR code that is generated looks like loads followed
directly by call.
This allows for a peephole optimization when lowering loads: if the load
instruction operates on an isByRef=true type and dies before any side effects
occur, then we can safely lower the load as a no-op that returns its
operand.
This is one out of three changes I intend to make to address #11498.
However I will put these changes in separate branches and merge them
separately so that we can have three independent points on the perf
charts.
This commit is contained in:
396
src/Liveness.zig
396
src/Liveness.zig
@@ -112,6 +112,402 @@ pub fn clearOperandDeath(l: Liveness, inst: Air.Inst.Index, operand: OperandInt)
|
||||
l.tomb_bits[usize_index] &= ~mask;
|
||||
}
|
||||
|
||||
const OperandCategory = enum {
|
||||
/// The operand lives on, but this instruction cannot possibly mutate memory.
|
||||
none,
|
||||
/// The operand lives on and this instruction can mutate memory.
|
||||
write,
|
||||
/// The operand dies at this instruction.
|
||||
tomb,
|
||||
/// The operand lives on, and this instruction is noreturn.
|
||||
noret,
|
||||
/// This instruction is too complicated for analysis, no information is available.
|
||||
complex,
|
||||
};
|
||||
|
||||
/// Given an instruction that we are examining, and an operand that we are looking for,
|
||||
/// returns a classification.
|
||||
pub fn categorizeOperand(
|
||||
l: Liveness,
|
||||
air: Air,
|
||||
inst: Air.Inst.Index,
|
||||
operand: Air.Inst.Index,
|
||||
) OperandCategory {
|
||||
const air_tags = air.instructions.items(.tag);
|
||||
const air_datas = air.instructions.items(.data);
|
||||
const operand_ref = Air.indexToRef(operand);
|
||||
switch (air_tags[inst]) {
|
||||
.add,
|
||||
.addwrap,
|
||||
.add_sat,
|
||||
.sub,
|
||||
.subwrap,
|
||||
.sub_sat,
|
||||
.mul,
|
||||
.mulwrap,
|
||||
.mul_sat,
|
||||
.div_float,
|
||||
.div_trunc,
|
||||
.div_floor,
|
||||
.div_exact,
|
||||
.rem,
|
||||
.mod,
|
||||
.bit_and,
|
||||
.bit_or,
|
||||
.xor,
|
||||
.cmp_lt,
|
||||
.cmp_lte,
|
||||
.cmp_eq,
|
||||
.cmp_gte,
|
||||
.cmp_gt,
|
||||
.cmp_neq,
|
||||
.bool_and,
|
||||
.bool_or,
|
||||
.array_elem_val,
|
||||
.slice_elem_val,
|
||||
.ptr_elem_val,
|
||||
.shl,
|
||||
.shl_exact,
|
||||
.shl_sat,
|
||||
.shr,
|
||||
.shr_exact,
|
||||
.min,
|
||||
.max,
|
||||
=> {
|
||||
const o = air_datas[inst].bin_op;
|
||||
if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.store,
|
||||
.atomic_store_unordered,
|
||||
.atomic_store_monotonic,
|
||||
.atomic_store_release,
|
||||
.atomic_store_seq_cst,
|
||||
.set_union_tag,
|
||||
=> {
|
||||
const o = air_datas[inst].bin_op;
|
||||
if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
if (o.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
|
||||
return .write;
|
||||
},
|
||||
|
||||
.arg,
|
||||
.alloc,
|
||||
.ret_ptr,
|
||||
.constant,
|
||||
.const_ty,
|
||||
.breakpoint,
|
||||
.dbg_stmt,
|
||||
.dbg_inline_begin,
|
||||
.dbg_inline_end,
|
||||
.dbg_block_begin,
|
||||
.dbg_block_end,
|
||||
.unreach,
|
||||
.ret_addr,
|
||||
.frame_addr,
|
||||
.wasm_memory_size,
|
||||
.err_return_trace,
|
||||
=> return .none,
|
||||
|
||||
.fence => return .write,
|
||||
|
||||
.not,
|
||||
.bitcast,
|
||||
.load,
|
||||
.fpext,
|
||||
.fptrunc,
|
||||
.intcast,
|
||||
.trunc,
|
||||
.optional_payload,
|
||||
.optional_payload_ptr,
|
||||
.wrap_optional,
|
||||
.unwrap_errunion_payload,
|
||||
.unwrap_errunion_err,
|
||||
.unwrap_errunion_payload_ptr,
|
||||
.unwrap_errunion_err_ptr,
|
||||
.wrap_errunion_payload,
|
||||
.wrap_errunion_err,
|
||||
.slice_ptr,
|
||||
.slice_len,
|
||||
.ptr_slice_len_ptr,
|
||||
.ptr_slice_ptr_ptr,
|
||||
.struct_field_ptr_index_0,
|
||||
.struct_field_ptr_index_1,
|
||||
.struct_field_ptr_index_2,
|
||||
.struct_field_ptr_index_3,
|
||||
.array_to_slice,
|
||||
.float_to_int,
|
||||
.int_to_float,
|
||||
.get_union_tag,
|
||||
.clz,
|
||||
.ctz,
|
||||
.popcount,
|
||||
.byte_swap,
|
||||
.bit_reverse,
|
||||
.splat,
|
||||
=> {
|
||||
const o = air_datas[inst].ty_op;
|
||||
if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.optional_payload_ptr_set,
|
||||
.errunion_payload_ptr_set,
|
||||
=> {
|
||||
const o = air_datas[inst].ty_op;
|
||||
if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
return .write;
|
||||
},
|
||||
|
||||
.is_null,
|
||||
.is_non_null,
|
||||
.is_null_ptr,
|
||||
.is_non_null_ptr,
|
||||
.is_err,
|
||||
.is_non_err,
|
||||
.is_err_ptr,
|
||||
.is_non_err_ptr,
|
||||
.ptrtoint,
|
||||
.bool_to_int,
|
||||
.tag_name,
|
||||
.error_name,
|
||||
.sqrt,
|
||||
.sin,
|
||||
.cos,
|
||||
.tan,
|
||||
.exp,
|
||||
.exp2,
|
||||
.log,
|
||||
.log2,
|
||||
.log10,
|
||||
.fabs,
|
||||
.floor,
|
||||
.ceil,
|
||||
.round,
|
||||
.trunc_float,
|
||||
.cmp_lt_errors_len,
|
||||
=> {
|
||||
const o = air_datas[inst].un_op;
|
||||
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.ret,
|
||||
.ret_load,
|
||||
=> {
|
||||
const o = air_datas[inst].un_op;
|
||||
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
|
||||
return .noret;
|
||||
},
|
||||
|
||||
.set_err_return_trace => {
|
||||
const o = air_datas[inst].un_op;
|
||||
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
return .write;
|
||||
},
|
||||
|
||||
.add_with_overflow,
|
||||
.sub_with_overflow,
|
||||
.mul_with_overflow,
|
||||
.shl_with_overflow,
|
||||
.ptr_add,
|
||||
.ptr_sub,
|
||||
.ptr_elem_ptr,
|
||||
.slice_elem_ptr,
|
||||
.slice,
|
||||
=> {
|
||||
const ty_pl = air_datas[inst].ty_pl;
|
||||
const extra = air.extraData(Air.Bin, ty_pl.payload).data;
|
||||
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.dbg_var_ptr,
|
||||
.dbg_var_val,
|
||||
=> {
|
||||
const o = air_datas[inst].pl_op.operand;
|
||||
if (o == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.prefetch => {
|
||||
const prefetch = air_datas[inst].prefetch;
|
||||
if (prefetch.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
|
||||
.call, .call_always_tail, .call_never_tail, .call_never_inline => {
|
||||
const inst_data = air_datas[inst].pl_op;
|
||||
const callee = inst_data.operand;
|
||||
const extra = air.extraData(Air.Call, inst_data.payload);
|
||||
const args = @ptrCast([]const Air.Inst.Ref, air.extra[extra.end..][0..extra.data.args_len]);
|
||||
if (args.len + 1 <= bpi - 1) {
|
||||
if (callee == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
for (args) |arg, i| {
|
||||
if (arg == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i + 1), .write);
|
||||
}
|
||||
return .write;
|
||||
}
|
||||
var bt = l.iterateBigTomb(inst);
|
||||
if (bt.feed()) {
|
||||
if (callee == operand_ref) return .tomb;
|
||||
} else {
|
||||
if (callee == operand_ref) return .write;
|
||||
}
|
||||
for (args) |arg| {
|
||||
if (bt.feed()) {
|
||||
if (arg == operand_ref) return .tomb;
|
||||
} else {
|
||||
if (arg == operand_ref) return .write;
|
||||
}
|
||||
}
|
||||
return .write;
|
||||
},
|
||||
.select => {
|
||||
const pl_op = air_datas[inst].pl_op;
|
||||
const extra = air.extraData(Air.Bin, pl_op.payload).data;
|
||||
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
|
||||
return .none;
|
||||
},
|
||||
.shuffle => {
|
||||
const extra = air.extraData(Air.Shuffle, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.a == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
return .none;
|
||||
},
|
||||
.reduce => {
|
||||
const reduce = air_datas[inst].reduce;
|
||||
if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
.cmp_vector => {
|
||||
const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
return .none;
|
||||
},
|
||||
.aggregate_init => {
|
||||
const ty_pl = air_datas[inst].ty_pl;
|
||||
const aggregate_ty = air.getRefType(ty_pl.ty);
|
||||
const len = @intCast(usize, aggregate_ty.arrayLen());
|
||||
const elements = @ptrCast([]const Air.Inst.Ref, air.extra[ty_pl.payload..][0..len]);
|
||||
|
||||
if (elements.len <= bpi - 1) {
|
||||
for (elements) |elem, i| {
|
||||
if (elem == operand_ref) return matchOperandSmallIndex(l, inst, @intCast(OperandInt, i), .none);
|
||||
}
|
||||
return .none;
|
||||
}
|
||||
|
||||
var bt = l.iterateBigTomb(inst);
|
||||
for (elements) |elem| {
|
||||
if (bt.feed()) {
|
||||
if (elem == operand_ref) return .tomb;
|
||||
} else {
|
||||
if (elem == operand_ref) return .write;
|
||||
}
|
||||
}
|
||||
return .write;
|
||||
},
|
||||
.union_init => {
|
||||
const extra = air.extraData(Air.UnionInit, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.init == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
.struct_field_ptr, .struct_field_val => {
|
||||
const extra = air.extraData(Air.StructField, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.struct_operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
.field_parent_ptr => {
|
||||
const extra = air.extraData(Air.FieldParentPtr, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.field_ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
.cmpxchg_strong, .cmpxchg_weak => {
|
||||
const extra = air.extraData(Air.Cmpxchg, air_datas[inst].ty_pl.payload).data;
|
||||
if (extra.ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
if (extra.expected_value == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
|
||||
if (extra.new_value == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
|
||||
return .write;
|
||||
},
|
||||
.mul_add => {
|
||||
const pl_op = air_datas[inst].pl_op;
|
||||
const extra = air.extraData(Air.Bin, pl_op.payload).data;
|
||||
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none);
|
||||
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 2, .none);
|
||||
return .none;
|
||||
},
|
||||
.atomic_load => {
|
||||
const ptr = air_datas[inst].atomic_load.ptr;
|
||||
if (ptr == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
.atomic_rmw => {
|
||||
const pl_op = air_datas[inst].pl_op;
|
||||
const extra = air.extraData(Air.AtomicRmw, pl_op.payload).data;
|
||||
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
if (extra.operand == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
|
||||
return .write;
|
||||
},
|
||||
.memset,
|
||||
.memcpy,
|
||||
=> {
|
||||
const pl_op = air_datas[inst].pl_op;
|
||||
const extra = air.extraData(Air.Bin, pl_op.payload).data;
|
||||
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .write);
|
||||
if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .write);
|
||||
if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 2, .write);
|
||||
return .write;
|
||||
},
|
||||
|
||||
.br => {
|
||||
const br = air_datas[inst].br;
|
||||
if (br.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .noret);
|
||||
return .noret;
|
||||
},
|
||||
.assembly => {
|
||||
return .complex;
|
||||
},
|
||||
.block => {
|
||||
return .complex;
|
||||
},
|
||||
.loop => {
|
||||
return .complex;
|
||||
},
|
||||
.cond_br => {
|
||||
return .complex;
|
||||
},
|
||||
.switch_br => {
|
||||
return .complex;
|
||||
},
|
||||
.wasm_memory_grow => {
|
||||
const pl_op = air_datas[inst].pl_op;
|
||||
if (pl_op.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
|
||||
return .none;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn matchOperandSmallIndex(
|
||||
l: Liveness,
|
||||
inst: Air.Inst.Index,
|
||||
operand: OperandInt,
|
||||
default: OperandCategory,
|
||||
) OperandCategory {
|
||||
if (operandDies(l, inst, operand)) {
|
||||
return .tomb;
|
||||
} else {
|
||||
return default;
|
||||
}
|
||||
}
|
||||
|
||||
/// Higher level API.
|
||||
pub const CondBrSlices = struct {
|
||||
then_deaths: []const Air.Inst.Index,
|
||||
|
||||
@@ -3885,7 +3885,7 @@ pub const FuncGen = struct {
|
||||
|
||||
fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void {
|
||||
const air_tags = self.air.instructions.items(.tag);
|
||||
for (body) |inst| {
|
||||
for (body) |inst, i| {
|
||||
const opt_value: ?*const llvm.Value = switch (air_tags[inst]) {
|
||||
// zig fmt: off
|
||||
.add => try self.airAdd(inst),
|
||||
@@ -3976,7 +3976,7 @@ pub const FuncGen = struct {
|
||||
.fptrunc => try self.airFptrunc(inst),
|
||||
.fpext => try self.airFpext(inst),
|
||||
.ptrtoint => try self.airPtrToInt(inst),
|
||||
.load => try self.airLoad(inst),
|
||||
.load => try self.airLoad(inst, body, i + 1),
|
||||
.loop => try self.airLoop(inst),
|
||||
.not => try self.airNot(inst),
|
||||
.ret => try self.airRet(inst),
|
||||
@@ -6982,11 +6982,33 @@ pub const FuncGen = struct {
|
||||
return null;
|
||||
}
|
||||
|
||||
fn airLoad(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
|
||||
fn airLoad(
|
||||
self: *FuncGen,
|
||||
inst: Air.Inst.Index,
|
||||
body: []const Air.Inst.Index,
|
||||
body_i: usize,
|
||||
) !?*const llvm.Value {
|
||||
const ty_op = self.air.instructions.items(.data)[inst].ty_op;
|
||||
const ptr_ty = self.air.typeOf(ty_op.operand);
|
||||
if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst))
|
||||
return null;
|
||||
elide: {
|
||||
const ptr_info = ptr_ty.ptrInfo().data;
|
||||
if (ptr_info.@"volatile") break :elide;
|
||||
if (self.liveness.isUnused(inst)) return null;
|
||||
if (!isByRef(ptr_info.pointee_type)) break :elide;
|
||||
|
||||
// It would be valid to fall back to the code below here that simply calls
|
||||
// load(). However, as an optimization, we want to avoid unnecessary copies
|
||||
// of isByRef=true types. Here, we scan forward in the current block,
|
||||
// looking to see if this load dies before any side effects occur.
|
||||
// In such case, we can safely return the operand without making a copy.
|
||||
for (body[body_i..]) |body_inst| {
|
||||
switch (self.liveness.categorizeOperand(self.air, body_inst, inst)) {
|
||||
.none => continue,
|
||||
.write, .noret, .complex => break :elide,
|
||||
.tomb => return try self.resolveInst(ty_op.operand),
|
||||
}
|
||||
} else unreachable;
|
||||
}
|
||||
const ptr = try self.resolveInst(ty_op.operand);
|
||||
return self.load(ptr, ptr_ty);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user