Merge pull request #11200 from Luukdegram/wasm-memcpy

stage2: wasm - Implement memcpy instruction
This commit is contained in:
Andrew Kelley
2022-03-17 18:25:15 -07:00
committed by GitHub
2 changed files with 216 additions and 64 deletions

View File

@@ -895,7 +895,7 @@ fn genFunc(self: *Self) InnerError!void {
try prologue.append(.{ .tag = .i32_sub, .data = .{ .tag = {} } });
// Get negative stack aligment
try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @intCast(i32, self.stack_alignment) * -1 } });
// Bit and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment
// Bitwise-and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment
try prologue.append(.{ .tag = .i32_and, .data = .{ .tag = {} } });
// store the current stack pointer as the bottom, which will be used to calculate all stack pointer offsets
try prologue.append(.{ .tag = .local_tee, .data = .{ .label = self.bottom_stack_value.local } });
@@ -1074,22 +1074,123 @@ fn toWasmBits(bits: u16) ?u16 {
/// Performs a copy of bytes for a given type. Copying all bytes
/// from rhs to lhs.
///
/// TODO: Perform feature detection and when bulk_memory is available,
/// use wasm's mem.copy instruction.
fn memCopy(self: *Self, ty: Type, lhs: WValue, rhs: WValue) !void {
const abi_size = ty.abiSize(self.target);
var offset: u32 = 0;
const lhs_base = lhs.offset();
const rhs_base = rhs.offset();
while (offset < abi_size) : (offset += 1) {
// get lhs' address to store the result
try self.emitWValue(lhs);
// load byte from rhs' adress
try self.emitWValue(rhs);
try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
// store the result in lhs (we already have its address on the stack)
try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
fn memcpy(self: *Self, dst: WValue, src: WValue, len: WValue) !void {
// When bulk_memory is enabled, we lower it to wasm's memcpy instruction.
// If not, we lower it ourselves manually
if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) {
switch (dst) {
.stack_offset => try self.emitWValue(try self.buildPointerOffset(dst, 0, .new)),
else => try self.emitWValue(dst),
}
switch (src) {
.stack_offset => try self.emitWValue(try self.buildPointerOffset(src, 0, .new)),
else => try self.emitWValue(src),
}
try self.emitWValue(len);
try self.addExtended(.memory_copy);
return;
}
// when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having
// the loop during codegen, rather than inserting a runtime loop into the binary.
switch (len) {
.imm32, .imm64 => {
const length = switch (len) {
.imm32 => |val| val,
.imm64 => |val| val,
else => unreachable,
};
var offset: u32 = 0;
const lhs_base = dst.offset();
const rhs_base = src.offset();
while (offset < length) : (offset += 1) {
// get dst's address to store the result
try self.emitWValue(dst);
// load byte from src's address
try self.emitWValue(src);
switch (self.arch()) {
.wasm32 => {
try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
},
.wasm64 => {
try self.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
try self.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
},
else => unreachable,
}
}
},
else => {
// TODO: We should probably lower this to a call to compiler_rt
// But for now, we implement it manually
const offset = try self.allocLocal(Type.usize); // local for counter
// outer block to jump to when loop is done
try self.startBlock(.block, wasm.block_empty);
try self.startBlock(.loop, wasm.block_empty);
// loop condition (offset == length -> break)
{
try self.emitWValue(offset);
try self.emitWValue(len);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_eq),
.wasm64 => try self.addTag(.i64_eq),
else => unreachable,
}
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
}
// get dst ptr
{
try self.emitWValue(dst);
try self.emitWValue(offset);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_add),
.wasm64 => try self.addTag(.i64_add),
else => unreachable,
}
}
// get src value and also store in dst
{
try self.emitWValue(src);
try self.emitWValue(offset);
switch (self.arch()) {
.wasm32 => {
try self.addTag(.i32_add);
try self.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
try self.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
},
.wasm64 => {
try self.addTag(.i64_add);
try self.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
try self.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
},
else => unreachable,
}
}
// increment loop counter
{
try self.emitWValue(offset);
switch (self.arch()) {
.wasm32 => {
try self.addImm32(1);
try self.addTag(.i32_add);
},
.wasm64 => {
try self.addImm64(1);
try self.addTag(.i64_add);
},
else => unreachable,
}
try self.addLabel(.local_set, offset.local);
try self.addLabel(.br, 0); // jump to start of loop
}
try self.endBlock(); // close off loop block
try self.endBlock(); // close off outer block
},
}
}
@@ -1298,6 +1399,8 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
.wasm_memory_size => self.airWasmMemorySize(inst),
.wasm_memory_grow => self.airWasmMemoryGrow(inst),
.memcpy => self.airMemcpy(inst),
.add_sat,
.sub_sat,
.mul_sat,
@@ -1338,7 +1441,6 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
.ptr_slice_len_ptr,
.ptr_slice_ptr_ptr,
.int_to_float,
.memcpy,
.cmpxchg_weak,
.cmpxchg_strong,
.fence,
@@ -1520,7 +1622,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
return self.store(lhs, rhs, err_ty, 0);
}
return self.memCopy(ty, lhs, rhs);
const len = @intCast(u32, ty.abiSize(self.target));
return self.memcpy(lhs, rhs, .{ .imm32 = len });
},
.Optional => {
if (ty.isPtrLikeOptional()) {
@@ -1532,10 +1635,12 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
return self.store(lhs, rhs, Type.u8, 0);
}
return self.memCopy(ty, lhs, rhs);
const len = @intCast(u32, ty.abiSize(self.target));
return self.memcpy(lhs, rhs, .{ .imm32 = len });
},
.Struct, .Array, .Union, .Vector => {
return self.memCopy(ty, lhs, rhs);
const len = @intCast(u32, ty.abiSize(self.target));
return self.memcpy(lhs, rhs, .{ .imm32 = len });
},
.Pointer => {
if (ty.isSlice()) {
@@ -1550,7 +1655,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
}
},
.Int => if (ty.intInfo(self.target).bits > 64) {
return self.memCopy(ty, lhs, rhs);
const len = @intCast(u32, ty.abiSize(self.target));
return self.memcpy(lhs, rhs, .{ .imm32 = len });
},
else => {},
}
@@ -2414,8 +2520,16 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
if (!err_ty.errorUnionPayload().hasRuntimeBits()) return operand;
const err_union = try self.allocStack(err_ty);
// TODO: Also write 'undefined' to the payload
try self.store(err_union, operand, err_ty.errorUnionSet(), 0);
// write 'undefined' to the payload
const err_align = err_ty.abiAlignment(self.target);
const set_size = err_ty.errorUnionSet().abiSize(self.target);
const offset = mem.alignForwardGeneric(u64, set_size, err_align);
const payload_ptr = try self.buildPointerOffset(err_union, offset, .new);
const len = @intCast(u32, err_ty.errorUnionPayload().abiSize(self.target));
try self.memset(payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaaaaaaaa });
return err_union;
}
@@ -2867,7 +2981,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
const ptr = try self.resolveInst(pl_op.operand);
const value = try self.resolveInst(bin_op.lhs);
const len = try self.resolveInst(bin_op.rhs);
try self.memSet(ptr, len, value);
try self.memset(ptr, len, value);
return WValue{ .none = {} };
}
@@ -2876,7 +2990,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
/// When the user has enabled the bulk_memory feature, we lower
/// this to wasm's memset instruction. When the feature is not present,
/// we implement it manually.
fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void {
fn memset(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void {
// When bulk_memory is enabled, we lower it to wasm's memset instruction.
// If not, we lower it ourselves
if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) {
@@ -2890,45 +3004,74 @@ fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void
return;
}
// TODO: We should probably lower this to a call to compiler_rt
// But for now, we implement it manually
const offset = try self.allocLocal(Type.usize); // local for counter
// outer block to jump to when loop is done
try self.startBlock(.block, wasm.block_empty);
try self.startBlock(.loop, wasm.block_empty);
try self.emitWValue(offset);
try self.emitWValue(len);
switch (self.ptrSize()) {
4 => try self.addTag(.i32_eq),
8 => try self.addTag(.i64_eq),
else => unreachable,
// When the length is comptime-known we do the loop at codegen, rather
// than emitting a runtime loop into the binary
switch (len) {
.imm32, .imm64 => {
const length = switch (len) {
.imm32 => |val| val,
.imm64 => |val| val,
else => unreachable,
};
var offset: u32 = 0;
const base = ptr.offset();
while (offset < length) : (offset += 1) {
try self.emitWValue(ptr);
try self.emitWValue(value);
switch (self.arch()) {
.wasm32 => {
try self.addMemArg(.i32_store8, .{ .offset = base + offset, .alignment = 1 });
},
.wasm64 => {
try self.addMemArg(.i64_store8, .{ .offset = base + offset, .alignment = 1 });
},
else => unreachable,
}
}
},
else => {
// TODO: We should probably lower this to a call to compiler_rt
// But for now, we implement it manually
const offset = try self.allocLocal(Type.usize); // local for counter
// outer block to jump to when loop is done
try self.startBlock(.block, wasm.block_empty);
try self.startBlock(.loop, wasm.block_empty);
try self.emitWValue(offset);
try self.emitWValue(len);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_eq),
.wasm64 => try self.addTag(.i64_eq),
else => unreachable,
}
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
try self.emitWValue(ptr);
try self.emitWValue(offset);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_add),
.wasm64 => try self.addTag(.i64_add),
else => unreachable,
}
try self.emitWValue(value);
const mem_store_op: Mir.Inst.Tag = switch (self.arch()) {
.wasm32 => .i32_store8,
.wasm64 => .i64_store8,
else => unreachable,
};
try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 });
try self.emitWValue(offset);
try self.addImm32(1);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_add),
.wasm64 => try self.addTag(.i64_add),
else => unreachable,
}
try self.addLabel(.local_set, offset.local);
try self.addLabel(.br, 0); // jump to start of loop
try self.endBlock();
try self.endBlock();
},
}
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
try self.emitWValue(ptr);
try self.emitWValue(offset);
switch (self.arch()) {
.wasm32 => try self.addTag(.i32_add),
.wasm64 => try self.addTag(.i64_add),
else => unreachable,
}
try self.emitWValue(value);
const mem_store_op: Mir.Inst.Tag = switch (self.arch()) {
.wasm32 => .i32_store8,
.wasm64 => .i64_store8,
else => unreachable,
};
try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 });
try self.emitWValue(offset);
try self.addImm32(1);
switch (self.ptrSize()) {
4 => try self.addTag(.i32_add),
8 => try self.addTag(.i64_add),
else => unreachable,
}
try self.addLabel(.local_set, offset.local);
try self.addLabel(.br, 0); // jump to start of loop
try self.endBlock();
try self.endBlock();
}
fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
@@ -3309,3 +3452,13 @@ fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
try self.addLabel(.local_set, base.local);
return base;
}
fn airMemcpy(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
const pl_op = self.air.instructions.items(.data)[inst].pl_op;
const bin_op = self.air.extraData(Air.Bin, pl_op.payload).data;
const dst = try self.resolveInst(pl_op.operand);
const src = try self.resolveInst(bin_op.lhs);
const len = try self.resolveInst(bin_op.rhs);
try self.memcpy(dst, src, len);
return WValue{ .none = {} };
}

View File

@@ -340,7 +340,6 @@ fn f2(x: bool) []const u8 {
test "memcpy and memset intrinsics" {
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
try testMemcpyMemset();
// TODO add comptime test coverage