Merge pull request #11200 from Luukdegram/wasm-memcpy
stage2: wasm - Implement memcpy instruction
This commit is contained in:
@@ -895,7 +895,7 @@ fn genFunc(self: *Self) InnerError!void {
|
||||
try prologue.append(.{ .tag = .i32_sub, .data = .{ .tag = {} } });
|
||||
// Get negative stack aligment
|
||||
try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @intCast(i32, self.stack_alignment) * -1 } });
|
||||
// Bit and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment
|
||||
// Bitwise-and the value to get the new stack pointer to ensure the pointers are aligned with the abi alignment
|
||||
try prologue.append(.{ .tag = .i32_and, .data = .{ .tag = {} } });
|
||||
// store the current stack pointer as the bottom, which will be used to calculate all stack pointer offsets
|
||||
try prologue.append(.{ .tag = .local_tee, .data = .{ .label = self.bottom_stack_value.local } });
|
||||
@@ -1074,22 +1074,123 @@ fn toWasmBits(bits: u16) ?u16 {
|
||||
|
||||
/// Performs a copy of bytes for a given type. Copying all bytes
|
||||
/// from rhs to lhs.
|
||||
///
|
||||
/// TODO: Perform feature detection and when bulk_memory is available,
|
||||
/// use wasm's mem.copy instruction.
|
||||
fn memCopy(self: *Self, ty: Type, lhs: WValue, rhs: WValue) !void {
|
||||
const abi_size = ty.abiSize(self.target);
|
||||
var offset: u32 = 0;
|
||||
const lhs_base = lhs.offset();
|
||||
const rhs_base = rhs.offset();
|
||||
while (offset < abi_size) : (offset += 1) {
|
||||
// get lhs' address to store the result
|
||||
try self.emitWValue(lhs);
|
||||
// load byte from rhs' adress
|
||||
try self.emitWValue(rhs);
|
||||
try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
|
||||
// store the result in lhs (we already have its address on the stack)
|
||||
try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
|
||||
fn memcpy(self: *Self, dst: WValue, src: WValue, len: WValue) !void {
|
||||
// When bulk_memory is enabled, we lower it to wasm's memcpy instruction.
|
||||
// If not, we lower it ourselves manually
|
||||
if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) {
|
||||
switch (dst) {
|
||||
.stack_offset => try self.emitWValue(try self.buildPointerOffset(dst, 0, .new)),
|
||||
else => try self.emitWValue(dst),
|
||||
}
|
||||
switch (src) {
|
||||
.stack_offset => try self.emitWValue(try self.buildPointerOffset(src, 0, .new)),
|
||||
else => try self.emitWValue(src),
|
||||
}
|
||||
try self.emitWValue(len);
|
||||
try self.addExtended(.memory_copy);
|
||||
return;
|
||||
}
|
||||
|
||||
// when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having
|
||||
// the loop during codegen, rather than inserting a runtime loop into the binary.
|
||||
switch (len) {
|
||||
.imm32, .imm64 => {
|
||||
const length = switch (len) {
|
||||
.imm32 => |val| val,
|
||||
.imm64 => |val| val,
|
||||
else => unreachable,
|
||||
};
|
||||
var offset: u32 = 0;
|
||||
const lhs_base = dst.offset();
|
||||
const rhs_base = src.offset();
|
||||
while (offset < length) : (offset += 1) {
|
||||
// get dst's address to store the result
|
||||
try self.emitWValue(dst);
|
||||
// load byte from src's address
|
||||
try self.emitWValue(src);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => {
|
||||
try self.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
|
||||
try self.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
|
||||
},
|
||||
.wasm64 => {
|
||||
try self.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
|
||||
try self.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// TODO: We should probably lower this to a call to compiler_rt
|
||||
// But for now, we implement it manually
|
||||
const offset = try self.allocLocal(Type.usize); // local for counter
|
||||
// outer block to jump to when loop is done
|
||||
try self.startBlock(.block, wasm.block_empty);
|
||||
try self.startBlock(.loop, wasm.block_empty);
|
||||
|
||||
// loop condition (offset == length -> break)
|
||||
{
|
||||
try self.emitWValue(offset);
|
||||
try self.emitWValue(len);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_eq),
|
||||
.wasm64 => try self.addTag(.i64_eq),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
|
||||
}
|
||||
|
||||
// get dst ptr
|
||||
{
|
||||
try self.emitWValue(dst);
|
||||
try self.emitWValue(offset);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_add),
|
||||
.wasm64 => try self.addTag(.i64_add),
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
// get src value and also store in dst
|
||||
{
|
||||
try self.emitWValue(src);
|
||||
try self.emitWValue(offset);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => {
|
||||
try self.addTag(.i32_add);
|
||||
try self.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
|
||||
try self.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
|
||||
},
|
||||
.wasm64 => {
|
||||
try self.addTag(.i64_add);
|
||||
try self.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
|
||||
try self.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
// increment loop counter
|
||||
{
|
||||
try self.emitWValue(offset);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => {
|
||||
try self.addImm32(1);
|
||||
try self.addTag(.i32_add);
|
||||
},
|
||||
.wasm64 => {
|
||||
try self.addImm64(1);
|
||||
try self.addTag(.i64_add);
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
try self.addLabel(.local_set, offset.local);
|
||||
try self.addLabel(.br, 0); // jump to start of loop
|
||||
}
|
||||
try self.endBlock(); // close off loop block
|
||||
try self.endBlock(); // close off outer block
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1298,6 +1399,8 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
|
||||
.wasm_memory_size => self.airWasmMemorySize(inst),
|
||||
.wasm_memory_grow => self.airWasmMemoryGrow(inst),
|
||||
|
||||
.memcpy => self.airMemcpy(inst),
|
||||
|
||||
.add_sat,
|
||||
.sub_sat,
|
||||
.mul_sat,
|
||||
@@ -1338,7 +1441,6 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
|
||||
.ptr_slice_len_ptr,
|
||||
.ptr_slice_ptr_ptr,
|
||||
.int_to_float,
|
||||
.memcpy,
|
||||
.cmpxchg_weak,
|
||||
.cmpxchg_strong,
|
||||
.fence,
|
||||
@@ -1520,7 +1622,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
|
||||
return self.store(lhs, rhs, err_ty, 0);
|
||||
}
|
||||
|
||||
return self.memCopy(ty, lhs, rhs);
|
||||
const len = @intCast(u32, ty.abiSize(self.target));
|
||||
return self.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.Optional => {
|
||||
if (ty.isPtrLikeOptional()) {
|
||||
@@ -1532,10 +1635,12 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
|
||||
return self.store(lhs, rhs, Type.u8, 0);
|
||||
}
|
||||
|
||||
return self.memCopy(ty, lhs, rhs);
|
||||
const len = @intCast(u32, ty.abiSize(self.target));
|
||||
return self.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.Struct, .Array, .Union, .Vector => {
|
||||
return self.memCopy(ty, lhs, rhs);
|
||||
const len = @intCast(u32, ty.abiSize(self.target));
|
||||
return self.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
.Pointer => {
|
||||
if (ty.isSlice()) {
|
||||
@@ -1550,7 +1655,8 @@ fn store(self: *Self, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerErro
|
||||
}
|
||||
},
|
||||
.Int => if (ty.intInfo(self.target).bits > 64) {
|
||||
return self.memCopy(ty, lhs, rhs);
|
||||
const len = @intCast(u32, ty.abiSize(self.target));
|
||||
return self.memcpy(lhs, rhs, .{ .imm32 = len });
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
@@ -2414,8 +2520,16 @@ fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
if (!err_ty.errorUnionPayload().hasRuntimeBits()) return operand;
|
||||
|
||||
const err_union = try self.allocStack(err_ty);
|
||||
// TODO: Also write 'undefined' to the payload
|
||||
try self.store(err_union, operand, err_ty.errorUnionSet(), 0);
|
||||
|
||||
// write 'undefined' to the payload
|
||||
const err_align = err_ty.abiAlignment(self.target);
|
||||
const set_size = err_ty.errorUnionSet().abiSize(self.target);
|
||||
const offset = mem.alignForwardGeneric(u64, set_size, err_align);
|
||||
const payload_ptr = try self.buildPointerOffset(err_union, offset, .new);
|
||||
const len = @intCast(u32, err_ty.errorUnionPayload().abiSize(self.target));
|
||||
try self.memset(payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaaaaaaaa });
|
||||
|
||||
return err_union;
|
||||
}
|
||||
|
||||
@@ -2867,7 +2981,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
const ptr = try self.resolveInst(pl_op.operand);
|
||||
const value = try self.resolveInst(bin_op.lhs);
|
||||
const len = try self.resolveInst(bin_op.rhs);
|
||||
try self.memSet(ptr, len, value);
|
||||
try self.memset(ptr, len, value);
|
||||
|
||||
return WValue{ .none = {} };
|
||||
}
|
||||
@@ -2876,7 +2990,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
/// When the user has enabled the bulk_memory feature, we lower
|
||||
/// this to wasm's memset instruction. When the feature is not present,
|
||||
/// we implement it manually.
|
||||
fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void {
|
||||
fn memset(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void {
|
||||
// When bulk_memory is enabled, we lower it to wasm's memset instruction.
|
||||
// If not, we lower it ourselves
|
||||
if (std.Target.wasm.featureSetHas(self.target.cpu.features, .bulk_memory)) {
|
||||
@@ -2890,45 +3004,74 @@ fn memSet(self: *Self, ptr: WValue, len: WValue, value: WValue) InnerError!void
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: We should probably lower this to a call to compiler_rt
|
||||
// But for now, we implement it manually
|
||||
const offset = try self.allocLocal(Type.usize); // local for counter
|
||||
// outer block to jump to when loop is done
|
||||
try self.startBlock(.block, wasm.block_empty);
|
||||
try self.startBlock(.loop, wasm.block_empty);
|
||||
try self.emitWValue(offset);
|
||||
try self.emitWValue(len);
|
||||
switch (self.ptrSize()) {
|
||||
4 => try self.addTag(.i32_eq),
|
||||
8 => try self.addTag(.i64_eq),
|
||||
else => unreachable,
|
||||
// When the length is comptime-known we do the loop at codegen, rather
|
||||
// than emitting a runtime loop into the binary
|
||||
switch (len) {
|
||||
.imm32, .imm64 => {
|
||||
const length = switch (len) {
|
||||
.imm32 => |val| val,
|
||||
.imm64 => |val| val,
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
var offset: u32 = 0;
|
||||
const base = ptr.offset();
|
||||
while (offset < length) : (offset += 1) {
|
||||
try self.emitWValue(ptr);
|
||||
try self.emitWValue(value);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => {
|
||||
try self.addMemArg(.i32_store8, .{ .offset = base + offset, .alignment = 1 });
|
||||
},
|
||||
.wasm64 => {
|
||||
try self.addMemArg(.i64_store8, .{ .offset = base + offset, .alignment = 1 });
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
},
|
||||
else => {
|
||||
// TODO: We should probably lower this to a call to compiler_rt
|
||||
// But for now, we implement it manually
|
||||
const offset = try self.allocLocal(Type.usize); // local for counter
|
||||
// outer block to jump to when loop is done
|
||||
try self.startBlock(.block, wasm.block_empty);
|
||||
try self.startBlock(.loop, wasm.block_empty);
|
||||
try self.emitWValue(offset);
|
||||
try self.emitWValue(len);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_eq),
|
||||
.wasm64 => try self.addTag(.i64_eq),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
|
||||
try self.emitWValue(ptr);
|
||||
try self.emitWValue(offset);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_add),
|
||||
.wasm64 => try self.addTag(.i64_add),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.emitWValue(value);
|
||||
const mem_store_op: Mir.Inst.Tag = switch (self.arch()) {
|
||||
.wasm32 => .i32_store8,
|
||||
.wasm64 => .i64_store8,
|
||||
else => unreachable,
|
||||
};
|
||||
try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 });
|
||||
try self.emitWValue(offset);
|
||||
try self.addImm32(1);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_add),
|
||||
.wasm64 => try self.addTag(.i64_add),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.addLabel(.local_set, offset.local);
|
||||
try self.addLabel(.br, 0); // jump to start of loop
|
||||
try self.endBlock();
|
||||
try self.endBlock();
|
||||
},
|
||||
}
|
||||
try self.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
|
||||
try self.emitWValue(ptr);
|
||||
try self.emitWValue(offset);
|
||||
switch (self.arch()) {
|
||||
.wasm32 => try self.addTag(.i32_add),
|
||||
.wasm64 => try self.addTag(.i64_add),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.emitWValue(value);
|
||||
const mem_store_op: Mir.Inst.Tag = switch (self.arch()) {
|
||||
.wasm32 => .i32_store8,
|
||||
.wasm64 => .i64_store8,
|
||||
else => unreachable,
|
||||
};
|
||||
try self.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 });
|
||||
try self.emitWValue(offset);
|
||||
try self.addImm32(1);
|
||||
switch (self.ptrSize()) {
|
||||
4 => try self.addTag(.i32_add),
|
||||
8 => try self.addTag(.i64_add),
|
||||
else => unreachable,
|
||||
}
|
||||
try self.addLabel(.local_set, offset.local);
|
||||
try self.addLabel(.br, 0); // jump to start of loop
|
||||
try self.endBlock();
|
||||
try self.endBlock();
|
||||
}
|
||||
|
||||
fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
@@ -3309,3 +3452,13 @@ fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
try self.addLabel(.local_set, base.local);
|
||||
return base;
|
||||
}
|
||||
|
||||
fn airMemcpy(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
|
||||
const pl_op = self.air.instructions.items(.data)[inst].pl_op;
|
||||
const bin_op = self.air.extraData(Air.Bin, pl_op.payload).data;
|
||||
const dst = try self.resolveInst(pl_op.operand);
|
||||
const src = try self.resolveInst(bin_op.lhs);
|
||||
const len = try self.resolveInst(bin_op.rhs);
|
||||
try self.memcpy(dst, src, len);
|
||||
return WValue{ .none = {} };
|
||||
}
|
||||
|
||||
@@ -340,7 +340,6 @@ fn f2(x: bool) []const u8 {
|
||||
test "memcpy and memset intrinsics" {
|
||||
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
|
||||
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
|
||||
|
||||
try testMemcpyMemset();
|
||||
// TODO add comptime test coverage
|
||||
|
||||
Reference in New Issue
Block a user