stage2: implement @prefetch

This reverts commit f423b5949b, re-instating commit d48e4245b6.
2022-01-18 10:49:54 -07:00
parent f423b5949b
commit 30efcf22d7
14 changed files with 287 additions and 45 deletions
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1278,6 +1278,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
            .error_name       => try airErrorName(f, inst),
            .splat            => try airSplat(f, inst),
            .vector_init      => try airVectorInit(f, inst),
+            .prefetch         => try airPrefetch(f, inst),

            .int_to_float,
            .float_to_int,
@@ -3089,6 +3090,25 @@ fn airVectorInit(f: *Function, inst: Air.Inst.Index) !CValue {
    return f.fail("TODO: C backend: implement airVectorInit", .{});
 }

+fn airPrefetch(f: *Function, inst: Air.Inst.Index) !CValue {
+    const prefetch = f.air.instructions.items(.data)[inst].prefetch;
+    switch (prefetch.cache) {
+        .data => {},
+        // The available prefetch intrinsics do not accept a cache argument; only
+        // address, rw, and locality. So unless the cache is data, we do not lower
+        // this instruction.
+        .instruction => return CValue.none,
+    }
+    const ptr = try f.resolveInst(prefetch.ptr);
+    const writer = f.object.writer();
+    try writer.writeAll("zig_prefetch(");
+    try f.writeCValue(writer, ptr);
+    try writer.print(", {d}, {d});\n", .{
+        @enumToInt(prefetch.rw), prefetch.locality,
+    });
+    return CValue.none;
+}
+
 fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
    return switch (order) {
        .Unordered => "memory_order_relaxed",
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -2073,6 +2073,7 @@ pub const FuncGen = struct {
                .error_name     => try self.airErrorName(inst),
                .splat          => try self.airSplat(inst),
                .vector_init    => try self.airVectorInit(inst),
+                .prefetch       => try self.airPrefetch(inst),

                .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered),
                .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic),
@@ -4384,6 +4385,67 @@ pub const FuncGen = struct {
        return vector;
    }

+    fn airPrefetch(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        const prefetch = self.air.instructions.items(.data)[inst].prefetch;
+
+        comptime assert(@enumToInt(std.builtin.PrefetchOptions.Rw.read) == 0);
+        comptime assert(@enumToInt(std.builtin.PrefetchOptions.Rw.write) == 1);
+
+        // TODO these two asserts should be able to be comptime because the type is a u2
+        assert(prefetch.locality >= 0);
+        assert(prefetch.locality <= 3);
+
+        comptime assert(@enumToInt(std.builtin.PrefetchOptions.Cache.instruction) == 0);
+        comptime assert(@enumToInt(std.builtin.PrefetchOptions.Cache.data) == 1);
+
+        // LLVM fails during codegen of instruction cache prefetchs for these architectures.
+        // This is an LLVM bug as the prefetch intrinsic should be a noop if not supported
+        // by the target.
+        // To work around this, don't emit llvm.prefetch in this case.
+        // See https://bugs.llvm.org/show_bug.cgi?id=21037
+        const target = self.dg.module.getTarget();
+        switch (prefetch.cache) {
+            .instruction => switch (target.cpu.arch) {
+                .x86_64, .i386 => return null,
+                .arm, .armeb, .thumb, .thumbeb => {
+                    switch (prefetch.rw) {
+                        .write => return null,
+                        else => {},
+                    }
+                },
+                else => {},
+            },
+            .data => {},
+        }
+
+        const llvm_u8 = self.context.intType(8);
+        const llvm_ptr_u8 = llvm_u8.pointerType(0);
+        const llvm_u32 = self.context.intType(32);
+
+        const llvm_fn_name = "llvm.prefetch.p0i8";
+        const fn_val = self.dg.object.llvm_module.getNamedFunction(llvm_fn_name) orelse blk: {
+            // declare void @llvm.prefetch(i8*, i32, i32, i32)
+            const llvm_void = self.context.voidType();
+            const param_types = [_]*const llvm.Type{
+                llvm_ptr_u8, llvm_u32, llvm_u32, llvm_u32,
+            };
+            const fn_type = llvm.functionType(llvm_void, &param_types, param_types.len, .False);
+            break :blk self.dg.object.llvm_module.addFunction(llvm_fn_name, fn_type);
+        };
+
+        const ptr = try self.resolveInst(prefetch.ptr);
+        const ptr_u8 = self.builder.buildBitCast(ptr, llvm_ptr_u8, "");
+
+        const params = [_]*const llvm.Value{
+            ptr_u8,
+            llvm_u32.constInt(@enumToInt(prefetch.rw), .False),
+            llvm_u32.constInt(prefetch.locality, .False),
+            llvm_u32.constInt(@enumToInt(prefetch.cache), .False),
+        };
+        _ = self.builder.buildCall(fn_val, &params, params.len, .C, .Auto, "");
+        return null;
+    }
+
    fn getErrorNameTable(self: *FuncGen) !*const llvm.Value {
        if (self.dg.object.error_name_table) |table| {
            return table;