zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 3357c59cebacb6b60da865376b20d2b307d12ec1 (tree)
parent 83051b0cbf31b76e824d3911a7f4a0be3c0cf94d
Author: Robin Voetter <robin@voetter.nl>
Date:   Sat, 18 Mar 2023 15:59:56 +0100

new builtins: @workItemId, @workGroupId, @workGroupSize

* @workItemId returns the index of the work item in a work group for a
  dimension.
* @workGroupId returns the index of the work group in the kernel dispatch for a
  dimension.
* @workGroupSize returns the size of the work group for a dimension.

These builtins are mainly useful for GPU backends. They are currently only
implemented for the AMDGCN LLVM backend.

Diffstat:
Mdoc/langref.html.in | 22++++++++++++++++++++++
Msrc/Air.zig | 21+++++++++++++++++++++
Msrc/AstGen.zig | 34++++++++++++++++++++++++++++++++++
Msrc/BuiltinFn.zig | 23+++++++++++++++++++++++
Msrc/Liveness.zig | 6++++++
Msrc/Sema.zig | 39+++++++++++++++++++++++++++++++++++++++
Msrc/Zir.zig | 9+++++++++
Msrc/arch/aarch64/CodeGen.zig | 4++++
Msrc/arch/arm/CodeGen.zig | 4++++
Msrc/arch/riscv64/CodeGen.zig | 4++++
Msrc/arch/sparc64/CodeGen.zig | 4++++
Msrc/arch/wasm/CodeGen.zig | 5+++++
Msrc/arch/x86_64/CodeGen.zig | 4++++
Msrc/codegen/c.zig | 5+++++
Msrc/codegen/llvm.zig | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/print_air.zig | 10++++++++++
Msrc/print_zir.zig | 3+++
17 files changed, 269 insertions(+), 0 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in @@ -9578,6 +9578,28 @@ fn foo(comptime T: type, ptr: *T) T { Remove {#syntax#}volatile{#endsyntax#} qualifier from a pointer. </p> {#header_close#} + + {#header_open|@workGroupId#} + <pre>{#syntax#}@workGroupId(comptime dimension: u32) u32{#endsyntax#}</pre> + <p> + Returns the index of the work group in the current kernel invocation in dimension {#syntax#}dimension{#endsyntax#}. + </p> + {#header_close#} + + {#header_open|@workGroupSize#} + <pre>{#syntax#}@workGroupSize(comptime dimension: u32) u32{#endsyntax#}</pre> + <p> + Returns the number of work items that a work group has in dimension {#syntax#}dimension{#endsyntax#}. + </p> + {#header_close#} + + {#header_open|@workItemId#} + <pre>{#syntax#}@workItemId(comptime dimension: u32) u32{#endsyntax#}</pre> + <p> + Returns the index of the work item in the work group in dimension {#syntax#}dimension{#endsyntax#}. This function returns values between {#syntax#}0{#endsyntax#} (inclusive) and {#syntax#}@workGroupSize(dimension){#endsyntax#} (exclusive). + </p> + {#header_close#} + {#header_close#} {#header_open|Build Mode#} diff --git a/src/Air.zig b/src/Air.zig @@ -761,6 +761,22 @@ pub const Inst = struct { /// Uses the `ty` field. c_va_start, + /// Implements @workItemId builtin. + /// Result type is always `u32` + /// Uses the `pl_op` field, payload is the dimension to get the work item id for. + /// Operand is unused and set to Ref.none + work_item_id, + /// Implements @workGroupSize builtin. + /// Result type is always `u32` + /// Uses the `pl_op` field, payload is the dimension to get the work group size for. + /// Operand is unused and set to Ref.none + work_group_size, + /// Implements @workGroupId builtin. + /// Result type is always `u32` + /// Uses the `pl_op` field, payload is the dimension to get the work group id for. + /// Operand is unused and set to Ref.none + work_group_id, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1267,6 +1283,11 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { const err_union_ty = air.typeOf(datas[inst].pl_op.operand); return err_union_ty.errorUnionPayload(); }, + + .work_item_id, + .work_group_size, + .work_group_id, + => return Type.u32, } } diff --git a/src/AstGen.zig b/src/AstGen.zig @@ -8549,6 +8549,40 @@ fn builtinCall( } return rvalue(gz, ri, try gz.addNodeExtended(.c_va_start, node), node); }, + + .work_item_id => { + if (astgen.fn_block == null) { + return astgen.failNode(node, "'@workItemId' outside function scope", .{}); + } + const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]); + const result = try gz.addExtendedPayload(.work_item_id, Zir.Inst.UnNode{ + .node = gz.nodeIndexToRelative(node), + .operand = operand, + }); + return rvalue(gz, ri, result, node); + }, + .work_group_size => { + if (astgen.fn_block == null) { + return astgen.failNode(node, "'@workGroupSize' outside function scope", .{}); + } + const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]); + const result = try gz.addExtendedPayload(.work_group_size, Zir.Inst.UnNode{ + .node = gz.nodeIndexToRelative(node), + .operand = operand, + }); + return rvalue(gz, ri, result, node); + }, + .work_group_id => { + if (astgen.fn_block == null) { + return astgen.failNode(node, "'@workGroupId' outside function scope", .{}); + } + const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]); + const result = try gz.addExtendedPayload(.work_group_id, Zir.Inst.UnNode{ + .node = gz.nodeIndexToRelative(node), + .operand = operand, + }); + return rvalue(gz, ri, result, node); + }, } } diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig @@ -118,6 +118,9 @@ pub const Tag = enum { union_init, Vector, volatile_cast, + work_item_id, + work_group_size, + work_group_id, }; pub const MemLocRequirement = enum { @@ -980,5 +983,25 @@ pub const list = list: { .param_count = 1, }, }, + .{ + "@workItemId", .{ + .tag = .work_item_id, + .param_count = 1, + }, + }, + .{ + "@workGroupSize", + .{ + .tag = .work_group_size, + .param_count = 1, + }, + }, + .{ + "@workGroupId", + .{ + .tag = .work_group_id, + .param_count = 1, + }, + }, }); }; diff --git a/src/Liveness.zig b/src/Liveness.zig @@ -240,6 +240,9 @@ pub fn categorizeOperand( .err_return_trace, .save_err_return_trace_index, .c_va_start, + .work_item_id, + .work_group_size, + .work_group_id, => return .none, .fence => return .write, @@ -864,6 +867,9 @@ fn analyzeInst( .err_return_trace, .save_err_return_trace_index, .c_va_start, + .work_item_id, + .work_group_size, + .work_group_id, => return trackOperands(a, new_set, inst, main_tomb, .{ .none, .none, .none }), .not, diff --git a/src/Sema.zig b/src/Sema.zig @@ -1164,6 +1164,9 @@ fn analyzeBodyInner( .c_va_start => try sema.zirCVaStart( block, extended), .const_cast, => try sema.zirConstCast( block, extended), .volatile_cast, => try sema.zirVolatileCast( block, extended), + .work_item_id => try sema.zirWorkItem( block, extended, extended.opcode), + .work_group_size => try sema.zirWorkItem( block, extended, extended.opcode), + .work_group_id => try sema.zirWorkItem( block, extended, extended.opcode), // zig fmt: on .fence => { @@ -22437,6 +22440,42 @@ fn zirBuiltinExtern( return sema.addConstant(ty, ref); } +fn zirWorkItem( + sema: *Sema, + block: *Block, + extended: Zir.Inst.Extended.InstData, + zir_tag: Zir.Inst.Extended, +) CompileError!Air.Inst.Ref { + const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data; + const dimension_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node }; + const builtin_src = LazySrcLoc.nodeOffset(extra.node); + const target = sema.mod.getTarget(); + + switch (target.cpu.arch) { + // TODO: Allow for other GPU targets. + .amdgcn => {}, + else => { + return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)}); + }, + } + + const dimension = @intCast(u32, try sema.resolveInt(block, dimension_src, extra.operand, Type.u32, "dimension must be comptime-known")); + try sema.requireRuntimeBlock(block, builtin_src, null); + + return block.addInst(.{ + .tag = switch (zir_tag) { + .work_item_id => .work_item_id, + .work_group_size => .work_group_size, + .work_group_id => .work_group_id, + else => unreachable, + }, + .data = .{ .pl_op = .{ + .operand = .none, + .payload = dimension, + } }, + }); +} + fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void { if (block.is_comptime) { const msg = msg: { diff --git a/src/Zir.zig b/src/Zir.zig @@ -2032,6 +2032,15 @@ pub const Inst = struct { /// Implements the `@volatileCast` builtin. /// `operand` is payload index to `UnNode`. volatile_cast, + /// Implements the `@workItemId` builtin. + /// `operand` is payload index to `UnNode`. + work_item_id, + /// Implements the `@workGroupSize` builtin. + /// `operand` is payload index to `UnNode`. + work_group_size, + /// Implements the `@workGroupId` builtin. + /// `operand` is payload index to `UnNode`. + work_group_id, pub const InstData = struct { opcode: Extended, diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig @@ -890,6 +890,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, + + .work_item_id => unreachable, + .work_group_size => unreachable, + .work_group_id => unreachable, // zig fmt: on } diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig @@ -874,6 +874,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, + + .work_item_id => unreachable, + .work_group_size => unreachable, + .work_group_id => unreachable, // zig fmt: on } diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig @@ -704,6 +704,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, + + .work_item_id => unreachable, + .work_group_size => unreachable, + .work_group_id => unreachable, // zig fmt: on } if (std.debug.runtime_safety) { diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig @@ -720,6 +720,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, + + .work_item_id => unreachable, + .work_group_size => unreachable, + .work_group_id => unreachable, // zig fmt: on } diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig @@ -1997,6 +1997,11 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .reduce_optimized, .float_to_int_optimized, => return func.fail("TODO implement optimized float mode", .{}), + + .work_item_id, + .work_group_size, + .work_group_id, + => unreachable, }; } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig @@ -1132,6 +1132,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, + + .work_item_id => unreachable, + .work_group_size => unreachable, + .work_group_id => unreachable, // zig fmt: on } diff --git a/src/codegen/c.zig b/src/codegen/c.zig @@ -2995,6 +2995,11 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .c_va_arg => try airCVaArg(f, inst), .c_va_end => try airCVaEnd(f, inst), .c_va_copy => try airCVaCopy(f, inst), + + .work_item_id, + .work_group_size, + .work_group_id, + => unreachable, // zig fmt: on }; if (result_value == .new_local) { diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig @@ -4745,6 +4745,10 @@ pub const FuncGen = struct { .c_va_copy => try self.airCVaCopy(inst), .c_va_end => try self.airCVaEnd(inst), .c_va_start => try self.airCVaStart(inst), + + .work_item_id => try self.airWorkItemId(inst), + .work_group_size => try self.airWorkGroupSize(inst), + .work_group_id => try self.airWorkGroupId(inst), // zig fmt: on }; if (opt_value) |val| { @@ -9567,6 +9571,74 @@ pub const FuncGen = struct { return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, ""); } + fn amdgcnWorkIntrinsic(self: *FuncGen, dimension: u32, default: u32, comptime basename: []const u8) !?*llvm.Value { + const llvm_u32 = self.context.intType(32); + + const llvm_fn_name = switch (dimension) { + 0 => basename ++ ".x", + 1 => basename ++ ".y", + 2 => basename ++ ".z", + else => return llvm_u32.constInt(default, .False), + }; + + const args: [0]*llvm.Value = .{}; + const llvm_fn = self.getIntrinsic(llvm_fn_name, &.{}); + return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, ""); + } + + fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const target = self.dg.module.getTarget(); + assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures + + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const dimension = pl_op.payload; + return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workitem.id"); + } + + fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const target = self.dg.module.getTarget(); + assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures + + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const dimension = pl_op.payload; + const llvm_u32 = self.context.intType(32); + if (dimension >= 3) { + return llvm_u32.constInt(1, .False); + } + + // Fetch the dispatch pointer, which points to this structure: + // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913 + const llvm_fn = self.getIntrinsic("llvm.amdgcn.dispatch.ptr", &.{}); + const args: [0]*llvm.Value = .{}; + const dispatch_ptr = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, ""); + dispatch_ptr.setAlignment(4); + + // Load the work_group_* member from the struct as u16. + // Just treat the dispatch pointer as an array of u16 to keep things simple. + const offset = 2 + dimension; + const index = [_]*llvm.Value{llvm_u32.constInt(offset, .False)}; + const llvm_u16 = self.context.intType(16); + const workgroup_size_ptr = self.builder.buildInBoundsGEP(llvm_u16, dispatch_ptr, &index, index.len, ""); + const workgroup_size = self.builder.buildLoad(llvm_u16, workgroup_size_ptr, ""); + workgroup_size.setAlignment(2); + return workgroup_size; + } + + fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const target = self.dg.module.getTarget(); + assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures + + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const dimension = pl_op.payload; + return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workgroup.id"); + } + fn getErrorNameTable(self: *FuncGen) !*llvm.Value { if (self.dg.object.error_name_table) |table| { return table; diff --git a/src/print_air.zig b/src/print_air.zig @@ -328,6 +328,11 @@ const Writer = struct { .vector_store_elem => try w.writeVectorStoreElem(s, inst), .dbg_block_begin, .dbg_block_end => {}, + + .work_item_id, + .work_group_size, + .work_group_id, + => try w.writeWorkDimension(s, inst), } try s.writeAll(")\n"); } @@ -869,6 +874,11 @@ const Writer = struct { try w.writeOperand(s, inst, 0, pl_op.operand); } + fn writeWorkDimension(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void { + const pl_op = w.air.instructions.items(.data)[inst].pl_op; + try s.print("{d}", .{pl_op.payload}); + } + fn writeOperand( w: *Writer, s: anytype, diff --git a/src/print_zir.zig b/src/print_zir.zig @@ -512,6 +512,9 @@ const Writer = struct { .c_va_end, .const_cast, .volatile_cast, + .work_item_id, + .work_group_size, + .work_group_id, => { const inst_data = self.code.extraData(Zir.Inst.UnNode, extended.operand).data; const src = LazySrcLoc.nodeOffset(inst_data.node);