Merge pull request #18822 from alichraghi/shader
spirv: basic shader support
This commit is contained in:
@@ -1221,6 +1221,7 @@ pub const Cpu = struct {
|
||||
.fs, .gs, .ss => arch == .x86_64 or arch == .x86,
|
||||
.global, .constant, .local, .shared => is_gpu,
|
||||
.param => is_nvptx,
|
||||
.input, .output, .uniform => is_spirv,
|
||||
// TODO this should also check how many flash banks the cpu has
|
||||
.flash, .flash1, .flash2, .flash3, .flash4, .flash5 => arch == .avr,
|
||||
};
|
||||
@@ -2353,7 +2354,7 @@ pub fn c_type_bit_size(target: Target, c_type: CType) u16 {
|
||||
.longdouble => return 128,
|
||||
},
|
||||
|
||||
.opencl => switch (c_type) {
|
||||
.opencl, .vulkan => switch (c_type) {
|
||||
.char => return 8,
|
||||
.short, .ushort => return 16,
|
||||
.int, .uint, .float => return 32,
|
||||
@@ -2386,7 +2387,6 @@ pub fn c_type_bit_size(target: Target, c_type: CType) u16 {
|
||||
.hermit,
|
||||
.hurd,
|
||||
.glsl450,
|
||||
.vulkan,
|
||||
.driverkit,
|
||||
.shadermodel,
|
||||
.liteos,
|
||||
|
||||
@@ -205,6 +205,9 @@ pub const CallingConvention = enum(u8) {
|
||||
Win64,
|
||||
/// AMD GPU, NVPTX, or SPIR-V kernel
|
||||
Kernel,
|
||||
// Vulkan-only
|
||||
Fragment,
|
||||
Vertex,
|
||||
};
|
||||
|
||||
/// This data structure is used by the Zig language code generation and
|
||||
@@ -222,6 +225,9 @@ pub const AddressSpace = enum(u5) {
|
||||
param,
|
||||
shared,
|
||||
local,
|
||||
input,
|
||||
output,
|
||||
uniform,
|
||||
|
||||
// AVR address spaces.
|
||||
flash,
|
||||
|
||||
166
lib/std/gpu.zig
Normal file
166
lib/std/gpu.zig
Normal file
@@ -0,0 +1,166 @@
|
||||
const std = @import("std.zig");
|
||||
const comptimePrint = std.fmt.comptimePrint;
|
||||
|
||||
/// Will make `ptr` contain the location of the current invocation within the
|
||||
/// global workgroup. Each component is equal to the index of the local workgroup
|
||||
/// multiplied by the size of the local workgroup plus `localInvocationId`.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn globalInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn GlobalInvocationId
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Will make that variable contain the location of the current cluster
|
||||
/// culling, task, mesh, or compute shader invocation within the local
|
||||
/// workgroup. Each component ranges from zero through to the size of the
|
||||
/// workgroup in that dimension minus one.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn localInvocationId(comptime ptr: *addrspace(.input) @Vector(3, u32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn LocalInvocationId
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output vertex position from a `Vertex` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn position(comptime ptr: *addrspace(.output) @Vector(4, f32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn Position
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Will make `ptr` contain the index of the vertex that is
|
||||
/// being processed by the current vertex shader invocation.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn vertexIndex(comptime ptr: *addrspace(.input) u32) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn VertexIndex
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output fragment depth from a `Fragment` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn fragmentCoord(comptime ptr: *addrspace(.input) @Vector(4, f32)) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn FragCoord
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Output fragment depth from a `Fragment` entrypoint
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn fragmentDepth(comptime ptr: *addrspace(.output) f32) void {
|
||||
asm volatile (
|
||||
\\OpDecorate %ptr BuiltIn FragDepth
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Forms the main linkage for `input` and `output` address spaces.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn location(comptime ptr: anytype, comptime loc: u32) void {
|
||||
const code = comptimePrint("OpDecorate %ptr Location {}", .{loc});
|
||||
asm volatile (code
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
/// Forms the main linkage for `input` and `output` address spaces.
|
||||
/// `ptr` must be a reference to variable or struct field.
|
||||
pub fn binding(comptime ptr: anytype, comptime group: u32, comptime bind: u32) void {
|
||||
const code = comptimePrint(
|
||||
\\OpDecorate %ptr DescriptorSet {}
|
||||
\\OpDecorate %ptr Binding {}
|
||||
, .{ group, bind });
|
||||
asm volatile (code
|
||||
:
|
||||
: [ptr] "" (ptr),
|
||||
);
|
||||
}
|
||||
|
||||
pub const Origin = enum(u32) {
|
||||
/// Increase toward the right and downward
|
||||
upper_left = 7,
|
||||
/// Increase toward the right and upward
|
||||
lower_left = 8,
|
||||
};
|
||||
|
||||
/// The coordinates appear to originate in the specified `origin`.
|
||||
/// Only valid with the `Fragment` calling convention.
|
||||
pub fn fragmentOrigin(comptime entry_point: anytype, comptime origin: Origin) void {
|
||||
const origin_enum = switch (origin) {
|
||||
.upper_left => .OriginUpperLeft,
|
||||
.lower_left => .OriginLowerLeft,
|
||||
};
|
||||
asm volatile ("OpExecutionMode %entry_point " ++ @tagName(origin_enum)
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
);
|
||||
}
|
||||
|
||||
pub const DepthMode = enum(u32) {
|
||||
/// Declares that this entry point dynamically writes the
|
||||
/// `fragmentDepth` built in-decorated variable.
|
||||
replacing = 12,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// greater-than-or-equal to the fragment’s interpolated depth value
|
||||
greater = 14,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// less-than-or-equal to the fragment’s interpolated depth value
|
||||
less = 15,
|
||||
/// Indicates that per-fragment tests may assume that
|
||||
/// any `fragmentDepth` built in-decorated value written by the shader is
|
||||
/// the same as the fragment’s interpolated depth value
|
||||
unchanged = 16,
|
||||
};
|
||||
|
||||
/// Only valid with the `Fragment` calling convention.
|
||||
pub fn depthMode(comptime entry_point: anytype, comptime mode: DepthMode) void {
|
||||
const code = comptimePrint("OpExecutionMode %entry_point {}", .{@intFromEnum(mode)});
|
||||
asm volatile (code
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
);
|
||||
}
|
||||
|
||||
/// Indicates the workgroup size in the `x`, `y`, and `z` dimensions.
|
||||
/// Only valid with the `GLCompute` or `Kernel` calling conventions.
|
||||
pub fn workgroupSize(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
|
||||
const code = comptimePrint("OpExecutionMode %entry_point LocalSize {} {} {}", .{
|
||||
size[0],
|
||||
size[1],
|
||||
size[2],
|
||||
});
|
||||
asm volatile (code
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
);
|
||||
}
|
||||
|
||||
/// A hint to the client, which indicates the workgroup size in the `x`, `y`, and `z` dimensions.
|
||||
/// Only valid with the `GLCompute` or `Kernel` calling conventions.
|
||||
pub fn workgroupSizeHint(comptime entry_point: anytype, comptime size: @Vector(3, u32)) void {
|
||||
const code = comptimePrint("OpExecutionMode %entry_point LocalSizeHint {} {} {}", .{
|
||||
size[0],
|
||||
size[1],
|
||||
size[2],
|
||||
});
|
||||
asm volatile (code
|
||||
:
|
||||
: [entry_point] "" (entry_point),
|
||||
);
|
||||
}
|
||||
@@ -104,6 +104,9 @@ pub const fmt = @import("fmt.zig");
|
||||
/// File system-related functionality.
|
||||
pub const fs = @import("fs.zig");
|
||||
|
||||
/// GPU programming helpers.
|
||||
pub const gpu = @import("gpu.zig");
|
||||
|
||||
/// Fast hashing functions (i.e. not cryptographically secure).
|
||||
pub const hash = @import("hash.zig");
|
||||
pub const hash_map = @import("hash_map.zig");
|
||||
|
||||
@@ -9741,6 +9741,10 @@ fn finishFunc(
|
||||
.nvptx, .nvptx64, .amdgcn, .spirv32, .spirv64 => null,
|
||||
else => "nvptx, amdgcn and SPIR-V",
|
||||
},
|
||||
.Fragment, .Vertex => switch (arch) {
|
||||
.spirv32, .spirv64 => null,
|
||||
else => "SPIR-V",
|
||||
},
|
||||
})) |allowed_platform| {
|
||||
return sema.fail(block, cc_src, "callconv '{s}' is only available on {s}, not {s}", .{
|
||||
@tagName(cc_resolved),
|
||||
@@ -37917,6 +37921,7 @@ pub fn analyzeAddressSpace(
|
||||
.gs, .fs, .ss => (arch == .x86 or arch == .x86_64) and ctx == .pointer,
|
||||
// TODO: check that .shared and .local are left uninitialized
|
||||
.param => is_nv,
|
||||
.input, .output, .uniform => is_spirv,
|
||||
.global, .shared, .local => is_gpu,
|
||||
.constant => is_gpu and (ctx == .constant),
|
||||
// TODO this should also check how many flash banks the cpu has
|
||||
|
||||
@@ -10848,6 +10848,7 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) Builder
|
||||
.amdgcn => .amdgpu_kernel,
|
||||
else => unreachable,
|
||||
},
|
||||
.Vertex, .Fragment => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -451,12 +451,12 @@ const DeclGen = struct {
|
||||
const spv_decl_index = blk: {
|
||||
const entry = try self.object.anon_decl_link.getOrPut(self.object.gpa, .{ val, storage_class });
|
||||
if (entry.found_existing) {
|
||||
try self.func.decl_deps.put(self.spv.gpa, entry.value_ptr.*, {});
|
||||
try self.addFunctionDep(entry.value_ptr.*, storage_class);
|
||||
return self.spv.declPtr(entry.value_ptr.*).result_id;
|
||||
}
|
||||
|
||||
const spv_decl_index = try self.spv.allocDecl(.global);
|
||||
try self.func.decl_deps.put(self.spv.gpa, spv_decl_index, {});
|
||||
try self.addFunctionDep(spv_decl_index, storage_class);
|
||||
entry.value_ptr.* = spv_decl_index;
|
||||
break :blk spv_decl_index;
|
||||
};
|
||||
@@ -529,6 +529,37 @@ const DeclGen = struct {
|
||||
return var_id;
|
||||
}
|
||||
|
||||
fn addFunctionDep(self: *DeclGen, decl_index: SpvModule.Decl.Index, storage_class: StorageClass) !void {
|
||||
const target = self.getTarget();
|
||||
if (target.os.tag == .vulkan) {
|
||||
// Shader entry point dependencies must be variables with Input or Output storage class
|
||||
switch (storage_class) {
|
||||
.Input, .Output => {
|
||||
try self.func.decl_deps.put(self.spv.gpa, decl_index, {});
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
} else {
|
||||
try self.func.decl_deps.put(self.spv.gpa, decl_index, {});
|
||||
}
|
||||
}
|
||||
|
||||
fn castToGeneric(self: *DeclGen, type_id: IdRef, ptr_id: IdRef) !IdRef {
|
||||
const target = self.getTarget();
|
||||
|
||||
if (target.os.tag == .vulkan) {
|
||||
return ptr_id;
|
||||
} else {
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{
|
||||
.id_result_type = type_id,
|
||||
.id_result = result_id,
|
||||
.pointer = ptr_id,
|
||||
});
|
||||
return result_id;
|
||||
}
|
||||
}
|
||||
|
||||
/// Start a new SPIR-V block, Emits the label of the new block, and stores which
|
||||
/// block we are currently generating.
|
||||
/// Note that there is no such thing as nested blocks like in ZIR or AIR, so we don't need to
|
||||
@@ -713,6 +744,30 @@ const DeclGen = struct {
|
||||
return try self.load(ty, ptr_composite_id, .{});
|
||||
}
|
||||
|
||||
/// Construct a vector at runtime.
|
||||
/// ty must be an vector type.
|
||||
/// Constituents should be in `indirect` representation (as the elements of an vector should be).
|
||||
/// Result is in `direct` representation.
|
||||
fn constructVector(self: *DeclGen, ty: Type, constituents: []const IdRef) !IdRef {
|
||||
// The Khronos LLVM-SPIRV translator crashes because it cannot construct structs which'
|
||||
// operands are not constant.
|
||||
// See https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1349
|
||||
// For now, just initialize the struct by setting the fields manually...
|
||||
// TODO: Make this OpCompositeConstruct when we can
|
||||
const mod = self.module;
|
||||
const ptr_composite_id = try self.alloc(ty, .{ .storage_class = .Function });
|
||||
const ptr_elem_ty_ref = try self.ptrType(ty.elemType2(mod), .Function);
|
||||
for (constituents, 0..) |constitent_id, index| {
|
||||
const ptr_id = try self.accessChain(ptr_elem_ty_ref, ptr_composite_id, &.{@as(u32, @intCast(index))});
|
||||
try self.func.body.emit(self.spv.gpa, .OpStore, .{
|
||||
.pointer = ptr_id,
|
||||
.object = constitent_id,
|
||||
});
|
||||
}
|
||||
|
||||
return try self.load(ty, ptr_composite_id, .{});
|
||||
}
|
||||
|
||||
/// Construct an array at runtime.
|
||||
/// ty must be an array type.
|
||||
/// Constituents should be in `indirect` representation (as the elements of an array should be).
|
||||
@@ -932,13 +987,16 @@ const DeclGen = struct {
|
||||
}
|
||||
|
||||
switch (tag) {
|
||||
inline .array_type => if (array_type.sentinel != .none) {
|
||||
constituents[constituents.len - 1] = try self.constant(elem_ty, Value.fromInterned(array_type.sentinel), .indirect);
|
||||
inline .array_type => {
|
||||
if (array_type.sentinel != .none) {
|
||||
const sentinel = Value.fromInterned(array_type.sentinel);
|
||||
constituents[constituents.len - 1] = try self.constant(elem_ty, sentinel, .indirect);
|
||||
}
|
||||
return self.constructArray(ty, constituents);
|
||||
},
|
||||
else => {},
|
||||
inline .vector_type => return self.constructVector(ty, constituents),
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
return try self.constructArray(ty, constituents);
|
||||
},
|
||||
.struct_type => {
|
||||
const struct_type = mod.typeToStruct(ty).?;
|
||||
@@ -1019,7 +1077,7 @@ const DeclGen = struct {
|
||||
|
||||
// TODO: Can we consolidate this in ptrElemPtr?
|
||||
const elem_ty = parent_ptr_ty.elemType2(mod); // use elemType() so that we get T for *[N]T.
|
||||
const elem_ptr_ty_ref = try self.ptrType(elem_ty, spvStorageClass(parent_ptr_ty.ptrAddressSpace(mod)));
|
||||
const elem_ptr_ty_ref = try self.ptrType(elem_ty, self.spvStorageClass(parent_ptr_ty.ptrAddressSpace(mod)));
|
||||
|
||||
if (elem_ptr_ty_ref == result_ty_ref) {
|
||||
return elem_ptr_id;
|
||||
@@ -1074,7 +1132,7 @@ const DeclGen = struct {
|
||||
unreachable; // TODO
|
||||
}
|
||||
|
||||
const final_storage_class = spvStorageClass(ty.ptrAddressSpace(mod));
|
||||
const final_storage_class = self.spvStorageClass(ty.ptrAddressSpace(mod));
|
||||
const actual_storage_class = switch (final_storage_class) {
|
||||
.Generic => .CrossWorkgroup,
|
||||
else => |other| other,
|
||||
@@ -1084,15 +1142,7 @@ const DeclGen = struct {
|
||||
const decl_ptr_ty_ref = try self.ptrType(decl_ty, final_storage_class);
|
||||
|
||||
const ptr_id = switch (final_storage_class) {
|
||||
.Generic => blk: {
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{
|
||||
.id_result_type = self.typeId(decl_ptr_ty_ref),
|
||||
.id_result = result_id,
|
||||
.pointer = decl_id,
|
||||
});
|
||||
break :blk result_id;
|
||||
},
|
||||
.Generic => try self.castToGeneric(self.typeId(decl_ptr_ty_ref), decl_id),
|
||||
else => decl_id,
|
||||
};
|
||||
|
||||
@@ -1115,6 +1165,7 @@ const DeclGen = struct {
|
||||
const ty_ref = try self.resolveType(ty, .direct);
|
||||
const ty_id = self.typeId(ty_ref);
|
||||
const decl = mod.declPtr(decl_index);
|
||||
|
||||
switch (mod.intern_pool.indexToKey(decl.val.ip_index)) {
|
||||
.func => {
|
||||
// TODO: Properly lower function pointers. For now we are going to hack around it and
|
||||
@@ -1133,23 +1184,13 @@ const DeclGen = struct {
|
||||
const spv_decl_index = try self.object.resolveDecl(mod, decl_index);
|
||||
|
||||
const decl_id = self.spv.declPtr(spv_decl_index).result_id;
|
||||
try self.func.decl_deps.put(self.spv.gpa, spv_decl_index, {});
|
||||
|
||||
const final_storage_class = spvStorageClass(decl.@"addrspace");
|
||||
const final_storage_class = self.spvStorageClass(decl.@"addrspace");
|
||||
try self.addFunctionDep(spv_decl_index, final_storage_class);
|
||||
|
||||
const decl_ptr_ty_ref = try self.ptrType(decl.ty, final_storage_class);
|
||||
|
||||
const ptr_id = switch (final_storage_class) {
|
||||
.Generic => blk: {
|
||||
// Pointer should be Generic, but is actually placed in CrossWorkgroup.
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{
|
||||
.id_result_type = self.typeId(decl_ptr_ty_ref),
|
||||
.id_result = result_id,
|
||||
.pointer = decl_id,
|
||||
});
|
||||
break :blk result_id;
|
||||
},
|
||||
.Generic => try self.castToGeneric(self.typeId(decl_ptr_ty_ref), decl_id),
|
||||
else => decl_id,
|
||||
};
|
||||
|
||||
@@ -1195,8 +1236,12 @@ const DeclGen = struct {
|
||||
// An array of largestSupportedIntBits.
|
||||
return self.todo("Implement {s} composite int type of {} bits", .{ @tagName(signedness), bits });
|
||||
};
|
||||
|
||||
// Kernel only supports unsigned ints.
|
||||
// TODO: Only do this with Kernels
|
||||
if (self.getTarget().os.tag == .vulkan) {
|
||||
return self.spv.intType(signedness, backing_bits);
|
||||
}
|
||||
|
||||
return self.spv.intType(.unsigned, backing_bits);
|
||||
}
|
||||
|
||||
@@ -1453,7 +1498,7 @@ const DeclGen = struct {
|
||||
// Note: Don't cache this pointer type, it would mess up the recursive pointer functionality
|
||||
// in ptrType()!
|
||||
|
||||
const storage_class = spvStorageClass(ptr_info.flags.address_space);
|
||||
const storage_class = self.spvStorageClass(ptr_info.flags.address_space);
|
||||
const ptr_ty_ref = try self.ptrType(Type.fromInterned(ptr_info.child), storage_class);
|
||||
|
||||
if (ptr_info.flags.size != .Slice) {
|
||||
@@ -1474,8 +1519,14 @@ const DeclGen = struct {
|
||||
|
||||
const elem_ty = ty.childType(mod);
|
||||
const elem_ty_ref = try self.resolveType(elem_ty, .indirect);
|
||||
const len = ty.vectorLen(mod);
|
||||
const is_scalar = elem_ty.isNumeric(mod) or elem_ty.toIntern() == .bool_type;
|
||||
|
||||
const ty_ref = if (is_scalar and len > 1 and len <= 4)
|
||||
try self.spv.vectorType(ty.vectorLen(mod), elem_ty_ref)
|
||||
else
|
||||
try self.spv.arrayType(ty.vectorLen(mod), elem_ty_ref);
|
||||
|
||||
const ty_ref = try self.spv.arrayType(ty.vectorLen(mod), elem_ty_ref);
|
||||
try self.type_map.put(self.gpa, ty.toIntern(), .{ .ty_ref = ty_ref });
|
||||
return ty_ref;
|
||||
},
|
||||
@@ -1634,13 +1685,20 @@ const DeclGen = struct {
|
||||
}
|
||||
}
|
||||
|
||||
fn spvStorageClass(as: std.builtin.AddressSpace) StorageClass {
|
||||
fn spvStorageClass(self: *DeclGen, as: std.builtin.AddressSpace) StorageClass {
|
||||
const target = self.getTarget();
|
||||
return switch (as) {
|
||||
.generic => .Generic,
|
||||
.generic => switch (target.os.tag) {
|
||||
.vulkan => .Private,
|
||||
else => .Generic,
|
||||
},
|
||||
.shared => .Workgroup,
|
||||
.local => .Private,
|
||||
.global => .CrossWorkgroup,
|
||||
.constant => .UniformConstant,
|
||||
.input => .Input,
|
||||
.output => .Output,
|
||||
.uniform => .Uniform,
|
||||
.gs,
|
||||
.fs,
|
||||
.ss,
|
||||
@@ -1920,7 +1978,7 @@ const DeclGen = struct {
|
||||
// point name is the same as a different OpName.
|
||||
const test_name = try std.fmt.allocPrint(self.gpa, "test {s}", .{name});
|
||||
defer self.gpa.free(test_name);
|
||||
try self.spv.declareEntryPoint(spv_decl_index, test_name);
|
||||
try self.spv.declareEntryPoint(spv_decl_index, test_name, .Kernel);
|
||||
}
|
||||
|
||||
fn genDecl(self: *DeclGen) !void {
|
||||
@@ -1928,6 +1986,7 @@ const DeclGen = struct {
|
||||
const ip = &mod.intern_pool;
|
||||
const decl = mod.declPtr(self.decl_index);
|
||||
const spv_decl_index = try self.object.resolveDecl(mod, self.decl_index);
|
||||
const target = self.getTarget();
|
||||
|
||||
const decl_id = self.spv.declPtr(spv_decl_index).result_id;
|
||||
|
||||
@@ -1994,30 +2053,24 @@ const DeclGen = struct {
|
||||
try self.generateTestEntryPoint(fqn, spv_decl_index);
|
||||
}
|
||||
} else {
|
||||
const init_val = if (decl.val.getVariable(mod)) |payload|
|
||||
Value.fromInterned(payload.init)
|
||||
else
|
||||
decl.val;
|
||||
|
||||
if (init_val.ip_index == .unreachable_value) {
|
||||
return self.todo("importing extern variables", .{});
|
||||
}
|
||||
|
||||
// Currently, initializers for CrossWorkgroup variables is not implemented
|
||||
// in Mesa. Therefore we generate an initialization kernel instead.
|
||||
|
||||
const void_ty_ref = try self.resolveType(Type.void, .direct);
|
||||
|
||||
const initializer_proto_ty_ref = try self.spv.resolve(.{ .function_type = .{
|
||||
.return_type = void_ty_ref,
|
||||
.parameters = &.{},
|
||||
} });
|
||||
const opt_init_val: ?Value = blk: {
|
||||
if (decl.val.getVariable(mod)) |payload| {
|
||||
if (payload.is_extern) break :blk null;
|
||||
break :blk Value.fromInterned(payload.init);
|
||||
}
|
||||
break :blk decl.val;
|
||||
};
|
||||
|
||||
// Generate the actual variable for the global...
|
||||
const final_storage_class = spvStorageClass(decl.@"addrspace");
|
||||
const actual_storage_class = switch (final_storage_class) {
|
||||
.Generic => .CrossWorkgroup,
|
||||
else => final_storage_class,
|
||||
const final_storage_class = self.spvStorageClass(decl.@"addrspace");
|
||||
const actual_storage_class = blk: {
|
||||
if (target.os.tag != .vulkan) {
|
||||
break :blk switch (final_storage_class) {
|
||||
.Generic => .CrossWorkgroup,
|
||||
else => final_storage_class,
|
||||
};
|
||||
}
|
||||
break :blk final_storage_class;
|
||||
};
|
||||
|
||||
const ptr_ty_ref = try self.ptrType(decl.ty, actual_storage_class);
|
||||
@@ -2028,37 +2081,51 @@ const DeclGen = struct {
|
||||
.id_result = decl_id,
|
||||
.storage_class = actual_storage_class,
|
||||
});
|
||||
|
||||
// Now emit the instructions that initialize the variable.
|
||||
const initializer_id = self.spv.allocId();
|
||||
try self.func.prologue.emit(self.spv.gpa, .OpFunction, .{
|
||||
.id_result_type = self.typeId(void_ty_ref),
|
||||
.id_result = initializer_id,
|
||||
.function_control = .{},
|
||||
.function_type = self.typeId(initializer_proto_ty_ref),
|
||||
});
|
||||
const root_block_id = self.spv.allocId();
|
||||
try self.func.prologue.emit(self.spv.gpa, .OpLabel, .{
|
||||
.id_result = root_block_id,
|
||||
});
|
||||
self.current_block_label = root_block_id;
|
||||
|
||||
const val_id = try self.constant(decl.ty, init_val, .indirect);
|
||||
try self.func.body.emit(self.spv.gpa, .OpStore, .{
|
||||
.pointer = decl_id,
|
||||
.object = val_id,
|
||||
});
|
||||
|
||||
// TODO: We should be able to get rid of this by now...
|
||||
self.spv.endGlobal(spv_decl_index, begin, decl_id, initializer_id);
|
||||
|
||||
try self.func.body.emit(self.spv.gpa, .OpReturn, {});
|
||||
try self.func.body.emit(self.spv.gpa, .OpFunctionEnd, {});
|
||||
try self.spv.addFunction(spv_decl_index, self.func);
|
||||
|
||||
const fqn = ip.stringToSlice(try decl.getFullyQualifiedName(self.module));
|
||||
try self.spv.debugName(decl_id, fqn);
|
||||
try self.spv.debugNameFmt(initializer_id, "initializer of {s}", .{fqn});
|
||||
|
||||
if (opt_init_val) |init_val| {
|
||||
// Currently, initializers for CrossWorkgroup variables is not implemented
|
||||
// in Mesa. Therefore we generate an initialization kernel instead.
|
||||
const void_ty_ref = try self.resolveType(Type.void, .direct);
|
||||
|
||||
const initializer_proto_ty_ref = try self.spv.resolve(.{ .function_type = .{
|
||||
.return_type = void_ty_ref,
|
||||
.parameters = &.{},
|
||||
} });
|
||||
|
||||
// Now emit the instructions that initialize the variable.
|
||||
const initializer_id = self.spv.allocId();
|
||||
try self.func.prologue.emit(self.spv.gpa, .OpFunction, .{
|
||||
.id_result_type = self.typeId(void_ty_ref),
|
||||
.id_result = initializer_id,
|
||||
.function_control = .{},
|
||||
.function_type = self.typeId(initializer_proto_ty_ref),
|
||||
});
|
||||
const root_block_id = self.spv.allocId();
|
||||
try self.func.prologue.emit(self.spv.gpa, .OpLabel, .{
|
||||
.id_result = root_block_id,
|
||||
});
|
||||
self.current_block_label = root_block_id;
|
||||
|
||||
const val_id = try self.constant(decl.ty, init_val, .indirect);
|
||||
try self.func.body.emit(self.spv.gpa, .OpStore, .{
|
||||
.pointer = decl_id,
|
||||
.object = val_id,
|
||||
});
|
||||
|
||||
// TODO: We should be able to get rid of this by now...
|
||||
self.spv.endGlobal(spv_decl_index, begin, decl_id, initializer_id);
|
||||
|
||||
try self.func.body.emit(self.spv.gpa, .OpReturn, {});
|
||||
try self.func.body.emit(self.spv.gpa, .OpFunctionEnd, {});
|
||||
try self.spv.addFunction(spv_decl_index, self.func);
|
||||
|
||||
try self.spv.debugNameFmt(initializer_id, "initializer of {s}", .{fqn});
|
||||
} else {
|
||||
self.spv.endGlobal(spv_decl_index, begin, decl_id, null);
|
||||
try self.spv.declareDeclDeps(spv_decl_index, &.{});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3654,7 +3721,19 @@ const DeclGen = struct {
|
||||
constituents[0..index],
|
||||
);
|
||||
},
|
||||
.Vector, .Array => {
|
||||
.Vector => {
|
||||
const n_elems = result_ty.vectorLen(mod);
|
||||
const elem_ids = try self.gpa.alloc(IdRef, n_elems);
|
||||
defer self.gpa.free(elem_ids);
|
||||
|
||||
for (elements, 0..) |element, i| {
|
||||
const id = try self.resolve(element);
|
||||
elem_ids[i] = try self.convertToIndirect(result_ty.childType(mod), id);
|
||||
}
|
||||
|
||||
return try self.constructVector(result_ty, elem_ids);
|
||||
},
|
||||
.Array => {
|
||||
const array_info = result_ty.arrayInfo(mod);
|
||||
const n_elems: usize = @intCast(result_ty.arrayLenIncludingSentinel(mod));
|
||||
const elem_ids = try self.gpa.alloc(IdRef, n_elems);
|
||||
@@ -3761,7 +3840,7 @@ const DeclGen = struct {
|
||||
const mod = self.module;
|
||||
// Construct new pointer type for the resulting pointer
|
||||
const elem_ty = ptr_ty.elemType2(mod); // use elemType() so that we get T for *[N]T.
|
||||
const elem_ptr_ty_ref = try self.ptrType(elem_ty, spvStorageClass(ptr_ty.ptrAddressSpace(mod)));
|
||||
const elem_ptr_ty_ref = try self.ptrType(elem_ty, self.spvStorageClass(ptr_ty.ptrAddressSpace(mod)));
|
||||
if (ptr_ty.isSinglePointer(mod)) {
|
||||
// Pointer-to-array. In this case, the resulting pointer is not of the same type
|
||||
// as the ptr_ty (we want a *T, not a *[N]T), and hence we need to use accessChain.
|
||||
@@ -3835,7 +3914,7 @@ const DeclGen = struct {
|
||||
const vector_ty = vector_ptr_ty.childType(mod);
|
||||
const scalar_ty = vector_ty.scalarType(mod);
|
||||
|
||||
const storage_class = spvStorageClass(vector_ptr_ty.ptrAddressSpace(mod));
|
||||
const storage_class = self.spvStorageClass(vector_ptr_ty.ptrAddressSpace(mod));
|
||||
const scalar_ptr_ty_ref = try self.ptrType(scalar_ty, storage_class);
|
||||
|
||||
const vector_ptr = try self.resolve(data.vector_ptr);
|
||||
@@ -3858,7 +3937,7 @@ const DeclGen = struct {
|
||||
if (layout.tag_size == 0) return;
|
||||
|
||||
const tag_ty = un_ty.unionTagTypeSafety(mod).?;
|
||||
const tag_ptr_ty_ref = try self.ptrType(tag_ty, spvStorageClass(un_ptr_ty.ptrAddressSpace(mod)));
|
||||
const tag_ptr_ty_ref = try self.ptrType(tag_ty, self.spvStorageClass(un_ptr_ty.ptrAddressSpace(mod)));
|
||||
|
||||
const union_ptr_id = try self.resolve(bin_op.lhs);
|
||||
const new_tag_id = try self.resolve(bin_op.rhs);
|
||||
@@ -4079,7 +4158,7 @@ const DeclGen = struct {
|
||||
return try self.spv.constUndef(result_ty_ref);
|
||||
}
|
||||
|
||||
const storage_class = spvStorageClass(object_ptr_ty.ptrAddressSpace(mod));
|
||||
const storage_class = self.spvStorageClass(object_ptr_ty.ptrAddressSpace(mod));
|
||||
const pl_ptr_ty_ref = try self.ptrType(layout.payload_ty, storage_class);
|
||||
const pl_ptr_id = try self.accessChain(pl_ptr_ty_ref, object_ptr, &.{layout.payload_index});
|
||||
|
||||
@@ -4134,17 +4213,16 @@ const DeclGen = struct {
|
||||
.initializer = options.initializer,
|
||||
});
|
||||
|
||||
const target = self.getTarget();
|
||||
if (target.os.tag == .vulkan) {
|
||||
return var_id;
|
||||
}
|
||||
|
||||
switch (options.storage_class) {
|
||||
.Generic => {
|
||||
const ptr_gn_ty_ref = try self.ptrType(ty, .Generic);
|
||||
// Convert to a generic pointer
|
||||
const result_id = self.spv.allocId();
|
||||
try self.func.body.emit(self.spv.gpa, .OpPtrCastToGeneric, .{
|
||||
.id_result_type = self.typeId(ptr_gn_ty_ref),
|
||||
.id_result = result_id,
|
||||
.pointer = var_id,
|
||||
});
|
||||
return result_id;
|
||||
return self.castToGeneric(self.typeId(ptr_gn_ty_ref), var_id);
|
||||
},
|
||||
.Function => return var_id,
|
||||
else => unreachable,
|
||||
@@ -4880,7 +4958,7 @@ const DeclGen = struct {
|
||||
const is_non_null_id = blk: {
|
||||
if (is_pointer) {
|
||||
if (payload_ty.hasRuntimeBitsIgnoreComptime(mod)) {
|
||||
const storage_class = spvStorageClass(operand_ty.ptrAddressSpace(mod));
|
||||
const storage_class = self.spvStorageClass(operand_ty.ptrAddressSpace(mod));
|
||||
const bool_ptr_ty = try self.ptrType(Type.bool, storage_class);
|
||||
const tag_ptr_id = try self.accessChain(bool_ptr_ty, operand_id, &.{1});
|
||||
break :blk try self.load(Type.bool, tag_ptr_id, .{});
|
||||
|
||||
@@ -591,9 +591,13 @@ fn parseValueEnum(self: *Assembler, kind: spec.OperandKind) !void {
|
||||
try self.expectToken(.value);
|
||||
|
||||
const text = self.tokenText(tok);
|
||||
const int_value = std.fmt.parseInt(u32, text, 0) catch null;
|
||||
const enumerant = for (kind.enumerants()) |enumerant| {
|
||||
if (std.mem.eql(u8, enumerant.name, text))
|
||||
break enumerant;
|
||||
if (int_value) |v| {
|
||||
if (v == enumerant.value) break enumerant;
|
||||
} else {
|
||||
if (std.mem.eql(u8, enumerant.name, text)) break enumerant;
|
||||
}
|
||||
} else {
|
||||
return self.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
|
||||
};
|
||||
|
||||
@@ -92,7 +92,7 @@ pub const Global = struct {
|
||||
/// The past-end offset into `self.flobals.section`.
|
||||
end_inst: u32,
|
||||
/// The result-id of the function that initializes this value.
|
||||
initializer_id: IdRef,
|
||||
initializer_id: ?IdRef,
|
||||
};
|
||||
|
||||
/// This models a kernel entry point.
|
||||
@@ -101,6 +101,8 @@ pub const EntryPoint = struct {
|
||||
decl_index: Decl.Index,
|
||||
/// The name of the kernel to be exported.
|
||||
name: CacheString,
|
||||
/// Calling Convention
|
||||
execution_model: spec.ExecutionModel,
|
||||
};
|
||||
|
||||
/// A general-purpose allocator which may be used to allocate resources for this module
|
||||
@@ -313,7 +315,7 @@ fn entryPoints(self: *Module) !Section {
|
||||
|
||||
const entry_point_id = self.declPtr(entry_point.decl_index).result_id;
|
||||
try entry_points.emit(self.gpa, .OpEntryPoint, .{
|
||||
.execution_model = .Kernel,
|
||||
.execution_model = entry_point.execution_model,
|
||||
.entry_point = entry_point_id,
|
||||
.name = self.cache.getString(entry_point.name).?,
|
||||
.interface = interface.items,
|
||||
@@ -362,11 +364,13 @@ fn initializer(self: *Module, entry_points: *Section) !Section {
|
||||
|
||||
for (self.globals.globals.keys(), self.globals.globals.values()) |decl_index, global| {
|
||||
try self.addEntryPointDeps(decl_index, &seen, &interface);
|
||||
try section.emit(self.gpa, .OpFunctionCall, .{
|
||||
.id_result_type = void_ty_id,
|
||||
.id_result = self.allocId(),
|
||||
.function = global.initializer_id,
|
||||
});
|
||||
if (global.initializer_id) |initializer_id| {
|
||||
try section.emit(self.gpa, .OpFunctionCall, .{
|
||||
.id_result_type = void_ty_id,
|
||||
.id_result = self.allocId(),
|
||||
.function = initializer_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
try section.emit(self.gpa, .OpReturn, {});
|
||||
@@ -390,7 +394,7 @@ fn initializer(self: *Module, entry_points: *Section) !Section {
|
||||
}
|
||||
|
||||
/// Emit this module as a spir-v binary.
|
||||
pub fn flush(self: *Module, file: std.fs.File) !void {
|
||||
pub fn flush(self: *Module, file: std.fs.File, target: std.Target) !void {
|
||||
// See SPIR-V Spec section 2.3, "Physical Layout of a SPIR-V Module and Instruction"
|
||||
|
||||
// TODO: Perform topological sort on the globals.
|
||||
@@ -403,14 +407,25 @@ pub fn flush(self: *Module, file: std.fs.File) !void {
|
||||
var types_constants = try self.cache.materialize(self);
|
||||
defer types_constants.deinit(self.gpa);
|
||||
|
||||
var init_func = try self.initializer(&entry_points);
|
||||
// TODO: Vulkan doesn't support initializer kernel
|
||||
var init_func = if (target.os.tag != .vulkan)
|
||||
try self.initializer(&entry_points)
|
||||
else
|
||||
Section{};
|
||||
defer init_func.deinit(self.gpa);
|
||||
|
||||
const header = [_]Word{
|
||||
spec.magic_number,
|
||||
// TODO: From cpu features
|
||||
// Emit SPIR-V 1.4 for now. This is the highest version that Intel's CPU OpenCL supports.
|
||||
(1 << 16) | (4 << 8),
|
||||
spec.Version.toWord(.{
|
||||
.major = 1,
|
||||
.minor = switch (target.os.tag) {
|
||||
// Emit SPIR-V 1.3 for now. This is the highest version that Vulkan 1.1 supports.
|
||||
.vulkan => 3,
|
||||
// Emit SPIR-V 1.4 for now. This is the highest version that Intel's CPU OpenCL supports.
|
||||
else => 4,
|
||||
},
|
||||
}),
|
||||
0, // TODO: Register Zig compiler magic number.
|
||||
self.idBound(),
|
||||
0, // Schema (currently reserved for future use)
|
||||
@@ -493,6 +508,13 @@ pub fn intType(self: *Module, signedness: std.builtin.Signedness, bits: u16) !Ca
|
||||
} });
|
||||
}
|
||||
|
||||
pub fn vectorType(self: *Module, len: u32, elem_ty_ref: CacheRef) !CacheRef {
|
||||
return try self.resolve(.{ .vector_type = .{
|
||||
.component_type = elem_ty_ref,
|
||||
.component_count = len,
|
||||
} });
|
||||
}
|
||||
|
||||
pub fn arrayType(self: *Module, len: u32, elem_ty_ref: CacheRef) !CacheRef {
|
||||
const len_ty_ref = try self.resolve(.{ .int_type = .{
|
||||
.signedness = .unsigned,
|
||||
@@ -617,7 +639,13 @@ pub fn beginGlobal(self: *Module) u32 {
|
||||
return @as(u32, @intCast(self.globals.section.instructions.items.len));
|
||||
}
|
||||
|
||||
pub fn endGlobal(self: *Module, global_index: Decl.Index, begin_inst: u32, result_id: IdRef, initializer_id: IdRef) void {
|
||||
pub fn endGlobal(
|
||||
self: *Module,
|
||||
global_index: Decl.Index,
|
||||
begin_inst: u32,
|
||||
result_id: IdRef,
|
||||
initializer_id: ?IdRef,
|
||||
) void {
|
||||
const global = self.globalPtr(global_index).?;
|
||||
global.* = .{
|
||||
.result_id = result_id,
|
||||
@@ -627,10 +655,16 @@ pub fn endGlobal(self: *Module, global_index: Decl.Index, begin_inst: u32, resul
|
||||
};
|
||||
}
|
||||
|
||||
pub fn declareEntryPoint(self: *Module, decl_index: Decl.Index, name: []const u8) !void {
|
||||
pub fn declareEntryPoint(
|
||||
self: *Module,
|
||||
decl_index: Decl.Index,
|
||||
name: []const u8,
|
||||
execution_model: spec.ExecutionModel,
|
||||
) !void {
|
||||
try self.entry_points.append(self.gpa, .{
|
||||
.decl_index = decl_index,
|
||||
.name = try self.resolveString(name),
|
||||
.execution_model = execution_model,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,15 @@
|
||||
//! This file is auto-generated by tools/gen_spirv_spec.zig.
|
||||
|
||||
const Version = @import("std").SemanticVersion;
|
||||
pub const Version = packed struct(Word) {
|
||||
padding: u8 = 0,
|
||||
minor: u8,
|
||||
major: u8,
|
||||
padding0: u8 = 0,
|
||||
|
||||
pub fn toWord(self: @This()) Word {
|
||||
return @bitCast(self);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Word = u32;
|
||||
pub const IdResult = struct {
|
||||
|
||||
@@ -86,8 +86,6 @@ pub fn createEmpty(
|
||||
else => unreachable, // Caught by Compilation.Config.resolve.
|
||||
}
|
||||
|
||||
assert(target.abi != .none); // Caught by Compilation.Config.resolve.
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
@@ -158,10 +156,27 @@ pub fn updateExports(
|
||||
},
|
||||
};
|
||||
const decl = mod.declPtr(decl_index);
|
||||
if (decl.val.isFuncBody(mod) and decl.ty.fnCallingConvention(mod) == .Kernel) {
|
||||
if (decl.val.isFuncBody(mod)) {
|
||||
const target = mod.getTarget();
|
||||
const spv_decl_index = try self.object.resolveDecl(mod, decl_index);
|
||||
for (exports) |exp| {
|
||||
try self.object.spv.declareEntryPoint(spv_decl_index, mod.intern_pool.stringToSlice(exp.opts.name));
|
||||
const execution_model = switch (decl.ty.fnCallingConvention(mod)) {
|
||||
.Vertex => spec.ExecutionModel.Vertex,
|
||||
.Fragment => spec.ExecutionModel.Fragment,
|
||||
.Kernel => spec.ExecutionModel.Kernel,
|
||||
else => unreachable,
|
||||
};
|
||||
const is_vulkan = target.os.tag == .vulkan;
|
||||
|
||||
if ((!is_vulkan and execution_model == .Kernel) or
|
||||
(is_vulkan and (execution_model == .Fragment or execution_model == .Vertex)))
|
||||
{
|
||||
for (exports) |exp| {
|
||||
try self.object.spv.declareEntryPoint(
|
||||
spv_decl_index,
|
||||
mod.intern_pool.stringToSlice(exp.opts.name),
|
||||
execution_model,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,7 +239,7 @@ pub fn flushModule(self: *SpirV, arena: Allocator, prog_node: *std.Progress.Node
|
||||
.extension = error_info.items,
|
||||
});
|
||||
|
||||
try spv.flush(self.base.file.?);
|
||||
try spv.flush(self.base.file.?, target);
|
||||
}
|
||||
|
||||
fn writeCapabilities(spv: *SpvModule, target: std.Target) !void {
|
||||
@@ -233,7 +248,7 @@ fn writeCapabilities(spv: *SpvModule, target: std.Target) !void {
|
||||
const caps: []const spec.Capability = switch (target.os.tag) {
|
||||
.opencl => &.{ .Kernel, .Addresses, .Int8, .Int16, .Int64, .Float64, .Float16, .GenericPointer },
|
||||
.glsl450 => &.{.Shader},
|
||||
.vulkan => &.{.Shader},
|
||||
.vulkan => &.{ .Shader, .VariablePointersStorageBuffer, .Int8, .Int16, .Int64, .Float64, .Float16 },
|
||||
else => unreachable, // TODO
|
||||
};
|
||||
|
||||
|
||||
@@ -77,7 +77,16 @@ fn render(writer: anytype, allocator: Allocator, registry: g.CoreRegistry) !void
|
||||
try writer.writeAll(
|
||||
\\//! This file is auto-generated by tools/gen_spirv_spec.zig.
|
||||
\\
|
||||
\\const Version = @import("std").SemanticVersion;
|
||||
\\pub const Version = packed struct(Word) {
|
||||
\\ padding: u8 = 0,
|
||||
\\ minor: u8,
|
||||
\\ major: u8,
|
||||
\\ padding0: u8 = 0,
|
||||
\\
|
||||
\\ pub fn toWord(self: @This()) Word {
|
||||
\\ return @bitCast(self);
|
||||
\\ }
|
||||
\\};
|
||||
\\
|
||||
\\pub const Word = u32;
|
||||
\\pub const IdResult = struct{
|
||||
|
||||
Reference in New Issue
Block a user