This reverts commit 9f0359d78f in an attempt to
make the tests pass again. The CI failure from that merge should be unrelated
to this commit.
931 lines
35 KiB
Zig
931 lines
35 KiB
Zig
const Assembler = @This();
|
|
|
|
const std = @import("std");
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
|
|
const spec = @import("spec.zig");
|
|
const Opcode = spec.Opcode;
|
|
const Word = spec.Word;
|
|
const IdRef = spec.IdRef;
|
|
const IdResult = spec.IdResult;
|
|
|
|
const SpvModule = @import("Module.zig");
|
|
const CacheRef = SpvModule.CacheRef;
|
|
const CacheKey = SpvModule.CacheKey;
|
|
|
|
/// Represents a token in the assembly template.
|
|
const Token = struct {
|
|
tag: Tag,
|
|
start: u32,
|
|
end: u32,
|
|
|
|
const Tag = enum {
|
|
/// Returned when there was no more input to match.
|
|
eof,
|
|
/// %identifier
|
|
result_id,
|
|
/// %identifier when appearing on the LHS of an equals sign.
|
|
/// While not technically a token, its relatively easy to resolve
|
|
/// this during lexical analysis and relieves a bunch of headaches
|
|
/// during parsing.
|
|
result_id_assign,
|
|
/// Mask, int, or float. These are grouped together as some
|
|
/// SPIR-V enumerants look a bit like integers as well (for example
|
|
/// "3D"), and so it is easier to just interpret them as the expected
|
|
/// type when resolving an instruction's operands.
|
|
value,
|
|
/// An enumerant that looks like an opcode, that is, OpXxxx.
|
|
/// Not necessarily a *valid* opcode.
|
|
opcode,
|
|
/// String literals.
|
|
/// Note, this token is also returned for unterminated
|
|
/// strings. In this case the closing " is not present.
|
|
string,
|
|
/// |.
|
|
pipe,
|
|
/// =.
|
|
equals,
|
|
|
|
fn name(self: Tag) []const u8 {
|
|
return switch (self) {
|
|
.eof => "<end of input>",
|
|
.result_id => "<result-id>",
|
|
.result_id_assign => "<assigned result-id>",
|
|
.value => "<value>",
|
|
.opcode => "<opcode>",
|
|
.string => "<string literal>",
|
|
.pipe => "'|'",
|
|
.equals => "'='",
|
|
};
|
|
}
|
|
};
|
|
};
|
|
|
|
/// This union represents utility information for a decoded operand.
|
|
/// Note that this union only needs to maintain a minimal amount of
|
|
/// bookkeeping: these values are enough to either decode the operands
|
|
/// into a spec type, or emit it directly into its binary form.
|
|
const Operand = union(enum) {
|
|
/// Any 'simple' 32-bit value. This could be a mask or
|
|
/// enumerant, etc, depending on the operands.
|
|
value: u32,
|
|
|
|
/// An int- or float literal encoded as 1 word. This may be
|
|
/// a 32-bit literal or smaller, already in the proper format:
|
|
/// the opper bits are 0 for floats and unsigned ints, and sign-extended
|
|
/// for signed ints.
|
|
literal32: u32,
|
|
|
|
/// An int- or float literal encoded as 2 words. This may be a 33-bit
|
|
/// to 64 bit literal, already in the proper format:
|
|
/// the opper bits are 0 for floats and unsigned ints, and sign-extended
|
|
/// for signed ints.
|
|
literal64: u64,
|
|
|
|
/// A result-id which is assigned to in this instruction. If present,
|
|
/// this is the first operand of the instruction.
|
|
result_id: AsmValue.Ref,
|
|
|
|
/// A result-id which referred to (not assigned to) in this instruction.
|
|
ref_id: AsmValue.Ref,
|
|
|
|
/// Offset into `inst.string_bytes`. The string ends at the next zero-terminator.
|
|
string: u32,
|
|
};
|
|
|
|
/// A structure representing an error message that the assembler may return, when
|
|
/// the assembly source is not syntactically or semantically correct.
|
|
const ErrorMsg = struct {
|
|
/// The offset in bytes from the start of `src` that this error occured.
|
|
byte_offset: u32,
|
|
/// An explanatory error message.
|
|
/// Memory is owned by `self.gpa`. TODO: Maybe allocate this with an arena
|
|
/// allocator if it is needed elsewhere?
|
|
msg: []const u8,
|
|
};
|
|
|
|
/// Possible errors the `assemble` function may return.
|
|
const Error = error{ AssembleFail, OutOfMemory };
|
|
|
|
/// This union is used to keep track of results of spir-v instructions. This can either be just a plain
|
|
/// result-id, in the case of most instructions, or for example a type that is constructed from
|
|
/// an OpTypeXxx instruction.
|
|
const AsmValue = union(enum) {
|
|
/// The results are stored in an array hash map, and can be referred to either by name (without the %),
|
|
/// or by values of this index type.
|
|
pub const Ref = u32;
|
|
|
|
/// This result-value is the RHS of the current instruction.
|
|
just_declared,
|
|
|
|
/// This is used as placeholder for ref-ids of which the result-id is not yet known.
|
|
/// It will be further resolved at a later stage to a more concrete forward reference.
|
|
unresolved_forward_reference,
|
|
|
|
/// This result-value is a normal result produced by a different instruction.
|
|
value: IdRef,
|
|
|
|
/// This result-value represents a type registered into the module's type system.
|
|
ty: CacheRef,
|
|
|
|
/// Retrieve the result-id of this AsmValue. Asserts that this AsmValue
|
|
/// is of a variant that allows the result to be obtained (not an unresolved
|
|
/// forward declaration, not in the process of being declared, etc).
|
|
pub fn resultId(self: AsmValue, spv: *const SpvModule) IdRef {
|
|
return switch (self) {
|
|
.just_declared, .unresolved_forward_reference => unreachable,
|
|
.value => |result| result,
|
|
.ty => |ref| spv.resultId(ref),
|
|
};
|
|
}
|
|
};
|
|
|
|
/// This map type maps results to values. Results can be addressed either by name (without the %), or by
|
|
/// AsmValue.Ref in AsmValueMap.keys/.values.
|
|
const AsmValueMap = std.StringArrayHashMapUnmanaged(AsmValue);
|
|
|
|
/// An allocator used for common allocations.
|
|
gpa: Allocator,
|
|
|
|
/// A list of errors that occured during processing the assembly.
|
|
errors: std.ArrayListUnmanaged(ErrorMsg) = .{},
|
|
|
|
/// The source code that is being assembled.
|
|
src: []const u8,
|
|
|
|
/// The module that this assembly is associated to.
|
|
/// Instructions like OpType*, OpDecorate, etc are emitted into this module.
|
|
spv: *SpvModule,
|
|
|
|
/// The function that the function-specific instructions should be emitted to.
|
|
func: *SpvModule.Fn,
|
|
|
|
/// `self.src` tokenized.
|
|
tokens: std.ArrayListUnmanaged(Token) = .{},
|
|
|
|
/// The token that is next during parsing.
|
|
current_token: u32 = 0,
|
|
|
|
/// This field groups the properties of the instruction that is currently
|
|
/// being parsed or has just been parsed.
|
|
inst: struct {
|
|
/// The opcode of the current instruction.
|
|
opcode: Opcode = undefined,
|
|
/// Operands of the current instruction.
|
|
operands: std.ArrayListUnmanaged(Operand) = .{},
|
|
/// This is where string data resides. Strings are zero-terminated.
|
|
string_bytes: std.ArrayListUnmanaged(u8) = .{},
|
|
|
|
/// Return a reference to the result of this instruction, if any.
|
|
fn result(self: @This()) ?AsmValue.Ref {
|
|
// The result, if present, is either the first or second
|
|
// operand of an instruction.
|
|
for (self.operands.items[0..@min(self.operands.items.len, 2)]) |op| {
|
|
switch (op) {
|
|
.result_id => |index| return index,
|
|
else => {},
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
} = .{},
|
|
|
|
/// This map maps results to their tracked values.
|
|
value_map: AsmValueMap = .{},
|
|
|
|
/// Free the resources owned by this assembler.
|
|
pub fn deinit(self: *Assembler) void {
|
|
for (self.errors.items) |err| {
|
|
self.gpa.free(err.msg);
|
|
}
|
|
self.tokens.deinit(self.gpa);
|
|
self.errors.deinit(self.gpa);
|
|
self.inst.operands.deinit(self.gpa);
|
|
self.inst.string_bytes.deinit(self.gpa);
|
|
self.value_map.deinit(self.gpa);
|
|
}
|
|
|
|
pub fn assemble(self: *Assembler) Error!void {
|
|
try self.tokenize();
|
|
while (!self.testToken(.eof)) {
|
|
try self.parseInstruction();
|
|
try self.processInstruction();
|
|
}
|
|
if (self.errors.items.len > 0)
|
|
return error.AssembleFail;
|
|
}
|
|
|
|
fn addError(self: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) !void {
|
|
const msg = try std.fmt.allocPrint(self.gpa, fmt, args);
|
|
errdefer self.gpa.free(msg);
|
|
try self.errors.append(self.gpa, .{
|
|
.byte_offset = offset,
|
|
.msg = msg,
|
|
});
|
|
}
|
|
|
|
fn fail(self: *Assembler, offset: u32, comptime fmt: []const u8, args: anytype) Error {
|
|
try self.addError(offset, fmt, args);
|
|
return error.AssembleFail;
|
|
}
|
|
|
|
fn todo(self: *Assembler, comptime fmt: []const u8, args: anytype) Error {
|
|
return self.fail(0, "todo: " ++ fmt, args);
|
|
}
|
|
|
|
/// Attempt to process the instruction currently in `self.inst`.
|
|
/// This for example emits the instruction in the module or function, or
|
|
/// records type definitions.
|
|
/// If this function returns `error.AssembleFail`, an explanatory
|
|
/// error message has already been emitted into `self.errors`.
|
|
fn processInstruction(self: *Assembler) !void {
|
|
const result = switch (self.inst.opcode) {
|
|
.OpEntryPoint => {
|
|
return self.fail(0, "cannot export entry points via OpEntryPoint, export the kernel using callconv(.Kernel)", .{});
|
|
},
|
|
else => switch (self.inst.opcode.class()) {
|
|
.TypeDeclaration => try self.processTypeInstruction(),
|
|
else => if (try self.processGenericInstruction()) |result|
|
|
result
|
|
else
|
|
return,
|
|
},
|
|
};
|
|
|
|
const result_ref = self.inst.result().?;
|
|
switch (self.value_map.values()[result_ref]) {
|
|
.just_declared => self.value_map.values()[result_ref] = result,
|
|
else => {
|
|
// TODO: Improve source location.
|
|
const name = self.value_map.keys()[result_ref];
|
|
return self.fail(0, "duplicate definition of %{s}", .{name});
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Record `self.inst` into the module's type system, and return the AsmValue that
|
|
/// refers to the result.
|
|
fn processTypeInstruction(self: *Assembler) !AsmValue {
|
|
const operands = self.inst.operands.items;
|
|
const ref = switch (self.inst.opcode) {
|
|
.OpTypeVoid => try self.spv.resolve(.void_type),
|
|
.OpTypeBool => try self.spv.resolve(.bool_type),
|
|
.OpTypeInt => blk: {
|
|
const signedness: std.builtin.Signedness = switch (operands[2].literal32) {
|
|
0 => .unsigned,
|
|
1 => .signed,
|
|
else => {
|
|
// TODO: Improve source location.
|
|
return self.fail(0, "{} is not a valid signedness (expected 0 or 1)", .{operands[2].literal32});
|
|
},
|
|
};
|
|
const width = std.math.cast(u16, operands[1].literal32) orelse {
|
|
return self.fail(0, "int type of {} bits is too large", .{operands[1].literal32});
|
|
};
|
|
break :blk try self.spv.intType(signedness, width);
|
|
},
|
|
.OpTypeFloat => blk: {
|
|
const bits = operands[1].literal32;
|
|
switch (bits) {
|
|
16, 32, 64 => {},
|
|
else => {
|
|
return self.fail(0, "{} is not a valid bit count for floats (expected 16, 32 or 64)", .{bits});
|
|
},
|
|
}
|
|
break :blk try self.spv.resolve(.{ .float_type = .{ .bits = @as(u16, @intCast(bits)) } });
|
|
},
|
|
.OpTypeVector => try self.spv.resolve(.{ .vector_type = .{
|
|
.component_type = try self.resolveTypeRef(operands[1].ref_id),
|
|
.component_count = operands[2].literal32,
|
|
} }),
|
|
.OpTypeArray => {
|
|
// TODO: The length of an OpTypeArray is determined by a constant (which may be a spec constant),
|
|
// and so some consideration must be taken when entering this in the type system.
|
|
return self.todo("process OpTypeArray", .{});
|
|
},
|
|
.OpTypePointer => blk: {
|
|
break :blk try self.spv.resolve(.{
|
|
.ptr_type = .{
|
|
.storage_class = @enumFromInt(operands[1].value),
|
|
.child_type = try self.resolveTypeRef(operands[2].ref_id),
|
|
// TODO: This should be a proper reference resolved via OpTypeForwardPointer
|
|
.fwd = @enumFromInt(std.math.maxInt(u32)),
|
|
},
|
|
});
|
|
},
|
|
.OpTypeFunction => blk: {
|
|
const param_operands = operands[2..];
|
|
const param_types = try self.spv.gpa.alloc(CacheRef, param_operands.len);
|
|
defer self.spv.gpa.free(param_types);
|
|
for (param_types, 0..) |*param, i| {
|
|
param.* = try self.resolveTypeRef(param_operands[i].ref_id);
|
|
}
|
|
break :blk try self.spv.resolve(.{ .function_type = .{
|
|
.return_type = try self.resolveTypeRef(operands[1].ref_id),
|
|
.parameters = param_types,
|
|
} });
|
|
},
|
|
else => return self.todo("process type instruction {s}", .{@tagName(self.inst.opcode)}),
|
|
};
|
|
|
|
return AsmValue{ .ty = ref };
|
|
}
|
|
|
|
/// Emit `self.inst` into `self.spv` and `self.func`, and return the AsmValue
|
|
/// that this produces (if any). This function processes common instructions:
|
|
/// - No forward references are allowed in operands.
|
|
/// - Target section is determined from instruction type.
|
|
/// - Function-local instructions are emitted in `self.func`.
|
|
fn processGenericInstruction(self: *Assembler) !?AsmValue {
|
|
const operands = self.inst.operands.items;
|
|
const section = switch (self.inst.opcode.class()) {
|
|
.ConstantCreation => &self.spv.sections.types_globals_constants,
|
|
.Annotation => &self.spv.sections.annotations,
|
|
.TypeDeclaration => unreachable, // Handled elsewhere.
|
|
else => switch (self.inst.opcode) {
|
|
.OpEntryPoint => unreachable,
|
|
.OpExecutionMode, .OpExecutionModeId => &self.spv.sections.execution_modes,
|
|
.OpVariable => switch (@as(spec.StorageClass, @enumFromInt(operands[2].value))) {
|
|
.Function => &self.func.prologue,
|
|
else => {
|
|
// This is currently disabled because global variables are required to be
|
|
// emitted in the proper order, and this should be honored in inline assembly
|
|
// as well.
|
|
return self.todo("global variables", .{});
|
|
},
|
|
},
|
|
// Default case - to be worked out further.
|
|
else => &self.func.body,
|
|
},
|
|
};
|
|
|
|
var maybe_result_id: ?IdResult = null;
|
|
const first_word = section.instructions.items.len;
|
|
// At this point we're not quite sure how many operands this instruction is going to have,
|
|
// so insert 0 and patch up the actual opcode word later.
|
|
try section.ensureUnusedCapacity(self.spv.gpa, 1);
|
|
section.writeWord(0);
|
|
|
|
for (operands) |operand| {
|
|
switch (operand) {
|
|
.value, .literal32 => |word| {
|
|
try section.ensureUnusedCapacity(self.spv.gpa, 1);
|
|
section.writeWord(word);
|
|
},
|
|
.literal64 => |dword| {
|
|
try section.ensureUnusedCapacity(self.spv.gpa, 2);
|
|
section.writeDoubleWord(dword);
|
|
},
|
|
.result_id => {
|
|
maybe_result_id = self.spv.allocId();
|
|
try section.ensureUnusedCapacity(self.spv.gpa, 1);
|
|
section.writeOperand(IdResult, maybe_result_id.?);
|
|
},
|
|
.ref_id => |index| {
|
|
const result = try self.resolveRef(index);
|
|
try section.ensureUnusedCapacity(self.spv.gpa, 1);
|
|
section.writeOperand(spec.IdRef, result.resultId(self.spv));
|
|
},
|
|
.string => |offset| {
|
|
const text = std.mem.sliceTo(self.inst.string_bytes.items[offset..], 0);
|
|
const size = std.math.divCeil(usize, text.len + 1, @sizeOf(Word)) catch unreachable;
|
|
try section.ensureUnusedCapacity(self.spv.gpa, size);
|
|
section.writeOperand(spec.LiteralString, text);
|
|
},
|
|
}
|
|
}
|
|
|
|
const actual_word_count = section.instructions.items.len - first_word;
|
|
section.instructions.items[first_word] |= @as(u32, @as(u16, @intCast(actual_word_count))) << 16 | @intFromEnum(self.inst.opcode);
|
|
|
|
if (maybe_result_id) |result| {
|
|
return AsmValue{ .value = result };
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/// Resolve a value reference. This function makes sure that the reference is
|
|
/// not self-referential, but it does allow the result to be forward declared.
|
|
fn resolveMaybeForwardRef(self: *Assembler, ref: AsmValue.Ref) !AsmValue {
|
|
const value = self.value_map.values()[ref];
|
|
switch (value) {
|
|
.just_declared => {
|
|
const name = self.value_map.keys()[ref];
|
|
// TODO: Improve source location.
|
|
return self.fail(0, "self-referential parameter %{s}", .{name});
|
|
},
|
|
else => return value,
|
|
}
|
|
}
|
|
|
|
/// Resolve a value reference. This function
|
|
/// makes sure that the result is not self-referential, nor that it is forward declared.
|
|
fn resolveRef(self: *Assembler, ref: AsmValue.Ref) !AsmValue {
|
|
const value = try self.resolveMaybeForwardRef(ref);
|
|
switch (value) {
|
|
.just_declared => unreachable,
|
|
.unresolved_forward_reference => {
|
|
const name = self.value_map.keys()[ref];
|
|
// TODO: Improve source location.
|
|
return self.fail(0, "reference to undeclared result-id %{s}", .{name});
|
|
},
|
|
else => return value,
|
|
}
|
|
}
|
|
|
|
/// Resolve a value reference as type.
|
|
fn resolveTypeRef(self: *Assembler, ref: AsmValue.Ref) !CacheRef {
|
|
const value = try self.resolveRef(ref);
|
|
switch (value) {
|
|
.just_declared, .unresolved_forward_reference => unreachable,
|
|
.ty => |ty_ref| return ty_ref,
|
|
else => {
|
|
const name = self.value_map.keys()[ref];
|
|
// TODO: Improve source location.
|
|
return self.fail(0, "expected operand %{s} to refer to a type", .{name});
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Attempt to parse an instruction into `self.inst`.
|
|
/// If this function returns `error.AssembleFail`, an explanatory
|
|
/// error message has been emitted into `self.errors`.
|
|
fn parseInstruction(self: *Assembler) !void {
|
|
self.inst.opcode = undefined;
|
|
self.inst.operands.shrinkRetainingCapacity(0);
|
|
self.inst.string_bytes.shrinkRetainingCapacity(0);
|
|
|
|
const lhs_result_tok = self.currentToken();
|
|
const maybe_lhs_result = if (self.eatToken(.result_id_assign)) blk: {
|
|
const name = self.tokenText(lhs_result_tok)[1..];
|
|
const entry = try self.value_map.getOrPut(self.gpa, name);
|
|
try self.expectToken(.equals);
|
|
if (!entry.found_existing) {
|
|
entry.value_ptr.* = .just_declared;
|
|
}
|
|
break :blk @as(AsmValue.Ref, @intCast(entry.index));
|
|
} else null;
|
|
|
|
const opcode_tok = self.currentToken();
|
|
if (maybe_lhs_result != null) {
|
|
try self.expectToken(.opcode);
|
|
} else if (!self.eatToken(.opcode)) {
|
|
return self.fail(opcode_tok.start, "expected start of instruction, found {s}", .{opcode_tok.tag.name()});
|
|
}
|
|
|
|
const opcode_text = self.tokenText(opcode_tok);
|
|
@setEvalBranchQuota(10000);
|
|
self.inst.opcode = std.meta.stringToEnum(Opcode, opcode_text) orelse {
|
|
return self.fail(opcode_tok.start, "invalid opcode '{s}'", .{opcode_text});
|
|
};
|
|
|
|
const expected_operands = self.inst.opcode.operands();
|
|
// This is a loop because the result-id is not always the first operand.
|
|
const requires_lhs_result = for (expected_operands) |op| {
|
|
if (op.kind == .IdResult) break true;
|
|
} else false;
|
|
|
|
if (requires_lhs_result and maybe_lhs_result == null) {
|
|
return self.fail(opcode_tok.start, "opcode '{s}' expects result on left-hand side", .{@tagName(self.inst.opcode)});
|
|
} else if (!requires_lhs_result and maybe_lhs_result != null) {
|
|
return self.fail(
|
|
lhs_result_tok.start,
|
|
"opcode '{s}' does not expect a result-id on the left-hand side",
|
|
.{@tagName(self.inst.opcode)},
|
|
);
|
|
}
|
|
|
|
for (expected_operands) |operand| {
|
|
if (operand.kind == .IdResult) {
|
|
try self.inst.operands.append(self.gpa, .{ .result_id = maybe_lhs_result.? });
|
|
continue;
|
|
}
|
|
|
|
switch (operand.quantifier) {
|
|
.required => if (self.isAtInstructionBoundary()) {
|
|
return self.fail(
|
|
self.currentToken().start,
|
|
"missing required operand", // TODO: Operand name?
|
|
.{},
|
|
);
|
|
} else {
|
|
try self.parseOperand(operand.kind);
|
|
},
|
|
.optional => if (!self.isAtInstructionBoundary()) {
|
|
try self.parseOperand(operand.kind);
|
|
},
|
|
.variadic => while (!self.isAtInstructionBoundary()) {
|
|
try self.parseOperand(operand.kind);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parse a single operand of a particular type.
|
|
fn parseOperand(self: *Assembler, kind: spec.OperandKind) Error!void {
|
|
switch (kind.category()) {
|
|
.bit_enum => try self.parseBitEnum(kind),
|
|
.value_enum => try self.parseValueEnum(kind),
|
|
.id => try self.parseRefId(),
|
|
else => switch (kind) {
|
|
.LiteralInteger => try self.parseLiteralInteger(),
|
|
.LiteralString => try self.parseString(),
|
|
.LiteralContextDependentNumber => try self.parseContextDependentNumber(),
|
|
.PairIdRefIdRef => try self.parsePhiSource(),
|
|
else => return self.todo("parse operand of type {s}", .{@tagName(kind)}),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Also handles parsing any required extra operands.
|
|
fn parseBitEnum(self: *Assembler, kind: spec.OperandKind) !void {
|
|
var tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
|
|
var text = self.tokenText(tok);
|
|
if (std.mem.eql(u8, text, "None")) {
|
|
try self.inst.operands.append(self.gpa, .{ .value = 0 });
|
|
return;
|
|
}
|
|
|
|
const enumerants = kind.enumerants();
|
|
var mask: u32 = 0;
|
|
while (true) {
|
|
const enumerant = for (enumerants) |enumerant| {
|
|
if (std.mem.eql(u8, enumerant.name, text))
|
|
break enumerant;
|
|
} else {
|
|
return self.fail(tok.start, "'{s}' is not a valid flag for bitmask {s}", .{ text, @tagName(kind) });
|
|
};
|
|
mask |= enumerant.value;
|
|
if (!self.eatToken(.pipe))
|
|
break;
|
|
|
|
tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
text = self.tokenText(tok);
|
|
}
|
|
|
|
try self.inst.operands.append(self.gpa, .{ .value = mask });
|
|
|
|
// Assume values are sorted.
|
|
// TODO: ensure in generator.
|
|
for (enumerants) |enumerant| {
|
|
if ((mask & enumerant.value) == 0)
|
|
continue;
|
|
|
|
for (enumerant.parameters) |param_kind| {
|
|
if (self.isAtInstructionBoundary()) {
|
|
return self.fail(self.currentToken().start, "missing required parameter for bit flag '{s}'", .{enumerant.name});
|
|
}
|
|
|
|
try self.parseOperand(param_kind);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Also handles parsing any required extra operands.
|
|
fn parseValueEnum(self: *Assembler, kind: spec.OperandKind) !void {
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
|
|
const text = self.tokenText(tok);
|
|
const enumerant = for (kind.enumerants()) |enumerant| {
|
|
if (std.mem.eql(u8, enumerant.name, text))
|
|
break enumerant;
|
|
} else {
|
|
return self.fail(tok.start, "'{s}' is not a valid value for enumeration {s}", .{ text, @tagName(kind) });
|
|
};
|
|
|
|
try self.inst.operands.append(self.gpa, .{ .value = enumerant.value });
|
|
|
|
for (enumerant.parameters) |param_kind| {
|
|
if (self.isAtInstructionBoundary()) {
|
|
return self.fail(self.currentToken().start, "missing required parameter for enum variant '{s}'", .{enumerant.name});
|
|
}
|
|
|
|
try self.parseOperand(param_kind);
|
|
}
|
|
}
|
|
|
|
fn parseRefId(self: *Assembler) !void {
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.result_id);
|
|
|
|
const name = self.tokenText(tok)[1..];
|
|
const entry = try self.value_map.getOrPut(self.gpa, name);
|
|
if (!entry.found_existing) {
|
|
entry.value_ptr.* = .unresolved_forward_reference;
|
|
}
|
|
|
|
const index = @as(AsmValue.Ref, @intCast(entry.index));
|
|
try self.inst.operands.append(self.gpa, .{ .ref_id = index });
|
|
}
|
|
|
|
fn parseLiteralInteger(self: *Assembler) !void {
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
// According to the SPIR-V machine readable grammar, a LiteralInteger
|
|
// may consist of one or more words. From the SPIR-V docs it seems like there
|
|
// only one instruction where multiple words are allowed, the literals that make up the
|
|
// switch cases of OpSwitch. This case is handled separately, and so we just assume
|
|
// everything is a 32-bit integer in this function.
|
|
const text = self.tokenText(tok);
|
|
const value = std.fmt.parseInt(u32, text, 0) catch {
|
|
return self.fail(tok.start, "'{s}' is not a valid 32-bit integer literal", .{text});
|
|
};
|
|
try self.inst.operands.append(self.gpa, .{ .literal32 = value });
|
|
}
|
|
|
|
fn parseString(self: *Assembler) !void {
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.string);
|
|
// Note, the string might not have a closing quote. In this case,
|
|
// an error is already emitted but we are trying to continue processing
|
|
// anyway, so in this function we have to deal with that situation.
|
|
const text = self.tokenText(tok);
|
|
assert(text.len > 0 and text[0] == '"');
|
|
const literal = if (text.len != 1 and text[text.len - 1] == '"')
|
|
text[1 .. text.len - 1]
|
|
else
|
|
text[1..];
|
|
|
|
const string_offset = @as(u32, @intCast(self.inst.string_bytes.items.len));
|
|
try self.inst.string_bytes.ensureUnusedCapacity(self.gpa, literal.len + 1);
|
|
self.inst.string_bytes.appendSliceAssumeCapacity(literal);
|
|
self.inst.string_bytes.appendAssumeCapacity(0);
|
|
|
|
try self.inst.operands.append(self.gpa, .{ .string = string_offset });
|
|
}
|
|
|
|
fn parseContextDependentNumber(self: *Assembler) !void {
|
|
// For context dependent numbers, the actual type to parse is determined by the instruction.
|
|
// Currently, this operand appears in OpConstant and OpSpecConstant, where the too-be-parsed type
|
|
// is determined by the result type. That means that in this instructions we have to resolve the
|
|
// operand type early and look at the result to see how we need to proceed.
|
|
assert(self.inst.opcode == .OpConstant or self.inst.opcode == .OpSpecConstant);
|
|
|
|
const tok = self.currentToken();
|
|
const result_type_ref = try self.resolveTypeRef(self.inst.operands.items[0].ref_id);
|
|
const result_type = self.spv.cache.lookup(result_type_ref);
|
|
switch (result_type) {
|
|
.int_type => |int| {
|
|
try self.parseContextDependentInt(int.signedness, int.bits);
|
|
},
|
|
.float_type => |float| {
|
|
switch (float.bits) {
|
|
16 => try self.parseContextDependentFloat(16),
|
|
32 => try self.parseContextDependentFloat(32),
|
|
64 => try self.parseContextDependentFloat(64),
|
|
else => return self.fail(tok.start, "cannot parse {}-bit float literal", .{float.bits}),
|
|
}
|
|
},
|
|
else => return self.fail(tok.start, "cannot parse literal constant", .{}),
|
|
}
|
|
}
|
|
|
|
fn parseContextDependentInt(self: *Assembler, signedness: std.builtin.Signedness, width: u32) !void {
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
|
|
if (width == 0 or width > 2 * @bitSizeOf(spec.Word)) {
|
|
return self.fail(tok.start, "cannot parse {}-bit integer literal", .{width});
|
|
}
|
|
|
|
const text = self.tokenText(tok);
|
|
invalid: {
|
|
// Just parse the integer as the next larger integer type, and check if it overflows afterwards.
|
|
const int = std.fmt.parseInt(i128, text, 0) catch break :invalid;
|
|
const min = switch (signedness) {
|
|
.unsigned => 0,
|
|
.signed => -(@as(i128, 1) << (@as(u7, @intCast(width)) - 1)),
|
|
};
|
|
const max = (@as(i128, 1) << (@as(u7, @intCast(width)) - @intFromBool(signedness == .signed))) - 1;
|
|
if (int < min or int > max) {
|
|
break :invalid;
|
|
}
|
|
|
|
// Note, we store the sign-extended version here.
|
|
if (width <= @bitSizeOf(spec.Word)) {
|
|
try self.inst.operands.append(self.gpa, .{ .literal32 = @as(u32, @truncate(@as(u128, @bitCast(int)))) });
|
|
} else {
|
|
try self.inst.operands.append(self.gpa, .{ .literal64 = @as(u64, @truncate(@as(u128, @bitCast(int)))) });
|
|
}
|
|
return;
|
|
}
|
|
|
|
return self.fail(tok.start, "'{s}' is not a valid {s} {}-bit int literal", .{ text, @tagName(signedness), width });
|
|
}
|
|
|
|
fn parseContextDependentFloat(self: *Assembler, comptime width: u16) !void {
|
|
const Float = std.meta.Float(width);
|
|
const Int = std.meta.Int(.unsigned, width);
|
|
|
|
const tok = self.currentToken();
|
|
try self.expectToken(.value);
|
|
|
|
const text = self.tokenText(tok);
|
|
|
|
const value = std.fmt.parseFloat(Float, text) catch {
|
|
return self.fail(tok.start, "'{s}' is not a valid {}-bit float literal", .{ text, width });
|
|
};
|
|
|
|
const float_bits = @as(Int, @bitCast(value));
|
|
if (width <= @bitSizeOf(spec.Word)) {
|
|
try self.inst.operands.append(self.gpa, .{ .literal32 = float_bits });
|
|
} else {
|
|
assert(width <= 2 * @bitSizeOf(spec.Word));
|
|
try self.inst.operands.append(self.gpa, .{ .literal64 = float_bits });
|
|
}
|
|
}
|
|
|
|
fn parsePhiSource(self: *Assembler) !void {
|
|
try self.parseRefId();
|
|
if (self.isAtInstructionBoundary()) {
|
|
return self.fail(self.currentToken().start, "missing phi block parent", .{});
|
|
}
|
|
try self.parseRefId();
|
|
}
|
|
|
|
/// Returns whether the `current_token` cursor is currently pointing
|
|
/// at the start of a new instruction.
|
|
fn isAtInstructionBoundary(self: Assembler) bool {
|
|
return switch (self.currentToken().tag) {
|
|
.opcode, .result_id_assign, .eof => true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
fn expectToken(self: *Assembler, tag: Token.Tag) !void {
|
|
if (self.eatToken(tag))
|
|
return;
|
|
|
|
return self.fail(self.currentToken().start, "unexpected {s}, expected {s}", .{
|
|
self.currentToken().tag.name(),
|
|
tag.name(),
|
|
});
|
|
}
|
|
|
|
fn eatToken(self: *Assembler, tag: Token.Tag) bool {
|
|
if (self.testToken(tag)) {
|
|
self.current_token += 1;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn testToken(self: Assembler, tag: Token.Tag) bool {
|
|
return self.currentToken().tag == tag;
|
|
}
|
|
|
|
fn currentToken(self: Assembler) Token {
|
|
return self.tokens.items[self.current_token];
|
|
}
|
|
|
|
fn tokenText(self: Assembler, tok: Token) []const u8 {
|
|
return self.src[tok.start..tok.end];
|
|
}
|
|
|
|
/// Tokenize `self.src` and put the tokens in `self.tokens`.
|
|
/// Any errors encountered are appended to `self.errors`.
|
|
fn tokenize(self: *Assembler) !void {
|
|
var offset: u32 = 0;
|
|
while (true) {
|
|
const tok = try self.nextToken(offset);
|
|
// Resolve result-id assignment now.
|
|
// Note: If the previous token wasn't a result-id, just ignore it,
|
|
// we will catch it while parsing.
|
|
if (tok.tag == .equals and self.tokens.items[self.tokens.items.len - 1].tag == .result_id) {
|
|
self.tokens.items[self.tokens.items.len - 1].tag = .result_id_assign;
|
|
}
|
|
try self.tokens.append(self.gpa, tok);
|
|
if (tok.tag == .eof)
|
|
break;
|
|
offset = tok.end;
|
|
}
|
|
}
|
|
|
|
/// Retrieve the next token from the input. This function will assert
|
|
/// that the token is surrounded by whitespace if required, but will not
|
|
/// interpret the token yet.
|
|
/// Note: This function doesn't handle .result_id_assign - this is handled in
|
|
/// tokenize().
|
|
fn nextToken(self: *Assembler, start_offset: u32) !Token {
|
|
// We generally separate the input into the following types:
|
|
// - Whitespace. Generally ignored, but also used as delimiter for some
|
|
// tokens.
|
|
// - Values. This entails integers, floats, enums - anything that
|
|
// consists of alphanumeric characters, delimited by whitespace.
|
|
// - Result-IDs. This entails anything that consists of alphanumeric characters and _, and
|
|
// starts with a %. In contrast to values, this entity can be checked for complete correctness
|
|
// relatively easily here.
|
|
// - Strings. This entails quote-delimited text such as "abc".
|
|
// SPIR-V strings have only two escapes, \" and \\.
|
|
// - Sigils, = and |. In this assembler, these are not required to have whitespace
|
|
// around them (they act as delimiters) as they do in SPIRV-Tools.
|
|
|
|
var state: enum {
|
|
start,
|
|
value,
|
|
result_id,
|
|
string,
|
|
string_end,
|
|
escape,
|
|
} = .start;
|
|
var token_start = start_offset;
|
|
var offset = start_offset;
|
|
var tag = Token.Tag.eof;
|
|
while (offset < self.src.len) : (offset += 1) {
|
|
const c = self.src[offset];
|
|
switch (state) {
|
|
.start => switch (c) {
|
|
' ', '\t', '\r', '\n' => token_start = offset + 1,
|
|
'"' => {
|
|
state = .string;
|
|
tag = .string;
|
|
},
|
|
'%' => {
|
|
state = .result_id;
|
|
tag = .result_id;
|
|
},
|
|
'|' => {
|
|
tag = .pipe;
|
|
offset += 1;
|
|
break;
|
|
},
|
|
'=' => {
|
|
tag = .equals;
|
|
offset += 1;
|
|
break;
|
|
},
|
|
else => {
|
|
state = .value;
|
|
tag = .value;
|
|
},
|
|
},
|
|
.value => switch (c) {
|
|
'"' => {
|
|
try self.addError(offset, "unexpected string literal", .{});
|
|
// The user most likely just forgot a delimiter here - keep
|
|
// the tag as value.
|
|
break;
|
|
},
|
|
' ', '\t', '\r', '\n', '=', '|' => break,
|
|
else => {},
|
|
},
|
|
.result_id => switch (c) {
|
|
'_', 'a'...'z', 'A'...'Z', '0'...'9' => {},
|
|
' ', '\t', '\r', '\n', '=', '|' => break,
|
|
else => {
|
|
try self.addError(offset, "illegal character in result-id", .{});
|
|
// Again, probably a forgotten delimiter here.
|
|
break;
|
|
},
|
|
},
|
|
.string => switch (c) {
|
|
'\\' => state = .escape,
|
|
'"' => state = .string_end,
|
|
else => {}, // Note, strings may include newlines
|
|
},
|
|
.string_end => switch (c) {
|
|
' ', '\t', '\r', '\n', '=', '|' => break,
|
|
else => {
|
|
try self.addError(offset, "unexpected character after string literal", .{});
|
|
// The token is still unmistakibly a string.
|
|
break;
|
|
},
|
|
},
|
|
// Escapes simply skip the next char.
|
|
.escape => state = .string,
|
|
}
|
|
}
|
|
|
|
var tok = Token{
|
|
.tag = tag,
|
|
.start = token_start,
|
|
.end = offset,
|
|
};
|
|
|
|
switch (state) {
|
|
.string, .escape => {
|
|
try self.addError(token_start, "unterminated string", .{});
|
|
},
|
|
.result_id => if (offset - token_start == 1) {
|
|
try self.addError(token_start, "result-id must have at least one name character", .{});
|
|
},
|
|
.value => {
|
|
const text = self.tokenText(tok);
|
|
const prefix = "Op";
|
|
const looks_like_opcode = text.len > prefix.len and
|
|
std.mem.startsWith(u8, text, prefix) and
|
|
std.ascii.isUpper(text[prefix.len]);
|
|
if (looks_like_opcode)
|
|
tok.tag = .opcode;
|
|
},
|
|
else => {},
|
|
}
|
|
|
|
return tok;
|
|
}
|