ir: semantic analysis skeleton

This commit is contained in:
Andrew Kelley
2020-04-21 00:56:30 -04:00
parent cc1c2bd568
commit 4c7507cceb
4 changed files with 315 additions and 135 deletions

View File

@@ -8,106 +8,221 @@ const text = @import("ir/text.zig");
/// These are in-memory, analyzed instructions. See `text.Inst` for the representation
/// of instructions that correspond to the ZIR text format.
/// This struct owns the `Value` and `Type` memory. When the struct is deallocated,
/// so are the `Value` and `Type`. The value of a constant must be copied into
/// a memory location for the value to survive after a const instruction.
pub const Inst = struct {
pub fn ty(base: *Inst) ?Type {
switch (base.tag) {
.constant => return base.cast(Constant).?.ty,
.@"asm" => return base.cast(Assembly).?.ty,
.@"fn" => return base.cast(Fn).?.ty,
tag: Tag,
ty: Type,
src_offset: usize,
.ptrtoint => return Type.initTag(.@"usize"),
.@"unreachable" => return Type.initTag(.@"noreturn"),
.@"export" => return Type.initTag(.@"void"),
.fntype, .primitive => return Type.initTag(.@"type"),
pub const Tag = enum {
unreach,
constant,
assembly,
};
.fieldptr,
.deref,
=> return null,
}
pub fn cast(base: *Inst, comptime T: type) ?*T {
if (base.tag != T.base_tag)
return null;
return @fieldParentPtr(T, "base", base);
}
/// This struct owns the `Value` memory. When the struct is deallocated,
/// so is the `Value`. The value of a constant must be copied into
/// a memory location for the value to survive after a const instruction.
pub const Constant = struct {
base: Inst = Inst{ .tag = .constant },
ty: Type,
pub const base_tag = Tag.constant;
base: Inst,
positionals: struct {
value: Value,
},
kw_args: struct {},
val: Value,
};
pub const Assembly = struct {
pub const base_tag = Tag.assembly;
base: Inst,
asm_source: []const u8,
is_volatile: bool,
output: []const u8,
inputs: []const []const u8,
clobbers: []const []const u8,
args: []const []const u8,
};
};
const Analyze = struct {
allocator: *Allocator,
old_tree: *const Module,
errors: std.ArrayList(ErrorMsg),
decls: std.ArrayList(*Inst),
const NewInst = struct {
ptr: *Inst,
};
const TypedValue = struct {
ty: Type,
val: Value,
};
pub fn analyze(allocator: *Allocator, old_tree: Module) !Module {
pub const Module = struct {
exports: []Export,
errors: []ErrorMsg,
arena: std.heap.ArenaAllocator,
pub const Export = struct {
name: []const u8,
typed_value: TypedValue,
};
pub fn deinit(self: *Module, allocator: *Allocator) void {
allocator.free(self.exports);
allocator.free(self.errors);
self.arena.deinit();
self.* = undefined;
}
pub fn emit_zir(self: Module, allocator: *Allocator) !text.Module {
return error.TodoImplementEmitToZIR;
}
};
pub const ErrorMsg = struct {
byte_offset: usize,
msg: []const u8,
};
pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module {
var ctx = Analyze{
.allocator = allocator,
.old_tree = &old_tree,
.decls = std.ArrayList(*Inst).init(allocator),
.arena = std.heap.ArenaAllocator.init(allocator),
.old_module = &old_module,
.errors = std.ArrayList(ErrorMsg).init(allocator),
.inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator),
.inst_table = std.AutoHashMap(*text.Inst, Analyze.NewInst).init(allocator),
.exports = std.ArrayList(Module.Export).init(allocator),
};
defer ctx.decls.deinit();
defer ctx.errors.deinit();
defer inst_table.deinit();
defer ctx.inst_table.deinit();
defer ctx.exports.deinit();
analyzeRoot(&ctx) catch |err| switch (err) {
error.AnalyzeFailure => {
ctx.analyzeRoot() catch |err| switch (err) {
error.AnalysisFail => {
assert(ctx.errors.items.len != 0);
},
else => |e| return e,
};
return Module{
.decls = ctx.decls.toOwnedSlice(),
.exports = ctx.exports.toOwnedSlice(),
.errors = ctx.errors.toOwnedSlice(),
.arena = ctx.arena,
};
}
fn analyzeRoot(ctx: *Analyze) !void {
for (old_tree.decls) |decl| {
if (decl.cast(Inst.Export)) |export_inst| {
try analyzeExport(ctx, export_inst);
const Analyze = struct {
allocator: *Allocator,
arena: std.heap.ArenaAllocator,
old_module: *const text.Module,
errors: std.ArrayList(ErrorMsg),
inst_table: std.AutoHashMap(*text.Inst, NewInst),
exports: std.ArrayList(Module.Export),
const NewInst = struct {
/// null means a semantic analysis error happened
ptr: ?*Inst,
};
const InnerError = error{ OutOfMemory, AnalysisFail };
fn analyzeRoot(self: *Analyze) !void {
for (self.old_module.decls) |decl| {
if (decl.cast(text.Inst.Export)) |export_inst| {
try analyzeExport(self, export_inst);
}
}
}
}
fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void {
const old_decl = export_inst.positionals.value;
const new_info = ctx.inst_table.get(old_exp_target) orelse blk: {
const new_decl = try analyzeDecl(ctx, old_decl);
const new_info: Analyze.NewInst = .{ .ptr = new_decl };
try ctx.inst_table.put(old_decl, new_info);
break :blk new_info;
};
fn resolveInst(self: *Analyze, old_inst: *text.Inst) InnerError!*Inst {
if (self.inst_table.get(old_inst)) |kv| {
return kv.value.ptr orelse return error.AnalysisFail;
} else {
const new_inst = self.analyzeDecl(old_inst) catch |err| switch (err) {
error.AnalysisFail => {
try self.inst_table.putNoClobber(old_inst, .{ .ptr = null });
return error.AnalysisFail;
},
else => |e| return e,
};
try self.inst_table.putNoClobber(old_inst, .{ .ptr = new_inst });
return new_inst;
}
}
//const exp_type = new_info.ptr.ty();
//switch (exp_type.zigTypeTag()) {
// .Fn => {
// if () |kv| {
// kv.value
// }
// return analyzeExportFn(ctx, exp_target.cast(Inst.,
// },
// else => return ctx.fail("unable to export type '{}'", .{exp_type}),
//}
}
fn resolveInstConst(self: *Analyze, old_inst: *text.Inst) InnerError!TypedValue {
const new_inst = try self.resolveInst(old_inst);
const val = try self.resolveConstValue(new_inst);
return TypedValue{
.ty = new_inst.ty,
.val = val,
};
}
fn resolveConstValue(self: *Analyze, base: *Inst) !Value {
const const_inst = base.cast(Inst.Constant) orelse
return self.fail(base.src_offset, "unable to resolve comptime value", .{});
return const_inst.val;
}
fn resolveConstString(self: *Analyze, old_inst: *text.Inst) ![]u8 {
const new_inst = try self.resolveInst(old_inst);
const wanted_type = Type.initTag(.const_slice_u8);
const coerced_inst = try self.coerce(wanted_type, new_inst);
const val = try self.resolveConstValue(coerced_inst);
return val.toAllocatedBytes(&self.arena.allocator);
}
fn analyzeExport(self: *Analyze, export_inst: *text.Inst.Export) !void {
const symbol_name = try self.resolveConstString(export_inst.positionals.symbol_name);
const typed_value = try self.resolveInstConst(export_inst.positionals.value);
switch (typed_value.ty.zigTypeTag()) {
.Fn => {},
else => return self.fail(
export_inst.positionals.value.src_offset,
"unable to export type '{}'",
.{typed_value.ty},
),
}
try self.exports.append(.{
.name = symbol_name,
.typed_value = typed_value,
});
}
fn analyzeDecl(self: *Analyze, old_inst: *text.Inst) !*Inst {
switch (old_inst.tag) {
.str => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.int => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.ptrtoint => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.fieldptr => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.deref => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.as => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"asm" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"unreachable" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"fn" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.@"export" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.primitive => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
.fntype => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}),
}
}
fn coerce(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst {
return self.fail(inst.src_offset, "TODO implement type coercion", .{});
}
fn fail(self: *Analyze, src_offset: usize, comptime format: []const u8, args: var) InnerError {
@setCold(true);
const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args);
(try self.errors.addOne()).* = .{
.byte_offset = src_offset,
.msg = msg,
};
return error.AnalysisFail;
}
};
pub fn main() anyerror!void {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const allocator = &arena.allocator;
const allocator = if (std.builtin.link_libc) std.heap.c_allocator else &arena.allocator;
const args = try std.process.argsAlloc(allocator);
@@ -116,11 +231,11 @@ pub fn main() anyerror!void {
const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0);
var tree = try text.parse(allocator, source);
defer tree.deinit();
var zir_module = try text.parse(allocator, source);
defer zir_module.deinit(allocator);
if (tree.errors.len != 0) {
for (tree.errors) |err_msg| {
if (zir_module.errors.len != 0) {
for (zir_module.errors) |err_msg| {
const loc = findLineColumn(source, err_msg.byte_offset);
std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
}
@@ -128,21 +243,22 @@ pub fn main() anyerror!void {
std.process.exit(1);
}
tree.dump();
var analyzed_module = try analyze(allocator, zir_module);
defer analyzed_module.deinit(allocator);
//const new_tree = try analyze(allocator, tree);
//defer new_tree.deinit();
if (analyzed_module.errors.len != 0) {
for (analyzed_module.errors) |err_msg| {
const loc = findLineColumn(source, err_msg.byte_offset);
std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
}
if (debug_error_trace) return error.ParseFailure;
std.process.exit(1);
}
//if (new_tree.errors.len != 0) {
// for (new_tree.errors) |err_msg| {
// const loc = findLineColumn(source, err_msg.byte_offset);
// std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg });
// }
// if (debug_error_trace) return error.ParseFailure;
// std.process.exit(1);
//}
var new_zir_module = try analyzed_module.emit_zir(allocator);
defer new_zir_module.deinit(allocator);
//new_tree.dump();
new_zir_module.dump();
}
fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } {

View File

@@ -1,4 +1,5 @@
//! This file has to do with parsing and rendering the ZIR text format.
const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
@@ -11,6 +12,7 @@ const BigInt = std.math.big.Int;
/// in-memory, analyzed instructions with types and values.
pub const Inst = struct {
tag: Tag,
src_offset: usize,
/// These names are used directly as the instruction names in the text format.
pub const Tag = enum {
@@ -46,15 +48,15 @@ pub const Inst = struct {
}
pub fn cast(base: *Inst, comptime T: type) ?*T {
const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag;
if (base.tag != expected_tag)
if (base.tag != T.base_tag)
return null;
return @fieldParentPtr(T, "base", base);
}
pub const Str = struct {
base: Inst = Inst{ .tag = .str },
pub const base_tag = Tag.str;
base: Inst,
positionals: struct {
bytes: []u8,
@@ -63,7 +65,8 @@ pub const Inst = struct {
};
pub const Int = struct {
base: Inst = Inst{ .tag = .int },
pub const base_tag = Tag.int;
base: Inst,
positionals: struct {
int: BigInt,
@@ -72,7 +75,8 @@ pub const Inst = struct {
};
pub const PtrToInt = struct {
base: Inst = Inst{ .tag = .ptrtoint },
pub const base_tag = Tag.ptrtoint;
base: Inst,
positionals: struct {
ptr: *Inst,
@@ -81,7 +85,8 @@ pub const Inst = struct {
};
pub const FieldPtr = struct {
base: Inst = Inst{ .tag = .fieldptr },
pub const base_tag = Tag.fieldptr;
base: Inst,
positionals: struct {
object_ptr: *Inst,
@@ -91,7 +96,8 @@ pub const Inst = struct {
};
pub const Deref = struct {
base: Inst = Inst{ .tag = .deref },
pub const base_tag = Tag.deref;
base: Inst,
positionals: struct {
ptr: *Inst,
@@ -100,7 +106,8 @@ pub const Inst = struct {
};
pub const As = struct {
base: Inst = Inst{ .tag = .as },
pub const base_tag = Tag.as;
base: Inst,
positionals: struct {
dest_type: *Inst,
@@ -110,7 +117,8 @@ pub const Inst = struct {
};
pub const Assembly = struct {
base: Inst = Inst{ .tag = .@"asm" },
pub const base_tag = Tag.@"asm";
base: Inst,
positionals: struct {
asm_source: *Inst,
@@ -126,14 +134,16 @@ pub const Inst = struct {
};
pub const Unreachable = struct {
base: Inst = Inst{ .tag = .@"unreachable" },
pub const base_tag = Tag.@"unreachable";
base: Inst,
positionals: struct {},
kw_args: struct {},
};
pub const Fn = struct {
base: Inst = Inst{ .tag = .@"fn" },
pub const base_tag = Tag.@"fn";
base: Inst,
positionals: struct {
fn_type: *Inst,
@@ -147,7 +157,8 @@ pub const Inst = struct {
};
pub const Export = struct {
base: Inst = Inst{ .tag = .@"export" },
pub const base_tag = Tag.@"export";
base: Inst,
positionals: struct {
symbol_name: *Inst,
@@ -157,7 +168,8 @@ pub const Inst = struct {
};
pub const Primitive = struct {
base: Inst = Inst{ .tag = .primitive },
pub const base_tag = Tag.primitive;
base: Inst,
positionals: struct {
tag: BuiltinType,
@@ -192,7 +204,8 @@ pub const Inst = struct {
};
pub const FnType = struct {
base: Inst = Inst{ .tag = .fntype },
pub const base_tag = Tag.fntype;
base: Inst,
positionals: struct {
param_types: []*Inst,
@@ -212,9 +225,12 @@ pub const ErrorMsg = struct {
pub const Module = struct {
decls: []*Inst,
errors: []ErrorMsg,
arena: std.heap.ArenaAllocator,
pub fn deinit(self: *Module) void {
// TODO resource deallocation
pub fn deinit(self: *Module, allocator: *Allocator) void {
allocator.free(self.decls);
allocator.free(self.errors);
self.arena.deinit();
self.* = undefined;
}
@@ -225,6 +241,8 @@ pub const Module = struct {
const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body });
/// The allocator is used for temporary storage, but this function always returns
/// with no resources allocated.
pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void {
// First, build a map of *Inst to @ or % indexes
var inst_table = InstPtrTable.init(allocator);
@@ -359,6 +377,7 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module
var parser: Parser = .{
.allocator = allocator,
.arena = std.heap.ArenaAllocator.init(allocator),
.i = 0,
.source = source,
.decls = std.ArrayList(*Inst).init(allocator),
@@ -374,11 +393,13 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module
return Module{
.decls = parser.decls.toOwnedSlice(),
.errors = parser.errors.toOwnedSlice(),
.arena = parser.arena,
};
}
const Parser = struct {
allocator: *Allocator,
arena: std.heap.ArenaAllocator,
i: usize,
source: [:0]const u8,
errors: std.ArrayList(ErrorMsg),
@@ -439,7 +460,7 @@ const Parser = struct {
self.i += 1;
const span = self.source[start..self.i];
var bad_index: usize = undefined;
const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) {
const parsed = std.zig.parseStringLiteral(&self.arena.allocator, span, &bad_index) catch |err| switch (err) {
error.InvalidCharacter => {
self.i = start + bad_index;
const bad_byte = self.source[self.i];
@@ -466,7 +487,7 @@ const Parser = struct {
else => break,
};
const number_text = self.source[start..self.i];
var result = try BigInt.init(self.allocator);
var result = try BigInt.init(&self.arena.allocator);
result.setString(10, number_text) catch |err| {
self.i = start;
switch (err) {
@@ -551,7 +572,7 @@ const Parser = struct {
fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError {
@setCold(true);
const msg = try std.fmt.allocPrint(self.allocator, format, args);
const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args);
(try self.errors.addOne()).* = .{
.byte_offset = self.i,
.msg = msg,
@@ -576,8 +597,11 @@ const Parser = struct {
comptime InstType: type,
body_ctx: ?*Body,
) !*Inst {
const inst_specific = try self.allocator.create(InstType);
inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?;
const inst_specific = try self.arena.allocator.create(InstType);
inst_specific.base = .{
.src_offset = self.i,
.tag = InstType.base_tag,
};
if (@hasField(InstType, "ty")) {
inst_specific.ty = opt_type orelse {
@@ -657,8 +681,7 @@ const Parser = struct {
skipSpace(self);
if (eatByte(self, ']')) return &[0]*Inst{};
var instructions = std.ArrayList(*Inst).init(self.allocator);
defer instructions.deinit();
var instructions = std.ArrayList(*Inst).init(&self.arena.allocator);
while (true) {
skipSpace(self);
try instructions.append(try parseParameterInst(self, body_ctx));

View File

@@ -18,9 +18,39 @@ pub const Type = extern union {
pub fn zigTypeTag(self: Type) std.builtin.TypeId {
switch (self.tag()) {
.@"u8", .@"usize" => return .Int,
.array_u8, .array_u8_sentinel_0 => return .Array,
.@"u8",
.@"i8",
.@"isize",
.@"usize",
.@"c_short",
.@"c_ushort",
.@"c_int",
.@"c_uint",
.@"c_long",
.@"c_ulong",
.@"c_longlong",
.@"c_ulonglong",
.@"c_longdouble",
=> return .Int,
.@"f16",
.@"f32",
.@"f64",
.@"f128",
=> return .Float,
.@"c_void" => return .Opaque,
.@"bool" => return .Bool,
.@"void" => return .Void,
.@"type" => return .Type,
.@"anyerror" => return .ErrorSet,
.@"comptime_int" => return .ComptimeInt,
.@"comptime_float" => return .ComptimeFloat,
.@"noreturn" => return .NoReturn,
.array, .array_u8_sentinel_0 => return .Array,
.single_const_pointer => return .Pointer,
.const_slice_u8 => return .Pointer,
}
}
@@ -51,35 +81,36 @@ pub const Type = extern union {
comptime assert(fmt.len == 0);
var ty = self;
while (true) {
switch (ty.tag()) {
@"u8",
@"i8",
@"isize",
@"usize",
@"noreturn",
@"void",
@"c_short",
@"c_ushort",
@"c_int",
@"c_uint",
@"c_long",
@"c_ulong",
@"c_longlong",
@"c_ulonglong",
@"c_longdouble",
@"c_void",
@"f16",
@"f32",
@"f64",
@"f128",
@"bool",
@"void",
@"type",
@"anyerror",
@"comptime_int",
@"comptime_float",
@"noreturn",
=> |t| return out_stream.writeAll(@tagName(t)),
const t = ty.tag();
switch (t) {
.@"u8",
.@"i8",
.@"isize",
.@"usize",
.@"c_short",
.@"c_ushort",
.@"c_int",
.@"c_uint",
.@"c_long",
.@"c_ulong",
.@"c_longlong",
.@"c_ulonglong",
.@"c_longdouble",
.@"c_void",
.@"f16",
.@"f32",
.@"f64",
.@"f128",
.@"bool",
.@"void",
.@"type",
.@"anyerror",
.@"comptime_int",
.@"comptime_float",
.@"noreturn",
=> return out_stream.writeAll(@tagName(t)),
.const_slice_u8 => return out_stream.writeAll("[]const u8"),
.array_u8_sentinel_0 => {
const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise);
@@ -110,6 +141,7 @@ pub const Type = extern union {
/// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`.
pub const Tag = enum {
// The first section of this enum are tags that require no payload.
const_slice_u8,
@"u8",
@"i8",
@"isize",

View File

@@ -91,6 +91,15 @@ pub const Value = extern union {
}
}
/// Asserts that the value is representable as an array of bytes.
/// Copies the value into a freshly allocated slice of memory, which is owned by the caller.
pub fn toAllocatedBytes(self: Value, allocator: *std.mem.Allocator) error{OutOfMemory}![]u8 {
if (self.cast(Payload.Bytes)) |bytes| {
return std.mem.dupe(allocator, u8, bytes.data);
}
unreachable;
}
/// This type is not copyable since it may contain pointers to its inner data.
pub const Payload = struct {
tag: Tag,