diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index f87e06eb43..affcb34e39 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -8,106 +8,221 @@ const text = @import("ir/text.zig"); /// These are in-memory, analyzed instructions. See `text.Inst` for the representation /// of instructions that correspond to the ZIR text format. +/// This struct owns the `Value` and `Type` memory. When the struct is deallocated, +/// so are the `Value` and `Type`. The value of a constant must be copied into +/// a memory location for the value to survive after a const instruction. pub const Inst = struct { - pub fn ty(base: *Inst) ?Type { - switch (base.tag) { - .constant => return base.cast(Constant).?.ty, - .@"asm" => return base.cast(Assembly).?.ty, - .@"fn" => return base.cast(Fn).?.ty, + tag: Tag, + ty: Type, + src_offset: usize, - .ptrtoint => return Type.initTag(.@"usize"), - .@"unreachable" => return Type.initTag(.@"noreturn"), - .@"export" => return Type.initTag(.@"void"), - .fntype, .primitive => return Type.initTag(.@"type"), + pub const Tag = enum { + unreach, + constant, + assembly, + }; - .fieldptr, - .deref, - => return null, - } + pub fn cast(base: *Inst, comptime T: type) ?*T { + if (base.tag != T.base_tag) + return null; + + return @fieldParentPtr(T, "base", base); } - /// This struct owns the `Value` memory. When the struct is deallocated, - /// so is the `Value`. The value of a constant must be copied into - /// a memory location for the value to survive after a const instruction. pub const Constant = struct { - base: Inst = Inst{ .tag = .constant }, - ty: Type, + pub const base_tag = Tag.constant; + base: Inst, - positionals: struct { - value: Value, - }, - kw_args: struct {}, + val: Value, + }; + + pub const Assembly = struct { + pub const base_tag = Tag.assembly; + base: Inst, + + asm_source: []const u8, + is_volatile: bool, + output: []const u8, + inputs: []const []const u8, + clobbers: []const []const u8, + args: []const []const u8, }; }; -const Analyze = struct { - allocator: *Allocator, - old_tree: *const Module, - errors: std.ArrayList(ErrorMsg), - decls: std.ArrayList(*Inst), - - const NewInst = struct { - ptr: *Inst, - }; +const TypedValue = struct { + ty: Type, + val: Value, }; -pub fn analyze(allocator: *Allocator, old_tree: Module) !Module { +pub const Module = struct { + exports: []Export, + errors: []ErrorMsg, + arena: std.heap.ArenaAllocator, + + pub const Export = struct { + name: []const u8, + typed_value: TypedValue, + }; + + pub fn deinit(self: *Module, allocator: *Allocator) void { + allocator.free(self.exports); + allocator.free(self.errors); + self.arena.deinit(); + self.* = undefined; + } + + pub fn emit_zir(self: Module, allocator: *Allocator) !text.Module { + return error.TodoImplementEmitToZIR; + } +}; + +pub const ErrorMsg = struct { + byte_offset: usize, + msg: []const u8, +}; + +pub fn analyze(allocator: *Allocator, old_module: text.Module) !Module { var ctx = Analyze{ .allocator = allocator, - .old_tree = &old_tree, - .decls = std.ArrayList(*Inst).init(allocator), + .arena = std.heap.ArenaAllocator.init(allocator), + .old_module = &old_module, .errors = std.ArrayList(ErrorMsg).init(allocator), - .inst_table = std.HashMap(*Inst, Analyze.InstData).init(allocator), + .inst_table = std.AutoHashMap(*text.Inst, Analyze.NewInst).init(allocator), + .exports = std.ArrayList(Module.Export).init(allocator), }; - defer ctx.decls.deinit(); defer ctx.errors.deinit(); - defer inst_table.deinit(); + defer ctx.inst_table.deinit(); + defer ctx.exports.deinit(); - analyzeRoot(&ctx) catch |err| switch (err) { - error.AnalyzeFailure => { + ctx.analyzeRoot() catch |err| switch (err) { + error.AnalysisFail => { assert(ctx.errors.items.len != 0); }, else => |e| return e, }; return Module{ - .decls = ctx.decls.toOwnedSlice(), + .exports = ctx.exports.toOwnedSlice(), .errors = ctx.errors.toOwnedSlice(), + .arena = ctx.arena, }; } -fn analyzeRoot(ctx: *Analyze) !void { - for (old_tree.decls) |decl| { - if (decl.cast(Inst.Export)) |export_inst| { - try analyzeExport(ctx, export_inst); +const Analyze = struct { + allocator: *Allocator, + arena: std.heap.ArenaAllocator, + old_module: *const text.Module, + errors: std.ArrayList(ErrorMsg), + inst_table: std.AutoHashMap(*text.Inst, NewInst), + exports: std.ArrayList(Module.Export), + + const NewInst = struct { + /// null means a semantic analysis error happened + ptr: ?*Inst, + }; + + const InnerError = error{ OutOfMemory, AnalysisFail }; + + fn analyzeRoot(self: *Analyze) !void { + for (self.old_module.decls) |decl| { + if (decl.cast(text.Inst.Export)) |export_inst| { + try analyzeExport(self, export_inst); + } } } -} -fn analyzeExport(ctx: *Analyze, export_inst: *Inst.Export) !void { - const old_decl = export_inst.positionals.value; - const new_info = ctx.inst_table.get(old_exp_target) orelse blk: { - const new_decl = try analyzeDecl(ctx, old_decl); - const new_info: Analyze.NewInst = .{ .ptr = new_decl }; - try ctx.inst_table.put(old_decl, new_info); - break :blk new_info; - }; + fn resolveInst(self: *Analyze, old_inst: *text.Inst) InnerError!*Inst { + if (self.inst_table.get(old_inst)) |kv| { + return kv.value.ptr orelse return error.AnalysisFail; + } else { + const new_inst = self.analyzeDecl(old_inst) catch |err| switch (err) { + error.AnalysisFail => { + try self.inst_table.putNoClobber(old_inst, .{ .ptr = null }); + return error.AnalysisFail; + }, + else => |e| return e, + }; + try self.inst_table.putNoClobber(old_inst, .{ .ptr = new_inst }); + return new_inst; + } + } - //const exp_type = new_info.ptr.ty(); - //switch (exp_type.zigTypeTag()) { - // .Fn => { - // if () |kv| { - // kv.value - // } - // return analyzeExportFn(ctx, exp_target.cast(Inst., - // }, - // else => return ctx.fail("unable to export type '{}'", .{exp_type}), - //} -} + fn resolveInstConst(self: *Analyze, old_inst: *text.Inst) InnerError!TypedValue { + const new_inst = try self.resolveInst(old_inst); + const val = try self.resolveConstValue(new_inst); + return TypedValue{ + .ty = new_inst.ty, + .val = val, + }; + } + + fn resolveConstValue(self: *Analyze, base: *Inst) !Value { + const const_inst = base.cast(Inst.Constant) orelse + return self.fail(base.src_offset, "unable to resolve comptime value", .{}); + return const_inst.val; + } + + fn resolveConstString(self: *Analyze, old_inst: *text.Inst) ![]u8 { + const new_inst = try self.resolveInst(old_inst); + const wanted_type = Type.initTag(.const_slice_u8); + const coerced_inst = try self.coerce(wanted_type, new_inst); + const val = try self.resolveConstValue(coerced_inst); + return val.toAllocatedBytes(&self.arena.allocator); + } + + fn analyzeExport(self: *Analyze, export_inst: *text.Inst.Export) !void { + const symbol_name = try self.resolveConstString(export_inst.positionals.symbol_name); + const typed_value = try self.resolveInstConst(export_inst.positionals.value); + + switch (typed_value.ty.zigTypeTag()) { + .Fn => {}, + else => return self.fail( + export_inst.positionals.value.src_offset, + "unable to export type '{}'", + .{typed_value.ty}, + ), + } + try self.exports.append(.{ + .name = symbol_name, + .typed_value = typed_value, + }); + } + + fn analyzeDecl(self: *Analyze, old_inst: *text.Inst) !*Inst { + switch (old_inst.tag) { + .str => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .int => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .ptrtoint => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .fieldptr => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .deref => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .as => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .@"asm" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .@"unreachable" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .@"fn" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .@"export" => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .primitive => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + .fntype => return self.fail(old_inst.src_offset, "TODO implement analyzing {}", .{@tagName(old_inst.tag)}), + } + } + + fn coerce(self: *Analyze, dest_type: Type, inst: *Inst) !*Inst { + return self.fail(inst.src_offset, "TODO implement type coercion", .{}); + } + + fn fail(self: *Analyze, src_offset: usize, comptime format: []const u8, args: var) InnerError { + @setCold(true); + const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); + (try self.errors.addOne()).* = .{ + .byte_offset = src_offset, + .msg = msg, + }; + return error.AnalysisFail; + } +}; pub fn main() anyerror!void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); - const allocator = &arena.allocator; + const allocator = if (std.builtin.link_libc) std.heap.c_allocator else &arena.allocator; const args = try std.process.argsAlloc(allocator); @@ -116,11 +231,11 @@ pub fn main() anyerror!void { const source = try std.fs.cwd().readFileAllocOptions(allocator, src_path, std.math.maxInt(u32), 1, 0); - var tree = try text.parse(allocator, source); - defer tree.deinit(); + var zir_module = try text.parse(allocator, source); + defer zir_module.deinit(allocator); - if (tree.errors.len != 0) { - for (tree.errors) |err_msg| { + if (zir_module.errors.len != 0) { + for (zir_module.errors) |err_msg| { const loc = findLineColumn(source, err_msg.byte_offset); std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); } @@ -128,21 +243,22 @@ pub fn main() anyerror!void { std.process.exit(1); } - tree.dump(); + var analyzed_module = try analyze(allocator, zir_module); + defer analyzed_module.deinit(allocator); - //const new_tree = try analyze(allocator, tree); - //defer new_tree.deinit(); + if (analyzed_module.errors.len != 0) { + for (analyzed_module.errors) |err_msg| { + const loc = findLineColumn(source, err_msg.byte_offset); + std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); + } + if (debug_error_trace) return error.ParseFailure; + std.process.exit(1); + } - //if (new_tree.errors.len != 0) { - // for (new_tree.errors) |err_msg| { - // const loc = findLineColumn(source, err_msg.byte_offset); - // std.debug.warn("{}:{}:{}: error: {}\n", .{ src_path, loc.line + 1, loc.column + 1, err_msg.msg }); - // } - // if (debug_error_trace) return error.ParseFailure; - // std.process.exit(1); - //} + var new_zir_module = try analyzed_module.emit_zir(allocator); + defer new_zir_module.deinit(allocator); - //new_tree.dump(); + new_zir_module.dump(); } fn findLineColumn(source: []const u8, byte_offset: usize) struct { line: usize, column: usize } { diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index e5169010f3..d850d2bcff 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -1,4 +1,5 @@ //! This file has to do with parsing and rendering the ZIR text format. + const std = @import("std"); const mem = std.mem; const Allocator = std.mem.Allocator; @@ -11,6 +12,7 @@ const BigInt = std.math.big.Int; /// in-memory, analyzed instructions with types and values. pub const Inst = struct { tag: Tag, + src_offset: usize, /// These names are used directly as the instruction names in the text format. pub const Tag = enum { @@ -46,15 +48,15 @@ pub const Inst = struct { } pub fn cast(base: *Inst, comptime T: type) ?*T { - const expected_tag = std.meta.fieldInfo(T, "base").default_value.?.tag; - if (base.tag != expected_tag) + if (base.tag != T.base_tag) return null; return @fieldParentPtr(T, "base", base); } pub const Str = struct { - base: Inst = Inst{ .tag = .str }, + pub const base_tag = Tag.str; + base: Inst, positionals: struct { bytes: []u8, @@ -63,7 +65,8 @@ pub const Inst = struct { }; pub const Int = struct { - base: Inst = Inst{ .tag = .int }, + pub const base_tag = Tag.int; + base: Inst, positionals: struct { int: BigInt, @@ -72,7 +75,8 @@ pub const Inst = struct { }; pub const PtrToInt = struct { - base: Inst = Inst{ .tag = .ptrtoint }, + pub const base_tag = Tag.ptrtoint; + base: Inst, positionals: struct { ptr: *Inst, @@ -81,7 +85,8 @@ pub const Inst = struct { }; pub const FieldPtr = struct { - base: Inst = Inst{ .tag = .fieldptr }, + pub const base_tag = Tag.fieldptr; + base: Inst, positionals: struct { object_ptr: *Inst, @@ -91,7 +96,8 @@ pub const Inst = struct { }; pub const Deref = struct { - base: Inst = Inst{ .tag = .deref }, + pub const base_tag = Tag.deref; + base: Inst, positionals: struct { ptr: *Inst, @@ -100,7 +106,8 @@ pub const Inst = struct { }; pub const As = struct { - base: Inst = Inst{ .tag = .as }, + pub const base_tag = Tag.as; + base: Inst, positionals: struct { dest_type: *Inst, @@ -110,7 +117,8 @@ pub const Inst = struct { }; pub const Assembly = struct { - base: Inst = Inst{ .tag = .@"asm" }, + pub const base_tag = Tag.@"asm"; + base: Inst, positionals: struct { asm_source: *Inst, @@ -126,14 +134,16 @@ pub const Inst = struct { }; pub const Unreachable = struct { - base: Inst = Inst{ .tag = .@"unreachable" }, + pub const base_tag = Tag.@"unreachable"; + base: Inst, positionals: struct {}, kw_args: struct {}, }; pub const Fn = struct { - base: Inst = Inst{ .tag = .@"fn" }, + pub const base_tag = Tag.@"fn"; + base: Inst, positionals: struct { fn_type: *Inst, @@ -147,7 +157,8 @@ pub const Inst = struct { }; pub const Export = struct { - base: Inst = Inst{ .tag = .@"export" }, + pub const base_tag = Tag.@"export"; + base: Inst, positionals: struct { symbol_name: *Inst, @@ -157,7 +168,8 @@ pub const Inst = struct { }; pub const Primitive = struct { - base: Inst = Inst{ .tag = .primitive }, + pub const base_tag = Tag.primitive; + base: Inst, positionals: struct { tag: BuiltinType, @@ -192,7 +204,8 @@ pub const Inst = struct { }; pub const FnType = struct { - base: Inst = Inst{ .tag = .fntype }, + pub const base_tag = Tag.fntype; + base: Inst, positionals: struct { param_types: []*Inst, @@ -212,9 +225,12 @@ pub const ErrorMsg = struct { pub const Module = struct { decls: []*Inst, errors: []ErrorMsg, + arena: std.heap.ArenaAllocator, - pub fn deinit(self: *Module) void { - // TODO resource deallocation + pub fn deinit(self: *Module, allocator: *Allocator) void { + allocator.free(self.decls); + allocator.free(self.errors); + self.arena.deinit(); self.* = undefined; } @@ -225,6 +241,8 @@ pub const Module = struct { const InstPtrTable = std.AutoHashMap(*Inst, struct { index: usize, fn_body: ?*Inst.Fn.Body }); + /// The allocator is used for temporary storage, but this function always returns + /// with no resources allocated. pub fn writeToStream(self: Module, allocator: *Allocator, stream: var) !void { // First, build a map of *Inst to @ or % indexes var inst_table = InstPtrTable.init(allocator); @@ -359,6 +377,7 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module var parser: Parser = .{ .allocator = allocator, + .arena = std.heap.ArenaAllocator.init(allocator), .i = 0, .source = source, .decls = std.ArrayList(*Inst).init(allocator), @@ -374,11 +393,13 @@ pub fn parse(allocator: *Allocator, source: [:0]const u8) Allocator.Error!Module return Module{ .decls = parser.decls.toOwnedSlice(), .errors = parser.errors.toOwnedSlice(), + .arena = parser.arena, }; } const Parser = struct { allocator: *Allocator, + arena: std.heap.ArenaAllocator, i: usize, source: [:0]const u8, errors: std.ArrayList(ErrorMsg), @@ -439,7 +460,7 @@ const Parser = struct { self.i += 1; const span = self.source[start..self.i]; var bad_index: usize = undefined; - const parsed = std.zig.parseStringLiteral(self.allocator, span, &bad_index) catch |err| switch (err) { + const parsed = std.zig.parseStringLiteral(&self.arena.allocator, span, &bad_index) catch |err| switch (err) { error.InvalidCharacter => { self.i = start + bad_index; const bad_byte = self.source[self.i]; @@ -466,7 +487,7 @@ const Parser = struct { else => break, }; const number_text = self.source[start..self.i]; - var result = try BigInt.init(self.allocator); + var result = try BigInt.init(&self.arena.allocator); result.setString(10, number_text) catch |err| { self.i = start; switch (err) { @@ -551,7 +572,7 @@ const Parser = struct { fn fail(self: *Parser, comptime format: []const u8, args: var) InnerError { @setCold(true); - const msg = try std.fmt.allocPrint(self.allocator, format, args); + const msg = try std.fmt.allocPrint(&self.arena.allocator, format, args); (try self.errors.addOne()).* = .{ .byte_offset = self.i, .msg = msg, @@ -576,8 +597,11 @@ const Parser = struct { comptime InstType: type, body_ctx: ?*Body, ) !*Inst { - const inst_specific = try self.allocator.create(InstType); - inst_specific.base = std.meta.fieldInfo(InstType, "base").default_value.?; + const inst_specific = try self.arena.allocator.create(InstType); + inst_specific.base = .{ + .src_offset = self.i, + .tag = InstType.base_tag, + }; if (@hasField(InstType, "ty")) { inst_specific.ty = opt_type orelse { @@ -657,8 +681,7 @@ const Parser = struct { skipSpace(self); if (eatByte(self, ']')) return &[0]*Inst{}; - var instructions = std.ArrayList(*Inst).init(self.allocator); - defer instructions.deinit(); + var instructions = std.ArrayList(*Inst).init(&self.arena.allocator); while (true) { skipSpace(self); try instructions.append(try parseParameterInst(self, body_ctx)); diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 6745366752..ae7ff8a8b7 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -18,9 +18,39 @@ pub const Type = extern union { pub fn zigTypeTag(self: Type) std.builtin.TypeId { switch (self.tag()) { - .@"u8", .@"usize" => return .Int, - .array_u8, .array_u8_sentinel_0 => return .Array, + .@"u8", + .@"i8", + .@"isize", + .@"usize", + .@"c_short", + .@"c_ushort", + .@"c_int", + .@"c_uint", + .@"c_long", + .@"c_ulong", + .@"c_longlong", + .@"c_ulonglong", + .@"c_longdouble", + => return .Int, + + .@"f16", + .@"f32", + .@"f64", + .@"f128", + => return .Float, + + .@"c_void" => return .Opaque, + .@"bool" => return .Bool, + .@"void" => return .Void, + .@"type" => return .Type, + .@"anyerror" => return .ErrorSet, + .@"comptime_int" => return .ComptimeInt, + .@"comptime_float" => return .ComptimeFloat, + .@"noreturn" => return .NoReturn, + + .array, .array_u8_sentinel_0 => return .Array, .single_const_pointer => return .Pointer, + .const_slice_u8 => return .Pointer, } } @@ -51,35 +81,36 @@ pub const Type = extern union { comptime assert(fmt.len == 0); var ty = self; while (true) { - switch (ty.tag()) { - @"u8", - @"i8", - @"isize", - @"usize", - @"noreturn", - @"void", - @"c_short", - @"c_ushort", - @"c_int", - @"c_uint", - @"c_long", - @"c_ulong", - @"c_longlong", - @"c_ulonglong", - @"c_longdouble", - @"c_void", - @"f16", - @"f32", - @"f64", - @"f128", - @"bool", - @"void", - @"type", - @"anyerror", - @"comptime_int", - @"comptime_float", - @"noreturn", - => |t| return out_stream.writeAll(@tagName(t)), + const t = ty.tag(); + switch (t) { + .@"u8", + .@"i8", + .@"isize", + .@"usize", + .@"c_short", + .@"c_ushort", + .@"c_int", + .@"c_uint", + .@"c_long", + .@"c_ulong", + .@"c_longlong", + .@"c_ulonglong", + .@"c_longdouble", + .@"c_void", + .@"f16", + .@"f32", + .@"f64", + .@"f128", + .@"bool", + .@"void", + .@"type", + .@"anyerror", + .@"comptime_int", + .@"comptime_float", + .@"noreturn", + => return out_stream.writeAll(@tagName(t)), + + .const_slice_u8 => return out_stream.writeAll("[]const u8"), .array_u8_sentinel_0 => { const payload = @fieldParentPtr(Payload.Array_u8_Sentinel0, "base", ty.ptr_otherwise); @@ -110,6 +141,7 @@ pub const Type = extern union { /// See `zigTypeTag` for the function that corresponds to `std.builtin.TypeId`. pub const Tag = enum { // The first section of this enum are tags that require no payload. + const_slice_u8, @"u8", @"i8", @"isize", diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index f62ed39f8b..a6a7ebc76d 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -91,6 +91,15 @@ pub const Value = extern union { } } + /// Asserts that the value is representable as an array of bytes. + /// Copies the value into a freshly allocated slice of memory, which is owned by the caller. + pub fn toAllocatedBytes(self: Value, allocator: *std.mem.Allocator) error{OutOfMemory}![]u8 { + if (self.cast(Payload.Bytes)) |bytes| { + return std.mem.dupe(allocator, u8, bytes.data); + } + unreachable; + } + /// This type is not copyable since it may contain pointers to its inner data. pub const Payload = struct { tag: Tag,