diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index b19a3f24f7..6ebf68df90 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -33,6 +33,7 @@ pub fn generateSymbol( var function = Function{ .target = &bin_file.options.target, + .bin_file = bin_file, .mod_fn = module_fn, .code = code, .inst_table = std.AutoHashMap(*ir.Inst, Function.MCValue).init(bin_file.allocator), @@ -144,6 +145,7 @@ pub fn generateSymbol( } const Function = struct { + bin_file: *link.ElfFile, target: *const std.Target, mod_fn: *const ir.Module.Fn, code: *std.ArrayList(u8), @@ -160,6 +162,8 @@ const Function = struct { /// The value is in a target-specific register. The value can /// be @intToEnum casted to the respective Reg enum. register: usize, + /// The value is in memory at a hard-coded address. + memory: u64, }; fn genFuncInst(self: *Function, inst: *ir.Inst) !MCValue { @@ -375,6 +379,7 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rax = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rax = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rax = memory", .{}), }, .rdx => switch (mcv) { .none, .unreach => unreachable, @@ -406,6 +411,7 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdx = memory", .{}), }, .rdi => switch (mcv) { .none, .unreach => unreachable, @@ -437,10 +443,37 @@ const Function = struct { }, .embedded_in_code => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = embedded_in_code", .{}), .register => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = register", .{}), + .memory => return self.fail(src, "TODO implement x86_64 genSetReg %rdi = memory", .{}), }, .rsi => switch (mcv) { .none, .unreach => unreachable, - .immediate => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = immediate", .{}), + .immediate => |x| { + // Setting the edi register zeroes the upper part of rdi, so if the number is small + // enough, that is preferable. + // Best case: zero + // 31 f6 xor esi,esi + if (x == 0) { + return self.code.appendSlice(&[_]u8{ 0x31, 0xf6 }); + } + // Next best case: set esi with 4 bytes + // be 40 30 20 10 mov esi,0x10203040 + if (x <= std.math.maxInt(u32)) { + try self.code.resize(self.code.items.len + 5); + self.code.items[self.code.items.len - 5] = 0xbe; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } + // Worst case: set rsi with 8 bytes + // 48 be 80 70 60 50 40 30 20 10 movabs rsi,0x1020304050607080 + + try self.code.resize(self.code.items.len + 10); + self.code.items[self.code.items.len - 10] = 0x48; + self.code.items[self.code.items.len - 9] = 0xbe; + const imm_ptr = self.code.items[self.code.items.len - 8 ..][0..8]; + mem.writeIntLittle(u64, imm_ptr, x); + return; + }, .embedded_in_code => |code_offset| { // Examples: // lea rsi, [rip + 0x01020304] @@ -462,6 +495,21 @@ const Function = struct { return; }, .register => return self.fail(src, "TODO implement x86_64 genSetReg %rsi = register", .{}), + .memory => |x| { + if (x <= std.math.maxInt(u32)) { + // 48 8b 34 25 40 30 20 10 mov rsi,QWORD PTR ds:0x10203040 + try self.code.resize(self.code.items.len + 8); + self.code.items[self.code.items.len - 8] = 0x48; + self.code.items[self.code.items.len - 7] = 0x8b; + self.code.items[self.code.items.len - 6] = 0x34; + self.code.items[self.code.items.len - 5] = 0x25; + const imm_ptr = self.code.items[self.code.items.len - 4 ..][0..4]; + mem.writeIntLittle(u32, imm_ptr, @intCast(u32, x)); + return; + } else { + return self.fail(src, "TODO implement genSetReg for x86_64 setting rsi to 64-bit memory", .{}); + } + }, }, else => return self.fail(src, "TODO implement genSetReg for x86_64 '{}'", .{@tagName(reg)}), }, @@ -493,33 +541,21 @@ const Function = struct { } fn genTypedValue(self: *Function, src: usize, typed_value: TypedValue) !MCValue { + const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); const allocator = self.code.allocator; switch (typed_value.ty.zigTypeTag()) { .Pointer => { - const ptr_elem_type = typed_value.ty.elemType(); - switch (ptr_elem_type.zigTypeTag()) { - .Array => { - // TODO more checks to make sure this can be emitted as a string literal - const bytes = typed_value.val.toAllocatedBytes(allocator) catch |err| switch (err) { - error.AnalysisFail => unreachable, - else => |e| return e, - }; - defer allocator.free(bytes); - const smaller_len = std.math.cast(u32, bytes.len) catch - return self.fail(src, "TODO handle a larger string constant", .{}); - - // Emit the string literal directly into the code; jump over it. - try self.genRelativeFwdJump(src, smaller_len); - const offset = self.code.items.len; - try self.code.appendSlice(bytes); - return MCValue{ .embedded_in_code = offset }; - }, - else => |t| return self.fail(src, "TODO implement emitTypedValue for pointer to '{}'", .{@tagName(t)}), + if (typed_value.val.cast(Value.Payload.DeclRef)) |payload| { + const got = &self.bin_file.program_headers.items[self.bin_file.phdr_got_index.?]; + const decl = payload.decl; + const got_addr = got.p_vaddr + decl.link.offset_table_index * ptr_bytes; + return MCValue{ .memory = got_addr }; } + return self.fail(src, "TODO codegen more kinds of const pointers", .{}); }, .Int => { const info = typed_value.ty.intInfo(self.target.*); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); if (info.bits > ptr_bits or info.signed) { return self.fail(src, "TODO const int bigger than ptr and signed int", .{}); } diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index b357a62932..92a5aa7fdf 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -292,6 +292,8 @@ pub const Module = struct { /// TODO look into using a lightweight map/set data structure rather than a linear array. dependants: ArrayListUnmanaged(*Decl) = ArrayListUnmanaged(*Decl){}, + contents_hash: Hash, + pub fn destroy(self: *Decl, allocator: *Allocator) void { allocator.free(mem.spanZ(self.name)); if (self.typedValueManaged()) |tvm| { @@ -465,26 +467,42 @@ pub const Module = struct { module: *text.Module, }, status: enum { - unloaded, + never_loaded, + unloaded_success, unloaded_parse_failure, + unloaded_sema_failure, loaded_parse_failure, loaded_sema_failure, loaded_success, }, - pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + pub fn unload(self: *ZIRModule, allocator: *Allocator) void { switch (self.status) { - .unloaded, + .never_loaded, .unloaded_parse_failure, + .unloaded_sema_failure, + .unloaded_success, => {}, - .loaded_success, .loaded_sema_failure => { + + .loaded_success => { allocator.free(self.source.bytes); self.contents.module.deinit(allocator); + self.status = .unloaded_success; + }, + .loaded_sema_failure => { + allocator.free(self.source.bytes); + self.contents.module.deinit(allocator); + self.status = .unloaded_sema_failure; }, .loaded_parse_failure => { allocator.free(self.source.bytes); + self.status = .unloaded_parse_failure; }, } + } + + pub fn deinit(self: *ZIRModule, allocator: *Allocator) void { + self.unload(allocator); self.* = undefined; } @@ -623,7 +641,8 @@ pub const Module = struct { try self.performAllTheWork(); - // TODO unload all the source files from memory + // Unload all the source files from memory. + self.root_scope.unload(self.allocator); try self.bin_file.flush(); self.link_error_flags = self.bin_file.error_flags; @@ -722,8 +741,8 @@ pub const Module = struct { .success => {}, } } - if (!decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()) - continue; + + assert(decl.typed_value.most_recent.typed_value.ty.hasCodeGenBits()); self.bin_file.updateDecl(self, decl) catch |err| switch (err) { error.OutOfMemory => return error.OutOfMemory, @@ -748,7 +767,7 @@ pub const Module = struct { fn getTextModule(self: *Module, root_scope: *Scope.ZIRModule) !*text.Module { switch (root_scope.status) { - .unloaded => { + .never_loaded, .unloaded_success => { try self.failed_files.ensureCapacity(self.failed_files.size + 1); var keep_source = false; @@ -789,6 +808,7 @@ pub const Module = struct { }, .unloaded_parse_failure, + .unloaded_sema_failure, .loaded_parse_failure, .loaded_sema_failure, => return error.AnalysisFail, @@ -804,16 +824,62 @@ pub const Module = struct { // Here we simulate adding a source file which was previously not part of the compilation, // which means scanning the decls looking for exports. // TODO also identify decls that need to be deleted. - const src_module = try self.getTextModule(root_scope); + switch (root_scope.status) { + .never_loaded => { + const src_module = try self.getTextModule(root_scope); - // Here we ensure enough queue capacity to store all the decls, so that later we can use - // appendAssumeCapacity. - try self.work_queue.ensureUnusedCapacity(src_module.decls.len); + // Here we ensure enough queue capacity to store all the decls, so that later we can use + // appendAssumeCapacity. + try self.work_queue.ensureUnusedCapacity(src_module.decls.len); - for (src_module.decls) |decl| { - if (decl.cast(text.Inst.Export)) |export_inst| { - _ = try self.resolveDecl(&root_scope.base, &export_inst.base); - } + for (src_module.decls) |decl| { + if (decl.cast(text.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, + + .unloaded_parse_failure, + .unloaded_sema_failure, + .loaded_parse_failure, + .loaded_sema_failure, + .loaded_success, + .unloaded_success, + => { + const src_module = try self.getTextModule(root_scope); + + // Look for changed decls. + for (src_module.decls) |src_decl| { + const name_hash = Decl.hashSimpleName(src_decl.name); + if (self.decl_table.get(name_hash)) |kv| { + const decl = kv.value; + const new_contents_hash = Decl.hashSimpleName(src_decl.contents); + if (!mem.eql(u8, &new_contents_hash, &decl.contents_hash)) { + // TODO recursive dependency management + std.debug.warn("noticed that '{}' changed\n", .{src_decl.name}); + self.decl_table.removeAssertDiscard(name_hash); + const saved_link = decl.link; + decl.destroy(self.allocator); + if (self.export_owners.getValue(decl)) |exports| { + @panic("TODO handle updating a decl that does an export"); + } + const new_decl = self.resolveDecl( + &root_scope.base, + src_decl, + saved_link, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => continue, + }; + if (self.decl_exports.remove(decl)) |entry| { + self.decl_exports.putAssumeCapacityNoClobber(new_decl, entry.value); + } + } + } else if (src_decl.cast(text.Inst.Export)) |export_inst| { + _ = try self.resolveDecl(&root_scope.base, &export_inst.base, link.ElfFile.Decl.empty); + } + } + }, } } @@ -846,11 +912,17 @@ pub const Module = struct { }; } - fn resolveDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { + fn resolveDecl( + self: *Module, + scope: *Scope, + old_inst: *text.Inst, + bin_file_link: link.ElfFile.Decl, + ) InnerError!*Decl { const hash = Decl.hashSimpleName(old_inst.name); if (self.decl_table.get(hash)) |kv| { return kv.value; } else { + std.debug.warn("creating new decl for {}\n", .{old_inst.name}); const new_decl = blk: { try self.decl_table.ensureCapacity(self.decl_table.size + 1); const new_decl = try self.allocator.create(Decl); @@ -863,6 +935,8 @@ pub const Module = struct { .src = old_inst.src, .typed_value = .{ .never_succeeded = {} }, .analysis = .initial_in_progress, + .contents_hash = Decl.hashSimpleName(old_inst.contents), + .link = bin_file_link, }; self.decl_table.putAssumeCapacityNoClobber(hash, new_decl); break :blk new_decl; @@ -887,6 +961,14 @@ pub const Module = struct { }; const arena_state = try decl_scope.arena.allocator.create(std.heap.ArenaAllocator.State); + const has_codegen_bits = typed_value.ty.hasCodeGenBits(); + if (has_codegen_bits) { + // We don't fully codegen the decl until later, but we do need to reserve a global + // offset table index for it. This allows us to codegen decls out of dependency order, + // increasing how many computations can be done in parallel. + try self.bin_file.allocateDeclIndexes(new_decl); + } + arena_state.* = decl_scope.arena.state; new_decl.typed_value = .{ @@ -896,14 +978,16 @@ pub const Module = struct { }, }; new_decl.analysis = .complete; - // We ensureCapacity when scanning for decls. - self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + if (has_codegen_bits) { + // We ensureCapacity when scanning for decls. + self.work_queue.writeItemAssumeCapacity(.{ .codegen_decl = new_decl }); + } return new_decl; } } fn resolveCompleteDecl(self: *Module, scope: *Scope, old_inst: *text.Inst) InnerError!*Decl { - const decl = try self.resolveDecl(scope, old_inst); + const decl = try self.resolveDecl(scope, old_inst, link.ElfFile.Decl.empty); switch (decl.analysis) { .initial_in_progress => unreachable, .repeat_in_progress => unreachable, @@ -2088,8 +2172,8 @@ pub fn main() anyerror!void { const src_path = args[1]; const bin_path = args[2]; - const debug_error_trace = true; - const output_zir = true; + const debug_error_trace = false; + const output_zir = false; const object_format: ?std.builtin.ObjectFormat = null; const native_info = try std.zig.system.NativeTargetInfo.detect(allocator, .{}); @@ -2112,7 +2196,7 @@ pub fn main() anyerror!void { .sub_file_path = root_pkg.root_src_path, .source = .{ .unloaded = {} }, .contents = .{ .not_available = {} }, - .status = .unloaded, + .status = .never_loaded, }; break :blk Module{ @@ -2132,22 +2216,38 @@ pub fn main() anyerror!void { }; defer module.deinit(); - try module.update(); + const stdin = std.io.getStdIn().inStream(); + const stderr = std.io.getStdErr().outStream(); + var repl_buf: [1024]u8 = undefined; - var errors = try module.getAllErrorsAlloc(); - defer errors.deinit(allocator); + while (true) { + try module.update(); - if (errors.list.len != 0) { - for (errors.list) |full_err_msg| { - std.debug.warn("{}:{}:{}: error: {}\n", .{ - full_err_msg.src_path, - full_err_msg.line + 1, - full_err_msg.column + 1, - full_err_msg.msg, - }); + var errors = try module.getAllErrorsAlloc(); + defer errors.deinit(allocator); + + if (errors.list.len != 0) { + for (errors.list) |full_err_msg| { + std.debug.warn("{}:{}:{}: error: {}\n", .{ + full_err_msg.src_path, + full_err_msg.line + 1, + full_err_msg.column + 1, + full_err_msg.msg, + }); + } + if (debug_error_trace) return error.AnalysisFail; + } + + try stderr.print("🦎 ", .{}); + if (try stdin.readUntilDelimiterOrEof(&repl_buf, '\n')) |line| { + if (mem.eql(u8, line, "update")) { + continue; + } else { + try stderr.print("unknown command: {}\n", .{line}); + } + } else { + break; } - if (debug_error_trace) return error.AnalysisFail; - std.process.exit(1); } if (output_zir) { diff --git a/src-self-hosted/ir/text.zig b/src-self-hosted/ir/text.zig index 8f189f49e3..f283fb5410 100644 --- a/src-self-hosted/ir/text.zig +++ b/src-self-hosted/ir/text.zig @@ -19,6 +19,9 @@ pub const Inst = struct { src: usize, name: []const u8, + /// Slice into the source of the part after the = and before the next instruction. + contents: []const u8, + /// These names are used directly as the instruction names in the text format. pub const Tag = enum { breakpoint, @@ -798,11 +801,12 @@ const Parser = struct { } fn parseInstruction(self: *Parser, body_ctx: ?*Body, name: []const u8) InnerError!*Inst { + const contents_start = self.i; const fn_name = try skipToAndOver(self, '('); inline for (@typeInfo(Inst.Tag).Enum.fields) |field| { if (mem.eql(u8, field.name, fn_name)) { const tag = @field(Inst.Tag, field.name); - return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name); + return parseInstructionGeneric(self, field.name, Inst.TagToType(tag), body_ctx, name, contents_start); } } return self.fail("unknown instruction '{}'", .{fn_name}); @@ -814,12 +818,14 @@ const Parser = struct { comptime InstType: type, body_ctx: ?*Body, inst_name: []const u8, + contents_start: usize, ) InnerError!*Inst { const inst_specific = try self.arena.allocator.create(InstType); inst_specific.base = .{ .name = inst_name, .src = self.i, .tag = InstType.base_tag, + .contents = undefined, }; if (@hasField(InstType, "ty")) { @@ -867,6 +873,8 @@ const Parser = struct { } try requireEatBytes(self, ")"); + inst_specific.base.contents = self.source[contents_start..self.i]; + return &inst_specific.base; } @@ -952,6 +960,7 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.Str.base_tag, + .contents = undefined, }, .positionals = .{ .bytes = ident }, .kw_args = .{}, @@ -962,6 +971,7 @@ const Parser = struct { .name = try self.generateName(), .src = src, .tag = Inst.DeclRef.base_tag, + .contents = undefined, }, .positionals = .{ .name = &name.base }, .kw_args = .{}, diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index fb3953fe4f..f7237f4d60 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -310,7 +310,7 @@ pub const ElfFile = struct { // TODO instead of hard coding the vaddr, make a function to find a vaddr to put things at. // we'll need to re-use that function anyway, in case the GOT grows and overlaps something // else in virtual memory. - const default_got_addr = 0x80000000; + const default_got_addr = 0x4000000; try self.program_headers.append(self.allocator, .{ .p_type = elf.PT_LOAD, .p_offset = off, @@ -755,6 +755,35 @@ pub const ElfFile = struct { }; } + pub fn allocateDeclIndexes(self: *ElfFile, decl: *ir.Module.Decl) !void { + if (decl.link.local_sym_index != 0) return; + + try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); + try self.offset_table.ensureCapacity(self.allocator, self.offset_table.items.len + 1); + const local_sym_index = self.local_symbols.items.len; + const offset_table_index = self.offset_table.items.len; + const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + + self.local_symbols.appendAssumeCapacity(.{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = phdr.p_vaddr, + .st_size = 0, + }); + errdefer self.local_symbols.shrink(self.allocator, self.local_symbols.items.len - 1); + self.offset_table.appendAssumeCapacity(0); + errdefer self.offset_table.shrink(self.allocator, self.offset_table.items.len - 1); + + self.offset_table_count_dirty = true; + + decl.link = .{ + .local_sym_index = @intCast(u32, local_sym_index), + .offset_table_index = @intCast(u32, offset_table_index), + }; + } + pub fn updateDecl(self: *ElfFile, module: *ir.Module, decl: *ir.Module.Decl) !void { var code_buffer = std.ArrayList(u8).init(self.allocator); defer code_buffer.deinit(); @@ -781,21 +810,33 @@ pub const ElfFile = struct { if (decl.link.local_sym_index != 0) { const local_sym = &self.local_symbols.items[decl.link.local_sym_index]; const existing_block = self.findAllocatedTextBlock(local_sym.*); - const need_realloc = code.len > existing_block.size_capacity or + const need_realloc = local_sym.st_size == 0 or + code.len > existing_block.size_capacity or !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment); + // TODO check for collision with another symbol const file_offset = if (need_realloc) fo: { const new_block = try self.allocateTextBlock(code.len, required_alignment); local_sym.st_value = new_block.vaddr; - local_sym.st_size = code.len; + self.offset_table.items[decl.link.offset_table_index] = new_block.vaddr; + //std.debug.warn("{}: writing got index {}=0x{x}\n", .{ + // decl.name, + // decl.link.offset_table_index, + // self.offset_table.items[decl.link.offset_table_index], + //}); try self.writeOffsetTableEntry(decl.link.offset_table_index); break :fo new_block.file_offset; } else existing_block.file_offset; + local_sym.st_size = code.len; local_sym.st_name = try self.updateString(local_sym.st_name, mem.spanZ(decl.name)); local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; + local_sym.st_other = 0; + local_sym.st_shndx = self.text_section_index.?; // TODO this write could be avoided if no fields of the symbol were changed. try self.writeSymbol(decl.link.local_sym_index); + + //std.debug.warn("updating {} at vaddr 0x{x}\n", .{ decl.name, local_sym.st_value }); break :blk file_offset; } else { try self.local_symbols.ensureCapacity(self.allocator, self.local_symbols.items.len + 1); @@ -829,6 +870,7 @@ pub const ElfFile = struct { .offset_table_index = @intCast(u32, offset_table_index), }; + //std.debug.warn("writing new {} at vaddr 0x{x}\n", .{ decl.name, new_block.vaddr }); break :blk new_block.file_offset; } };