zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 173fc842c4eb1da6ca07a4ab6026c4c62bc8c09b (tree)
parent b4d5d4d1748d25efa6197914a36e276e93509f57
Author: Andrew Kelley <superjoe30@gmail.com>
Date:   Sun,  9 Sep 2018 18:07:11 -0400

basic compiler id hash working

Diffstat:
MCMakeLists.txt | 1+
Msrc/buffer.hpp | 4++++
Asrc/cache_hash.cpp | 407+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cache_hash.hpp | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/codegen.cpp | 152+++++++++++++++++--------------------------------------------------------------
Msrc/error.cpp | 2++
Msrc/error.hpp | 2++
Msrc/main.cpp | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/os.cpp | 314+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Msrc/os.hpp | 92++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
Msrc/util.cpp | 39+++++++++++++++++++++++++++++++++++++++
Msrc/util.hpp | 12++++++++++++
12 files changed, 918 insertions(+), 224 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -409,6 +409,7 @@ set(ZIG_SOURCES "${CMAKE_SOURCE_DIR}/src/bigint.cpp" "${CMAKE_SOURCE_DIR}/src/blake2b.cpp" "${CMAKE_SOURCE_DIR}/src/buffer.cpp" + "${CMAKE_SOURCE_DIR}/src/cache_hash.cpp" "${CMAKE_SOURCE_DIR}/src/c_tokenizer.cpp" "${CMAKE_SOURCE_DIR}/src/codegen.cpp" "${CMAKE_SOURCE_DIR}/src/errmsg.cpp" diff --git a/src/buffer.hpp b/src/buffer.hpp @@ -78,6 +78,10 @@ static inline Buf *buf_create_from_mem(const char *ptr, size_t len) { return buf; } +static inline Buf *buf_create_from_slice(Slice<uint8_t> slice) { + return buf_create_from_mem((const char *)slice.ptr, slice.len); +} + static inline Buf *buf_create_from_str(const char *str) { return buf_create_from_mem(str, strlen(str)); } diff --git a/src/cache_hash.cpp b/src/cache_hash.cpp @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2018 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#include "cache_hash.hpp" +#include "buffer.hpp" +#include "os.hpp" + +#include <stdio.h> + +void cache_init(CacheHash *ch, Buf *manifest_dir) { + int rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + ch->files = {}; + ch->manifest_dir = manifest_dir; + ch->manifest_file_path = nullptr; + ch->manifest_dirty = false; +} + +void cache_str(CacheHash *ch, const char *ptr) { + assert(ch->manifest_file_path == nullptr); + assert(ptr != nullptr); + // + 1 to include the null byte + blake2b_update(&ch->blake, ptr, strlen(ptr) + 1); +} + +void cache_int(CacheHash *ch, int x) { + assert(ch->manifest_file_path == nullptr); + // + 1 to include the null byte + uint8_t buf[sizeof(int) + 1]; + memcpy(buf, &x, sizeof(int)); + buf[sizeof(int)] = 0; + blake2b_update(&ch->blake, buf, sizeof(int) + 1); +} + +void cache_buf(CacheHash *ch, Buf *buf) { + assert(ch->manifest_file_path == nullptr); + assert(buf != nullptr); + // + 1 to include the null byte + blake2b_update(&ch->blake, buf_ptr(buf), buf_len(buf) + 1); +} + +void cache_buf_opt(CacheHash *ch, Buf *buf) { + assert(ch->manifest_file_path == nullptr); + if (buf == nullptr) { + cache_str(ch, ""); + cache_str(ch, ""); + } else { + cache_buf(ch, buf); + } +} + +void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len) { + assert(ch->manifest_file_path == nullptr); + for (size_t i = 0; i < len; i += 1) { + LinkLib *lib = ptr[i]; + if (lib->provided_explicitly) { + cache_buf(ch, lib->name); + } + } + cache_str(ch, ""); +} + +void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len) { + assert(ch->manifest_file_path == nullptr); + for (size_t i = 0; i < len; i += 1) { + Buf *buf = ptr[i]; + cache_buf(ch, buf); + } + cache_str(ch, ""); +} + +void cache_file(CacheHash *ch, Buf *file_path) { + assert(ch->manifest_file_path == nullptr); + assert(file_path != nullptr); + Buf *resolved_path = buf_alloc(); + *resolved_path = os_path_resolve(&file_path, 1); + CacheHashFile *chf = ch->files.add_one(); + chf->path = resolved_path; + cache_buf(ch, resolved_path); +} + +void cache_file_opt(CacheHash *ch, Buf *file_path) { + assert(ch->manifest_file_path == nullptr); + if (file_path == nullptr) { + cache_str(ch, ""); + cache_str(ch, ""); + } else { + cache_file(ch, file_path); + } +} + +// Ported from std/base64.zig +static uint8_t base64_fs_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) { + size_t dest_len = ((source.len + 2) / 3) * 4; + assert(dest.len == dest_len); + + size_t i = 0; + size_t out_index = 0; + for (; i + 2 < source.len; i += 3) { + dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)]; + out_index += 1; + + dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f]; + out_index += 1; + } + + // Assert that we never need pad characters. + assert(i == source.len); +} + +// Ported from std/base64.zig +static Error base64_decode(Slice<uint8_t> dest, Slice<uint8_t> source) { + assert(source.len % 4 == 0); + assert(dest.len == (source.len / 4) * 3); + + // In Zig this is comptime computed. In C++ it's not worth it to do that. + uint8_t char_to_index[256]; + bool char_in_alphabet[256] = {0}; + for (size_t i = 0; i < 64; i += 1) { + uint8_t c = base64_fs_alphabet[i]; + assert(!char_in_alphabet[c]); + char_in_alphabet[c] = true; + char_to_index[c] = i; + } + + size_t src_cursor = 0; + size_t dest_cursor = 0; + + for (;src_cursor < source.len; src_cursor += 4) { + if (!char_in_alphabet[source.ptr[src_cursor + 0]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 1]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 2]]) return ErrorInvalidFormat; + if (!char_in_alphabet[source.ptr[src_cursor + 3]]) return ErrorInvalidFormat; + dest.ptr[dest_cursor + 0] = (char_to_index[source.ptr[src_cursor + 0]] << 2) | (char_to_index[source.ptr[src_cursor + 1]] >> 4); + dest.ptr[dest_cursor + 1] = (char_to_index[source.ptr[src_cursor + 1]] << 4) | (char_to_index[source.ptr[src_cursor + 2]] >> 2); + dest.ptr[dest_cursor + 2] = (char_to_index[source.ptr[src_cursor + 2]] << 6) | (char_to_index[source.ptr[src_cursor + 3]]); + dest_cursor += 3; + } + + assert(src_cursor == source.len); + assert(dest_cursor == dest.len); + return ErrorNone; +} + +static Error hash_file(uint8_t *digest, OsFile handle) { + Error err; + + blake2b_state blake; + int rc = blake2b_init(&blake, 48); + assert(rc == 0); + + for (;;) { + uint8_t buf[4096]; + size_t amt = 4096; + if ((err = os_file_read(handle, buf, &amt))) + return err; + if (amt == 0) { + rc = blake2b_final(&blake, digest, 48); + assert(rc == 0); + return ErrorNone; + } + blake2b_update(&blake, buf, amt); + } +} + +static Error populate_file_hash(CacheHash *ch, CacheHashFile *chf) { + Error err; + + assert(chf->path != nullptr); + + OsFile this_file; + if ((err = os_file_open_r(chf->path, &this_file))) + return err; + + if ((err = os_file_mtime(this_file, &chf->mtime))) { + os_file_close(this_file); + return err; + } + + if ((err = hash_file(chf->bin_digest, this_file))) { + os_file_close(this_file); + return err; + } + os_file_close(this_file); + + blake2b_update(&ch->blake, chf->bin_digest, 48); + + return ErrorNone; +} + +Error cache_hit(CacheHash *ch, Buf *out_digest) { + Error err; + + uint8_t bin_digest[48]; + int rc = blake2b_final(&ch->blake, bin_digest, 48); + assert(rc == 0); + + Buf b64_digest = BUF_INIT; + buf_resize(&b64_digest, 64); + base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); + + rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + blake2b_update(&ch->blake, bin_digest, 48); + + ch->manifest_file_path = buf_alloc(); + os_path_join(ch->manifest_dir, &b64_digest, ch->manifest_file_path); + + buf_append_str(ch->manifest_file_path, ".txt"); + + if ((err = os_file_open_lock_rw(ch->manifest_file_path, &ch->manifest_file))) + return err; + + Buf line_buf = BUF_INIT; + buf_resize(&line_buf, 512); + if ((err = os_file_read_all(ch->manifest_file, &line_buf))) { + os_file_close(ch->manifest_file); + return err; + } + + size_t input_file_count = ch->files.length; + bool any_file_changed = false; + size_t file_i = 0; + SplitIterator line_it = memSplit(buf_to_slice(&line_buf), str("\n")); + for (;; file_i += 1) { + Optional<Slice<uint8_t>> opt_line = SplitIterator_next(&line_it); + if (!opt_line.is_some) + break; + + CacheHashFile *chf; + if (file_i < input_file_count) { + chf = &ch->files.at(file_i); + } else if (any_file_changed) { + // cache miss. + // keep the the manifest file open with the rw lock + // reset the hash + rc = blake2b_init(&ch->blake, 48); + assert(rc == 0); + blake2b_update(&ch->blake, bin_digest, 48); + ch->files.resize(input_file_count); + // bring the hash up to the input file hashes + for (file_i = 0; file_i < input_file_count; file_i += 1) { + blake2b_update(&ch->blake, ch->files.at(file_i).bin_digest, 48); + } + // caller can notice that out_digest is unmodified. + return ErrorNone; + } else { + chf = ch->files.add_one(); + chf->path = nullptr; + } + + SplitIterator it = memSplit(opt_line.value, str(" ")); + + Optional<Slice<uint8_t>> opt_mtime_sec = SplitIterator_next(&it); + if (!opt_mtime_sec.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->mtime.sec = strtoull((const char *)opt_mtime_sec.value.ptr, nullptr, 10); + + Optional<Slice<uint8_t>> opt_mtime_nsec = SplitIterator_next(&it); + if (!opt_mtime_nsec.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->mtime.nsec = strtoull((const char *)opt_mtime_nsec.value.ptr, nullptr, 10); + + Optional<Slice<uint8_t>> opt_digest = SplitIterator_next(&it); + if (!opt_digest.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + if ((err = base64_decode({chf->bin_digest, 48}, opt_digest.value))) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + + Optional<Slice<uint8_t>> opt_file_path = SplitIterator_next(&it); + if (!opt_file_path.is_some) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + Buf *this_path = buf_create_from_slice(opt_file_path.value); + if (chf->path != nullptr && !buf_eql_buf(this_path, chf->path)) { + os_file_close(ch->manifest_file); + return ErrorInvalidFormat; + } + chf->path = this_path; + + // if the mtime matches we can trust the digest + OsFile this_file; + if ((err = os_file_open_r(chf->path, &this_file))) { + os_file_close(ch->manifest_file); + return err; + } + OsTimeStamp actual_mtime; + if ((err = os_file_mtime(this_file, &actual_mtime))) { + os_file_close(this_file); + os_file_close(ch->manifest_file); + return err; + } + if (chf->mtime.sec == actual_mtime.sec && chf->mtime.nsec == actual_mtime.nsec) { + os_file_close(this_file); + } else { + // we have to recompute the digest. + // later we'll rewrite the manifest with the new mtime/digest values + ch->manifest_dirty = true; + chf->mtime = actual_mtime; + + uint8_t actual_digest[48]; + if ((err = hash_file(actual_digest, this_file))) { + os_file_close(this_file); + os_file_close(ch->manifest_file); + return err; + } + os_file_close(this_file); + if (memcmp(chf->bin_digest, actual_digest, 48) != 0) { + memcpy(chf->bin_digest, actual_digest, 48); + // keep going until we have the input file digests + any_file_changed = true; + } + } + if (!any_file_changed) { + blake2b_update(&ch->blake, chf->bin_digest, 48); + } + } + if (file_i < input_file_count) { + // manifest file is empty or missing entries, so this is a cache miss + ch->manifest_dirty = true; + for (; file_i < input_file_count; file_i += 1) { + CacheHashFile *chf = &ch->files.at(file_i); + if ((err = populate_file_hash(ch, chf))) { + os_file_close(ch->manifest_file); + return err; + } + } + return ErrorNone; + } + // Cache Hit + return cache_final(ch, out_digest); +} + +Error cache_add_file(CacheHash *ch, Buf *path) { + Error err; + + assert(ch->manifest_file_path != nullptr); + CacheHashFile *chf = ch->files.add_one(); + chf->path = path; + if ((err = populate_file_hash(ch, chf))) { + os_file_close(ch->manifest_file); + return err; + } + + return ErrorNone; +} + +static Error write_manifest_file(CacheHash *ch) { + Error err; + Buf contents = BUF_INIT; + buf_resize(&contents, 0); + uint8_t encoded_digest[65]; + encoded_digest[64] = 0; + for (size_t i = 0; i < ch->files.length; i += 1) { + CacheHashFile *chf = &ch->files.at(i); + base64_encode({encoded_digest, 64}, {chf->bin_digest, 48}); + buf_appendf(&contents, "%" ZIG_PRI_u64 " %" ZIG_PRI_u64 " %s %s\n", + chf->mtime.sec, chf->mtime.nsec, encoded_digest, buf_ptr(chf->path)); + } + fprintf(stderr, "overwrite with\n%s\n", buf_ptr(&contents)); + if ((err = os_file_overwrite(ch->manifest_file, &contents))) + return err; + + return ErrorNone; +} + +Error cache_final(CacheHash *ch, Buf *out_digest) { + Error err; + + assert(ch->manifest_file_path != nullptr); + + if (ch->manifest_dirty) { + if ((err = write_manifest_file(ch))) { + fprintf(stderr, "Warning: Unable to write cache file '%s': %s\n", + buf_ptr(ch->manifest_file_path), err_str(err)); + } + } + os_file_close(ch->manifest_file); + + uint8_t bin_digest[48]; + int rc = blake2b_final(&ch->blake, bin_digest, 48); + assert(rc == 0); + buf_resize(out_digest, 64); + base64_encode(buf_to_slice(out_digest), {bin_digest, 48}); + + return ErrorNone; +} diff --git a/src/cache_hash.hpp b/src/cache_hash.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#ifndef ZIG_CACHE_HASH_HPP +#define ZIG_CACHE_HASH_HPP + +#include "all_types.hpp" +#include "blake2.h" +#include "os.hpp" + +struct CacheHashFile { + Buf *path; + OsTimeStamp mtime; + uint8_t bin_digest[48]; +}; + +struct CacheHash { + blake2b_state blake; + ZigList<CacheHashFile> files; + Buf *manifest_dir; + Buf *manifest_file_path; + OsFile manifest_file; + bool manifest_dirty; +}; + +// Always call this first to set up. +void cache_init(CacheHash *ch, Buf *manifest_dir); + +// Next, use the hash population functions to add the initial parameters. +void cache_str(CacheHash *ch, const char *ptr); +void cache_int(CacheHash *ch, int x); +void cache_buf(CacheHash *ch, Buf *buf); +void cache_buf_opt(CacheHash *ch, Buf *buf); +void cache_list_of_link_lib(CacheHash *ch, LinkLib **ptr, size_t len); +void cache_list_of_buf(CacheHash *ch, Buf **ptr, size_t len); +void cache_file(CacheHash *ch, Buf *path); +void cache_file_opt(CacheHash *ch, Buf *path); + +// Then call cache_hit when you're ready to see if you can skip the next step. +// out_b64_digest will be left unchanged if it was a cache miss +Error ATTRIBUTE_MUST_USE cache_hit(CacheHash *ch, Buf *out_b64_digest); + +// If you got a cache hit, the flow is done. No more functions to call. +// Next call this function for every file that is depended on. +Error ATTRIBUTE_MUST_USE cache_add_file(CacheHash *ch, Buf *path); + +// If you did not get a cache hit, use the hash population functions again +// and do all the actual work. When done use cache_final to save the cache +// for next time. +Error ATTRIBUTE_MUST_USE cache_final(CacheHash *ch, Buf *out_digest); + +#endif diff --git a/src/codegen.cpp b/src/codegen.cpp @@ -19,7 +19,6 @@ #include "target.hpp" #include "util.hpp" #include "zig_llvm.h" -#include "blake2.h" #include <stdio.h> #include <errno.h> @@ -7675,130 +7674,45 @@ void codegen_add_time_event(CodeGen *g, const char *name) { g->timing_events.append({os_get_time(), name}); } -static void add_cache_str(blake2b_state *blake, const char *ptr) { - assert(ptr != nullptr); - // + 1 to include the null byte - blake2b_update(blake, ptr, strlen(ptr) + 1); -} - -static void add_cache_int(blake2b_state *blake, int x) { - // + 1 to include the null byte - uint8_t buf[sizeof(int) + 1]; - memcpy(buf, &x, sizeof(int)); - buf[sizeof(int)] = 0; - blake2b_update(blake, buf, sizeof(int) + 1); -} -static void add_cache_buf(blake2b_state *blake, Buf *buf) { - assert(buf != nullptr); - // + 1 to include the null byte - blake2b_update(blake, buf_ptr(buf), buf_len(buf) + 1); -} - -static void add_cache_buf_opt(blake2b_state *blake, Buf *buf) { - if (buf == nullptr) { - add_cache_str(blake, ""); - add_cache_str(blake, ""); - } else { - add_cache_buf(blake, buf); - } -} - -static void add_cache_list_of_link_lib(blake2b_state *blake, LinkLib **ptr, size_t len) { - for (size_t i = 0; i < len; i += 1) { - LinkLib *lib = ptr[i]; - if (lib->provided_explicitly) { - add_cache_buf(blake, lib->name); - } - } - add_cache_str(blake, ""); -} - -static void add_cache_list_of_buf(blake2b_state *blake, Buf **ptr, size_t len) { - for (size_t i = 0; i < len; i += 1) { - Buf *buf = ptr[i]; - add_cache_buf(blake, buf); - } - add_cache_str(blake, ""); -} - -//static void add_cache_file(CodeGen *g, blake2b_state *blake, Buf *resolved_path) { -// assert(file_name != nullptr); -// g->cache_files.append(resolved_path); -//} +//// Called before init() +//static bool build_with_cache(CodeGen *g) { +// // TODO zig exe & dynamic libraries +// // should be in main.cpp I think. only needs to happen +// // once on startup. +// +// CacheHash comp; +// cache_init(&comp); +// +// add_cache_buf(&blake, g->root_out_name); +// add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file +// add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length); +// add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length); +// add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length); +// add_cache_int(&blake, g->emit_file_type); +// add_cache_int(&blake, g->build_mode); +// add_cache_int(&blake, g->out_type); +// // TODO the rest of the struct CodeGen fields +// +// uint8_t bin_digest[48]; +// rc = blake2b_final(&blake, bin_digest, 48); +// assert(rc == 0); +// +// Buf b64_digest = BUF_INIT; +// buf_resize(&b64_digest, 64); +// base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); // -//static void add_cache_file_opt(CodeGen *g, blake2b_state *blake, Buf *resolved_path) { -// if (resolved_path == nullptr) { -// add_cache_str(blake, ""); -// add_cache_str(blake, ""); -// } else { -// add_cache_file(g, blake, resolved_path); -// } +// fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest)); +// // TODO next look for a manifest file that has all the files from the input parameters +// // use that to construct the real hash, which looks up the output directory and another manifest file +// +// return false; //} -// Ported from std/base64.zig -static uint8_t base64_fs_alphabet[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_"; -static void base64_encode(Slice<uint8_t> dest, Slice<uint8_t> source) { - size_t dest_len = ((source.len + 2) / 3) * 4; - assert(dest.len == dest_len); - - size_t i = 0; - size_t out_index = 0; - for (; i + 2 < source.len; i += 3) { - dest.ptr[out_index] = base64_fs_alphabet[(source.ptr[i] >> 2) & 0x3f]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i] & 0x3) << 4) | ((source.ptr[i + 1] & 0xf0) >> 4)]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[((source.ptr[i + 1] & 0xf) << 2) | ((source.ptr[i + 2] & 0xc0) >> 6)]; - out_index += 1; - - dest.ptr[out_index] = base64_fs_alphabet[source.ptr[i + 2] & 0x3f]; - out_index += 1; - } - - // Assert that we never need pad characters. - assert(i == source.len); -} - -// Called before init() -static bool build_with_cache(CodeGen *g) { - blake2b_state blake; - int rc = blake2b_init(&blake, 48); - assert(rc == 0); - - // TODO zig exe & dynamic libraries - - add_cache_buf(&blake, g->root_out_name); - add_cache_buf_opt(&blake, get_resolved_root_src_path(g)); // Root source file - add_cache_list_of_link_lib(&blake, g->link_libs_list.items, g->link_libs_list.length); - add_cache_list_of_buf(&blake, g->darwin_frameworks.items, g->darwin_frameworks.length); - add_cache_list_of_buf(&blake, g->rpath_list.items, g->rpath_list.length); - add_cache_int(&blake, g->emit_file_type); - add_cache_int(&blake, g->build_mode); - add_cache_int(&blake, g->out_type); - // TODO the rest of the struct CodeGen fields - - uint8_t bin_digest[48]; - rc = blake2b_final(&blake, bin_digest, 48); - assert(rc == 0); - - Buf b64_digest = BUF_INIT; - buf_resize(&b64_digest, 64); - base64_encode(buf_to_slice(&b64_digest), {bin_digest, 48}); - - fprintf(stderr, "input params hash: %s\n", buf_ptr(&b64_digest)); - // TODO next look for a manifest file that has all the files from the input parameters - // use that to construct the real hash, which looks up the output directory and another manifest file - - return false; -} - void codegen_build(CodeGen *g) { assert(g->out_type != OutTypeUnknown); - if (build_with_cache(g)) - return; + //if (build_with_cache(g)) + // return; init(g); codegen_add_time_event(g, "Semantic Analysis"); diff --git a/src/error.cpp b/src/error.cpp @@ -27,6 +27,8 @@ const char *err_str(int err) { case ErrorNegativeDenominator: return "negative denominator"; case ErrorShiftedOutOneBits: return "exact shift shifted out one bits"; case ErrorCCompileErrors: return "C compile errors"; + case ErrorEndOfFile: return "end of file"; + case ErrorIsDir: return "is directory"; } return "(invalid error)"; } diff --git a/src/error.hpp b/src/error.hpp @@ -27,6 +27,8 @@ enum Error { ErrorNegativeDenominator, ErrorShiftedOutOneBits, ErrorCCompileErrors, + ErrorEndOfFile, + ErrorIsDir, }; const char *err_str(int err); diff --git a/src/main.cpp b/src/main.cpp @@ -13,6 +13,7 @@ #include "link.hpp" #include "os.hpp" #include "target.hpp" +#include "cache_hash.hpp" #include <stdio.h> @@ -270,6 +271,66 @@ int main(int argc, char **argv) { return 0; } + if (argc == 2 && strcmp(argv[1], "TEST") == 0) { + Error err; + Buf app_data_dir = BUF_INIT; + if ((err = os_get_app_data_dir(&app_data_dir, "zig"))) { + fprintf(stderr, "get app dir: %s\n", err_str(err)); + return 1; + } + Buf *stage1_dir = buf_alloc(); + os_path_join(&app_data_dir, buf_create_from_str("stage1"), stage1_dir); + Buf *manifest_dir = buf_alloc(); + os_path_join(stage1_dir, buf_create_from_str("exe"), manifest_dir); + + if ((err = os_make_path(manifest_dir))) { + fprintf(stderr, "make path: %s\n", err_str(err)); + return 1; + } + CacheHash cache_hash; + CacheHash *ch = &cache_hash; + cache_init(ch, manifest_dir); + Buf self_exe_path = BUF_INIT; + if ((err = os_self_exe_path(&self_exe_path))) { + fprintf(stderr, "self exe path: %s\n", err_str(err)); + return 1; + } + + cache_file(ch, &self_exe_path); + + Buf exe_digest = BUF_INIT; + buf_resize(&exe_digest, 0); + if ((err = cache_hit(ch, &exe_digest))) { + fprintf(stderr, "cache hit error: %s\n", err_str(err)); + return 1; + } + if (buf_len(&exe_digest) != 0) { + fprintf(stderr, "cache hit: %s\n", buf_ptr(&exe_digest)); + return 0; + } + fprintf(stderr, "cache miss\n"); + ZigList<Buf *> lib_paths = {}; + if ((err = os_self_exe_shared_libs(lib_paths))) { + fprintf(stderr, "finding out shared libs: %s\n", err_str(err)); + return 1; + } + for (size_t i = 0; i < lib_paths.length; i += 1) { + Buf *lib_path = lib_paths.at(i); + if ((err = cache_add_file(ch, lib_path))) { + fprintf(stderr, "cache add file %s: %s", buf_ptr(lib_path), err_str(err)); + return 1; + } + } + if ((err = cache_final(ch, &exe_digest))) { + fprintf(stderr, "final: %s\n", err_str(err)); + return 1; + } + + + fprintf(stderr, "computed2: %s\n", buf_ptr(&exe_digest)); + return 0; + } + os_init(); char *arg0 = argv[0]; diff --git a/src/os.cpp b/src/os.cpp @@ -40,6 +40,10 @@ typedef SSIZE_T ssize_t; #endif +#if defined(ZIG_OS_LINUX) +#include <link.h> +#endif + #if defined(__MACH__) #include <mach/clock.h> @@ -57,54 +61,6 @@ static clock_serv_t cclock; #include <errno.h> #include <time.h> -// Ported from std/mem.zig. -// Coordinate struct fields with memSplit function -struct SplitIterator { - size_t index; - Slice<uint8_t> buffer; - Slice<uint8_t> split_bytes; -}; - -// Ported from std/mem.zig. -static bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) { - for (size_t i = 0; i < self->split_bytes.len; i += 1) { - if (byte == self->split_bytes.ptr[i]) { - return true; - } - } - return false; -} - -// Ported from std/mem.zig. -static Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self) { - // move to beginning of token - while (self->index < self->buffer.len && - SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) - { - self->index += 1; - } - size_t start = self->index; - if (start == self->buffer.len) { - return {}; - } - - // move to end of token - while (self->index < self->buffer.len && - !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) - { - self->index += 1; - } - size_t end = self->index; - - return Optional<Slice<uint8_t>>::some(self->buffer.slice(start, end)); -} - -// Ported from std/mem.zig -static SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes) { - return SplitIterator{0, buffer, split_bytes}; -} - - #if defined(ZIG_OS_POSIX) static void populate_termination(Termination *term, int status) { if (WIFEXITED(status)) { @@ -1368,16 +1324,16 @@ double os_get_time(void) { #endif } -int os_make_path(Buf *path) { +Error os_make_path(Buf *path) { Buf resolved_path = os_path_resolve(&path, 1); size_t end_index = buf_len(&resolved_path); - int err; + Error err; while (true) { if ((err = os_make_dir(buf_slice(&resolved_path, 0, end_index)))) { if (err == ErrorPathAlreadyExists) { if (end_index == buf_len(&resolved_path)) - return 0; + return ErrorNone; } else if (err == ErrorFileNotFound) { // march end_index backward until next path component while (true) { @@ -1391,7 +1347,7 @@ int os_make_path(Buf *path) { } } if (end_index == buf_len(&resolved_path)) - return 0; + return ErrorNone; // march end_index forward until next path component while (true) { end_index += 1; @@ -1399,10 +1355,10 @@ int os_make_path(Buf *path) { break; } } - return 0; + return ErrorNone; } -int os_make_dir(Buf *path) { +Error os_make_dir(Buf *path) { #if defined(ZIG_OS_WINDOWS) if (!CreateDirectory(buf_ptr(path), NULL)) { if (GetLastError() == ERROR_ALREADY_EXISTS) @@ -1413,7 +1369,7 @@ int os_make_dir(Buf *path) { return ErrorAccess; return ErrorUnexpected; } - return 0; + return ErrorNone; #else if (mkdir(buf_ptr(path), 0755) == -1) { if (errno == EEXIST) @@ -1424,7 +1380,7 @@ int os_make_dir(Buf *path) { return ErrorAccess; return ErrorUnexpected; } - return 0; + return ErrorNone; #endif } @@ -1447,7 +1403,7 @@ int os_init(void) { return 0; } -int os_self_exe_path(Buf *out_path) { +Error os_self_exe_path(Buf *out_path) { #if defined(ZIG_OS_WINDOWS) buf_resize(out_path, 256); for (;;) { @@ -1480,27 +1436,21 @@ int os_self_exe_path(Buf *out_path) { char *real_path = realpath(buf_ptr(tmp), buf_ptr(out_path)); if (!real_path) { buf_init_from_buf(out_path, tmp); - return 0; + return ErrorNone; } // Resize out_path for the correct length. buf_resize(out_path, strlen(buf_ptr(out_path))); - return 0; + return ErrorNone; #elif defined(ZIG_OS_LINUX) - buf_resize(out_path, 256); - for (;;) { - ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path)); - if (amt == -1) { - return ErrorUnexpected; - } - if (amt == (ssize_t)buf_len(out_path)) { - buf_resize(out_path, buf_len(out_path) * 2); - continue; - } - buf_resize(out_path, amt); - return 0; + buf_resize(out_path, PATH_MAX); + ssize_t amt = readlink("/proc/self/exe", buf_ptr(out_path), buf_len(out_path)); + if (amt == -1) { + return ErrorUnexpected; } + buf_resize(out_path, amt); + return ErrorNone; #endif return ErrorFileNotFound; } @@ -1685,3 +1635,225 @@ int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf* output_buf, ZigLLVM_ArchTy return ErrorFileNotFound; #endif } + +// Ported from std/os/get_app_data_dir.zig +Error os_get_app_data_dir(Buf *out_path, const char *appname) { +#if defined(ZIG_OS_WINDOWS) +#error "Unimplemented" +#elif defined(ZIG_OS_DARWIN) + const char *home_dir = getenv("HOME"); + if (home_dir == nullptr) { + // TODO use /etc/passwd + return ErrorFileNotFound; + } + buf_resize(out_path, 0); + buf_appendf(out_path, "%s/Library/Application Support/%s", home_dir, appname); + return ErrorNone; +#elif defined(ZIG_OS_LINUX) + const char *home_dir = getenv("HOME"); + if (home_dir == nullptr) { + // TODO use /etc/passwd + return ErrorFileNotFound; + } + buf_resize(out_path, 0); + buf_appendf(out_path, "%s/.local/share/%s", home_dir, appname); + return ErrorNone; +#endif +} + + +#if defined(ZIG_OS_LINUX) +static int self_exe_shared_libs_callback(struct dl_phdr_info *info, size_t size, void *data) { + ZigList<Buf *> *libs = reinterpret_cast< ZigList<Buf *> *>(data); + if (info->dlpi_name[0] == '/') { + libs->append(buf_create_from_str(info->dlpi_name)); + } + return 0; +} +#endif + +Error os_self_exe_shared_libs(ZigList<Buf *> &paths) { +#if defined(ZIG_OS_LINUX) + paths.resize(0); + dl_iterate_phdr(self_exe_shared_libs_callback, &paths); + return ErrorNone; +#else +#error "unimplemented" +#endif +} + +Error os_file_open_r(Buf *full_path, OsFile *out_file) { +#if defined(ZIG_OS_WINDOWS) +#error "unimplemented" +#else + for (;;) { + int fd = open(buf_ptr(full_path), O_RDONLY|O_CLOEXEC); + if (fd == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EACCES: + return ErrorAccess; + case EISDIR: + return ErrorIsDir; + case ENOENT: + return ErrorFileNotFound; + default: + return ErrorFileSystem; + } + } + *out_file = fd; + return ErrorNone; + } +#endif +} + +Error os_file_open_lock_rw(Buf *full_path, OsFile *out_file) { +#if defined(ZIG_OS_WINDOWS) +#error "unimplemented" +#else + int fd; + for (;;) { + fd = open(buf_ptr(full_path), O_RDWR|O_CLOEXEC|O_CREAT, 0666); + if (fd == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EACCES: + return ErrorAccess; + case EISDIR: + return ErrorIsDir; + case ENOENT: + return ErrorFileNotFound; + default: + return ErrorFileSystem; + } + } + break; + } + for (;;) { + struct flock lock; + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + if (fcntl(fd, F_SETLKW, &lock) == -1) { + switch (errno) { + case EINTR: + continue; + case EBADF: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EINVAL: + zig_unreachable(); + default: + close(fd); + return ErrorFileSystem; + } + } + break; + } + *out_file = fd; + return ErrorNone; +#endif +} + +Error os_file_mtime(OsFile file, OsTimeStamp *mtime) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + struct stat statbuf; + if (fstat(file, &statbuf) == -1) + return ErrorFileSystem; + + mtime->sec = statbuf.st_mtim.tv_sec; + mtime->nsec = statbuf.st_mtim.tv_nsec; + return ErrorNone; +#endif +} + +Error os_file_read(OsFile file, void *ptr, size_t *len) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + for (;;) { + ssize_t rc = read(file, ptr, *len); + if (rc == -1) { + switch (errno) { + case EINTR: + continue; + case EBADF: + zig_unreachable(); + case EFAULT: + zig_unreachable(); + case EISDIR: + zig_unreachable(); + default: + return ErrorFileSystem; + } + } + *len = rc; + return ErrorNone; + } +#endif +} + +Error os_file_read_all(OsFile file, Buf *contents) { + Error err; + size_t index = 0; + for (;;) { + size_t amt = buf_len(contents) - index; + + if (amt < 512) { + buf_resize(contents, buf_len(contents) + 512); + amt += 512; + } + + if ((err = os_file_read(file, buf_ptr(contents) + index, &amt))) + return err; + + if (amt == 0) { + buf_resize(contents, index); + return ErrorNone; + } + + index += amt; + } +} + +Error os_file_overwrite(OsFile file, Buf *contents) { +#if defined(ZIG_OS_WINDOWS) +#error unimplemented +#else + if (lseek(file, 0, SEEK_SET) == -1) + return ErrorFileSystem; + for (;;) { + if (write(file, buf_ptr(contents), buf_len(contents)) == -1) { + switch (errno) { + case EINTR: + continue; + case EINVAL: + zig_unreachable(); + case EBADF: + zig_unreachable(); + default: + return ErrorFileSystem; + } + } + return ErrorNone; + } +#endif +} + +void os_file_close(OsFile file) { + close(file); +} diff --git a/src/os.hpp b/src/os.hpp @@ -13,10 +13,43 @@ #include "error.hpp" #include "zig_llvm.h" #include "windows_sdk.h" +#include "result.hpp" #include <stdio.h> #include <inttypes.h> +#if defined(__APPLE__) +#define ZIG_OS_DARWIN +#elif defined(_WIN32) +#define ZIG_OS_WINDOWS +#elif defined(__linux__) +#define ZIG_OS_LINUX +#else +#define ZIG_OS_UNKNOWN +#endif + +#if defined(__x86_64__) +#define ZIG_ARCH_X86_64 +#else +#define ZIG_ARCH_UNKNOWN +#endif + +#if defined(ZIG_OS_WINDOWS) +#define ZIG_PRI_usize "I64u" +#define ZIG_PRI_u64 "I64u" +#define ZIG_PRI_llu "I64u" +#define ZIG_PRI_x64 "I64x" +#define OS_SEP "\\" +#define ZIG_OS_SEP_CHAR '\\' +#else +#define ZIG_PRI_usize "zu" +#define ZIG_PRI_u64 PRIu64 +#define ZIG_PRI_llu "llu" +#define ZIG_PRI_x64 PRIx64 +#define OS_SEP "/" +#define ZIG_OS_SEP_CHAR '/' +#endif + enum TermColor { TermColorRed, TermColorGreen, @@ -38,6 +71,17 @@ struct Termination { int code; }; +#if defined(ZIG_OS_WINDOWS) +#define OsFile (void *) +#else +#define OsFile int +#endif + +struct OsTimeStamp { + uint64_t sec; + uint64_t nsec; +}; + int os_init(void); void os_spawn_process(const char *exe, ZigList<const char *> &args, Termination *term); @@ -54,8 +98,16 @@ bool os_path_is_absolute(Buf *path); int os_get_global_cache_directory(Buf *out_tmp_path); -int os_make_path(Buf *path); -int os_make_dir(Buf *path); +Error ATTRIBUTE_MUST_USE os_make_path(Buf *path); +Error ATTRIBUTE_MUST_USE os_make_dir(Buf *path); + +Error ATTRIBUTE_MUST_USE os_file_open_r(Buf *full_path, OsFile *out_file); +Error ATTRIBUTE_MUST_USE os_file_open_lock_rw(Buf *full_path, OsFile *out_file); +Error ATTRIBUTE_MUST_USE os_file_mtime(OsFile file, OsTimeStamp *mtime); +Error ATTRIBUTE_MUST_USE os_file_read(OsFile file, void *ptr, size_t *len); +Error ATTRIBUTE_MUST_USE os_file_read_all(OsFile file, Buf *contents); +Error ATTRIBUTE_MUST_USE os_file_overwrite(OsFile file, Buf *contents); +void os_file_close(OsFile file); void os_write_file(Buf *full_path, Buf *contents); int os_copy_file(Buf *src_path, Buf *dest_path); @@ -78,42 +130,14 @@ double os_get_time(void); bool os_is_sep(uint8_t c); -int os_self_exe_path(Buf *out_path); +Error ATTRIBUTE_MUST_USE os_self_exe_path(Buf *out_path); + +Error ATTRIBUTE_MUST_USE os_get_app_data_dir(Buf *out_path, const char *appname); int os_get_win32_ucrt_include_path(ZigWindowsSDK *sdk, Buf *output_buf); int os_get_win32_ucrt_lib_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); int os_get_win32_kern32_path(ZigWindowsSDK *sdk, Buf *output_buf, ZigLLVM_ArchType platform_type); -#if defined(__APPLE__) -#define ZIG_OS_DARWIN -#elif defined(_WIN32) -#define ZIG_OS_WINDOWS -#elif defined(__linux__) -#define ZIG_OS_LINUX -#else -#define ZIG_OS_UNKNOWN -#endif - -#if defined(__x86_64__) -#define ZIG_ARCH_X86_64 -#else -#define ZIG_ARCH_UNKNOWN -#endif - -#if defined(ZIG_OS_WINDOWS) -#define ZIG_PRI_usize "I64u" -#define ZIG_PRI_u64 "I64u" -#define ZIG_PRI_llu "I64u" -#define ZIG_PRI_x64 "I64x" -#define OS_SEP "\\" -#define ZIG_OS_SEP_CHAR '\\' -#else -#define ZIG_PRI_usize "zu" -#define ZIG_PRI_u64 PRIu64 -#define ZIG_PRI_llu "llu" -#define ZIG_PRI_x64 PRIx64 -#define OS_SEP "/" -#define ZIG_OS_SEP_CHAR '/' -#endif +Error ATTRIBUTE_MUST_USE os_self_exe_shared_libs(ZigList<Buf *> &paths); #endif diff --git a/src/util.cpp b/src/util.cpp @@ -43,3 +43,42 @@ uint32_t ptr_hash(const void *ptr) { bool ptr_eq(const void *a, const void *b) { return a == b; } + +// Ported from std/mem.zig. +bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte) { + for (size_t i = 0; i < self->split_bytes.len; i += 1) { + if (byte == self->split_bytes.ptr[i]) { + return true; + } + } + return false; +} + +// Ported from std/mem.zig. +Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self) { + // move to beginning of token + while (self->index < self->buffer.len && + SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) + { + self->index += 1; + } + size_t start = self->index; + if (start == self->buffer.len) { + return {}; + } + + // move to end of token + while (self->index < self->buffer.len && + !SplitIterator_isSplitByte(self, self->buffer.ptr[self->index])) + { + self->index += 1; + } + size_t end = self->index; + + return Optional<Slice<uint8_t>>::some(self->buffer.slice(start, end)); +} + +// Ported from std/mem.zig +SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes) { + return SplitIterator{0, buffer, split_bytes}; +} diff --git a/src/util.hpp b/src/util.hpp @@ -254,4 +254,16 @@ static inline void memCopy(Slice<T> dest, Slice<T> src) { memcpy(dest.ptr, src.ptr, src.len * sizeof(T)); } +// Ported from std/mem.zig. +// Coordinate struct fields with memSplit function +struct SplitIterator { + size_t index; + Slice<uint8_t> buffer; + Slice<uint8_t> split_bytes; +}; + +bool SplitIterator_isSplitByte(SplitIterator *self, uint8_t byte); +Optional<Slice<uint8_t>> SplitIterator_next(SplitIterator *self); +SplitIterator memSplit(Slice<uint8_t> buffer, Slice<uint8_t> split_bytes); + #endif