zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 5faac5d0d8193153f24a4167b390d014fcfa0f21 (tree)
parent 5b4c7e2bc60aea0b1b1c8f3a17deb92f34f60f7a
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date:   Thu, 26 Feb 2026 05:04:08 +0000

sema: add string literal interning, create entries $198-$212

Add new IP key types: IP_KEY_BYTES, IP_KEY_PTR_UAV,
IP_KEY_PTR_UAV_ALIGNED, IP_KEY_PTR_SLICE, IP_KEY_OPT_PAYLOAD
with full hash/equality/typeOf/verbose support.

Add internStringLiteral() helper that creates the complete sequence
of IP entries for a comptime string literal:
- [len:0]u8 array type + bytes value
- *const [len:0]u8 pointer + ptr_uav + ptr_uav_aligned
- int_usize(len) + ptr_slice for [:0]const u8
- [len]u8 array type + pointer + individual u8 values + bytes

Create entries $198-$212 for the "main" string from
@hasDecl(root, "main") in start.zig's comptime block.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Diffstat:
Mstage0/intern_pool.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstage0/intern_pool.h | 27+++++++++++++++++++++++++++
Mstage0/sema.c | 101+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mstage0/verbose_intern_pool.c | 31+++++++++++++++++++++++++++++++
4 files changed, 214 insertions(+), 0 deletions(-)

diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c @@ -95,6 +95,28 @@ static uint32_t ipHashKey(const InternPoolKey* key) { h = ipHashCombine(h, key->data.memoized_call.func); h = ipHashCombine(h, key->data.memoized_call.result); break; + case IP_KEY_BYTES: + h = ipHashCombine(h, key->data.bytes.ty); + h = ipHashCombine(h, key->data.bytes.str_idx); + break; + case IP_KEY_PTR_UAV: + h = ipHashCombine(h, key->data.ptr_uav.ty); + h = ipHashCombine(h, key->data.ptr_uav.val); + break; + case IP_KEY_PTR_UAV_ALIGNED: + h = ipHashCombine(h, key->data.ptr_uav_aligned.ty); + h = ipHashCombine(h, key->data.ptr_uav_aligned.val); + h = ipHashCombine(h, key->data.ptr_uav_aligned.orig_ty); + break; + case IP_KEY_PTR_SLICE: + h = ipHashCombine(h, key->data.ptr_slice.ty); + h = ipHashCombine(h, key->data.ptr_slice.ptr); + h = ipHashCombine(h, key->data.ptr_slice.len); + break; + case IP_KEY_OPT_PAYLOAD: + h = ipHashCombine(h, key->data.opt_payload.ty); + h = ipHashCombine(h, key->data.opt_payload.val); + break; default: /* For other tag types, just use the tag hash. */ break; @@ -164,6 +186,24 @@ static bool ipKeysEqual(const InternPoolKey* a, const InternPoolKey* b) { case IP_KEY_MEMOIZED_CALL: return a->data.memoized_call.func == b->data.memoized_call.func && a->data.memoized_call.result == b->data.memoized_call.result; + case IP_KEY_BYTES: + return a->data.bytes.ty == b->data.bytes.ty + && a->data.bytes.str_idx == b->data.bytes.str_idx; + case IP_KEY_PTR_UAV: + return a->data.ptr_uav.ty == b->data.ptr_uav.ty + && a->data.ptr_uav.val == b->data.ptr_uav.val; + case IP_KEY_PTR_UAV_ALIGNED: + return a->data.ptr_uav_aligned.ty == b->data.ptr_uav_aligned.ty + && a->data.ptr_uav_aligned.val == b->data.ptr_uav_aligned.val + && a->data.ptr_uav_aligned.orig_ty + == b->data.ptr_uav_aligned.orig_ty; + case IP_KEY_PTR_SLICE: + return a->data.ptr_slice.ty == b->data.ptr_slice.ty + && a->data.ptr_slice.ptr == b->data.ptr_slice.ptr + && a->data.ptr_slice.len == b->data.ptr_slice.len; + case IP_KEY_OPT_PAYLOAD: + return a->data.opt_payload.ty == b->data.opt_payload.ty + && a->data.opt_payload.val == b->data.opt_payload.val; default: /* Fallback: memcmp the entire data union. */ return memcmp(&a->data, &b->data, sizeof(a->data)) == 0; @@ -823,6 +863,21 @@ InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index) { case IP_KEY_MEMOIZED_CALL: return IP_INDEX_VOID_TYPE; + case IP_KEY_BYTES: + return key.data.bytes.ty; + + case IP_KEY_PTR_UAV: + return key.data.ptr_uav.ty; + + case IP_KEY_PTR_UAV_ALIGNED: + return key.data.ptr_uav_aligned.ty; + + case IP_KEY_PTR_SLICE: + return key.data.ptr_slice.ty; + + case IP_KEY_OPT_PAYLOAD: + return key.data.opt_payload.ty; + default: break; } diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h @@ -306,6 +306,11 @@ typedef enum { IP_KEY_UNION_VALUE, IP_KEY_MEMOIZED_CALL, IP_KEY_PTR_NAV, + IP_KEY_BYTES, + IP_KEY_PTR_UAV, + IP_KEY_PTR_UAV_ALIGNED, + IP_KEY_PTR_SLICE, + IP_KEY_OPT_PAYLOAD, } InternPoolKeyTag; // --- InternPoolKey (tagged union) --- @@ -363,6 +368,28 @@ typedef struct { InternPoolIndex ty; // pointer type uint32_t nav; // Nav index (NOT an IP index) } ptr_nav; + struct { + InternPoolIndex ty; // aggregate type (array type) + uint32_t str_idx; // string_bytes offset + } bytes; + struct { + InternPoolIndex ty; // pointer type + InternPoolIndex val; // value being pointed to + } ptr_uav; + struct { + InternPoolIndex ty; // coerced pointer type + InternPoolIndex val; // value being pointed to + InternPoolIndex orig_ty; // original pointer type + } ptr_uav_aligned; + struct { + InternPoolIndex ty; // slice type + InternPoolIndex ptr; // many-pointer value + InternPoolIndex len; // usize value + } ptr_slice; + struct { + InternPoolIndex ty; // optional type + InternPoolIndex val; // payload value (or IP_INDEX_NONE for null) + } opt_payload; } data; } InternPoolKey; diff --git a/stage0/sema.c b/stage0/sema.c @@ -2977,6 +2977,102 @@ static InternPoolIndex internTypedInt(InternPoolIndex ty, uint64_t val) { return ipIntern(s_module_ip, key); } +// --- internStringLiteral --- +// Intern a string literal, creating the sequence of IP entries that +// the Zig compiler creates for a comptime string like "main". +// Creates: [len:0]u8 type, bytes value, *const [len:0]u8 ptr_type, +// ptr_uav, ptr_uav_aligned, int_usize(len), ptr_slice, +// [len]u8 type, *const [len]u8 ptr_type, ptr_uav_aligned, +// individual int_u8 values, bytes value. +static void internStringLiteral(const char* str) { + uint32_t slen = (uint32_t)strlen(str); + + // $198: [len:0]u8 (sentinel-terminated array type). + InternPoolKey arr_key; + memset(&arr_key, 0, sizeof(arr_key)); + arr_key.tag = IP_KEY_ARRAY_TYPE; + arr_key.data.array_type.len = slen; + arr_key.data.array_type.child = IP_INDEX_U8_TYPE; + arr_key.data.array_type.sentinel = IP_INDEX_ZERO_U8; + InternPoolIndex arr_type = ipIntern(s_module_ip, arr_key); + + // $199: bytes value (the string data stored in string_bytes). + uint32_t str_idx = ipGetOrPutString(s_module_ip, str); + InternPoolKey bytes_key; + memset(&bytes_key, 0, sizeof(bytes_key)); + bytes_key.tag = IP_KEY_BYTES; + bytes_key.data.bytes.ty = arr_type; + bytes_key.data.bytes.str_idx = str_idx; + InternPoolIndex bytes_val = ipIntern(s_module_ip, bytes_key); + + // $200: *const [len:0]u8 pointer type. + InternPoolIndex arr_ptr_type = internPtrConst(arr_type); + + // $201: ptr_uav — pointer to the bytes value. + InternPoolKey uav_key; + memset(&uav_key, 0, sizeof(uav_key)); + uav_key.tag = IP_KEY_PTR_UAV; + uav_key.data.ptr_uav.ty = arr_ptr_type; + uav_key.data.ptr_uav.val = bytes_val; + InternPoolIndex uav_ptr = ipIntern(s_module_ip, uav_key); + + // $202: ptr_uav_aligned — coerced to [*:0]const u8. + InternPoolKey uav_aligned_key; + memset(&uav_aligned_key, 0, sizeof(uav_aligned_key)); + uav_aligned_key.tag = IP_KEY_PTR_UAV_ALIGNED; + uav_aligned_key.data.ptr_uav_aligned.ty + = IP_INDEX_MANYPTR_CONST_U8_SENTINEL_0_TYPE; + uav_aligned_key.data.ptr_uav_aligned.val = bytes_val; + uav_aligned_key.data.ptr_uav_aligned.orig_ty = uav_ptr; + InternPoolIndex uav_aligned = ipIntern(s_module_ip, uav_aligned_key); + + // $203: int_usize(len). + InternPoolIndex len_val = internTypedInt(IP_INDEX_USIZE_TYPE, slen); + + // $204: ptr_slice — [:0]const u8 slice value. + InternPoolKey slice_key; + memset(&slice_key, 0, sizeof(slice_key)); + slice_key.tag = IP_KEY_PTR_SLICE; + slice_key.data.ptr_slice.ty = IP_INDEX_SLICE_CONST_U8_SENTINEL_0_TYPE; + slice_key.data.ptr_slice.ptr = uav_aligned; + slice_key.data.ptr_slice.len = len_val; + (void)ipIntern(s_module_ip, slice_key); + + // $205: [len]u8 (non-sentinel array type). + InternPoolKey arr2_key; + memset(&arr2_key, 0, sizeof(arr2_key)); + arr2_key.tag = IP_KEY_ARRAY_TYPE; + arr2_key.data.array_type.len = slen; + arr2_key.data.array_type.child = IP_INDEX_U8_TYPE; + arr2_key.data.array_type.sentinel = IP_INDEX_NONE; + InternPoolIndex arr2_type = ipIntern(s_module_ip, arr2_key); + + // $206: *const [len]u8 pointer type. + InternPoolIndex arr2_ptr_type = internPtrConst(arr2_type); + + // $207: ptr_uav_aligned — pointer to string as [len]u8. + InternPoolKey uav2_key; + memset(&uav2_key, 0, sizeof(uav2_key)); + uav2_key.tag = IP_KEY_PTR_UAV_ALIGNED; + uav2_key.data.ptr_uav_aligned.ty = arr2_ptr_type; + uav2_key.data.ptr_uav_aligned.val = bytes_val; + uav2_key.data.ptr_uav_aligned.orig_ty = uav_ptr; + (void)ipIntern(s_module_ip, uav2_key); + + // $208-$211: individual int_u8 values for each byte. + for (uint32_t i = 0; i < slen; i++) { + (void)internTypedInt(IP_INDEX_U8_TYPE, (uint8_t)str[i]); + } + + // $212: bytes value for the non-sentinel array. + InternPoolKey bytes2_key; + memset(&bytes2_key, 0, sizeof(bytes2_key)); + bytes2_key.tag = IP_KEY_BYTES; + bytes2_key.data.bytes.ty = arr2_type; + bytes2_key.data.bytes.str_idx = str_idx; + (void)ipIntern(s_module_ip, bytes2_key); +} + // --- resolveStartComptimePreamble --- // Create the IP entries that the Zig compiler generates when processing // start.zig's comptime block. This block evaluates builtin.zig_backend @@ -3093,6 +3189,11 @@ static void resolveStartComptimePreamble(void) { (void)internEnumLiteral("Exe"); (void)internEnumTag(om_enum_ip, internTypedInt(u2_type, 0)); } + + // --- $198-$212: string literal "main" from @hasDecl(root, "main") --- + // start.zig's else branch evaluates @hasDecl(root, "main") which + // triggers interning of the string literal "main". + internStringLiteral("main"); } // --- findDeclImportPathFromZir --- diff --git a/stage0/verbose_intern_pool.c b/stage0/verbose_intern_pool.c @@ -76,6 +76,16 @@ static const char* ipKeyTagName(InternPoolKeyTag tag) { return "memoized_call"; case IP_KEY_PTR_NAV: return "ptr_nav"; + case IP_KEY_BYTES: + return "bytes"; + case IP_KEY_PTR_UAV: + return "ptr_uav"; + case IP_KEY_PTR_UAV_ALIGNED: + return "ptr_uav_aligned"; + case IP_KEY_PTR_SLICE: + return "ptr_slice"; + case IP_KEY_OPT_PAYLOAD: + return "opt_payload"; } return "?"; } @@ -133,6 +143,27 @@ void verboseIpPrint(FILE* out, const InternPool* ip) { fprintf(out, " ty=%u nav=%u", key.data.ptr_nav.ty, key.data.ptr_nav.nav); break; + case IP_KEY_BYTES: + fprintf(out, " ty=%u str=%u", key.data.bytes.ty, + key.data.bytes.str_idx); + break; + case IP_KEY_PTR_UAV: + fprintf(out, " ty=%u val=%u", key.data.ptr_uav.ty, + key.data.ptr_uav.val); + break; + case IP_KEY_PTR_UAV_ALIGNED: + fprintf(out, " ty=%u val=%u orig=%u", key.data.ptr_uav_aligned.ty, + key.data.ptr_uav_aligned.val, + key.data.ptr_uav_aligned.orig_ty); + break; + case IP_KEY_PTR_SLICE: + fprintf(out, " ty=%u ptr=%u len=%u", key.data.ptr_slice.ty, + key.data.ptr_slice.ptr, key.data.ptr_slice.len); + break; + case IP_KEY_OPT_PAYLOAD: + fprintf(out, " ty=%u val=%u", key.data.opt_payload.ty, + key.data.opt_payload.val); + break; default: break; }