commit 5faac5d0d8193153f24a4167b390d014fcfa0f21 (tree)
parent 5b4c7e2bc60aea0b1b1c8f3a17deb92f34f60f7a
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Thu, 26 Feb 2026 05:04:08 +0000
sema: add string literal interning, create entries $198-$212
Add new IP key types: IP_KEY_BYTES, IP_KEY_PTR_UAV,
IP_KEY_PTR_UAV_ALIGNED, IP_KEY_PTR_SLICE, IP_KEY_OPT_PAYLOAD
with full hash/equality/typeOf/verbose support.
Add internStringLiteral() helper that creates the complete sequence
of IP entries for a comptime string literal:
- [len:0]u8 array type + bytes value
- *const [len:0]u8 pointer + ptr_uav + ptr_uav_aligned
- int_usize(len) + ptr_slice for [:0]const u8
- [len]u8 array type + pointer + individual u8 values + bytes
Create entries $198-$212 for the "main" string from
@hasDecl(root, "main") in start.zig's comptime block.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
4 files changed, 214 insertions(+), 0 deletions(-)
diff --git a/stage0/intern_pool.c b/stage0/intern_pool.c
@@ -95,6 +95,28 @@ static uint32_t ipHashKey(const InternPoolKey* key) {
h = ipHashCombine(h, key->data.memoized_call.func);
h = ipHashCombine(h, key->data.memoized_call.result);
break;
+ case IP_KEY_BYTES:
+ h = ipHashCombine(h, key->data.bytes.ty);
+ h = ipHashCombine(h, key->data.bytes.str_idx);
+ break;
+ case IP_KEY_PTR_UAV:
+ h = ipHashCombine(h, key->data.ptr_uav.ty);
+ h = ipHashCombine(h, key->data.ptr_uav.val);
+ break;
+ case IP_KEY_PTR_UAV_ALIGNED:
+ h = ipHashCombine(h, key->data.ptr_uav_aligned.ty);
+ h = ipHashCombine(h, key->data.ptr_uav_aligned.val);
+ h = ipHashCombine(h, key->data.ptr_uav_aligned.orig_ty);
+ break;
+ case IP_KEY_PTR_SLICE:
+ h = ipHashCombine(h, key->data.ptr_slice.ty);
+ h = ipHashCombine(h, key->data.ptr_slice.ptr);
+ h = ipHashCombine(h, key->data.ptr_slice.len);
+ break;
+ case IP_KEY_OPT_PAYLOAD:
+ h = ipHashCombine(h, key->data.opt_payload.ty);
+ h = ipHashCombine(h, key->data.opt_payload.val);
+ break;
default:
/* For other tag types, just use the tag hash. */
break;
@@ -164,6 +186,24 @@ static bool ipKeysEqual(const InternPoolKey* a, const InternPoolKey* b) {
case IP_KEY_MEMOIZED_CALL:
return a->data.memoized_call.func == b->data.memoized_call.func
&& a->data.memoized_call.result == b->data.memoized_call.result;
+ case IP_KEY_BYTES:
+ return a->data.bytes.ty == b->data.bytes.ty
+ && a->data.bytes.str_idx == b->data.bytes.str_idx;
+ case IP_KEY_PTR_UAV:
+ return a->data.ptr_uav.ty == b->data.ptr_uav.ty
+ && a->data.ptr_uav.val == b->data.ptr_uav.val;
+ case IP_KEY_PTR_UAV_ALIGNED:
+ return a->data.ptr_uav_aligned.ty == b->data.ptr_uav_aligned.ty
+ && a->data.ptr_uav_aligned.val == b->data.ptr_uav_aligned.val
+ && a->data.ptr_uav_aligned.orig_ty
+ == b->data.ptr_uav_aligned.orig_ty;
+ case IP_KEY_PTR_SLICE:
+ return a->data.ptr_slice.ty == b->data.ptr_slice.ty
+ && a->data.ptr_slice.ptr == b->data.ptr_slice.ptr
+ && a->data.ptr_slice.len == b->data.ptr_slice.len;
+ case IP_KEY_OPT_PAYLOAD:
+ return a->data.opt_payload.ty == b->data.opt_payload.ty
+ && a->data.opt_payload.val == b->data.opt_payload.val;
default:
/* Fallback: memcmp the entire data union. */
return memcmp(&a->data, &b->data, sizeof(a->data)) == 0;
@@ -823,6 +863,21 @@ InternPoolIndex ipTypeOf(const InternPool* ip, InternPoolIndex index) {
case IP_KEY_MEMOIZED_CALL:
return IP_INDEX_VOID_TYPE;
+ case IP_KEY_BYTES:
+ return key.data.bytes.ty;
+
+ case IP_KEY_PTR_UAV:
+ return key.data.ptr_uav.ty;
+
+ case IP_KEY_PTR_UAV_ALIGNED:
+ return key.data.ptr_uav_aligned.ty;
+
+ case IP_KEY_PTR_SLICE:
+ return key.data.ptr_slice.ty;
+
+ case IP_KEY_OPT_PAYLOAD:
+ return key.data.opt_payload.ty;
+
default:
break;
}
diff --git a/stage0/intern_pool.h b/stage0/intern_pool.h
@@ -306,6 +306,11 @@ typedef enum {
IP_KEY_UNION_VALUE,
IP_KEY_MEMOIZED_CALL,
IP_KEY_PTR_NAV,
+ IP_KEY_BYTES,
+ IP_KEY_PTR_UAV,
+ IP_KEY_PTR_UAV_ALIGNED,
+ IP_KEY_PTR_SLICE,
+ IP_KEY_OPT_PAYLOAD,
} InternPoolKeyTag;
// --- InternPoolKey (tagged union) ---
@@ -363,6 +368,28 @@ typedef struct {
InternPoolIndex ty; // pointer type
uint32_t nav; // Nav index (NOT an IP index)
} ptr_nav;
+ struct {
+ InternPoolIndex ty; // aggregate type (array type)
+ uint32_t str_idx; // string_bytes offset
+ } bytes;
+ struct {
+ InternPoolIndex ty; // pointer type
+ InternPoolIndex val; // value being pointed to
+ } ptr_uav;
+ struct {
+ InternPoolIndex ty; // coerced pointer type
+ InternPoolIndex val; // value being pointed to
+ InternPoolIndex orig_ty; // original pointer type
+ } ptr_uav_aligned;
+ struct {
+ InternPoolIndex ty; // slice type
+ InternPoolIndex ptr; // many-pointer value
+ InternPoolIndex len; // usize value
+ } ptr_slice;
+ struct {
+ InternPoolIndex ty; // optional type
+ InternPoolIndex val; // payload value (or IP_INDEX_NONE for null)
+ } opt_payload;
} data;
} InternPoolKey;
diff --git a/stage0/sema.c b/stage0/sema.c
@@ -2977,6 +2977,102 @@ static InternPoolIndex internTypedInt(InternPoolIndex ty, uint64_t val) {
return ipIntern(s_module_ip, key);
}
+// --- internStringLiteral ---
+// Intern a string literal, creating the sequence of IP entries that
+// the Zig compiler creates for a comptime string like "main".
+// Creates: [len:0]u8 type, bytes value, *const [len:0]u8 ptr_type,
+// ptr_uav, ptr_uav_aligned, int_usize(len), ptr_slice,
+// [len]u8 type, *const [len]u8 ptr_type, ptr_uav_aligned,
+// individual int_u8 values, bytes value.
+static void internStringLiteral(const char* str) {
+ uint32_t slen = (uint32_t)strlen(str);
+
+ // $198: [len:0]u8 (sentinel-terminated array type).
+ InternPoolKey arr_key;
+ memset(&arr_key, 0, sizeof(arr_key));
+ arr_key.tag = IP_KEY_ARRAY_TYPE;
+ arr_key.data.array_type.len = slen;
+ arr_key.data.array_type.child = IP_INDEX_U8_TYPE;
+ arr_key.data.array_type.sentinel = IP_INDEX_ZERO_U8;
+ InternPoolIndex arr_type = ipIntern(s_module_ip, arr_key);
+
+ // $199: bytes value (the string data stored in string_bytes).
+ uint32_t str_idx = ipGetOrPutString(s_module_ip, str);
+ InternPoolKey bytes_key;
+ memset(&bytes_key, 0, sizeof(bytes_key));
+ bytes_key.tag = IP_KEY_BYTES;
+ bytes_key.data.bytes.ty = arr_type;
+ bytes_key.data.bytes.str_idx = str_idx;
+ InternPoolIndex bytes_val = ipIntern(s_module_ip, bytes_key);
+
+ // $200: *const [len:0]u8 pointer type.
+ InternPoolIndex arr_ptr_type = internPtrConst(arr_type);
+
+ // $201: ptr_uav — pointer to the bytes value.
+ InternPoolKey uav_key;
+ memset(&uav_key, 0, sizeof(uav_key));
+ uav_key.tag = IP_KEY_PTR_UAV;
+ uav_key.data.ptr_uav.ty = arr_ptr_type;
+ uav_key.data.ptr_uav.val = bytes_val;
+ InternPoolIndex uav_ptr = ipIntern(s_module_ip, uav_key);
+
+ // $202: ptr_uav_aligned — coerced to [*:0]const u8.
+ InternPoolKey uav_aligned_key;
+ memset(&uav_aligned_key, 0, sizeof(uav_aligned_key));
+ uav_aligned_key.tag = IP_KEY_PTR_UAV_ALIGNED;
+ uav_aligned_key.data.ptr_uav_aligned.ty
+ = IP_INDEX_MANYPTR_CONST_U8_SENTINEL_0_TYPE;
+ uav_aligned_key.data.ptr_uav_aligned.val = bytes_val;
+ uav_aligned_key.data.ptr_uav_aligned.orig_ty = uav_ptr;
+ InternPoolIndex uav_aligned = ipIntern(s_module_ip, uav_aligned_key);
+
+ // $203: int_usize(len).
+ InternPoolIndex len_val = internTypedInt(IP_INDEX_USIZE_TYPE, slen);
+
+ // $204: ptr_slice — [:0]const u8 slice value.
+ InternPoolKey slice_key;
+ memset(&slice_key, 0, sizeof(slice_key));
+ slice_key.tag = IP_KEY_PTR_SLICE;
+ slice_key.data.ptr_slice.ty = IP_INDEX_SLICE_CONST_U8_SENTINEL_0_TYPE;
+ slice_key.data.ptr_slice.ptr = uav_aligned;
+ slice_key.data.ptr_slice.len = len_val;
+ (void)ipIntern(s_module_ip, slice_key);
+
+ // $205: [len]u8 (non-sentinel array type).
+ InternPoolKey arr2_key;
+ memset(&arr2_key, 0, sizeof(arr2_key));
+ arr2_key.tag = IP_KEY_ARRAY_TYPE;
+ arr2_key.data.array_type.len = slen;
+ arr2_key.data.array_type.child = IP_INDEX_U8_TYPE;
+ arr2_key.data.array_type.sentinel = IP_INDEX_NONE;
+ InternPoolIndex arr2_type = ipIntern(s_module_ip, arr2_key);
+
+ // $206: *const [len]u8 pointer type.
+ InternPoolIndex arr2_ptr_type = internPtrConst(arr2_type);
+
+ // $207: ptr_uav_aligned — pointer to string as [len]u8.
+ InternPoolKey uav2_key;
+ memset(&uav2_key, 0, sizeof(uav2_key));
+ uav2_key.tag = IP_KEY_PTR_UAV_ALIGNED;
+ uav2_key.data.ptr_uav_aligned.ty = arr2_ptr_type;
+ uav2_key.data.ptr_uav_aligned.val = bytes_val;
+ uav2_key.data.ptr_uav_aligned.orig_ty = uav_ptr;
+ (void)ipIntern(s_module_ip, uav2_key);
+
+ // $208-$211: individual int_u8 values for each byte.
+ for (uint32_t i = 0; i < slen; i++) {
+ (void)internTypedInt(IP_INDEX_U8_TYPE, (uint8_t)str[i]);
+ }
+
+ // $212: bytes value for the non-sentinel array.
+ InternPoolKey bytes2_key;
+ memset(&bytes2_key, 0, sizeof(bytes2_key));
+ bytes2_key.tag = IP_KEY_BYTES;
+ bytes2_key.data.bytes.ty = arr2_type;
+ bytes2_key.data.bytes.str_idx = str_idx;
+ (void)ipIntern(s_module_ip, bytes2_key);
+}
+
// --- resolveStartComptimePreamble ---
// Create the IP entries that the Zig compiler generates when processing
// start.zig's comptime block. This block evaluates builtin.zig_backend
@@ -3093,6 +3189,11 @@ static void resolveStartComptimePreamble(void) {
(void)internEnumLiteral("Exe");
(void)internEnumTag(om_enum_ip, internTypedInt(u2_type, 0));
}
+
+ // --- $198-$212: string literal "main" from @hasDecl(root, "main") ---
+ // start.zig's else branch evaluates @hasDecl(root, "main") which
+ // triggers interning of the string literal "main".
+ internStringLiteral("main");
}
// --- findDeclImportPathFromZir ---
diff --git a/stage0/verbose_intern_pool.c b/stage0/verbose_intern_pool.c
@@ -76,6 +76,16 @@ static const char* ipKeyTagName(InternPoolKeyTag tag) {
return "memoized_call";
case IP_KEY_PTR_NAV:
return "ptr_nav";
+ case IP_KEY_BYTES:
+ return "bytes";
+ case IP_KEY_PTR_UAV:
+ return "ptr_uav";
+ case IP_KEY_PTR_UAV_ALIGNED:
+ return "ptr_uav_aligned";
+ case IP_KEY_PTR_SLICE:
+ return "ptr_slice";
+ case IP_KEY_OPT_PAYLOAD:
+ return "opt_payload";
}
return "?";
}
@@ -133,6 +143,27 @@ void verboseIpPrint(FILE* out, const InternPool* ip) {
fprintf(out, " ty=%u nav=%u", key.data.ptr_nav.ty,
key.data.ptr_nav.nav);
break;
+ case IP_KEY_BYTES:
+ fprintf(out, " ty=%u str=%u", key.data.bytes.ty,
+ key.data.bytes.str_idx);
+ break;
+ case IP_KEY_PTR_UAV:
+ fprintf(out, " ty=%u val=%u", key.data.ptr_uav.ty,
+ key.data.ptr_uav.val);
+ break;
+ case IP_KEY_PTR_UAV_ALIGNED:
+ fprintf(out, " ty=%u val=%u orig=%u", key.data.ptr_uav_aligned.ty,
+ key.data.ptr_uav_aligned.val,
+ key.data.ptr_uav_aligned.orig_ty);
+ break;
+ case IP_KEY_PTR_SLICE:
+ fprintf(out, " ty=%u ptr=%u len=%u", key.data.ptr_slice.ty,
+ key.data.ptr_slice.ptr, key.data.ptr_slice.len);
+ break;
+ case IP_KEY_OPT_PAYLOAD:
+ fprintf(out, " ty=%u val=%u", key.data.opt_payload.ty,
+ key.data.opt_payload.val);
+ break;
default:
break;
}