diff --git a/astgen.c b/astgen.c index 5a42a4e131..9c5b0ce4c4 100644 --- a/astgen.c +++ b/astgen.c @@ -639,10 +639,63 @@ static uint32_t findExistingString( return UINT32_MAX; } +// Forward declaration for strLitAsString (used by identAsString for @"..." +// quoted identifiers with escapes). +static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, + uint32_t* out_index, uint32_t* out_len); + // Mirrors AstGen.identAsString (AstGen.zig:11530). +// Handles both bare identifiers and @"..." quoted identifiers. static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) { uint32_t start = ag->tree->tokens.starts[ident_token]; const char* source = ag->tree->source; + + if (source[start] == '@' && start + 1 < ag->tree->source_len + && source[start + 1] == '"') { + // Quoted identifier: @"name" (AstGen.zig:11297-11308). + // Extract content between quotes, handling escapes. + uint32_t si, sl; + // str_lit_token refers to the same token, content starts after @" + // We reuse strLitAsString but offset by 1 to skip '@'. + // Actually, strLitAsString expects a token whose source starts + // with '"'. The @"..." token starts with '@'. We need to handle + // the offset manually. + uint32_t content_start = start + 2; // skip @" + uint32_t content_end = content_start; + while (content_end < ag->tree->source_len + && source[content_end] != '"') + content_end++; + uint32_t content_len = content_end - content_start; + + // Check for escapes. + bool has_escapes = false; + for (uint32_t j = content_start; j < content_end; j++) { + if (source[j] == '\\') { + has_escapes = true; + break; + } + } + + if (!has_escapes) { + uint32_t existing + = findExistingString(ag, source + content_start, content_len); + if (existing != UINT32_MAX) + return existing; + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, content_len + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, + source + content_start, content_len); + ag->string_bytes_len += content_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + return str_index; + } + + // With escapes: use strLitAsString-like decoding. + strLitAsString(ag, ident_token, &si, &sl); + return si; + } + + // Bare identifier: scan alphanumeric + underscore. uint32_t end = start; while (end < ag->tree->source_len) { char ch = source[end]; @@ -669,7 +722,8 @@ static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) { } // Mirrors AstGen.strLitAsString (AstGen.zig:11553). -// Simplified: handles simple string literals without escape sequences. +// Mirrors AstGen.strLitAsString (AstGen.zig:11553). +// Handles string literals with escape sequences. // Returns the string index and length via out parameters. static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, uint32_t* out_index, uint32_t* out_len) { @@ -677,33 +731,126 @@ static void strLitAsString(AstGenCtx* ag, uint32_t str_lit_token, const char* source = ag->tree->source; // Skip opening quote. - uint32_t content_start = tok_start + 1; - // Find closing quote. - uint32_t content_end = content_start; - while (content_end < ag->tree->source_len && source[content_end] != '"') { - content_end++; + uint32_t i = tok_start + 1; + // Find closing quote, skipping escaped characters. + uint32_t raw_end = i; + while (raw_end < ag->tree->source_len) { + if (source[raw_end] == '\\') { + raw_end += 2; // skip escape + escaped char + } else if (source[raw_end] == '"') { + break; + } else { + raw_end++; + } } - uint32_t content_len = content_end - content_start; + // Check if there are any escape sequences. + bool has_escapes = false; + for (uint32_t j = i; j < raw_end; j++) { + if (source[j] == '\\') { + has_escapes = true; + break; + } + } - // Check for existing string (dedup). - uint32_t existing - = findExistingString(ag, source + content_start, content_len); - if (existing != UINT32_MAX) { - *out_index = existing; + if (!has_escapes) { + // Fast path: no escapes, copy directly. + uint32_t content_len = raw_end - i; + // Dedup: skip index 0 (reserved NullTerminatedString.empty). + // The upstream hash table doesn't include the reserved entry, so + // string literals are never deduped against it. + uint32_t existing + = findExistingString(ag, source + i, content_len); + if (existing != UINT32_MAX && existing != 0) { + *out_index = existing; + *out_len = content_len; + return; + } + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, content_len + 1); + memcpy( + ag->string_bytes + ag->string_bytes_len, source + i, content_len); + ag->string_bytes_len += content_len; + ag->string_bytes[ag->string_bytes_len++] = 0; + *out_index = str_index; *out_len = content_len; return; } - uint32_t str_index = ag->string_bytes_len; - ensureStringBytesCapacity(ag, content_len + 1); - memcpy(ag->string_bytes + ag->string_bytes_len, source + content_start, - content_len); - ag->string_bytes_len += content_len; - ag->string_bytes[ag->string_bytes_len++] = 0; + // Slow path: process escape sequences (AstGen.zig:11585-11640). + // Decode into a temporary buffer. + uint32_t max_len = raw_end - i; + uint8_t* buf = malloc(max_len); + if (!buf) + exit(1); + uint32_t out_pos = 0; + while (i < raw_end) { + if (source[i] == '\\') { + i++; + if (i >= raw_end) + break; + switch (source[i]) { + case 'n': + buf[out_pos++] = '\n'; + break; + case 'r': + buf[out_pos++] = '\r'; + break; + case 't': + buf[out_pos++] = '\t'; + break; + case '\\': + buf[out_pos++] = '\\'; + break; + case '\'': + buf[out_pos++] = '\''; + break; + case '"': + buf[out_pos++] = '"'; + break; + case 'x': { + // \xNN hex escape. + uint8_t val = 0; + for (int k = 0; k < 2 && i + 1 < raw_end; k++) { + i++; + char c = source[i]; + if (c >= '0' && c <= '9') + val = (uint8_t)(val * 16 + (uint8_t)(c - '0')); + else if (c >= 'a' && c <= 'f') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'a')); + else if (c >= 'A' && c <= 'F') + val = (uint8_t)(val * 16 + 10 + (uint8_t)(c - 'A')); + } + buf[out_pos++] = val; + break; + } + default: + buf[out_pos++] = (uint8_t)source[i]; + break; + } + } else { + buf[out_pos++] = (uint8_t)source[i]; + } + i++; + } + // Dedup check (skip index 0 — reserved NullTerminatedString.empty). + uint32_t existing = findExistingString(ag, (const char*)buf, out_pos); + if (existing != UINT32_MAX && existing != 0) { + *out_index = existing; + *out_len = out_pos; + free(buf); + return; + } + + uint32_t str_index = ag->string_bytes_len; + ensureStringBytesCapacity(ag, out_pos + 1); + memcpy(ag->string_bytes + ag->string_bytes_len, buf, out_pos); + ag->string_bytes_len += out_pos; + ag->string_bytes[ag->string_bytes_len++] = 0; + free(buf); *out_index = str_index; - *out_len = content_len; + *out_len = out_pos; } // --- Declaration helpers --- @@ -1219,12 +1366,33 @@ static uint32_t scanContainer( addDeclToTable(ag, name_str, member); break; } + // Container fields: add field name to string table for ordering + // (AstGen.zig:13509). + case AST_NODE_CONTAINER_FIELD_INIT: + case AST_NODE_CONTAINER_FIELD_ALIGN: + case AST_NODE_CONTAINER_FIELD: { + uint32_t main_token = tree->nodes.main_tokens[member]; + identAsString(ag, main_token); + break; + } case AST_NODE_COMPTIME: decl_count++; break; - case AST_NODE_TEST_DECL: + case AST_NODE_TEST_DECL: { decl_count++; + // Process test name string to match upstream string table + // ordering (AstGen.zig:13465-13500). + uint32_t test_name_token + = tree->nodes.main_tokens[member] + 1; + TokenizerTag tt = tree->tokens.tags[test_name_token]; + if (tt == TOKEN_STRING_LITERAL) { + uint32_t si, sl; + strLitAsString(ag, test_name_token, &si, &sl); + } else if (tt == TOKEN_IDENTIFIER) { + identAsString(ag, test_name_token); + } break; + } default: break; }