Fix identAsString for @"..." identifiers with escape sequences

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 15:05:35 +00:00
parent 47856ccc80
commit 981c19c113
2 changed files with 121 additions and 10 deletions

View File

@@ -1313,12 +1313,6 @@ static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) {
&& source[start + 1] == '"') {
// Quoted identifier: @"name" (AstGen.zig:11297-11308).
// Extract content between quotes, handling escapes.
uint32_t si, sl;
// str_lit_token refers to the same token, content starts after @"
// We reuse strLitAsString but offset by 1 to skip '@'.
// Actually, strLitAsString expects a token whose source starts
// with '"'. The @"..." token starts with '@'. We need to handle
// the offset manually.
uint32_t content_start = start + 2; // skip @"
uint32_t content_end = content_start;
while (
@@ -1349,9 +1343,126 @@ static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) {
return str_index;
}
// With escapes: use strLitAsString-like decoding.
strLitAsString(ag, ident_token, &si, &sl);
return si;
// With escapes: decode directly into string_bytes
// (AstGen.zig:11297-11308, appendIdentStr with parseStrLit offset=1).
// Cannot use strLitAsString here because it assumes tok_start+1 is
// past the opening quote, but for @"..." tok_start+1 is the quote.
{
uint32_t str_index = ag->string_bytes_len;
uint32_t max_len = content_end - content_start;
ensureStringBytesCapacity(ag, max_len + 1);
uint32_t ci = content_start;
while (ci < content_end) {
if (source[ci] == '\\') {
ci++;
if (ci >= content_end)
break;
switch (source[ci]) {
case 'n':
ag->string_bytes[ag->string_bytes_len++] = '\n';
break;
case 'r':
ag->string_bytes[ag->string_bytes_len++] = '\r';
break;
case 't':
ag->string_bytes[ag->string_bytes_len++] = '\t';
break;
case '\\':
ag->string_bytes[ag->string_bytes_len++] = '\\';
break;
case '\'':
ag->string_bytes[ag->string_bytes_len++] = '\'';
break;
case '"':
ag->string_bytes[ag->string_bytes_len++] = '"';
break;
case 'x': {
uint8_t val = 0;
for (int k = 0; k < 2 && ci + 1 < content_end; k++) {
ci++;
char c = source[ci];
if (c >= '0' && c <= '9')
val = (uint8_t)(val * 16 + (uint8_t)(c - '0'));
else if (c >= 'a' && c <= 'f')
val = (uint8_t)(val * 16 + 10
+ (uint8_t)(c - 'a'));
else if (c >= 'A' && c <= 'F')
val = (uint8_t)(val * 16 + 10
+ (uint8_t)(c - 'A'));
}
ag->string_bytes[ag->string_bytes_len++] = val;
break;
}
case 'u': {
ci++; // skip '{'
uint32_t codepoint = 0;
while (ci + 1 < content_end) {
ci++;
char c = source[ci];
if (c >= '0' && c <= '9')
codepoint
= codepoint * 16 + (uint32_t)(c - '0');
else if (c >= 'a' && c <= 'f')
codepoint = codepoint * 16 + 10
+ (uint32_t)(c - 'a');
else if (c >= 'A' && c <= 'F')
codepoint = codepoint * 16 + 10
+ (uint32_t)(c - 'A');
else
break;
}
if (codepoint <= 0x7F) {
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)codepoint;
} else if (codepoint <= 0x7FF) {
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0xC0 | (codepoint >> 6));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | (codepoint & 0x3F));
} else if (codepoint <= 0xFFFF) {
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0xE0 | (codepoint >> 12));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | (codepoint & 0x3F));
} else {
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0xF0 | (codepoint >> 18));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | ((codepoint >> 12) & 0x3F));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F));
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)(0x80 | (codepoint & 0x3F));
}
break;
}
default:
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)source[ci];
break;
}
} else {
ag->string_bytes[ag->string_bytes_len++]
= (uint8_t)source[ci];
}
ci++;
}
uint32_t decoded_len = ag->string_bytes_len - str_index;
uint8_t* key = ag->string_bytes + str_index;
// Identifiers cannot contain null bytes (AstGen.zig:11303).
uint32_t existing
= findExistingString(ag, (const char*)key, decoded_len);
if (existing != UINT32_MAX) {
ag->string_bytes_len = str_index;
return existing;
}
ensureStringBytesCapacity(ag, 1);
ag->string_bytes[ag->string_bytes_len++] = 0;
registerString(ag, str_index);
return str_index;
}
}
// Bare identifier: scan alphanumeric + underscore.

View File

@@ -1308,7 +1308,7 @@ const corpus_files = .{
"../test/behavior/widening.zig",
"../test/behavior/wrapping_arithmetic.zig",
"../test/behavior/x86_64.zig",
//"../test/behavior/zon.zig",
"../test/behavior/zon.zig",
"../src/print_value.zig",
//"../src/crash_report.zig",
"../src/target.zig",