Fix identAsString for @"..." identifiers with escape sequences
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
129
stage0/astgen.c
129
stage0/astgen.c
@@ -1313,12 +1313,6 @@ static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) {
|
||||
&& source[start + 1] == '"') {
|
||||
// Quoted identifier: @"name" (AstGen.zig:11297-11308).
|
||||
// Extract content between quotes, handling escapes.
|
||||
uint32_t si, sl;
|
||||
// str_lit_token refers to the same token, content starts after @"
|
||||
// We reuse strLitAsString but offset by 1 to skip '@'.
|
||||
// Actually, strLitAsString expects a token whose source starts
|
||||
// with '"'. The @"..." token starts with '@'. We need to handle
|
||||
// the offset manually.
|
||||
uint32_t content_start = start + 2; // skip @"
|
||||
uint32_t content_end = content_start;
|
||||
while (
|
||||
@@ -1349,9 +1343,126 @@ static uint32_t identAsString(AstGenCtx* ag, uint32_t ident_token) {
|
||||
return str_index;
|
||||
}
|
||||
|
||||
// With escapes: use strLitAsString-like decoding.
|
||||
strLitAsString(ag, ident_token, &si, &sl);
|
||||
return si;
|
||||
// With escapes: decode directly into string_bytes
|
||||
// (AstGen.zig:11297-11308, appendIdentStr with parseStrLit offset=1).
|
||||
// Cannot use strLitAsString here because it assumes tok_start+1 is
|
||||
// past the opening quote, but for @"..." tok_start+1 is the quote.
|
||||
{
|
||||
uint32_t str_index = ag->string_bytes_len;
|
||||
uint32_t max_len = content_end - content_start;
|
||||
ensureStringBytesCapacity(ag, max_len + 1);
|
||||
uint32_t ci = content_start;
|
||||
while (ci < content_end) {
|
||||
if (source[ci] == '\\') {
|
||||
ci++;
|
||||
if (ci >= content_end)
|
||||
break;
|
||||
switch (source[ci]) {
|
||||
case 'n':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '\r';
|
||||
break;
|
||||
case 't':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '\t';
|
||||
break;
|
||||
case '\\':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '\\';
|
||||
break;
|
||||
case '\'':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '\'';
|
||||
break;
|
||||
case '"':
|
||||
ag->string_bytes[ag->string_bytes_len++] = '"';
|
||||
break;
|
||||
case 'x': {
|
||||
uint8_t val = 0;
|
||||
for (int k = 0; k < 2 && ci + 1 < content_end; k++) {
|
||||
ci++;
|
||||
char c = source[ci];
|
||||
if (c >= '0' && c <= '9')
|
||||
val = (uint8_t)(val * 16 + (uint8_t)(c - '0'));
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
val = (uint8_t)(val * 16 + 10
|
||||
+ (uint8_t)(c - 'a'));
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
val = (uint8_t)(val * 16 + 10
|
||||
+ (uint8_t)(c - 'A'));
|
||||
}
|
||||
ag->string_bytes[ag->string_bytes_len++] = val;
|
||||
break;
|
||||
}
|
||||
case 'u': {
|
||||
ci++; // skip '{'
|
||||
uint32_t codepoint = 0;
|
||||
while (ci + 1 < content_end) {
|
||||
ci++;
|
||||
char c = source[ci];
|
||||
if (c >= '0' && c <= '9')
|
||||
codepoint
|
||||
= codepoint * 16 + (uint32_t)(c - '0');
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
codepoint = codepoint * 16 + 10
|
||||
+ (uint32_t)(c - 'a');
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
codepoint = codepoint * 16 + 10
|
||||
+ (uint32_t)(c - 'A');
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (codepoint <= 0x7F) {
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)codepoint;
|
||||
} else if (codepoint <= 0x7FF) {
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0xC0 | (codepoint >> 6));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | (codepoint & 0x3F));
|
||||
} else if (codepoint <= 0xFFFF) {
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0xE0 | (codepoint >> 12));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | (codepoint & 0x3F));
|
||||
} else {
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0xF0 | (codepoint >> 18));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | ((codepoint >> 12) & 0x3F));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | ((codepoint >> 6) & 0x3F));
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)(0x80 | (codepoint & 0x3F));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)source[ci];
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ag->string_bytes[ag->string_bytes_len++]
|
||||
= (uint8_t)source[ci];
|
||||
}
|
||||
ci++;
|
||||
}
|
||||
uint32_t decoded_len = ag->string_bytes_len - str_index;
|
||||
uint8_t* key = ag->string_bytes + str_index;
|
||||
// Identifiers cannot contain null bytes (AstGen.zig:11303).
|
||||
uint32_t existing
|
||||
= findExistingString(ag, (const char*)key, decoded_len);
|
||||
if (existing != UINT32_MAX) {
|
||||
ag->string_bytes_len = str_index;
|
||||
return existing;
|
||||
}
|
||||
ensureStringBytesCapacity(ag, 1);
|
||||
ag->string_bytes[ag->string_bytes_len++] = 0;
|
||||
registerString(ag, str_index);
|
||||
return str_index;
|
||||
}
|
||||
}
|
||||
|
||||
// Bare identifier: scan alphanumeric + underscore.
|
||||
|
||||
@@ -1308,7 +1308,7 @@ const corpus_files = .{
|
||||
"../test/behavior/widening.zig",
|
||||
"../test/behavior/wrapping_arithmetic.zig",
|
||||
"../test/behavior/x86_64.zig",
|
||||
//"../test/behavior/zon.zig",
|
||||
"../test/behavior/zon.zig",
|
||||
"../src/print_value.zig",
|
||||
//"../src/crash_report.zig",
|
||||
"../src/target.zig",
|
||||
|
||||
Reference in New Issue
Block a user