stage0: remove GNU C extensions for strict C11 compliance

Replace GNU statement expressions ({...}) in common.h with a static
inline function and do...while(0) macros. Expand case range expressions
(case 'a' ... 'z') in tokenizer.c to individual case labels. Replace
empty initializer braces {} with {0} in parser.c. Add a dummy member
to the empty struct in ast.h. Add -pedantic to zig0_cflags in build.zig
to prevent future regressions.

zig0 now compiles with any C11-conforming compiler, not just those
supporting GNU extensions. This enables bootstrapping with MSVC,
cproc, and other strict C11 compilers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Motiejus Jakštys
2026-02-25 22:32:10 +00:00
parent f9039a646d
commit 40e2b2bc95
5 changed files with 184 additions and 105 deletions

View File

@@ -44,6 +44,7 @@ const zig0_c_lib_files = &[_][]const u8{
const zig0_all_c_files = zig0_c_lib_files ++ &[_][]const u8{"main.c"};
const zig0_cflags = &[_][]const u8{
"-std=c11",
"-pedantic",
"-Wall",
"-Wvla",
"-Wextra",

View File

@@ -616,6 +616,7 @@ typedef struct AstError {
TokenizerTag expected_tag;
} expected;
struct {
char unused_;
} none;
} extra;
} AstError;

View File

@@ -13,44 +13,46 @@
Type* arr; \
}
static inline void* arr_init_(size_t count, size_t size) {
void* arr = calloc(count, size);
if (!arr)
exit(1);
return arr;
}
#define ARR_INIT(Type, initial_cap) \
({ \
Type* arr = calloc(initial_cap, sizeof(Type)); \
if (!arr) \
exit(1); \
arr; \
})
((Type*)arr_init_((initial_cap), sizeof(Type)))
#define SLICE_INIT(Type, initial_cap) \
{ .len = 0, .cap = (initial_cap), .arr = ARR_INIT(Type, initial_cap) }
#define SLICE_RESIZE(Type, slice, new_cap) \
({ \
const uint32_t cap = (new_cap); \
Type* new_arr = realloc((slice)->arr, cap * sizeof(Type)); \
if (new_arr == NULL) { \
do { \
const uint32_t cap_ = (new_cap); \
Type* new_arr_ = realloc((slice)->arr, cap_ * sizeof(Type)); \
if (new_arr_ == NULL) { \
free((slice)->arr); \
exit(1); \
} \
(slice)->arr = new_arr; \
(slice)->cap = cap; \
})
(slice)->arr = new_arr_; \
(slice)->cap = cap_; \
} while (0)
#define SLICE_ENSURE_CAPACITY(Type, slice, additional) \
({ \
do { \
if ((slice)->len + (additional) > (slice)->cap) { \
SLICE_RESIZE(Type, slice, \
((slice)->cap * 2 > (slice)->len + (additional)) \
? (slice)->cap * 2 \
: (slice)->len + (additional)); \
} \
})
} while (0)
#define SLICE_APPEND(Type, slice, item) \
({ \
do { \
SLICE_ENSURE_CAPACITY(Type, slice, 1); \
(slice)->arr[(slice)->len++] = (item); \
})
} while (0)
#define ERR_BUF_SIZE 200

View File

@@ -2467,35 +2467,35 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
(AstNodeItem) {
.tag = AST_NODE_CHAR_LITERAL,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_NUMBER_LITERAL:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_NUMBER_LITERAL,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_KEYWORD_UNREACHABLE:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_UNREACHABLE_LITERAL,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_KEYWORD_ANYFRAME:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_ANYFRAME_LITERAL,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_STRING_LITERAL:
return addNode(&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_STRING_LITERAL,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_BUILTIN:
return parseBuiltinCall(p);
@@ -2566,7 +2566,7 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
(AstNodeItem) {
.tag = AST_NODE_IDENTIFIER,
.main_token = nextToken(p),
.data = {},
.data = { 0 },
});
case TOKEN_KEYWORD_FOR:
return parseForExpr(p);
@@ -3041,7 +3041,7 @@ static AstNodeIndex parseSwitchItem(Parser* p) {
}
static PtrModifiers parsePtrModifiers(Parser* p) {
PtrModifiers mods = {};
PtrModifiers mods = { 0 };
while (true) {
switch (p->token_tags[p->tok_i]) {

View File

@@ -124,10 +124,9 @@ state:
state = TOKENIZER_STATE_INVALID;
goto state;
}
case ' ':
case '\n':
case '\t':
case '\r':
// clang-format off
case ' ': case '\n': case '\t': case '\r':
// clang-format on
self->index++;
result.loc.start = self->index;
goto state;
@@ -139,9 +138,17 @@ state:
result.tag = TOKEN_CHAR_LITERAL;
state = TOKENIZER_STATE_CHAR_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
// clang-format off
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z': case 'A': case 'B': case 'C': case 'D':
case 'E': case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case '_':
// clang-format on
result.tag = TOKEN_IDENTIFIER;
state = TOKENIZER_STATE_IDENTIFIER;
goto state;
@@ -235,7 +242,10 @@ state:
case '&':
state = TOKENIZER_STATE_AMPERSAND;
goto state;
case '0' ... '9':
// clang-format off
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
// clang-format on
result.tag = TOKEN_NUMBER_LITERAL;
self->index++;
state = TOKENIZER_STATE_INT;
@@ -299,9 +309,17 @@ state:
result.tag = TOKEN_IDENTIFIER;
state = TOKENIZER_STATE_STRING_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
// clang-format off
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z': case 'A': case 'B': case 'C': case 'D':
case 'E': case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case '_':
// clang-format on
result.tag = TOKEN_BUILTIN;
state = TOKENIZER_STATE_BUILTIN;
goto state;
@@ -451,10 +469,19 @@ state:
case TOKENIZER_STATE_IDENTIFIER:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
// clang-format off
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z': case 'A': case 'B': case 'C': case 'D':
case 'E': case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case '_': case '0':
case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9':
// clang-format on
state = TOKENIZER_STATE_IDENTIFIER;
goto state;
default:; // Once we're at C23, this semicolon can be removed.
@@ -469,10 +496,19 @@ state:
case TOKENIZER_STATE_BUILTIN:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
// clang-format off
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z': case 'A': case 'B': case 'C': case 'D':
case 'E': case 'F': case 'G': case 'H': case 'I': case 'J':
case 'K': case 'L': case 'M': case 'N': case 'O': case 'P':
case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V':
case 'W': case 'X': case 'Y': case 'Z': case '_': case '0':
case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9':
// clang-format on
state = TOKENIZER_STATE_BUILTIN;
goto state;
break;
@@ -517,9 +553,14 @@ state:
case '"':
self->index++;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0d:
case 0x0e: case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19:
case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -561,9 +602,14 @@ state:
case '\'':
self->index++;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0d:
case 0x0e: case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19:
case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -586,9 +632,14 @@ state:
case '\n':
result.tag = TOKEN_INVALID;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0d:
case 0x0e: case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19:
case 0x1a: case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f:
case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -614,10 +665,13 @@ state:
goto state;
}
break;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0e:
case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14:
case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a:
case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -882,10 +936,13 @@ state:
case '\r':
state = TOKENIZER_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0e:
case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14:
case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a:
case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -912,10 +969,13 @@ state:
case '/':
state = TOKENIZER_STATE_LINE_COMMENT;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0e:
case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14:
case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a:
case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -948,10 +1008,13 @@ state:
case '\r':
state = TOKENIZER_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0e:
case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14:
case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a:
case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -972,10 +1035,13 @@ state:
goto state;
}
break;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
// clang-format off
case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06:
case 0x07: case 0x08: case 0x09: case 0x0b: case 0x0c: case 0x0e:
case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13: case 0x14:
case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1a:
case 0x1b: case 0x1c: case 0x1d: case 0x1e: case 0x1f: case 0x7f:
// clang-format on
state = TOKENIZER_STATE_INVALID;
goto state;
default:
@@ -989,21 +1055,24 @@ state:
case '.':
state = TOKENIZER_STATE_INT_PERIOD;
goto state;
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
// clang-format off
case '_': case 'a': case 'b': case 'c': case 'd': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'q': case 'r': case 's':
case 't': case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z': case 'A': case 'B': case 'C': case 'D': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'Q': case 'R': case 'S':
case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// clang-format on
self->index++;
state = TOKENIZER_STATE_INT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
// clang-format off
case 'e': case 'E': case 'p': case 'P':
// clang-format on
state = TOKENIZER_STATE_INT_EXPONENT;
goto state;
default:
@@ -1028,21 +1097,24 @@ state:
case TOKENIZER_STATE_INT_PERIOD:
self->index++;
switch (self->buffer[self->index]) {
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
// clang-format off
case '_': case 'a': case 'b': case 'c': case 'd': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'q': case 'r': case 's':
case 't': case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z': case 'A': case 'B': case 'C': case 'D': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'Q': case 'R': case 'S':
case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// clang-format on
self->index++;
state = TOKENIZER_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
// clang-format off
case 'e': case 'E': case 'p': case 'P':
// clang-format on
state = TOKENIZER_STATE_FLOAT_EXPONENT;
goto state;
default:
@@ -1053,21 +1125,24 @@ state:
case TOKENIZER_STATE_FLOAT:
switch (self->buffer[self->index]) {
case '_':
case 'a' ... 'd':
case 'f' ... 'o':
case 'q' ... 'z':
case 'A' ... 'D':
case 'F' ... 'O':
case 'Q' ... 'Z':
case '0' ... '9':
// clang-format off
case '_': case 'a': case 'b': case 'c': case 'd': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'q': case 'r': case 's':
case 't': case 'u': case 'v': case 'w': case 'x': case 'y':
case 'z': case 'A': case 'B': case 'C': case 'D': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'Q': case 'R': case 'S':
case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y':
case 'Z': case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// clang-format on
self->index++;
state = TOKENIZER_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
// clang-format off
case 'e': case 'E': case 'p': case 'P':
// clang-format on
state = TOKENIZER_STATE_FLOAT_EXPONENT;
goto state;
default: