string literals are now null terminated

this also deletes C string literals from the language, and then makes
the std lib changes and compiler changes necessary to get the behavior
tests and std lib tests passing again.
This commit is contained in:
Andrew Kelley
2019-11-19 20:29:08 -05:00
parent 21f344b3b9
commit 47f06be369
51 changed files with 986 additions and 716 deletions

View File

@@ -33,10 +33,10 @@
'0': \
case DIGIT_NON_ZERO
#define ALPHA_EXCEPT_C \
#define ALPHA \
'a': \
case 'b': \
/*case 'c':*/ \
case 'c': \
case 'd': \
case 'e': \
case 'f': \
@@ -87,10 +87,6 @@
case 'Y': \
case 'Z'
#define ALPHA \
ALPHA_EXCEPT_C: \
case 'c'
#define SYMBOL_CHAR \
ALPHA: \
case DIGIT: \
@@ -180,7 +176,6 @@ static bool is_symbol_char(uint8_t c) {
enum TokenizeState {
TokenizeStateStart,
TokenizeStateSymbol,
TokenizeStateSymbolFirstC,
TokenizeStateZero, // "0", which might lead to "0x"
TokenizeStateNumber, // "123", "0x123"
TokenizeStateNumberDot,
@@ -279,7 +274,6 @@ static void set_token_id(Tokenize *t, Token *token, TokenId id) {
} else if (id == TokenIdStringLiteral || id == TokenIdSymbol) {
memset(&token->data.str_lit.str, 0, sizeof(Buf));
buf_resize(&token->data.str_lit.str, 0);
token->data.str_lit.is_c_str = false;
}
}
@@ -429,12 +423,7 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case WHITESPACE:
break;
case 'c':
t.state = TokenizeStateSymbolFirstC;
begin_token(&t, TokenIdSymbol);
buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
case ALPHA_EXCEPT_C:
case ALPHA:
case '_':
t.state = TokenizeStateSymbol;
begin_token(&t, TokenIdSymbol);
@@ -1007,19 +996,7 @@ void tokenize(Buf *buf, Tokenization *out) {
switch (c) {
case WHITESPACE:
break;
case 'c':
if (!t.cur_tok->data.str_lit.is_c_str) {
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
break;
}
t.state = TokenizeStateLineStringContinueC;
break;
case '\\':
if (t.cur_tok->data.str_lit.is_c_str) {
invalid_char_error(&t, c);
}
t.state = TokenizeStateLineStringContinue;
break;
default:
@@ -1084,29 +1061,6 @@ void tokenize(Buf *buf, Tokenization *out) {
break;
}
break;
case TokenizeStateSymbolFirstC:
switch (c) {
case '"':
set_token_id(&t, t.cur_tok, TokenIdStringLiteral);
t.cur_tok->data.str_lit.is_c_str = true;
t.state = TokenizeStateString;
break;
case '\\':
set_token_id(&t, t.cur_tok, TokenIdStringLiteral);
t.cur_tok->data.str_lit.is_c_str = true;
t.state = TokenizeStateSawBackslash;
break;
case SYMBOL_CHAR:
t.state = TokenizeStateSymbol;
buf_append_char(&t.cur_tok->data.str_lit.str, c);
break;
default:
t.pos -= 1;
end_token(&t);
t.state = TokenizeStateStart;
continue;
}
break;
case TokenizeStateSawAtSign:
switch (c) {
case '"':
@@ -1544,7 +1498,6 @@ void tokenize(Buf *buf, Tokenization *out) {
tokenize_error(&t, "unterminated character literal");
break;
case TokenizeStateSymbol:
case TokenizeStateSymbolFirstC:
case TokenizeStateZero:
case TokenizeStateNumber:
case TokenizeStateFloatFraction: