blob a7820678 (58016B) - Raw
1 /* 2 * Copyright (c) 2015 Andrew Kelley 3 * 4 * This file is part of zig, which is MIT licensed. 5 * See http://opensource.org/licenses/MIT 6 */ 7 8 #include "tokenizer.hpp" 9 #include "util.hpp" 10 11 #include <stdarg.h> 12 #include <stdlib.h> 13 #include <stdio.h> 14 #include <inttypes.h> 15 #include <limits.h> 16 #include <errno.h> 17 18 #define WHITESPACE \ 19 ' ': \ 20 case '\n' 21 22 #define DIGIT_NON_ZERO \ 23 '1': \ 24 case '2': \ 25 case '3': \ 26 case '4': \ 27 case '5': \ 28 case '6': \ 29 case '7': \ 30 case '8': \ 31 case '9' 32 #define DIGIT \ 33 '0': \ 34 case DIGIT_NON_ZERO 35 36 #define ALPHA_EXCEPT_C \ 37 'a': \ 38 case 'b': \ 39 /*case 'c':*/ \ 40 case 'd': \ 41 case 'e': \ 42 case 'f': \ 43 case 'g': \ 44 case 'h': \ 45 case 'i': \ 46 case 'j': \ 47 case 'k': \ 48 case 'l': \ 49 case 'm': \ 50 case 'n': \ 51 case 'o': \ 52 case 'p': \ 53 case 'q': \ 54 case 'r': \ 55 case 's': \ 56 case 't': \ 57 case 'u': \ 58 case 'v': \ 59 case 'w': \ 60 case 'x': \ 61 case 'y': \ 62 case 'z': \ 63 case 'A': \ 64 case 'B': \ 65 case 'C': \ 66 case 'D': \ 67 case 'E': \ 68 case 'F': \ 69 case 'G': \ 70 case 'H': \ 71 case 'I': \ 72 case 'J': \ 73 case 'K': \ 74 case 'L': \ 75 case 'M': \ 76 case 'N': \ 77 case 'O': \ 78 case 'P': \ 79 case 'Q': \ 80 case 'R': \ 81 case 'S': \ 82 case 'T': \ 83 case 'U': \ 84 case 'V': \ 85 case 'W': \ 86 case 'X': \ 87 case 'Y': \ 88 case 'Z' 89 90 #define ALPHA \ 91 ALPHA_EXCEPT_C: \ 92 case 'c' 93 94 #define SYMBOL_CHAR \ 95 ALPHA_EXCEPT_C: \ 96 case DIGIT: \ 97 case '_': \ 98 case 'c' 99 100 #define SYMBOL_START \ 101 ALPHA: \ 102 case '_' 103 104 struct ZigKeyword { 105 const char *text; 106 TokenId token_id; 107 }; 108 109 static const struct ZigKeyword zig_keywords[] = { 110 {"align", TokenIdKeywordAlign}, 111 {"and", TokenIdKeywordAnd}, 112 {"asm", TokenIdKeywordAsm}, 113 {"break", TokenIdKeywordBreak}, 114 {"coldcc", TokenIdKeywordColdCC}, 115 {"comptime", TokenIdKeywordCompTime}, 116 {"const", TokenIdKeywordConst}, 117 {"continue", TokenIdKeywordContinue}, 118 {"defer", TokenIdKeywordDefer}, 119 {"else", TokenIdKeywordElse}, 120 {"enum", TokenIdKeywordEnum}, 121 {"error", TokenIdKeywordError}, 122 {"export", TokenIdKeywordExport}, 123 {"extern", TokenIdKeywordExtern}, 124 {"false", TokenIdKeywordFalse}, 125 {"fn", TokenIdKeywordFn}, 126 {"for", TokenIdKeywordFor}, 127 {"goto", TokenIdKeywordGoto}, 128 {"if", TokenIdKeywordIf}, 129 {"inline", TokenIdKeywordInline}, 130 {"nakedcc", TokenIdKeywordNakedCC}, 131 {"noalias", TokenIdKeywordNoAlias}, 132 {"null", TokenIdKeywordNull}, 133 {"or", TokenIdKeywordOr}, 134 {"packed", TokenIdKeywordPacked}, 135 {"pub", TokenIdKeywordPub}, 136 {"return", TokenIdKeywordReturn}, 137 {"stdcallcc", TokenIdKeywordStdcallCC}, 138 {"struct", TokenIdKeywordStruct}, 139 {"switch", TokenIdKeywordSwitch}, 140 {"test", TokenIdKeywordTest}, 141 {"this", TokenIdKeywordThis}, 142 {"true", TokenIdKeywordTrue}, 143 {"undefined", TokenIdKeywordUndefined}, 144 {"union", TokenIdKeywordUnion}, 145 {"unreachable", TokenIdKeywordUnreachable}, 146 {"use", TokenIdKeywordUse}, 147 {"var", TokenIdKeywordVar}, 148 {"volatile", TokenIdKeywordVolatile}, 149 {"while", TokenIdKeywordWhile}, 150 }; 151 152 bool is_zig_keyword(Buf *buf) { 153 for (size_t i = 0; i < array_length(zig_keywords); i += 1) { 154 if (buf_eql_str(buf, zig_keywords[i].text)) { 155 return true; 156 } 157 } 158 return false; 159 } 160 161 static bool is_symbol_char(uint8_t c) { 162 switch (c) { 163 case SYMBOL_CHAR: 164 return true; 165 default: 166 return false; 167 } 168 } 169 170 enum TokenizeState { 171 TokenizeStateStart, 172 TokenizeStateSymbol, 173 TokenizeStateSymbolFirstC, 174 TokenizeStateZero, // "0", which might lead to "0x" 175 TokenizeStateNumber, // "123", "0x123" 176 TokenizeStateNumberDot, 177 TokenizeStateFloatFraction, // "123.456", "0x123.456" 178 TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p" 179 TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5" 180 TokenizeStateString, 181 TokenizeStateStringEscape, 182 TokenizeStateCharLiteral, 183 TokenizeStateCharLiteralEnd, 184 TokenizeStateSawStar, 185 TokenizeStateSawStarPercent, 186 TokenizeStateSawSlash, 187 TokenizeStateSawBackslash, 188 TokenizeStateSawPercent, 189 TokenizeStateSawPlus, 190 TokenizeStateSawPlusPercent, 191 TokenizeStateSawDash, 192 TokenizeStateSawMinusPercent, 193 TokenizeStateSawAmpersand, 194 TokenizeStateSawCaret, 195 TokenizeStateSawPipe, 196 TokenizeStateLineComment, 197 TokenizeStateLineString, 198 TokenizeStateLineStringEnd, 199 TokenizeStateLineStringContinue, 200 TokenizeStateLineStringContinueC, 201 TokenizeStateSawEq, 202 TokenizeStateSawBang, 203 TokenizeStateSawLessThan, 204 TokenizeStateSawLessThanLessThan, 205 TokenizeStateSawGreaterThan, 206 TokenizeStateSawGreaterThanGreaterThan, 207 TokenizeStateSawDot, 208 TokenizeStateSawDotDot, 209 TokenizeStateSawQuestionMark, 210 TokenizeStateSawAtSign, 211 TokenizeStateCharCode, 212 TokenizeStateError, 213 }; 214 215 216 struct Tokenize { 217 Buf *buf; 218 size_t pos; 219 TokenizeState state; 220 ZigList<Token> *tokens; 221 int line; 222 int column; 223 Token *cur_tok; 224 Tokenization *out; 225 uint32_t radix; 226 int32_t exp_add_amt; 227 bool is_exp_negative; 228 size_t char_code_index; 229 size_t char_code_end; 230 bool unicode; 231 uint32_t char_code; 232 int exponent_in_bin_or_dec; 233 BigInt specified_exponent; 234 BigInt significand; 235 }; 236 237 ATTRIBUTE_PRINTF(2, 3) 238 static void tokenize_error(Tokenize *t, const char *format, ...) { 239 t->state = TokenizeStateError; 240 241 if (t->cur_tok) { 242 t->out->err_line = t->cur_tok->start_line; 243 t->out->err_column = t->cur_tok->start_column; 244 } else { 245 t->out->err_line = t->line; 246 t->out->err_column = t->column; 247 } 248 249 va_list ap; 250 va_start(ap, format); 251 t->out->err = buf_vprintf(format, ap); 252 va_end(ap); 253 } 254 255 static void set_token_id(Tokenize *t, Token *token, TokenId id) { 256 token->id = id; 257 258 if (id == TokenIdIntLiteral) { 259 bigint_init_unsigned(&token->data.int_lit.bigint, 0); 260 } else if (id == TokenIdFloatLiteral) { 261 bigfloat_init_32(&token->data.float_lit.bigfloat, 0.0f); 262 token->data.float_lit.overflow = false; 263 } else if (id == TokenIdStringLiteral || id == TokenIdSymbol) { 264 memset(&token->data.str_lit.str, 0, sizeof(Buf)); 265 buf_resize(&token->data.str_lit.str, 0); 266 token->data.str_lit.is_c_str = false; 267 } 268 } 269 270 static void begin_token(Tokenize *t, TokenId id) { 271 assert(!t->cur_tok); 272 t->tokens->add_one(); 273 Token *token = &t->tokens->last(); 274 token->start_line = t->line; 275 token->start_column = t->column; 276 token->start_pos = t->pos; 277 278 set_token_id(t, token, id); 279 280 t->cur_tok = token; 281 } 282 283 static void cancel_token(Tokenize *t) { 284 t->tokens->pop(); 285 t->cur_tok = nullptr; 286 } 287 288 static void end_float_token(Tokenize *t) { 289 if (t->radix == 10) { 290 uint8_t *ptr_buf = (uint8_t*)buf_ptr(t->buf) + t->cur_tok->start_pos; 291 size_t buf_len = t->cur_tok->end_pos - t->cur_tok->start_pos; 292 if (bigfloat_init_buf_base10(&t->cur_tok->data.float_lit.bigfloat, ptr_buf, buf_len)) { 293 t->cur_tok->data.float_lit.overflow = true; 294 } 295 return; 296 } 297 298 BigInt int_max; 299 bigint_init_unsigned(&int_max, INT_MAX); 300 301 if (bigint_cmp(&t->specified_exponent, &int_max) != CmpLT) { 302 t->cur_tok->data.float_lit.overflow = true; 303 return; 304 } 305 306 if (!bigint_fits_in_bits(&t->specified_exponent, 64, true)) { 307 t->cur_tok->data.float_lit.overflow = true; 308 return; 309 } 310 311 int64_t specified_exponent = bigint_as_signed(&t->specified_exponent); 312 if (t->is_exp_negative) { 313 specified_exponent = -specified_exponent; 314 } 315 t->exponent_in_bin_or_dec = (int)(t->exponent_in_bin_or_dec + specified_exponent); 316 317 if (!bigint_fits_in_bits(&t->significand, 64, false)) { 318 t->cur_tok->data.float_lit.overflow = true; 319 return; 320 } 321 322 uint64_t significand = bigint_as_unsigned(&t->significand); 323 uint64_t significand_bits; 324 uint64_t exponent_bits; 325 if (significand == 0) { 326 // 0 is all 0's 327 significand_bits = 0; 328 exponent_bits = 0; 329 } else { 330 // normalize the significand 331 if (t->radix == 10) { 332 zig_panic("TODO: decimal floats"); 333 } else { 334 int significand_magnitude_in_bin = clzll(1) - clzll(significand); 335 t->exponent_in_bin_or_dec += significand_magnitude_in_bin; 336 if (!(-1022 <= t->exponent_in_bin_or_dec && t->exponent_in_bin_or_dec <= 1023)) { 337 t->cur_tok->data.float_lit.overflow = true; 338 return; 339 } else { 340 // this should chop off exactly one 1 bit from the top. 341 significand_bits = ((uint64_t)significand << (52 - significand_magnitude_in_bin)) & 0xfffffffffffffULL; 342 exponent_bits = t->exponent_in_bin_or_dec + 1023; 343 } 344 } 345 } 346 uint64_t double_bits = (exponent_bits << 52) | significand_bits; 347 double dbl_value; 348 safe_memcpy(&dbl_value, (double *)&double_bits, 1); 349 bigfloat_init_64(&t->cur_tok->data.float_lit.bigfloat, dbl_value); 350 } 351 352 static void end_token(Tokenize *t) { 353 assert(t->cur_tok); 354 t->cur_tok->end_pos = t->pos + 1; 355 356 if (t->cur_tok->id == TokenIdFloatLiteral) { 357 end_float_token(t); 358 } else if (t->cur_tok->id == TokenIdSymbol) { 359 char *token_mem = buf_ptr(t->buf) + t->cur_tok->start_pos; 360 int token_len = (int)(t->cur_tok->end_pos - t->cur_tok->start_pos); 361 362 for (size_t i = 0; i < array_length(zig_keywords); i += 1) { 363 if (mem_eql_str(token_mem, token_len, zig_keywords[i].text)) { 364 t->cur_tok->id = zig_keywords[i].token_id; 365 break; 366 } 367 } 368 } 369 370 t->cur_tok = nullptr; 371 } 372 373 static bool is_exponent_signifier(uint8_t c, int radix) { 374 if (radix == 16) { 375 return c == 'p' || c == 'P'; 376 } else { 377 return c == 'e' || c == 'E'; 378 } 379 } 380 381 static uint32_t get_digit_value(uint8_t c) { 382 if ('0' <= c && c <= '9') { 383 return c - '0'; 384 } 385 if ('A' <= c && c <= 'Z') { 386 return c - 'A' + 10; 387 } 388 if ('a' <= c && c <= 'z') { 389 return c - 'a' + 10; 390 } 391 return UINT32_MAX; 392 } 393 394 static void handle_string_escape(Tokenize *t, uint8_t c) { 395 if (t->cur_tok->id == TokenIdCharLiteral) { 396 t->cur_tok->data.char_lit.c = c; 397 t->state = TokenizeStateCharLiteralEnd; 398 } else if (t->cur_tok->id == TokenIdStringLiteral || t->cur_tok->id == TokenIdSymbol) { 399 buf_append_char(&t->cur_tok->data.str_lit.str, c); 400 t->state = TokenizeStateString; 401 } else { 402 zig_unreachable(); 403 } 404 } 405 406 void tokenize(Buf *buf, Tokenization *out) { 407 Tokenize t = {0}; 408 t.out = out; 409 t.tokens = out->tokens = allocate<ZigList<Token>>(1); 410 t.buf = buf; 411 412 out->line_offsets = allocate<ZigList<size_t>>(1); 413 414 out->line_offsets->append(0); 415 for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) { 416 uint8_t c = buf_ptr(t.buf)[t.pos]; 417 switch (t.state) { 418 case TokenizeStateError: 419 break; 420 case TokenizeStateStart: 421 switch (c) { 422 case WHITESPACE: 423 break; 424 case 'c': 425 t.state = TokenizeStateSymbolFirstC; 426 begin_token(&t, TokenIdSymbol); 427 buf_append_char(&t.cur_tok->data.str_lit.str, c); 428 break; 429 case ALPHA_EXCEPT_C: 430 case '_': 431 t.state = TokenizeStateSymbol; 432 begin_token(&t, TokenIdSymbol); 433 buf_append_char(&t.cur_tok->data.str_lit.str, c); 434 break; 435 case '0': 436 t.state = TokenizeStateZero; 437 begin_token(&t, TokenIdIntLiteral); 438 t.radix = 10; 439 t.exp_add_amt = 1; 440 t.exponent_in_bin_or_dec = 0; 441 bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0); 442 bigint_init_unsigned(&t.specified_exponent, 0); 443 break; 444 case DIGIT_NON_ZERO: 445 t.state = TokenizeStateNumber; 446 begin_token(&t, TokenIdIntLiteral); 447 t.radix = 10; 448 t.exp_add_amt = 1; 449 t.exponent_in_bin_or_dec = 0; 450 bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c)); 451 bigint_init_unsigned(&t.specified_exponent, 0); 452 break; 453 case '"': 454 begin_token(&t, TokenIdStringLiteral); 455 t.state = TokenizeStateString; 456 break; 457 case '\'': 458 begin_token(&t, TokenIdCharLiteral); 459 t.state = TokenizeStateCharLiteral; 460 break; 461 case '(': 462 begin_token(&t, TokenIdLParen); 463 end_token(&t); 464 break; 465 case ')': 466 begin_token(&t, TokenIdRParen); 467 end_token(&t); 468 break; 469 case ',': 470 begin_token(&t, TokenIdComma); 471 end_token(&t); 472 break; 473 case '{': 474 begin_token(&t, TokenIdLBrace); 475 end_token(&t); 476 break; 477 case '}': 478 begin_token(&t, TokenIdRBrace); 479 end_token(&t); 480 break; 481 case '[': 482 begin_token(&t, TokenIdLBracket); 483 end_token(&t); 484 break; 485 case ']': 486 begin_token(&t, TokenIdRBracket); 487 end_token(&t); 488 break; 489 case ';': 490 begin_token(&t, TokenIdSemicolon); 491 end_token(&t); 492 break; 493 case ':': 494 begin_token(&t, TokenIdColon); 495 end_token(&t); 496 break; 497 case '#': 498 begin_token(&t, TokenIdNumberSign); 499 end_token(&t); 500 break; 501 case '*': 502 begin_token(&t, TokenIdStar); 503 t.state = TokenizeStateSawStar; 504 break; 505 case '/': 506 begin_token(&t, TokenIdSlash); 507 t.state = TokenizeStateSawSlash; 508 break; 509 case '\\': 510 begin_token(&t, TokenIdStringLiteral); 511 t.state = TokenizeStateSawBackslash; 512 break; 513 case '%': 514 begin_token(&t, TokenIdPercent); 515 t.state = TokenizeStateSawPercent; 516 break; 517 case '+': 518 begin_token(&t, TokenIdPlus); 519 t.state = TokenizeStateSawPlus; 520 break; 521 case '~': 522 begin_token(&t, TokenIdTilde); 523 end_token(&t); 524 break; 525 case '@': 526 begin_token(&t, TokenIdAtSign); 527 t.state = TokenizeStateSawAtSign; 528 break; 529 case '-': 530 begin_token(&t, TokenIdDash); 531 t.state = TokenizeStateSawDash; 532 break; 533 case '&': 534 begin_token(&t, TokenIdAmpersand); 535 t.state = TokenizeStateSawAmpersand; 536 break; 537 case '^': 538 begin_token(&t, TokenIdBinXor); 539 t.state = TokenizeStateSawCaret; 540 break; 541 case '|': 542 begin_token(&t, TokenIdBinOr); 543 t.state = TokenizeStateSawPipe; 544 break; 545 case '=': 546 begin_token(&t, TokenIdEq); 547 t.state = TokenizeStateSawEq; 548 break; 549 case '!': 550 begin_token(&t, TokenIdBang); 551 t.state = TokenizeStateSawBang; 552 break; 553 case '<': 554 begin_token(&t, TokenIdCmpLessThan); 555 t.state = TokenizeStateSawLessThan; 556 break; 557 case '>': 558 begin_token(&t, TokenIdCmpGreaterThan); 559 t.state = TokenizeStateSawGreaterThan; 560 break; 561 case '.': 562 begin_token(&t, TokenIdDot); 563 t.state = TokenizeStateSawDot; 564 break; 565 case '?': 566 begin_token(&t, TokenIdMaybe); 567 t.state = TokenizeStateSawQuestionMark; 568 break; 569 default: 570 tokenize_error(&t, "invalid character: '%c'", c); 571 } 572 break; 573 case TokenizeStateSawQuestionMark: 574 switch (c) { 575 case '?': 576 set_token_id(&t, t.cur_tok, TokenIdDoubleQuestion); 577 end_token(&t); 578 t.state = TokenizeStateStart; 579 break; 580 case '=': 581 set_token_id(&t, t.cur_tok, TokenIdMaybeAssign); 582 end_token(&t); 583 t.state = TokenizeStateStart; 584 break; 585 default: 586 t.pos -= 1; 587 end_token(&t); 588 t.state = TokenizeStateStart; 589 continue; 590 } 591 break; 592 case TokenizeStateSawDot: 593 switch (c) { 594 case '.': 595 t.state = TokenizeStateSawDotDot; 596 set_token_id(&t, t.cur_tok, TokenIdEllipsis2); 597 break; 598 default: 599 t.pos -= 1; 600 end_token(&t); 601 t.state = TokenizeStateStart; 602 continue; 603 } 604 break; 605 case TokenizeStateSawDotDot: 606 switch (c) { 607 case '.': 608 t.state = TokenizeStateStart; 609 set_token_id(&t, t.cur_tok, TokenIdEllipsis3); 610 end_token(&t); 611 break; 612 default: 613 t.pos -= 1; 614 end_token(&t); 615 t.state = TokenizeStateStart; 616 continue; 617 } 618 break; 619 case TokenizeStateSawGreaterThan: 620 switch (c) { 621 case '=': 622 set_token_id(&t, t.cur_tok, TokenIdCmpGreaterOrEq); 623 end_token(&t); 624 t.state = TokenizeStateStart; 625 break; 626 case '>': 627 set_token_id(&t, t.cur_tok, TokenIdBitShiftRight); 628 t.state = TokenizeStateSawGreaterThanGreaterThan; 629 break; 630 default: 631 t.pos -= 1; 632 end_token(&t); 633 t.state = TokenizeStateStart; 634 continue; 635 } 636 break; 637 case TokenizeStateSawGreaterThanGreaterThan: 638 switch (c) { 639 case '=': 640 set_token_id(&t, t.cur_tok, TokenIdBitShiftRightEq); 641 end_token(&t); 642 t.state = TokenizeStateStart; 643 break; 644 default: 645 t.pos -= 1; 646 end_token(&t); 647 t.state = TokenizeStateStart; 648 continue; 649 } 650 break; 651 case TokenizeStateSawLessThan: 652 switch (c) { 653 case '=': 654 set_token_id(&t, t.cur_tok, TokenIdCmpLessOrEq); 655 end_token(&t); 656 t.state = TokenizeStateStart; 657 break; 658 case '<': 659 set_token_id(&t, t.cur_tok, TokenIdBitShiftLeft); 660 t.state = TokenizeStateSawLessThanLessThan; 661 break; 662 default: 663 t.pos -= 1; 664 end_token(&t); 665 t.state = TokenizeStateStart; 666 continue; 667 } 668 break; 669 case TokenizeStateSawLessThanLessThan: 670 switch (c) { 671 case '=': 672 set_token_id(&t, t.cur_tok, TokenIdBitShiftLeftEq); 673 end_token(&t); 674 t.state = TokenizeStateStart; 675 break; 676 default: 677 t.pos -= 1; 678 end_token(&t); 679 t.state = TokenizeStateStart; 680 continue; 681 } 682 break; 683 case TokenizeStateSawBang: 684 switch (c) { 685 case '=': 686 set_token_id(&t, t.cur_tok, TokenIdCmpNotEq); 687 end_token(&t); 688 t.state = TokenizeStateStart; 689 break; 690 default: 691 t.pos -= 1; 692 end_token(&t); 693 t.state = TokenizeStateStart; 694 continue; 695 } 696 break; 697 case TokenizeStateSawEq: 698 switch (c) { 699 case '=': 700 set_token_id(&t, t.cur_tok, TokenIdCmpEq); 701 end_token(&t); 702 t.state = TokenizeStateStart; 703 break; 704 case '>': 705 set_token_id(&t, t.cur_tok, TokenIdFatArrow); 706 end_token(&t); 707 t.state = TokenizeStateStart; 708 break; 709 default: 710 t.pos -= 1; 711 end_token(&t); 712 t.state = TokenizeStateStart; 713 continue; 714 } 715 break; 716 case TokenizeStateSawStar: 717 switch (c) { 718 case '=': 719 set_token_id(&t, t.cur_tok, TokenIdTimesEq); 720 end_token(&t); 721 t.state = TokenizeStateStart; 722 break; 723 case '*': 724 set_token_id(&t, t.cur_tok, TokenIdStarStar); 725 end_token(&t); 726 t.state = TokenizeStateStart; 727 break; 728 case '%': 729 set_token_id(&t, t.cur_tok, TokenIdTimesPercent); 730 t.state = TokenizeStateSawStarPercent; 731 break; 732 default: 733 t.pos -= 1; 734 end_token(&t); 735 t.state = TokenizeStateStart; 736 continue; 737 } 738 break; 739 case TokenizeStateSawStarPercent: 740 switch (c) { 741 case '=': 742 set_token_id(&t, t.cur_tok, TokenIdTimesPercentEq); 743 end_token(&t); 744 t.state = TokenizeStateStart; 745 break; 746 default: 747 t.pos -= 1; 748 end_token(&t); 749 t.state = TokenizeStateStart; 750 continue; 751 } 752 break; 753 case TokenizeStateSawPercent: 754 switch (c) { 755 case '=': 756 set_token_id(&t, t.cur_tok, TokenIdModEq); 757 end_token(&t); 758 t.state = TokenizeStateStart; 759 break; 760 case '.': 761 set_token_id(&t, t.cur_tok, TokenIdPercentDot); 762 end_token(&t); 763 t.state = TokenizeStateStart; 764 break; 765 case '%': 766 set_token_id(&t, t.cur_tok, TokenIdPercentPercent); 767 end_token(&t); 768 t.state = TokenizeStateStart; 769 break; 770 default: 771 t.pos -= 1; 772 end_token(&t); 773 t.state = TokenizeStateStart; 774 continue; 775 } 776 break; 777 case TokenizeStateSawPlus: 778 switch (c) { 779 case '=': 780 set_token_id(&t, t.cur_tok, TokenIdPlusEq); 781 end_token(&t); 782 t.state = TokenizeStateStart; 783 break; 784 case '+': 785 set_token_id(&t, t.cur_tok, TokenIdPlusPlus); 786 end_token(&t); 787 t.state = TokenizeStateStart; 788 break; 789 case '%': 790 set_token_id(&t, t.cur_tok, TokenIdPlusPercent); 791 t.state = TokenizeStateSawPlusPercent; 792 break; 793 default: 794 t.pos -= 1; 795 end_token(&t); 796 t.state = TokenizeStateStart; 797 continue; 798 } 799 break; 800 case TokenizeStateSawPlusPercent: 801 switch (c) { 802 case '=': 803 set_token_id(&t, t.cur_tok, TokenIdPlusPercentEq); 804 end_token(&t); 805 t.state = TokenizeStateStart; 806 break; 807 default: 808 t.pos -= 1; 809 end_token(&t); 810 t.state = TokenizeStateStart; 811 continue; 812 } 813 break; 814 case TokenizeStateSawAmpersand: 815 switch (c) { 816 case '=': 817 set_token_id(&t, t.cur_tok, TokenIdBitAndEq); 818 end_token(&t); 819 t.state = TokenizeStateStart; 820 break; 821 default: 822 t.pos -= 1; 823 end_token(&t); 824 t.state = TokenizeStateStart; 825 continue; 826 } 827 break; 828 case TokenizeStateSawCaret: 829 switch (c) { 830 case '=': 831 set_token_id(&t, t.cur_tok, TokenIdBitXorEq); 832 end_token(&t); 833 t.state = TokenizeStateStart; 834 break; 835 default: 836 t.pos -= 1; 837 end_token(&t); 838 t.state = TokenizeStateStart; 839 continue; 840 } 841 break; 842 case TokenizeStateSawPipe: 843 switch (c) { 844 case '=': 845 set_token_id(&t, t.cur_tok, TokenIdBitOrEq); 846 end_token(&t); 847 t.state = TokenizeStateStart; 848 break; 849 default: 850 t.pos -= 1; 851 end_token(&t); 852 t.state = TokenizeStateStart; 853 continue; 854 } 855 break; 856 case TokenizeStateSawSlash: 857 switch (c) { 858 case '/': 859 cancel_token(&t); 860 t.state = TokenizeStateLineComment; 861 break; 862 case '=': 863 set_token_id(&t, t.cur_tok, TokenIdDivEq); 864 end_token(&t); 865 t.state = TokenizeStateStart; 866 break; 867 default: 868 t.pos -= 1; 869 end_token(&t); 870 t.state = TokenizeStateStart; 871 continue; 872 } 873 break; 874 case TokenizeStateSawBackslash: 875 switch (c) { 876 case '\\': 877 t.state = TokenizeStateLineString; 878 break; 879 default: 880 tokenize_error(&t, "invalid character: '%c'", c); 881 break; 882 } 883 break; 884 case TokenizeStateLineString: 885 switch (c) { 886 case '\n': 887 t.state = TokenizeStateLineStringEnd; 888 break; 889 default: 890 buf_append_char(&t.cur_tok->data.str_lit.str, c); 891 break; 892 } 893 break; 894 case TokenizeStateLineStringEnd: 895 switch (c) { 896 case WHITESPACE: 897 break; 898 case 'c': 899 if (!t.cur_tok->data.str_lit.is_c_str) { 900 t.pos -= 1; 901 end_token(&t); 902 t.state = TokenizeStateStart; 903 break; 904 } 905 t.state = TokenizeStateLineStringContinueC; 906 break; 907 case '\\': 908 if (t.cur_tok->data.str_lit.is_c_str) { 909 tokenize_error(&t, "invalid character: '%c'", c); 910 } 911 t.state = TokenizeStateLineStringContinue; 912 break; 913 default: 914 t.pos -= 1; 915 end_token(&t); 916 t.state = TokenizeStateStart; 917 continue; 918 } 919 break; 920 case TokenizeStateLineStringContinueC: 921 switch (c) { 922 case '\\': 923 t.state = TokenizeStateLineStringContinue; 924 break; 925 default: 926 t.pos -= 1; 927 end_token(&t); 928 t.state = TokenizeStateStart; 929 continue; 930 } 931 break; 932 case TokenizeStateLineStringContinue: 933 switch (c) { 934 case '\\': 935 t.state = TokenizeStateLineString; 936 buf_append_char(&t.cur_tok->data.str_lit.str, '\n'); 937 break; 938 default: 939 tokenize_error(&t, "invalid character: '%c'", c); 940 break; 941 } 942 break; 943 case TokenizeStateLineComment: 944 switch (c) { 945 case '\n': 946 t.state = TokenizeStateStart; 947 break; 948 default: 949 // do nothing 950 break; 951 } 952 break; 953 case TokenizeStateSymbolFirstC: 954 switch (c) { 955 case '"': 956 set_token_id(&t, t.cur_tok, TokenIdStringLiteral); 957 t.cur_tok->data.str_lit.is_c_str = true; 958 t.state = TokenizeStateString; 959 break; 960 case '\\': 961 set_token_id(&t, t.cur_tok, TokenIdStringLiteral); 962 t.cur_tok->data.str_lit.is_c_str = true; 963 t.state = TokenizeStateSawBackslash; 964 break; 965 case SYMBOL_CHAR: 966 t.state = TokenizeStateSymbol; 967 buf_append_char(&t.cur_tok->data.str_lit.str, c); 968 break; 969 default: 970 t.pos -= 1; 971 end_token(&t); 972 t.state = TokenizeStateStart; 973 continue; 974 } 975 break; 976 case TokenizeStateSawAtSign: 977 switch (c) { 978 case '"': 979 set_token_id(&t, t.cur_tok, TokenIdSymbol); 980 t.state = TokenizeStateString; 981 break; 982 default: 983 t.pos -= 1; 984 end_token(&t); 985 t.state = TokenizeStateStart; 986 continue; 987 } 988 break; 989 case TokenizeStateSymbol: 990 switch (c) { 991 case SYMBOL_CHAR: 992 buf_append_char(&t.cur_tok->data.str_lit.str, c); 993 break; 994 default: 995 t.pos -= 1; 996 end_token(&t); 997 t.state = TokenizeStateStart; 998 continue; 999 } 1000 break; 1001 case TokenizeStateString: 1002 switch (c) { 1003 case '"': 1004 end_token(&t); 1005 t.state = TokenizeStateStart; 1006 break; 1007 case '\n': 1008 tokenize_error(&t, "newline not allowed in string literal"); 1009 break; 1010 case '\\': 1011 t.state = TokenizeStateStringEscape; 1012 break; 1013 default: 1014 buf_append_char(&t.cur_tok->data.str_lit.str, c); 1015 break; 1016 } 1017 break; 1018 case TokenizeStateStringEscape: 1019 switch (c) { 1020 case 'x': 1021 t.state = TokenizeStateCharCode; 1022 t.radix = 16; 1023 t.char_code = 0; 1024 t.char_code_index = 0; 1025 t.char_code_end = 2; 1026 t.unicode = false; 1027 break; 1028 case 'u': 1029 t.state = TokenizeStateCharCode; 1030 t.radix = 16; 1031 t.char_code = 0; 1032 t.char_code_index = 0; 1033 t.char_code_end = 4; 1034 t.unicode = true; 1035 break; 1036 case 'U': 1037 t.state = TokenizeStateCharCode; 1038 t.radix = 16; 1039 t.char_code = 0; 1040 t.char_code_index = 0; 1041 t.char_code_end = 6; 1042 t.unicode = true; 1043 break; 1044 case 'n': 1045 handle_string_escape(&t, '\n'); 1046 break; 1047 case 'r': 1048 handle_string_escape(&t, '\r'); 1049 break; 1050 case '\\': 1051 handle_string_escape(&t, '\\'); 1052 break; 1053 case 't': 1054 handle_string_escape(&t, '\t'); 1055 break; 1056 case '\'': 1057 handle_string_escape(&t, '\''); 1058 break; 1059 case '"': 1060 handle_string_escape(&t, '\"'); 1061 break; 1062 default: 1063 tokenize_error(&t, "invalid character: '%c'", c); 1064 } 1065 break; 1066 case TokenizeStateCharCode: 1067 { 1068 uint32_t digit_value = get_digit_value(c); 1069 if (digit_value >= t.radix) { 1070 tokenize_error(&t, "invalid digit: '%c'", c); 1071 } 1072 t.char_code *= t.radix; 1073 t.char_code += digit_value; 1074 t.char_code_index += 1; 1075 1076 if (t.char_code_index >= t.char_code_end) { 1077 if (t.unicode) { 1078 if (t.char_code <= 0x7f) { 1079 // 00000000 00000000 00000000 0xxxxxxx 1080 handle_string_escape(&t, (uint8_t)t.char_code); 1081 } else if (t.cur_tok->id == TokenIdCharLiteral) { 1082 tokenize_error(&t, "unicode value too large for character literal: %x", t.char_code); 1083 } else if (t.char_code <= 0x7ff) { 1084 // 00000000 00000000 00000xxx xx000000 1085 handle_string_escape(&t, (uint8_t)(0xc0 | (t.char_code >> 6))); 1086 // 00000000 00000000 00000000 00xxxxxx 1087 handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f))); 1088 } else if (t.char_code <= 0xffff) { 1089 // 00000000 00000000 xxxx0000 00000000 1090 handle_string_escape(&t, (uint8_t)(0xe0 | (t.char_code >> 12))); 1091 // 00000000 00000000 0000xxxx xx000000 1092 handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 6) & 0x3f))); 1093 // 00000000 00000000 00000000 00xxxxxx 1094 handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f))); 1095 } else if (t.char_code <= 0x10ffff) { 1096 // 00000000 000xxx00 00000000 00000000 1097 handle_string_escape(&t, (uint8_t)(0xf0 | (t.char_code >> 18))); 1098 // 00000000 000000xx xxxx0000 00000000 1099 handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 12) & 0x3f))); 1100 // 00000000 00000000 0000xxxx xx000000 1101 handle_string_escape(&t, (uint8_t)(0x80 | ((t.char_code >> 6) & 0x3f))); 1102 // 00000000 00000000 00000000 00xxxxxx 1103 handle_string_escape(&t, (uint8_t)(0x80 | (t.char_code & 0x3f))); 1104 } else { 1105 tokenize_error(&t, "unicode value out of range: %x", t.char_code); 1106 } 1107 } else { 1108 if (t.cur_tok->id == TokenIdCharLiteral && t.char_code > UINT8_MAX) { 1109 tokenize_error(&t, "value too large for character literal: '%x'", 1110 t.char_code); 1111 } 1112 handle_string_escape(&t, (uint8_t)t.char_code); 1113 } 1114 } 1115 } 1116 break; 1117 case TokenizeStateCharLiteral: 1118 switch (c) { 1119 case '\'': 1120 tokenize_error(&t, "expected character"); 1121 case '\\': 1122 t.state = TokenizeStateStringEscape; 1123 break; 1124 default: 1125 t.cur_tok->data.char_lit.c = c; 1126 t.state = TokenizeStateCharLiteralEnd; 1127 break; 1128 } 1129 break; 1130 case TokenizeStateCharLiteralEnd: 1131 switch (c) { 1132 case '\'': 1133 end_token(&t); 1134 t.state = TokenizeStateStart; 1135 break; 1136 default: 1137 tokenize_error(&t, "invalid character: '%c'", c); 1138 } 1139 break; 1140 case TokenizeStateZero: 1141 switch (c) { 1142 case 'b': 1143 t.radix = 2; 1144 t.state = TokenizeStateNumber; 1145 break; 1146 case 'o': 1147 t.radix = 8; 1148 t.exp_add_amt = 3; 1149 t.state = TokenizeStateNumber; 1150 break; 1151 case 'x': 1152 t.radix = 16; 1153 t.exp_add_amt = 4; 1154 t.state = TokenizeStateNumber; 1155 break; 1156 default: 1157 // reinterpret as normal number 1158 t.pos -= 1; 1159 t.state = TokenizeStateNumber; 1160 continue; 1161 } 1162 break; 1163 case TokenizeStateNumber: 1164 { 1165 if (c == '.') { 1166 t.state = TokenizeStateNumberDot; 1167 break; 1168 } 1169 if (is_exponent_signifier(c, t.radix)) { 1170 t.state = TokenizeStateFloatExponentUnsigned; 1171 assert(t.cur_tok->id == TokenIdIntLiteral); 1172 bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); 1173 set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); 1174 break; 1175 } 1176 uint32_t digit_value = get_digit_value(c); 1177 if (digit_value >= t.radix) { 1178 if (is_symbol_char(c)) { 1179 tokenize_error(&t, "invalid character: '%c'", c); 1180 } 1181 // not my char 1182 t.pos -= 1; 1183 end_token(&t); 1184 t.state = TokenizeStateStart; 1185 continue; 1186 } 1187 BigInt digit_value_bi; 1188 bigint_init_unsigned(&digit_value_bi, digit_value); 1189 1190 BigInt radix_bi; 1191 bigint_init_unsigned(&radix_bi, t.radix); 1192 1193 BigInt multiplied; 1194 bigint_mul(&multiplied, &t.cur_tok->data.int_lit.bigint, &radix_bi); 1195 1196 bigint_add(&t.cur_tok->data.int_lit.bigint, &multiplied, &digit_value_bi); 1197 break; 1198 } 1199 case TokenizeStateNumberDot: 1200 { 1201 if (c == '.') { 1202 t.pos -= 2; 1203 end_token(&t); 1204 t.state = TokenizeStateStart; 1205 continue; 1206 } 1207 t.pos -= 1; 1208 t.state = TokenizeStateFloatFraction; 1209 assert(t.cur_tok->id == TokenIdIntLiteral); 1210 bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); 1211 set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); 1212 continue; 1213 } 1214 case TokenizeStateFloatFraction: 1215 { 1216 if (is_exponent_signifier(c, t.radix)) { 1217 t.state = TokenizeStateFloatExponentUnsigned; 1218 break; 1219 } 1220 uint32_t digit_value = get_digit_value(c); 1221 if (digit_value >= t.radix) { 1222 if (is_symbol_char(c)) { 1223 tokenize_error(&t, "invalid character: '%c'", c); 1224 } 1225 // not my char 1226 t.pos -= 1; 1227 end_token(&t); 1228 t.state = TokenizeStateStart; 1229 continue; 1230 } 1231 t.exponent_in_bin_or_dec -= t.exp_add_amt; 1232 if (t.radix == 10) { 1233 // For now we use strtod to parse decimal floats, so we just have to get to the 1234 // end of the token. 1235 break; 1236 } 1237 BigInt digit_value_bi; 1238 bigint_init_unsigned(&digit_value_bi, digit_value); 1239 1240 BigInt radix_bi; 1241 bigint_init_unsigned(&radix_bi, t.radix); 1242 1243 BigInt multiplied; 1244 bigint_mul(&multiplied, &t.significand, &radix_bi); 1245 1246 bigint_add(&t.significand, &multiplied, &digit_value_bi); 1247 break; 1248 } 1249 case TokenizeStateFloatExponentUnsigned: 1250 switch (c) { 1251 case '+': 1252 t.is_exp_negative = false; 1253 t.state = TokenizeStateFloatExponentNumber; 1254 break; 1255 case '-': 1256 t.is_exp_negative = true; 1257 t.state = TokenizeStateFloatExponentNumber; 1258 break; 1259 default: 1260 // reinterpret as normal exponent number 1261 t.pos -= 1; 1262 t.is_exp_negative = false; 1263 t.state = TokenizeStateFloatExponentNumber; 1264 continue; 1265 } 1266 break; 1267 case TokenizeStateFloatExponentNumber: 1268 { 1269 uint32_t digit_value = get_digit_value(c); 1270 if (digit_value >= t.radix) { 1271 if (is_symbol_char(c)) { 1272 tokenize_error(&t, "invalid character: '%c'", c); 1273 } 1274 // not my char 1275 t.pos -= 1; 1276 end_token(&t); 1277 t.state = TokenizeStateStart; 1278 continue; 1279 } 1280 if (t.radix == 10) { 1281 // For now we use strtod to parse decimal floats, so we just have to get to the 1282 // end of the token. 1283 break; 1284 } 1285 BigInt digit_value_bi; 1286 bigint_init_unsigned(&digit_value_bi, digit_value); 1287 1288 BigInt radix_bi; 1289 bigint_init_unsigned(&radix_bi, 10); 1290 1291 BigInt multiplied; 1292 bigint_mul(&multiplied, &t.specified_exponent, &radix_bi); 1293 1294 bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi); 1295 } 1296 break; 1297 case TokenizeStateSawDash: 1298 switch (c) { 1299 case '>': 1300 set_token_id(&t, t.cur_tok, TokenIdArrow); 1301 end_token(&t); 1302 t.state = TokenizeStateStart; 1303 break; 1304 case '=': 1305 set_token_id(&t, t.cur_tok, TokenIdMinusEq); 1306 end_token(&t); 1307 t.state = TokenizeStateStart; 1308 break; 1309 case '%': 1310 set_token_id(&t, t.cur_tok, TokenIdMinusPercent); 1311 t.state = TokenizeStateSawMinusPercent; 1312 break; 1313 default: 1314 t.pos -= 1; 1315 end_token(&t); 1316 t.state = TokenizeStateStart; 1317 continue; 1318 } 1319 break; 1320 case TokenizeStateSawMinusPercent: 1321 switch (c) { 1322 case '=': 1323 set_token_id(&t, t.cur_tok, TokenIdMinusPercentEq); 1324 end_token(&t); 1325 t.state = TokenizeStateStart; 1326 break; 1327 default: 1328 t.pos -= 1; 1329 end_token(&t); 1330 t.state = TokenizeStateStart; 1331 continue; 1332 } 1333 break; 1334 } 1335 if (c == '\n') { 1336 out->line_offsets->append(t.pos + 1); 1337 t.line += 1; 1338 t.column = 0; 1339 } else { 1340 t.column += 1; 1341 } 1342 } 1343 // EOF 1344 switch (t.state) { 1345 case TokenizeStateStart: 1346 case TokenizeStateError: 1347 break; 1348 case TokenizeStateNumberDot: 1349 tokenize_error(&t, "unterminated number literal"); 1350 break; 1351 case TokenizeStateString: 1352 tokenize_error(&t, "unterminated string"); 1353 break; 1354 case TokenizeStateStringEscape: 1355 case TokenizeStateCharCode: 1356 if (t.cur_tok->id == TokenIdStringLiteral) { 1357 tokenize_error(&t, "unterminated string"); 1358 } else if (t.cur_tok->id == TokenIdCharLiteral) { 1359 tokenize_error(&t, "unterminated character literal"); 1360 } else { 1361 zig_unreachable(); 1362 } 1363 break; 1364 case TokenizeStateCharLiteral: 1365 case TokenizeStateCharLiteralEnd: 1366 tokenize_error(&t, "unterminated character literal"); 1367 break; 1368 case TokenizeStateSymbol: 1369 case TokenizeStateSymbolFirstC: 1370 case TokenizeStateZero: 1371 case TokenizeStateNumber: 1372 case TokenizeStateFloatFraction: 1373 case TokenizeStateFloatExponentUnsigned: 1374 case TokenizeStateFloatExponentNumber: 1375 case TokenizeStateSawStar: 1376 case TokenizeStateSawSlash: 1377 case TokenizeStateSawPercent: 1378 case TokenizeStateSawPlus: 1379 case TokenizeStateSawDash: 1380 case TokenizeStateSawAmpersand: 1381 case TokenizeStateSawCaret: 1382 case TokenizeStateSawPipe: 1383 case TokenizeStateSawEq: 1384 case TokenizeStateSawBang: 1385 case TokenizeStateSawLessThan: 1386 case TokenizeStateSawLessThanLessThan: 1387 case TokenizeStateSawGreaterThan: 1388 case TokenizeStateSawGreaterThanGreaterThan: 1389 case TokenizeStateSawDot: 1390 case TokenizeStateSawQuestionMark: 1391 case TokenizeStateSawAtSign: 1392 case TokenizeStateSawStarPercent: 1393 case TokenizeStateSawPlusPercent: 1394 case TokenizeStateSawMinusPercent: 1395 case TokenizeStateLineString: 1396 case TokenizeStateLineStringEnd: 1397 end_token(&t); 1398 break; 1399 case TokenizeStateSawDotDot: 1400 case TokenizeStateSawBackslash: 1401 case TokenizeStateLineStringContinue: 1402 case TokenizeStateLineStringContinueC: 1403 tokenize_error(&t, "unexpected EOF"); 1404 break; 1405 case TokenizeStateLineComment: 1406 break; 1407 } 1408 if (t.state != TokenizeStateError) { 1409 if (t.tokens->length > 0) { 1410 Token *last_token = &t.tokens->last(); 1411 t.line = (int)last_token->start_line; 1412 t.column = (int)last_token->start_column; 1413 t.pos = last_token->start_pos; 1414 } else { 1415 t.pos = 0; 1416 } 1417 begin_token(&t, TokenIdEof); 1418 end_token(&t); 1419 assert(!t.cur_tok); 1420 } 1421 } 1422 1423 const char * token_name(TokenId id) { 1424 switch (id) { 1425 case TokenIdAmpersand: return "&"; 1426 case TokenIdArrow: return "->"; 1427 case TokenIdAtSign: return "@"; 1428 case TokenIdBang: return "!"; 1429 case TokenIdBinOr: return "|"; 1430 case TokenIdBinXor: return "^"; 1431 case TokenIdBitAndEq: return "&="; 1432 case TokenIdBitOrEq: return "|="; 1433 case TokenIdBitShiftLeft: return "<<"; 1434 case TokenIdBitShiftLeftEq: return "<<="; 1435 case TokenIdBitShiftRight: return ">>"; 1436 case TokenIdBitShiftRightEq: return ">>="; 1437 case TokenIdBitXorEq: return "^="; 1438 case TokenIdCharLiteral: return "CharLiteral"; 1439 case TokenIdCmpEq: return "=="; 1440 case TokenIdCmpGreaterOrEq: return ">="; 1441 case TokenIdCmpGreaterThan: return ">"; 1442 case TokenIdCmpLessOrEq: return "<="; 1443 case TokenIdCmpLessThan: return "<"; 1444 case TokenIdCmpNotEq: return "!="; 1445 case TokenIdColon: return ":"; 1446 case TokenIdComma: return ","; 1447 case TokenIdDash: return "-"; 1448 case TokenIdDivEq: return "/="; 1449 case TokenIdDot: return "."; 1450 case TokenIdDoubleQuestion: return "??"; 1451 case TokenIdEllipsis2: return ".."; 1452 case TokenIdEllipsis3: return "..."; 1453 case TokenIdEof: return "EOF"; 1454 case TokenIdEq: return "="; 1455 case TokenIdFatArrow: return "=>"; 1456 case TokenIdFloatLiteral: return "FloatLiteral"; 1457 case TokenIdIntLiteral: return "IntLiteral"; 1458 case TokenIdKeywordAlign: return "align"; 1459 case TokenIdKeywordAnd: return "and"; 1460 case TokenIdKeywordAsm: return "asm"; 1461 case TokenIdKeywordBreak: return "break"; 1462 case TokenIdKeywordColdCC: return "coldcc"; 1463 case TokenIdKeywordCompTime: return "comptime"; 1464 case TokenIdKeywordConst: return "const"; 1465 case TokenIdKeywordContinue: return "continue"; 1466 case TokenIdKeywordDefer: return "defer"; 1467 case TokenIdKeywordElse: return "else"; 1468 case TokenIdKeywordEnum: return "enum"; 1469 case TokenIdKeywordError: return "error"; 1470 case TokenIdKeywordExport: return "export"; 1471 case TokenIdKeywordExtern: return "extern"; 1472 case TokenIdKeywordFalse: return "false"; 1473 case TokenIdKeywordFn: return "fn"; 1474 case TokenIdKeywordFor: return "for"; 1475 case TokenIdKeywordGoto: return "goto"; 1476 case TokenIdKeywordIf: return "if"; 1477 case TokenIdKeywordInline: return "inline"; 1478 case TokenIdKeywordNakedCC: return "nakedcc"; 1479 case TokenIdKeywordNoAlias: return "noalias"; 1480 case TokenIdKeywordNull: return "null"; 1481 case TokenIdKeywordOr: return "or"; 1482 case TokenIdKeywordPacked: return "packed"; 1483 case TokenIdKeywordPub: return "pub"; 1484 case TokenIdKeywordReturn: return "return"; 1485 case TokenIdKeywordStdcallCC: return "stdcallcc"; 1486 case TokenIdKeywordStruct: return "struct"; 1487 case TokenIdKeywordSwitch: return "switch"; 1488 case TokenIdKeywordTest: return "test"; 1489 case TokenIdKeywordThis: return "this"; 1490 case TokenIdKeywordTrue: return "true"; 1491 case TokenIdKeywordUndefined: return "undefined"; 1492 case TokenIdKeywordUnion: return "union"; 1493 case TokenIdKeywordUnreachable: return "unreachable"; 1494 case TokenIdKeywordUse: return "use"; 1495 case TokenIdKeywordVar: return "var"; 1496 case TokenIdKeywordVolatile: return "volatile"; 1497 case TokenIdKeywordWhile: return "while"; 1498 case TokenIdLBrace: return "{"; 1499 case TokenIdLBracket: return "["; 1500 case TokenIdLParen: return "("; 1501 case TokenIdMaybe: return "?"; 1502 case TokenIdMaybeAssign: return "?="; 1503 case TokenIdMinusEq: return "-="; 1504 case TokenIdMinusPercent: return "-%"; 1505 case TokenIdMinusPercentEq: return "-%="; 1506 case TokenIdModEq: return "%="; 1507 case TokenIdNumberSign: return "#"; 1508 case TokenIdPercent: return "%"; 1509 case TokenIdPercentDot: return "%."; 1510 case TokenIdPercentPercent: return "%%"; 1511 case TokenIdPlus: return "+"; 1512 case TokenIdPlusEq: return "+="; 1513 case TokenIdPlusPercent: return "+%"; 1514 case TokenIdPlusPercentEq: return "+%="; 1515 case TokenIdPlusPlus: return "++"; 1516 case TokenIdRBrace: return "}"; 1517 case TokenIdRBracket: return "]"; 1518 case TokenIdRParen: return ")"; 1519 case TokenIdSemicolon: return ";"; 1520 case TokenIdSlash: return "/"; 1521 case TokenIdStar: return "*"; 1522 case TokenIdStarStar: return "**"; 1523 case TokenIdStringLiteral: return "StringLiteral"; 1524 case TokenIdSymbol: return "Symbol"; 1525 case TokenIdTilde: return "~"; 1526 case TokenIdTimesEq: return "*="; 1527 case TokenIdTimesPercent: return "*%"; 1528 case TokenIdTimesPercentEq: return "*%="; 1529 } 1530 return "(invalid token)"; 1531 } 1532 1533 void print_tokens(Buf *buf, ZigList<Token> *tokens) { 1534 for (size_t i = 0; i < tokens->length; i += 1) { 1535 Token *token = &tokens->at(i); 1536 fprintf(stderr, "%s ", token_name(token->id)); 1537 if (token->start_pos != SIZE_MAX) { 1538 fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stderr); 1539 } 1540 fprintf(stderr, "\n"); 1541 } 1542 } 1543 1544 bool valid_symbol_starter(uint8_t c) { 1545 switch (c) { 1546 case SYMBOL_START: 1547 return true; 1548 } 1549 return false; 1550 }