* Extracts AstGen logic from ir.cpp into astgen.cpp. Reduces the
largest file of stage1 from 33,551 lines to 25,510.
* tokenizer: rework it completely to match the stage2 tokenizer logic.
They can now be maintained together; when one is changed, the other
can be changed in the same way.
- Each token now takes up 13 bytes instead of 64 bytes. The tokenizer
does not parse char literals, string literals, integer literals,
etc into meaningful data. Instead, that happens during parsing or
astgen.
- no longer store line offsets. Error messages scan source
files to find the line/column as needed (same as stage2).
- main loop: instead of checking the loop, handle a null byte
explicitly in the switch statements. This is a nice improvement
that we may want to backport to stage2.
- delete some dead tokens, artifacts of past syntax that no longer
exists.
* Parser: fix a TODO by parsing builtin functions as tokens rather than
`@` as a separate token. This is how stage2 does it.
* Remove some debugging infrastructure. These will need to be redone,
if at all, as the code migrates to match stage2.
- remove the ast_render code.
- remove the IR debugging stuff
- remove teh token printing code
158 lines
3.3 KiB
C++
158 lines
3.3 KiB
C++
/*
|
|
* Copyright (c) 2015 Andrew Kelley
|
|
*
|
|
* This file is part of zig, which is MIT licensed.
|
|
* See http://opensource.org/licenses/MIT
|
|
*/
|
|
|
|
#ifndef ZIG_TOKENIZER_HPP
|
|
#define ZIG_TOKENIZER_HPP
|
|
|
|
#include "buffer.hpp"
|
|
#include "bigint.hpp"
|
|
#include "bigfloat.hpp"
|
|
|
|
enum TokenId : uint8_t {
|
|
TokenIdAmpersand,
|
|
TokenIdArrow,
|
|
TokenIdBang,
|
|
TokenIdBarBar,
|
|
TokenIdBinOr,
|
|
TokenIdBinXor,
|
|
TokenIdBitAndEq,
|
|
TokenIdBitOrEq,
|
|
TokenIdBitShiftLeft,
|
|
TokenIdBitShiftLeftEq,
|
|
TokenIdBitShiftRight,
|
|
TokenIdBitShiftRightEq,
|
|
TokenIdBitXorEq,
|
|
TokenIdBuiltin,
|
|
TokenIdCharLiteral,
|
|
TokenIdCmpEq,
|
|
TokenIdCmpGreaterOrEq,
|
|
TokenIdCmpGreaterThan,
|
|
TokenIdCmpLessOrEq,
|
|
TokenIdCmpLessThan,
|
|
TokenIdCmpNotEq,
|
|
TokenIdColon,
|
|
TokenIdComma,
|
|
TokenIdDash,
|
|
TokenIdDivEq,
|
|
TokenIdDocComment,
|
|
TokenIdContainerDocComment,
|
|
TokenIdDot,
|
|
TokenIdDotStar,
|
|
TokenIdEllipsis2,
|
|
TokenIdEllipsis3,
|
|
TokenIdEof,
|
|
TokenIdEq,
|
|
TokenIdFatArrow,
|
|
TokenIdFloatLiteral,
|
|
TokenIdIntLiteral,
|
|
TokenIdKeywordAlign,
|
|
TokenIdKeywordAllowZero,
|
|
TokenIdKeywordAnd,
|
|
TokenIdKeywordAnyFrame,
|
|
TokenIdKeywordAnyType,
|
|
TokenIdKeywordAsm,
|
|
TokenIdKeywordAsync,
|
|
TokenIdKeywordAwait,
|
|
TokenIdKeywordBreak,
|
|
TokenIdKeywordCatch,
|
|
TokenIdKeywordCallconv,
|
|
TokenIdKeywordCompTime,
|
|
TokenIdKeywordConst,
|
|
TokenIdKeywordContinue,
|
|
TokenIdKeywordDefer,
|
|
TokenIdKeywordElse,
|
|
TokenIdKeywordEnum,
|
|
TokenIdKeywordErrdefer,
|
|
TokenIdKeywordError,
|
|
TokenIdKeywordExport,
|
|
TokenIdKeywordExtern,
|
|
TokenIdKeywordFalse,
|
|
TokenIdKeywordFn,
|
|
TokenIdKeywordFor,
|
|
TokenIdKeywordIf,
|
|
TokenIdKeywordInline,
|
|
TokenIdKeywordNoInline,
|
|
TokenIdKeywordLinkSection,
|
|
TokenIdKeywordNoAlias,
|
|
TokenIdKeywordNoSuspend,
|
|
TokenIdKeywordNull,
|
|
TokenIdKeywordOpaque,
|
|
TokenIdKeywordOr,
|
|
TokenIdKeywordOrElse,
|
|
TokenIdKeywordPacked,
|
|
TokenIdKeywordPub,
|
|
TokenIdKeywordResume,
|
|
TokenIdKeywordReturn,
|
|
TokenIdKeywordStruct,
|
|
TokenIdKeywordSuspend,
|
|
TokenIdKeywordSwitch,
|
|
TokenIdKeywordTest,
|
|
TokenIdKeywordThreadLocal,
|
|
TokenIdKeywordTrue,
|
|
TokenIdKeywordTry,
|
|
TokenIdKeywordUndefined,
|
|
TokenIdKeywordUnion,
|
|
TokenIdKeywordUnreachable,
|
|
TokenIdKeywordUsingNamespace,
|
|
TokenIdKeywordVar,
|
|
TokenIdKeywordVolatile,
|
|
TokenIdKeywordWhile,
|
|
TokenIdLBrace,
|
|
TokenIdLBracket,
|
|
TokenIdLParen,
|
|
TokenIdQuestion,
|
|
TokenIdMinusEq,
|
|
TokenIdMinusPercent,
|
|
TokenIdMinusPercentEq,
|
|
TokenIdModEq,
|
|
TokenIdPercent,
|
|
TokenIdPlus,
|
|
TokenIdPlusEq,
|
|
TokenIdPlusPercent,
|
|
TokenIdPlusPercentEq,
|
|
TokenIdPlusPlus,
|
|
TokenIdRBrace,
|
|
TokenIdRBracket,
|
|
TokenIdRParen,
|
|
TokenIdSemicolon,
|
|
TokenIdSlash,
|
|
TokenIdStar,
|
|
TokenIdStarStar,
|
|
TokenIdStringLiteral,
|
|
TokenIdMultilineStringLiteralLine,
|
|
TokenIdIdentifier,
|
|
TokenIdTilde,
|
|
TokenIdTimesEq,
|
|
TokenIdTimesPercent,
|
|
TokenIdTimesPercentEq,
|
|
|
|
TokenIdCount,
|
|
};
|
|
|
|
typedef uint32_t TokenIndex;
|
|
|
|
struct TokenLoc {
|
|
uint32_t offset;
|
|
uint32_t line;
|
|
uint32_t column;
|
|
};
|
|
|
|
struct Tokenization {
|
|
ZigList<TokenId> ids;
|
|
ZigList<TokenLoc> locs;
|
|
|
|
// if an error occurred
|
|
Buf *err;
|
|
uint32_t err_byte_offset;
|
|
};
|
|
|
|
void tokenize(const char *source, Tokenization *out_tokenization);
|
|
|
|
const char * token_name(TokenId id);
|
|
|
|
#endif
|