zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob 8986034f (46319B) - Raw


      1 // JSON parser conforming to RFC8259.
      2 //
      3 // https://tools.ietf.org/html/rfc8259
      4 
      5 const std = @import("index.zig");
      6 const debug = std.debug;
      7 const mem = std.mem;
      8 
      9 const u1 = @IntType(false, 1);
     10 const u256 = @IntType(false, 256);
     11 
     12 // A single token slice into the parent string.
     13 //
     14 // Use `token.slice()` on the input at the current position to get the current slice.
     15 pub const Token = struct {
     16     id: Id,
     17     // How many bytes do we skip before counting
     18     offset: u1,
     19     // Whether string contains a \uXXXX sequence and cannot be zero-copied
     20     string_has_escape: bool,
     21     // Whether number is simple and can be represented by an integer (i.e. no `.` or `e`)
     22     number_is_integer: bool,
     23     // How many bytes from the current position behind the start of this token is.
     24     count: usize,
     25 
     26     pub const Id = enum {
     27         ObjectBegin,
     28         ObjectEnd,
     29         ArrayBegin,
     30         ArrayEnd,
     31         String,
     32         Number,
     33         True,
     34         False,
     35         Null,
     36     };
     37 
     38     pub fn init(id: Id, count: usize, offset: u1) Token {
     39         return Token{
     40             .id = id,
     41             .offset = offset,
     42             .string_has_escape = false,
     43             .number_is_integer = true,
     44             .count = count,
     45         };
     46     }
     47 
     48     pub fn initString(count: usize, has_unicode_escape: bool) Token {
     49         return Token{
     50             .id = Id.String,
     51             .offset = 0,
     52             .string_has_escape = has_unicode_escape,
     53             .number_is_integer = true,
     54             .count = count,
     55         };
     56     }
     57 
     58     pub fn initNumber(count: usize, number_is_integer: bool) Token {
     59         return Token{
     60             .id = Id.Number,
     61             .offset = 0,
     62             .string_has_escape = false,
     63             .number_is_integer = number_is_integer,
     64             .count = count,
     65         };
     66     }
     67 
     68     // A marker token is a zero-length
     69     pub fn initMarker(id: Id) Token {
     70         return Token{
     71             .id = id,
     72             .offset = 0,
     73             .string_has_escape = false,
     74             .number_is_integer = true,
     75             .count = 0,
     76         };
     77     }
     78 
     79     // Slice into the underlying input string.
     80     pub fn slice(self: *const Token, input: []const u8, i: usize) []const u8 {
     81         return input[i + self.offset - self.count .. i + self.offset];
     82     }
     83 };
     84 
     85 // A small streaming JSON parser. This accepts input one byte at a time and returns tokens as
     86 // they are encountered. No copies or allocations are performed during parsing and the entire
     87 // parsing state requires ~40-50 bytes of stack space.
     88 //
     89 // Conforms strictly to RFC8529.
     90 //
     91 // For a non-byte based wrapper, consider using TokenStream instead.
     92 pub const StreamingParser = struct {
     93     // Current state
     94     state: State,
     95     // How many bytes we have counted for the current token
     96     count: usize,
     97     // What state to follow after parsing a string (either property or value string)
     98     after_string_state: State,
     99     // What state to follow after parsing a value (either top-level or value end)
    100     after_value_state: State,
    101     // If we stopped now, would the complete parsed string to now be a valid json string
    102     complete: bool,
    103     // Current token flags to pass through to the next generated, see Token.
    104     string_has_escape: bool,
    105     number_is_integer: bool,
    106 
    107     // Bit-stack for nested object/map literals (max 255 nestings).
    108     stack: u256,
    109     stack_used: u8,
    110 
    111     const object_bit = 0;
    112     const array_bit = 1;
    113     const max_stack_size = @maxValue(u8);
    114 
    115     pub fn init() StreamingParser {
    116         var p: StreamingParser = undefined;
    117         p.reset();
    118         return p;
    119     }
    120 
    121     pub fn reset(p: *StreamingParser) void {
    122         p.state = State.TopLevelBegin;
    123         p.count = 0;
    124         // Set before ever read in main transition function
    125         p.after_string_state = undefined;
    126         p.after_value_state = State.ValueEnd; // handle end of values normally
    127         p.stack = 0;
    128         p.stack_used = 0;
    129         p.complete = false;
    130         p.string_has_escape = false;
    131         p.number_is_integer = true;
    132     }
    133 
    134     pub const State = enum {
    135         // These must be first with these explicit values as we rely on them for indexing the
    136         // bit-stack directly and avoiding a branch.
    137         ObjectSeparator = 0,
    138         ValueEnd = 1,
    139 
    140         TopLevelBegin,
    141         TopLevelEnd,
    142 
    143         ValueBegin,
    144         ValueBeginNoClosing,
    145 
    146         String,
    147         StringUtf8Byte3,
    148         StringUtf8Byte2,
    149         StringUtf8Byte1,
    150         StringEscapeCharacter,
    151         StringEscapeHexUnicode4,
    152         StringEscapeHexUnicode3,
    153         StringEscapeHexUnicode2,
    154         StringEscapeHexUnicode1,
    155 
    156         Number,
    157         NumberMaybeDotOrExponent,
    158         NumberMaybeDigitOrDotOrExponent,
    159         NumberFractionalRequired,
    160         NumberFractional,
    161         NumberMaybeExponent,
    162         NumberExponent,
    163         NumberExponentDigitsRequired,
    164         NumberExponentDigits,
    165 
    166         TrueLiteral1,
    167         TrueLiteral2,
    168         TrueLiteral3,
    169 
    170         FalseLiteral1,
    171         FalseLiteral2,
    172         FalseLiteral3,
    173         FalseLiteral4,
    174 
    175         NullLiteral1,
    176         NullLiteral2,
    177         NullLiteral3,
    178 
    179         // Only call this function to generate array/object final state.
    180         pub fn fromInt(x: var) State {
    181             debug.assert(x == 0 or x == 1);
    182             const T = @TagType(State);
    183             return @intToEnum(State, @intCast(T, x));
    184         }
    185     };
    186 
    187     pub const Error = error{
    188         InvalidTopLevel,
    189         TooManyNestedItems,
    190         TooManyClosingItems,
    191         InvalidValueBegin,
    192         InvalidValueEnd,
    193         UnbalancedBrackets,
    194         UnbalancedBraces,
    195         UnexpectedClosingBracket,
    196         UnexpectedClosingBrace,
    197         InvalidNumber,
    198         InvalidSeparator,
    199         InvalidLiteral,
    200         InvalidEscapeCharacter,
    201         InvalidUnicodeHexSymbol,
    202         InvalidUtf8Byte,
    203         InvalidTopLevelTrailing,
    204         InvalidControlCharacter,
    205     };
    206 
    207     // Give another byte to the parser and obtain any new tokens. This may (rarely) return two
    208     // tokens. token2 is always null if token1 is null.
    209     //
    210     // There is currently no error recovery on a bad stream.
    211     pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void {
    212         token1.* = null;
    213         token2.* = null;
    214         p.count += 1;
    215 
    216         // unlikely
    217         if (try p.transition(c, token1)) {
    218             _ = try p.transition(c, token2);
    219         }
    220     }
    221 
    222     // Perform a single transition on the state machine and return any possible token.
    223     fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool {
    224         switch (p.state) {
    225             State.TopLevelBegin => switch (c) {
    226                 '{' => {
    227                     p.stack <<= 1;
    228                     p.stack |= object_bit;
    229                     p.stack_used += 1;
    230 
    231                     p.state = State.ValueBegin;
    232                     p.after_string_state = State.ObjectSeparator;
    233 
    234                     token.* = Token.initMarker(Token.Id.ObjectBegin);
    235                 },
    236                 '[' => {
    237                     p.stack <<= 1;
    238                     p.stack |= array_bit;
    239                     p.stack_used += 1;
    240 
    241                     p.state = State.ValueBegin;
    242                     p.after_string_state = State.ValueEnd;
    243 
    244                     token.* = Token.initMarker(Token.Id.ArrayBegin);
    245                 },
    246                 '-' => {
    247                     p.number_is_integer = true;
    248                     p.state = State.Number;
    249                     p.after_value_state = State.TopLevelEnd;
    250                     p.count = 0;
    251                 },
    252                 '0' => {
    253                     p.number_is_integer = true;
    254                     p.state = State.NumberMaybeDotOrExponent;
    255                     p.after_value_state = State.TopLevelEnd;
    256                     p.count = 0;
    257                 },
    258                 '1'...'9' => {
    259                     p.number_is_integer = true;
    260                     p.state = State.NumberMaybeDigitOrDotOrExponent;
    261                     p.after_value_state = State.TopLevelEnd;
    262                     p.count = 0;
    263                 },
    264                 '"' => {
    265                     p.state = State.String;
    266                     p.after_value_state = State.TopLevelEnd;
    267                     // We don't actually need the following since after_value_state should override.
    268                     p.after_string_state = State.ValueEnd;
    269                     p.string_has_escape = false;
    270                     p.count = 0;
    271                 },
    272                 't' => {
    273                     p.state = State.TrueLiteral1;
    274                     p.after_value_state = State.TopLevelEnd;
    275                     p.count = 0;
    276                 },
    277                 'f' => {
    278                     p.state = State.FalseLiteral1;
    279                     p.after_value_state = State.TopLevelEnd;
    280                     p.count = 0;
    281                 },
    282                 'n' => {
    283                     p.state = State.NullLiteral1;
    284                     p.after_value_state = State.TopLevelEnd;
    285                     p.count = 0;
    286                 },
    287                 0x09, 0x0A, 0x0D, 0x20 => {
    288                     // whitespace
    289                 },
    290                 else => {
    291                     return error.InvalidTopLevel;
    292                 },
    293             },
    294 
    295             State.TopLevelEnd => switch (c) {
    296                 0x09, 0x0A, 0x0D, 0x20 => {
    297                     // whitespace
    298                 },
    299                 else => {
    300                     return error.InvalidTopLevelTrailing;
    301                 },
    302             },
    303 
    304             State.ValueBegin => switch (c) {
    305                 // NOTE: These are shared in ValueEnd as well, think we can reorder states to
    306                 // be a bit clearer and avoid this duplication.
    307                 '}' => {
    308                     // unlikely
    309                     if (p.stack & 1 != object_bit) {
    310                         return error.UnexpectedClosingBracket;
    311                     }
    312                     if (p.stack_used == 0) {
    313                         return error.TooManyClosingItems;
    314                     }
    315 
    316                     p.state = State.ValueBegin;
    317                     p.after_string_state = State.fromInt(p.stack & 1);
    318 
    319                     p.stack >>= 1;
    320                     p.stack_used -= 1;
    321 
    322                     switch (p.stack_used) {
    323                         0 => {
    324                             p.complete = true;
    325                             p.state = State.TopLevelEnd;
    326                         },
    327                         else => {
    328                             p.state = State.ValueEnd;
    329                         },
    330                     }
    331 
    332                     token.* = Token.initMarker(Token.Id.ObjectEnd);
    333                 },
    334                 ']' => {
    335                     if (p.stack & 1 != array_bit) {
    336                         return error.UnexpectedClosingBrace;
    337                     }
    338                     if (p.stack_used == 0) {
    339                         return error.TooManyClosingItems;
    340                     }
    341 
    342                     p.state = State.ValueBegin;
    343                     p.after_string_state = State.fromInt(p.stack & 1);
    344 
    345                     p.stack >>= 1;
    346                     p.stack_used -= 1;
    347 
    348                     switch (p.stack_used) {
    349                         0 => {
    350                             p.complete = true;
    351                             p.state = State.TopLevelEnd;
    352                         },
    353                         else => {
    354                             p.state = State.ValueEnd;
    355                         },
    356                     }
    357 
    358                     token.* = Token.initMarker(Token.Id.ArrayEnd);
    359                 },
    360                 '{' => {
    361                     if (p.stack_used == max_stack_size) {
    362                         return error.TooManyNestedItems;
    363                     }
    364 
    365                     p.stack <<= 1;
    366                     p.stack |= object_bit;
    367                     p.stack_used += 1;
    368 
    369                     p.state = State.ValueBegin;
    370                     p.after_string_state = State.ObjectSeparator;
    371 
    372                     token.* = Token.initMarker(Token.Id.ObjectBegin);
    373                 },
    374                 '[' => {
    375                     if (p.stack_used == max_stack_size) {
    376                         return error.TooManyNestedItems;
    377                     }
    378 
    379                     p.stack <<= 1;
    380                     p.stack |= array_bit;
    381                     p.stack_used += 1;
    382 
    383                     p.state = State.ValueBegin;
    384                     p.after_string_state = State.ValueEnd;
    385 
    386                     token.* = Token.initMarker(Token.Id.ArrayBegin);
    387                 },
    388                 '-' => {
    389                     p.state = State.Number;
    390                     p.count = 0;
    391                 },
    392                 '0' => {
    393                     p.state = State.NumberMaybeDotOrExponent;
    394                     p.count = 0;
    395                 },
    396                 '1'...'9' => {
    397                     p.state = State.NumberMaybeDigitOrDotOrExponent;
    398                     p.count = 0;
    399                 },
    400                 '"' => {
    401                     p.state = State.String;
    402                     p.count = 0;
    403                 },
    404                 't' => {
    405                     p.state = State.TrueLiteral1;
    406                     p.count = 0;
    407                 },
    408                 'f' => {
    409                     p.state = State.FalseLiteral1;
    410                     p.count = 0;
    411                 },
    412                 'n' => {
    413                     p.state = State.NullLiteral1;
    414                     p.count = 0;
    415                 },
    416                 0x09, 0x0A, 0x0D, 0x20 => {
    417                     // whitespace
    418                 },
    419                 else => {
    420                     return error.InvalidValueBegin;
    421                 },
    422             },
    423 
    424             // TODO: A bit of duplication here and in the following state, redo.
    425             State.ValueBeginNoClosing => switch (c) {
    426                 '{' => {
    427                     if (p.stack_used == max_stack_size) {
    428                         return error.TooManyNestedItems;
    429                     }
    430 
    431                     p.stack <<= 1;
    432                     p.stack |= object_bit;
    433                     p.stack_used += 1;
    434 
    435                     p.state = State.ValueBegin;
    436                     p.after_string_state = State.ObjectSeparator;
    437 
    438                     token.* = Token.initMarker(Token.Id.ObjectBegin);
    439                 },
    440                 '[' => {
    441                     if (p.stack_used == max_stack_size) {
    442                         return error.TooManyNestedItems;
    443                     }
    444 
    445                     p.stack <<= 1;
    446                     p.stack |= array_bit;
    447                     p.stack_used += 1;
    448 
    449                     p.state = State.ValueBegin;
    450                     p.after_string_state = State.ValueEnd;
    451 
    452                     token.* = Token.initMarker(Token.Id.ArrayBegin);
    453                 },
    454                 '-' => {
    455                     p.state = State.Number;
    456                     p.count = 0;
    457                 },
    458                 '0' => {
    459                     p.state = State.NumberMaybeDotOrExponent;
    460                     p.count = 0;
    461                 },
    462                 '1'...'9' => {
    463                     p.state = State.NumberMaybeDigitOrDotOrExponent;
    464                     p.count = 0;
    465                 },
    466                 '"' => {
    467                     p.state = State.String;
    468                     p.count = 0;
    469                 },
    470                 't' => {
    471                     p.state = State.TrueLiteral1;
    472                     p.count = 0;
    473                 },
    474                 'f' => {
    475                     p.state = State.FalseLiteral1;
    476                     p.count = 0;
    477                 },
    478                 'n' => {
    479                     p.state = State.NullLiteral1;
    480                     p.count = 0;
    481                 },
    482                 0x09, 0x0A, 0x0D, 0x20 => {
    483                     // whitespace
    484                 },
    485                 else => {
    486                     return error.InvalidValueBegin;
    487                 },
    488             },
    489 
    490             State.ValueEnd => switch (c) {
    491                 ',' => {
    492                     p.after_string_state = State.fromInt(p.stack & 1);
    493                     p.state = State.ValueBeginNoClosing;
    494                 },
    495                 ']' => {
    496                     if (p.stack_used == 0) {
    497                         return error.UnbalancedBrackets;
    498                     }
    499 
    500                     p.state = State.ValueEnd;
    501                     p.after_string_state = State.fromInt(p.stack & 1);
    502 
    503                     p.stack >>= 1;
    504                     p.stack_used -= 1;
    505 
    506                     if (p.stack_used == 0) {
    507                         p.complete = true;
    508                         p.state = State.TopLevelEnd;
    509                     }
    510 
    511                     token.* = Token.initMarker(Token.Id.ArrayEnd);
    512                 },
    513                 '}' => {
    514                     if (p.stack_used == 0) {
    515                         return error.UnbalancedBraces;
    516                     }
    517 
    518                     p.state = State.ValueEnd;
    519                     p.after_string_state = State.fromInt(p.stack & 1);
    520 
    521                     p.stack >>= 1;
    522                     p.stack_used -= 1;
    523 
    524                     if (p.stack_used == 0) {
    525                         p.complete = true;
    526                         p.state = State.TopLevelEnd;
    527                     }
    528 
    529                     token.* = Token.initMarker(Token.Id.ObjectEnd);
    530                 },
    531                 0x09, 0x0A, 0x0D, 0x20 => {
    532                     // whitespace
    533                 },
    534                 else => {
    535                     return error.InvalidValueEnd;
    536                 },
    537             },
    538 
    539             State.ObjectSeparator => switch (c) {
    540                 ':' => {
    541                     p.state = State.ValueBegin;
    542                     p.after_string_state = State.ValueEnd;
    543                 },
    544                 0x09, 0x0A, 0x0D, 0x20 => {
    545                     // whitespace
    546                 },
    547                 else => {
    548                     return error.InvalidSeparator;
    549                 },
    550             },
    551 
    552             State.String => switch (c) {
    553                 0x00...0x1F => {
    554                     return error.InvalidControlCharacter;
    555                 },
    556                 '"' => {
    557                     p.state = p.after_string_state;
    558                     if (p.after_value_state == State.TopLevelEnd) {
    559                         p.state = State.TopLevelEnd;
    560                         p.complete = true;
    561                     }
    562 
    563                     token.* = Token.initString(p.count - 1, p.string_has_escape);
    564                 },
    565                 '\\' => {
    566                     p.state = State.StringEscapeCharacter;
    567                 },
    568                 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => {
    569                     // non-control ascii
    570                 },
    571                 0xC0...0xDF => {
    572                     p.state = State.StringUtf8Byte1;
    573                 },
    574                 0xE0...0xEF => {
    575                     p.state = State.StringUtf8Byte2;
    576                 },
    577                 0xF0...0xFF => {
    578                     p.state = State.StringUtf8Byte3;
    579                 },
    580                 else => {
    581                     return error.InvalidUtf8Byte;
    582                 },
    583             },
    584 
    585             State.StringUtf8Byte3 => switch (c >> 6) {
    586                 0b10 => p.state = State.StringUtf8Byte2,
    587                 else => return error.InvalidUtf8Byte,
    588             },
    589 
    590             State.StringUtf8Byte2 => switch (c >> 6) {
    591                 0b10 => p.state = State.StringUtf8Byte1,
    592                 else => return error.InvalidUtf8Byte,
    593             },
    594 
    595             State.StringUtf8Byte1 => switch (c >> 6) {
    596                 0b10 => p.state = State.String,
    597                 else => return error.InvalidUtf8Byte,
    598             },
    599 
    600             State.StringEscapeCharacter => switch (c) {
    601                 // NOTE: '/' is allowed as an escaped character but it also is allowed
    602                 // as unescaped according to the RFC. There is a reported errata which suggests
    603                 // removing the non-escaped variant but it makes more sense to simply disallow
    604                 // it as an escape code here.
    605                 //
    606                 // The current JSONTestSuite tests rely on both of this behaviour being present
    607                 // however, so we default to the status quo where both are accepted until this
    608                 // is further clarified.
    609                 '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => {
    610                     p.string_has_escape = true;
    611                     p.state = State.String;
    612                 },
    613                 'u' => {
    614                     p.string_has_escape = true;
    615                     p.state = State.StringEscapeHexUnicode4;
    616                 },
    617                 else => {
    618                     return error.InvalidEscapeCharacter;
    619                 },
    620             },
    621 
    622             State.StringEscapeHexUnicode4 => switch (c) {
    623                 '0'...'9', 'A'...'F', 'a'...'f' => {
    624                     p.state = State.StringEscapeHexUnicode3;
    625                 },
    626                 else => return error.InvalidUnicodeHexSymbol,
    627             },
    628 
    629             State.StringEscapeHexUnicode3 => switch (c) {
    630                 '0'...'9', 'A'...'F', 'a'...'f' => {
    631                     p.state = State.StringEscapeHexUnicode2;
    632                 },
    633                 else => return error.InvalidUnicodeHexSymbol,
    634             },
    635 
    636             State.StringEscapeHexUnicode2 => switch (c) {
    637                 '0'...'9', 'A'...'F', 'a'...'f' => {
    638                     p.state = State.StringEscapeHexUnicode1;
    639                 },
    640                 else => return error.InvalidUnicodeHexSymbol,
    641             },
    642 
    643             State.StringEscapeHexUnicode1 => switch (c) {
    644                 '0'...'9', 'A'...'F', 'a'...'f' => {
    645                     p.state = State.String;
    646                 },
    647                 else => return error.InvalidUnicodeHexSymbol,
    648             },
    649 
    650             State.Number => {
    651                 p.complete = p.after_value_state == State.TopLevelEnd;
    652                 switch (c) {
    653                     '0' => {
    654                         p.state = State.NumberMaybeDotOrExponent;
    655                     },
    656                     '1'...'9' => {
    657                         p.state = State.NumberMaybeDigitOrDotOrExponent;
    658                     },
    659                     else => {
    660                         return error.InvalidNumber;
    661                     },
    662                 }
    663             },
    664 
    665             State.NumberMaybeDotOrExponent => {
    666                 p.complete = p.after_value_state == State.TopLevelEnd;
    667                 switch (c) {
    668                     '.' => {
    669                         p.number_is_integer = false;
    670                         p.state = State.NumberFractionalRequired;
    671                     },
    672                     'e', 'E' => {
    673                         p.number_is_integer = false;
    674                         p.state = State.NumberExponent;
    675                     },
    676                     else => {
    677                         p.state = p.after_value_state;
    678                         token.* = Token.initNumber(p.count, p.number_is_integer);
    679                         return true;
    680                     },
    681                 }
    682             },
    683 
    684             State.NumberMaybeDigitOrDotOrExponent => {
    685                 p.complete = p.after_value_state == State.TopLevelEnd;
    686                 switch (c) {
    687                     '.' => {
    688                         p.number_is_integer = false;
    689                         p.state = State.NumberFractionalRequired;
    690                     },
    691                     'e', 'E' => {
    692                         p.number_is_integer = false;
    693                         p.state = State.NumberExponent;
    694                     },
    695                     '0'...'9' => {
    696                         // another digit
    697                     },
    698                     else => {
    699                         p.state = p.after_value_state;
    700                         token.* = Token.initNumber(p.count, p.number_is_integer);
    701                         return true;
    702                     },
    703                 }
    704             },
    705 
    706             State.NumberFractionalRequired => {
    707                 p.complete = p.after_value_state == State.TopLevelEnd;
    708                 switch (c) {
    709                     '0'...'9' => {
    710                         p.state = State.NumberFractional;
    711                     },
    712                     else => {
    713                         return error.InvalidNumber;
    714                     },
    715                 }
    716             },
    717 
    718             State.NumberFractional => {
    719                 p.complete = p.after_value_state == State.TopLevelEnd;
    720                 switch (c) {
    721                     '0'...'9' => {
    722                         // another digit
    723                     },
    724                     'e', 'E' => {
    725                         p.number_is_integer = false;
    726                         p.state = State.NumberExponent;
    727                     },
    728                     else => {
    729                         p.state = p.after_value_state;
    730                         token.* = Token.initNumber(p.count, p.number_is_integer);
    731                         return true;
    732                     },
    733                 }
    734             },
    735 
    736             State.NumberMaybeExponent => {
    737                 p.complete = p.after_value_state == State.TopLevelEnd;
    738                 switch (c) {
    739                     'e', 'E' => {
    740                         p.number_is_integer = false;
    741                         p.state = State.NumberExponent;
    742                     },
    743                     else => {
    744                         p.state = p.after_value_state;
    745                         token.* = Token.initNumber(p.count, p.number_is_integer);
    746                         return true;
    747                     },
    748                 }
    749             },
    750 
    751             State.NumberExponent => switch (c) {
    752                 '-', '+' => {
    753                     p.complete = false;
    754                     p.state = State.NumberExponentDigitsRequired;
    755                 },
    756                 '0'...'9' => {
    757                     p.complete = p.after_value_state == State.TopLevelEnd;
    758                     p.state = State.NumberExponentDigits;
    759                 },
    760                 else => {
    761                     return error.InvalidNumber;
    762                 },
    763             },
    764 
    765             State.NumberExponentDigitsRequired => switch (c) {
    766                 '0'...'9' => {
    767                     p.complete = p.after_value_state == State.TopLevelEnd;
    768                     p.state = State.NumberExponentDigits;
    769                 },
    770                 else => {
    771                     return error.InvalidNumber;
    772                 },
    773             },
    774 
    775             State.NumberExponentDigits => {
    776                 p.complete = p.after_value_state == State.TopLevelEnd;
    777                 switch (c) {
    778                     '0'...'9' => {
    779                         // another digit
    780                     },
    781                     else => {
    782                         p.state = p.after_value_state;
    783                         token.* = Token.initNumber(p.count, p.number_is_integer);
    784                         return true;
    785                     },
    786                 }
    787             },
    788 
    789             State.TrueLiteral1 => switch (c) {
    790                 'r' => p.state = State.TrueLiteral2,
    791                 else => return error.InvalidLiteral,
    792             },
    793 
    794             State.TrueLiteral2 => switch (c) {
    795                 'u' => p.state = State.TrueLiteral3,
    796                 else => return error.InvalidLiteral,
    797             },
    798 
    799             State.TrueLiteral3 => switch (c) {
    800                 'e' => {
    801                     p.state = p.after_value_state;
    802                     p.complete = p.state == State.TopLevelEnd;
    803                     token.* = Token.init(Token.Id.True, p.count + 1, 1);
    804                 },
    805                 else => {
    806                     return error.InvalidLiteral;
    807                 },
    808             },
    809 
    810             State.FalseLiteral1 => switch (c) {
    811                 'a' => p.state = State.FalseLiteral2,
    812                 else => return error.InvalidLiteral,
    813             },
    814 
    815             State.FalseLiteral2 => switch (c) {
    816                 'l' => p.state = State.FalseLiteral3,
    817                 else => return error.InvalidLiteral,
    818             },
    819 
    820             State.FalseLiteral3 => switch (c) {
    821                 's' => p.state = State.FalseLiteral4,
    822                 else => return error.InvalidLiteral,
    823             },
    824 
    825             State.FalseLiteral4 => switch (c) {
    826                 'e' => {
    827                     p.state = p.after_value_state;
    828                     p.complete = p.state == State.TopLevelEnd;
    829                     token.* = Token.init(Token.Id.False, p.count + 1, 1);
    830                 },
    831                 else => {
    832                     return error.InvalidLiteral;
    833                 },
    834             },
    835 
    836             State.NullLiteral1 => switch (c) {
    837                 'u' => p.state = State.NullLiteral2,
    838                 else => return error.InvalidLiteral,
    839             },
    840 
    841             State.NullLiteral2 => switch (c) {
    842                 'l' => p.state = State.NullLiteral3,
    843                 else => return error.InvalidLiteral,
    844             },
    845 
    846             State.NullLiteral3 => switch (c) {
    847                 'l' => {
    848                     p.state = p.after_value_state;
    849                     p.complete = p.state == State.TopLevelEnd;
    850                     token.* = Token.init(Token.Id.Null, p.count + 1, 1);
    851                 },
    852                 else => {
    853                     return error.InvalidLiteral;
    854                 },
    855             },
    856         }
    857 
    858         return false;
    859     }
    860 };
    861 
    862 // A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
    863 pub const TokenStream = struct {
    864     i: usize,
    865     slice: []const u8,
    866     parser: StreamingParser,
    867     token: ?Token,
    868 
    869     pub fn init(slice: []const u8) TokenStream {
    870         return TokenStream{
    871             .i = 0,
    872             .slice = slice,
    873             .parser = StreamingParser.init(),
    874             .token = null,
    875         };
    876     }
    877 
    878     pub fn next(self: *TokenStream) !?Token {
    879         if (self.token) |token| {
    880             self.token = null;
    881             return token;
    882         }
    883 
    884         var t1: ?Token = undefined;
    885         var t2: ?Token = undefined;
    886 
    887         while (self.i < self.slice.len) {
    888             try self.parser.feed(self.slice[self.i], &t1, &t2);
    889             self.i += 1;
    890 
    891             if (t1) |token| {
    892                 self.token = t2;
    893                 return token;
    894             }
    895         }
    896 
    897         if (self.i > self.slice.len) {
    898             try self.parser.feed(' ', &t1, &t2);
    899             self.i += 1;
    900 
    901             if (t1) |token| {
    902                 return token;
    903             }
    904         }
    905 
    906         return null;
    907     }
    908 };
    909 
    910 fn checkNext(p: *TokenStream, id: Token.Id) void {
    911     const token = (p.next() catch unreachable).?;
    912     debug.assert(token.id == id);
    913 }
    914 
    915 test "token" {
    916     const s =
    917         \\{
    918         \\  "Image": {
    919         \\      "Width":  800,
    920         \\      "Height": 600,
    921         \\      "Title":  "View from 15th Floor",
    922         \\      "Thumbnail": {
    923         \\          "Url":    "http://www.example.com/image/481989943",
    924         \\          "Height": 125,
    925         \\          "Width":  100
    926         \\      },
    927         \\      "Animated" : false,
    928         \\      "IDs": [116, 943, 234, 38793]
    929         \\    }
    930         \\}
    931     ;
    932 
    933     var p = TokenStream.init(s);
    934 
    935     checkNext(&p, Token.Id.ObjectBegin);
    936     checkNext(&p, Token.Id.String); // Image
    937     checkNext(&p, Token.Id.ObjectBegin);
    938     checkNext(&p, Token.Id.String); // Width
    939     checkNext(&p, Token.Id.Number);
    940     checkNext(&p, Token.Id.String); // Height
    941     checkNext(&p, Token.Id.Number);
    942     checkNext(&p, Token.Id.String); // Title
    943     checkNext(&p, Token.Id.String);
    944     checkNext(&p, Token.Id.String); // Thumbnail
    945     checkNext(&p, Token.Id.ObjectBegin);
    946     checkNext(&p, Token.Id.String); // Url
    947     checkNext(&p, Token.Id.String);
    948     checkNext(&p, Token.Id.String); // Height
    949     checkNext(&p, Token.Id.Number);
    950     checkNext(&p, Token.Id.String); // Width
    951     checkNext(&p, Token.Id.Number);
    952     checkNext(&p, Token.Id.ObjectEnd);
    953     checkNext(&p, Token.Id.String); // Animated
    954     checkNext(&p, Token.Id.False);
    955     checkNext(&p, Token.Id.String); // IDs
    956     checkNext(&p, Token.Id.ArrayBegin);
    957     checkNext(&p, Token.Id.Number);
    958     checkNext(&p, Token.Id.Number);
    959     checkNext(&p, Token.Id.Number);
    960     checkNext(&p, Token.Id.Number);
    961     checkNext(&p, Token.Id.ArrayEnd);
    962     checkNext(&p, Token.Id.ObjectEnd);
    963     checkNext(&p, Token.Id.ObjectEnd);
    964 
    965     debug.assert((try p.next()) == null);
    966 }
    967 
    968 // Validate a JSON string. This does not limit number precision so a decoder may not necessarily
    969 // be able to decode the string even if this returns true.
    970 pub fn validate(s: []const u8) bool {
    971     var p = StreamingParser.init();
    972 
    973     for (s) |c, i| {
    974         var token1: ?Token = undefined;
    975         var token2: ?Token = undefined;
    976 
    977         p.feed(c, &token1, &token2) catch |err| {
    978             return false;
    979         };
    980     }
    981 
    982     return p.complete;
    983 }
    984 
    985 test "json validate" {
    986     debug.assert(validate("{}"));
    987 }
    988 
    989 const Allocator = std.mem.Allocator;
    990 const ArenaAllocator = std.heap.ArenaAllocator;
    991 const ArrayList = std.ArrayList;
    992 const HashMap = std.HashMap;
    993 
    994 pub const ValueTree = struct {
    995     arena: ArenaAllocator,
    996     root: Value,
    997 
    998     pub fn deinit(self: *ValueTree) void {
    999         self.arena.deinit();
   1000     }
   1001 };
   1002 
   1003 pub const ObjectMap = HashMap([]const u8, Value, mem.hash_slice_u8, mem.eql_slice_u8);
   1004 
   1005 pub const Value = union(enum) {
   1006     Null,
   1007     Bool: bool,
   1008     Integer: i64,
   1009     Float: f64,
   1010     String: []const u8,
   1011     Array: ArrayList(Value),
   1012     Object: ObjectMap,
   1013 
   1014     pub fn dump(self: *const Value) void {
   1015         switch (self.*) {
   1016             Value.Null => {
   1017                 debug.warn("null");
   1018             },
   1019             Value.Bool => |inner| {
   1020                 debug.warn("{}", inner);
   1021             },
   1022             Value.Integer => |inner| {
   1023                 debug.warn("{}", inner);
   1024             },
   1025             Value.Float => |inner| {
   1026                 debug.warn("{.5}", inner);
   1027             },
   1028             Value.String => |inner| {
   1029                 debug.warn("\"{}\"", inner);
   1030             },
   1031             Value.Array => |inner| {
   1032                 var not_first = false;
   1033                 debug.warn("[");
   1034                 for (inner.toSliceConst()) |value| {
   1035                     if (not_first) {
   1036                         debug.warn(",");
   1037                     }
   1038                     not_first = true;
   1039                     value.dump();
   1040                 }
   1041                 debug.warn("]");
   1042             },
   1043             Value.Object => |inner| {
   1044                 var not_first = false;
   1045                 debug.warn("{{");
   1046                 var it = inner.iterator();
   1047 
   1048                 while (it.next()) |entry| {
   1049                     if (not_first) {
   1050                         debug.warn(",");
   1051                     }
   1052                     not_first = true;
   1053                     debug.warn("\"{}\":", entry.key);
   1054                     entry.value.dump();
   1055                 }
   1056                 debug.warn("}}");
   1057             },
   1058         }
   1059     }
   1060 
   1061     pub fn dumpIndent(self: *const Value, indent: usize) void {
   1062         if (indent == 0) {
   1063             self.dump();
   1064         } else {
   1065             self.dumpIndentLevel(indent, 0);
   1066         }
   1067     }
   1068 
   1069     fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void {
   1070         switch (self.*) {
   1071             Value.Null => {
   1072                 debug.warn("null");
   1073             },
   1074             Value.Bool => |inner| {
   1075                 debug.warn("{}", inner);
   1076             },
   1077             Value.Integer => |inner| {
   1078                 debug.warn("{}", inner);
   1079             },
   1080             Value.Float => |inner| {
   1081                 debug.warn("{.5}", inner);
   1082             },
   1083             Value.String => |inner| {
   1084                 debug.warn("\"{}\"", inner);
   1085             },
   1086             Value.Array => |inner| {
   1087                 var not_first = false;
   1088                 debug.warn("[\n");
   1089 
   1090                 for (inner.toSliceConst()) |value| {
   1091                     if (not_first) {
   1092                         debug.warn(",\n");
   1093                     }
   1094                     not_first = true;
   1095                     padSpace(level + indent);
   1096                     value.dumpIndentLevel(indent, level + indent);
   1097                 }
   1098                 debug.warn("\n");
   1099                 padSpace(level);
   1100                 debug.warn("]");
   1101             },
   1102             Value.Object => |inner| {
   1103                 var not_first = false;
   1104                 debug.warn("{{\n");
   1105                 var it = inner.iterator();
   1106 
   1107                 while (it.next()) |entry| {
   1108                     if (not_first) {
   1109                         debug.warn(",\n");
   1110                     }
   1111                     not_first = true;
   1112                     padSpace(level + indent);
   1113                     debug.warn("\"{}\": ", entry.key);
   1114                     entry.value.dumpIndentLevel(indent, level + indent);
   1115                 }
   1116                 debug.warn("\n");
   1117                 padSpace(level);
   1118                 debug.warn("}}");
   1119             },
   1120         }
   1121     }
   1122 
   1123     fn padSpace(indent: usize) void {
   1124         var i: usize = 0;
   1125         while (i < indent) : (i += 1) {
   1126             debug.warn(" ");
   1127         }
   1128     }
   1129 };
   1130 
   1131 // A non-stream JSON parser which constructs a tree of Value's.
   1132 pub const Parser = struct {
   1133     allocator: *Allocator,
   1134     state: State,
   1135     copy_strings: bool,
   1136     // Stores parent nodes and un-combined Values.
   1137     stack: ArrayList(Value),
   1138 
   1139     const State = enum {
   1140         ObjectKey,
   1141         ObjectValue,
   1142         ArrayValue,
   1143         Simple,
   1144     };
   1145 
   1146     pub fn init(allocator: *Allocator, copy_strings: bool) Parser {
   1147         return Parser{
   1148             .allocator = allocator,
   1149             .state = State.Simple,
   1150             .copy_strings = copy_strings,
   1151             .stack = ArrayList(Value).init(allocator),
   1152         };
   1153     }
   1154 
   1155     pub fn deinit(p: *Parser) void {
   1156         p.stack.deinit();
   1157     }
   1158 
   1159     pub fn reset(p: *Parser) void {
   1160         p.state = State.Simple;
   1161         p.stack.shrink(0);
   1162     }
   1163 
   1164     pub fn parse(p: *Parser, input: []const u8) !ValueTree {
   1165         var s = TokenStream.init(input);
   1166 
   1167         var arena = ArenaAllocator.init(p.allocator);
   1168         errdefer arena.deinit();
   1169 
   1170         while (try s.next()) |token| {
   1171             try p.transition(&arena.allocator, input, s.i - 1, token);
   1172         }
   1173 
   1174         debug.assert(p.stack.len == 1);
   1175 
   1176         return ValueTree{
   1177             .arena = arena,
   1178             .root = p.stack.at(0),
   1179         };
   1180     }
   1181 
   1182     // Even though p.allocator exists, we take an explicit allocator so that allocation state
   1183     // can be cleaned up on error correctly during a `parse` on call.
   1184     fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void {
   1185         switch (p.state) {
   1186             State.ObjectKey => switch (token.id) {
   1187                 Token.Id.ObjectEnd => {
   1188                     if (p.stack.len == 1) {
   1189                         return;
   1190                     }
   1191 
   1192                     var value = p.stack.pop();
   1193                     try p.pushToParent(value);
   1194                 },
   1195                 Token.Id.String => {
   1196                     try p.stack.append(try p.parseString(allocator, token, input, i));
   1197                     p.state = State.ObjectValue;
   1198                 },
   1199                 else => {
   1200                     unreachable;
   1201                 },
   1202             },
   1203             State.ObjectValue => {
   1204                 var object = &p.stack.items[p.stack.len - 2].Object;
   1205                 var key = p.stack.items[p.stack.len - 1].String;
   1206 
   1207                 switch (token.id) {
   1208                     Token.Id.ObjectBegin => {
   1209                         try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
   1210                         p.state = State.ObjectKey;
   1211                     },
   1212                     Token.Id.ArrayBegin => {
   1213                         try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
   1214                         p.state = State.ArrayValue;
   1215                     },
   1216                     Token.Id.String => {
   1217                         _ = try object.put(key, try p.parseString(allocator, token, input, i));
   1218                         _ = p.stack.pop();
   1219                         p.state = State.ObjectKey;
   1220                     },
   1221                     Token.Id.Number => {
   1222                         _ = try object.put(key, try p.parseNumber(token, input, i));
   1223                         _ = p.stack.pop();
   1224                         p.state = State.ObjectKey;
   1225                     },
   1226                     Token.Id.True => {
   1227                         _ = try object.put(key, Value{ .Bool = true });
   1228                         _ = p.stack.pop();
   1229                         p.state = State.ObjectKey;
   1230                     },
   1231                     Token.Id.False => {
   1232                         _ = try object.put(key, Value{ .Bool = false });
   1233                         _ = p.stack.pop();
   1234                         p.state = State.ObjectKey;
   1235                     },
   1236                     Token.Id.Null => {
   1237                         _ = try object.put(key, Value.Null);
   1238                         _ = p.stack.pop();
   1239                         p.state = State.ObjectKey;
   1240                     },
   1241                     Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
   1242                         unreachable;
   1243                     },
   1244                 }
   1245             },
   1246             State.ArrayValue => {
   1247                 var array = &p.stack.items[p.stack.len - 1].Array;
   1248 
   1249                 switch (token.id) {
   1250                     Token.Id.ArrayEnd => {
   1251                         if (p.stack.len == 1) {
   1252                             return;
   1253                         }
   1254 
   1255                         var value = p.stack.pop();
   1256                         try p.pushToParent(value);
   1257                     },
   1258                     Token.Id.ObjectBegin => {
   1259                         try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
   1260                         p.state = State.ObjectKey;
   1261                     },
   1262                     Token.Id.ArrayBegin => {
   1263                         try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
   1264                         p.state = State.ArrayValue;
   1265                     },
   1266                     Token.Id.String => {
   1267                         try array.append(try p.parseString(allocator, token, input, i));
   1268                     },
   1269                     Token.Id.Number => {
   1270                         try array.append(try p.parseNumber(token, input, i));
   1271                     },
   1272                     Token.Id.True => {
   1273                         try array.append(Value{ .Bool = true });
   1274                     },
   1275                     Token.Id.False => {
   1276                         try array.append(Value{ .Bool = false });
   1277                     },
   1278                     Token.Id.Null => {
   1279                         try array.append(Value.Null);
   1280                     },
   1281                     Token.Id.ObjectEnd => {
   1282                         unreachable;
   1283                     },
   1284                 }
   1285             },
   1286             State.Simple => switch (token.id) {
   1287                 Token.Id.ObjectBegin => {
   1288                     try p.stack.append(Value{ .Object = ObjectMap.init(allocator) });
   1289                     p.state = State.ObjectKey;
   1290                 },
   1291                 Token.Id.ArrayBegin => {
   1292                     try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) });
   1293                     p.state = State.ArrayValue;
   1294                 },
   1295                 Token.Id.String => {
   1296                     try p.stack.append(try p.parseString(allocator, token, input, i));
   1297                 },
   1298                 Token.Id.Number => {
   1299                     try p.stack.append(try p.parseNumber(token, input, i));
   1300                 },
   1301                 Token.Id.True => {
   1302                     try p.stack.append(Value{ .Bool = true });
   1303                 },
   1304                 Token.Id.False => {
   1305                     try p.stack.append(Value{ .Bool = false });
   1306                 },
   1307                 Token.Id.Null => {
   1308                     try p.stack.append(Value.Null);
   1309                 },
   1310                 Token.Id.ObjectEnd, Token.Id.ArrayEnd => {
   1311                     unreachable;
   1312                 },
   1313             },
   1314         }
   1315     }
   1316 
   1317     fn pushToParent(p: *Parser, value: *const Value) !void {
   1318         switch (p.stack.at(p.stack.len - 1)) {
   1319             // Object Parent -> [ ..., object, <key>, value ]
   1320             Value.String => |key| {
   1321                 _ = p.stack.pop();
   1322 
   1323                 var object = &p.stack.items[p.stack.len - 1].Object;
   1324                 _ = try object.put(key, value);
   1325                 p.state = State.ObjectKey;
   1326             },
   1327             // Array Parent -> [ ..., <array>, value ]
   1328             Value.Array => |*array| {
   1329                 try array.append(value.*);
   1330                 p.state = State.ArrayValue;
   1331             },
   1332             else => {
   1333                 unreachable;
   1334             },
   1335         }
   1336     }
   1337 
   1338     fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value {
   1339         // TODO: We don't strictly have to copy values which do not contain any escape
   1340         // characters if flagged with the option.
   1341         const slice = token.slice(input, i);
   1342         return Value{ .String = try mem.dupe(p.allocator, u8, slice) };
   1343     }
   1344 
   1345     fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value {
   1346         return if (token.number_is_integer)
   1347             Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) }
   1348         else
   1349             @panic("TODO: fmt.parseFloat not yet implemented");
   1350     }
   1351 };
   1352 
   1353 test "json parser dynamic" {
   1354     var p = Parser.init(debug.global_allocator, false);
   1355     defer p.deinit();
   1356 
   1357     const s =
   1358         \\{
   1359         \\  "Image": {
   1360         \\      "Width":  800,
   1361         \\      "Height": 600,
   1362         \\      "Title":  "View from 15th Floor",
   1363         \\      "Thumbnail": {
   1364         \\          "Url":    "http://www.example.com/image/481989943",
   1365         \\          "Height": 125,
   1366         \\          "Width":  100
   1367         \\      },
   1368         \\      "Animated" : false,
   1369         \\      "IDs": [116, 943, 234, 38793]
   1370         \\    }
   1371         \\}
   1372     ;
   1373 
   1374     var tree = try p.parse(s);
   1375     defer tree.deinit();
   1376 
   1377     var root = tree.root;
   1378 
   1379     var image = root.Object.get("Image").?.value;
   1380 
   1381     const width = image.Object.get("Width").?.value;
   1382     debug.assert(width.Integer == 800);
   1383 
   1384     const height = image.Object.get("Height").?.value;
   1385     debug.assert(height.Integer == 600);
   1386 
   1387     const title = image.Object.get("Title").?.value;
   1388     debug.assert(mem.eql(u8, title.String, "View from 15th Floor"));
   1389 
   1390     const animated = image.Object.get("Animated").?.value;
   1391     debug.assert(animated.Bool == false);
   1392 }