blob 8986034f (46319B) - Raw
1 // JSON parser conforming to RFC8259. 2 // 3 // https://tools.ietf.org/html/rfc8259 4 5 const std = @import("index.zig"); 6 const debug = std.debug; 7 const mem = std.mem; 8 9 const u1 = @IntType(false, 1); 10 const u256 = @IntType(false, 256); 11 12 // A single token slice into the parent string. 13 // 14 // Use `token.slice()` on the input at the current position to get the current slice. 15 pub const Token = struct { 16 id: Id, 17 // How many bytes do we skip before counting 18 offset: u1, 19 // Whether string contains a \uXXXX sequence and cannot be zero-copied 20 string_has_escape: bool, 21 // Whether number is simple and can be represented by an integer (i.e. no `.` or `e`) 22 number_is_integer: bool, 23 // How many bytes from the current position behind the start of this token is. 24 count: usize, 25 26 pub const Id = enum { 27 ObjectBegin, 28 ObjectEnd, 29 ArrayBegin, 30 ArrayEnd, 31 String, 32 Number, 33 True, 34 False, 35 Null, 36 }; 37 38 pub fn init(id: Id, count: usize, offset: u1) Token { 39 return Token{ 40 .id = id, 41 .offset = offset, 42 .string_has_escape = false, 43 .number_is_integer = true, 44 .count = count, 45 }; 46 } 47 48 pub fn initString(count: usize, has_unicode_escape: bool) Token { 49 return Token{ 50 .id = Id.String, 51 .offset = 0, 52 .string_has_escape = has_unicode_escape, 53 .number_is_integer = true, 54 .count = count, 55 }; 56 } 57 58 pub fn initNumber(count: usize, number_is_integer: bool) Token { 59 return Token{ 60 .id = Id.Number, 61 .offset = 0, 62 .string_has_escape = false, 63 .number_is_integer = number_is_integer, 64 .count = count, 65 }; 66 } 67 68 // A marker token is a zero-length 69 pub fn initMarker(id: Id) Token { 70 return Token{ 71 .id = id, 72 .offset = 0, 73 .string_has_escape = false, 74 .number_is_integer = true, 75 .count = 0, 76 }; 77 } 78 79 // Slice into the underlying input string. 80 pub fn slice(self: *const Token, input: []const u8, i: usize) []const u8 { 81 return input[i + self.offset - self.count .. i + self.offset]; 82 } 83 }; 84 85 // A small streaming JSON parser. This accepts input one byte at a time and returns tokens as 86 // they are encountered. No copies or allocations are performed during parsing and the entire 87 // parsing state requires ~40-50 bytes of stack space. 88 // 89 // Conforms strictly to RFC8529. 90 // 91 // For a non-byte based wrapper, consider using TokenStream instead. 92 pub const StreamingParser = struct { 93 // Current state 94 state: State, 95 // How many bytes we have counted for the current token 96 count: usize, 97 // What state to follow after parsing a string (either property or value string) 98 after_string_state: State, 99 // What state to follow after parsing a value (either top-level or value end) 100 after_value_state: State, 101 // If we stopped now, would the complete parsed string to now be a valid json string 102 complete: bool, 103 // Current token flags to pass through to the next generated, see Token. 104 string_has_escape: bool, 105 number_is_integer: bool, 106 107 // Bit-stack for nested object/map literals (max 255 nestings). 108 stack: u256, 109 stack_used: u8, 110 111 const object_bit = 0; 112 const array_bit = 1; 113 const max_stack_size = @maxValue(u8); 114 115 pub fn init() StreamingParser { 116 var p: StreamingParser = undefined; 117 p.reset(); 118 return p; 119 } 120 121 pub fn reset(p: *StreamingParser) void { 122 p.state = State.TopLevelBegin; 123 p.count = 0; 124 // Set before ever read in main transition function 125 p.after_string_state = undefined; 126 p.after_value_state = State.ValueEnd; // handle end of values normally 127 p.stack = 0; 128 p.stack_used = 0; 129 p.complete = false; 130 p.string_has_escape = false; 131 p.number_is_integer = true; 132 } 133 134 pub const State = enum { 135 // These must be first with these explicit values as we rely on them for indexing the 136 // bit-stack directly and avoiding a branch. 137 ObjectSeparator = 0, 138 ValueEnd = 1, 139 140 TopLevelBegin, 141 TopLevelEnd, 142 143 ValueBegin, 144 ValueBeginNoClosing, 145 146 String, 147 StringUtf8Byte3, 148 StringUtf8Byte2, 149 StringUtf8Byte1, 150 StringEscapeCharacter, 151 StringEscapeHexUnicode4, 152 StringEscapeHexUnicode3, 153 StringEscapeHexUnicode2, 154 StringEscapeHexUnicode1, 155 156 Number, 157 NumberMaybeDotOrExponent, 158 NumberMaybeDigitOrDotOrExponent, 159 NumberFractionalRequired, 160 NumberFractional, 161 NumberMaybeExponent, 162 NumberExponent, 163 NumberExponentDigitsRequired, 164 NumberExponentDigits, 165 166 TrueLiteral1, 167 TrueLiteral2, 168 TrueLiteral3, 169 170 FalseLiteral1, 171 FalseLiteral2, 172 FalseLiteral3, 173 FalseLiteral4, 174 175 NullLiteral1, 176 NullLiteral2, 177 NullLiteral3, 178 179 // Only call this function to generate array/object final state. 180 pub fn fromInt(x: var) State { 181 debug.assert(x == 0 or x == 1); 182 const T = @TagType(State); 183 return @intToEnum(State, @intCast(T, x)); 184 } 185 }; 186 187 pub const Error = error{ 188 InvalidTopLevel, 189 TooManyNestedItems, 190 TooManyClosingItems, 191 InvalidValueBegin, 192 InvalidValueEnd, 193 UnbalancedBrackets, 194 UnbalancedBraces, 195 UnexpectedClosingBracket, 196 UnexpectedClosingBrace, 197 InvalidNumber, 198 InvalidSeparator, 199 InvalidLiteral, 200 InvalidEscapeCharacter, 201 InvalidUnicodeHexSymbol, 202 InvalidUtf8Byte, 203 InvalidTopLevelTrailing, 204 InvalidControlCharacter, 205 }; 206 207 // Give another byte to the parser and obtain any new tokens. This may (rarely) return two 208 // tokens. token2 is always null if token1 is null. 209 // 210 // There is currently no error recovery on a bad stream. 211 pub fn feed(p: *StreamingParser, c: u8, token1: *?Token, token2: *?Token) Error!void { 212 token1.* = null; 213 token2.* = null; 214 p.count += 1; 215 216 // unlikely 217 if (try p.transition(c, token1)) { 218 _ = try p.transition(c, token2); 219 } 220 } 221 222 // Perform a single transition on the state machine and return any possible token. 223 fn transition(p: *StreamingParser, c: u8, token: *?Token) Error!bool { 224 switch (p.state) { 225 State.TopLevelBegin => switch (c) { 226 '{' => { 227 p.stack <<= 1; 228 p.stack |= object_bit; 229 p.stack_used += 1; 230 231 p.state = State.ValueBegin; 232 p.after_string_state = State.ObjectSeparator; 233 234 token.* = Token.initMarker(Token.Id.ObjectBegin); 235 }, 236 '[' => { 237 p.stack <<= 1; 238 p.stack |= array_bit; 239 p.stack_used += 1; 240 241 p.state = State.ValueBegin; 242 p.after_string_state = State.ValueEnd; 243 244 token.* = Token.initMarker(Token.Id.ArrayBegin); 245 }, 246 '-' => { 247 p.number_is_integer = true; 248 p.state = State.Number; 249 p.after_value_state = State.TopLevelEnd; 250 p.count = 0; 251 }, 252 '0' => { 253 p.number_is_integer = true; 254 p.state = State.NumberMaybeDotOrExponent; 255 p.after_value_state = State.TopLevelEnd; 256 p.count = 0; 257 }, 258 '1'...'9' => { 259 p.number_is_integer = true; 260 p.state = State.NumberMaybeDigitOrDotOrExponent; 261 p.after_value_state = State.TopLevelEnd; 262 p.count = 0; 263 }, 264 '"' => { 265 p.state = State.String; 266 p.after_value_state = State.TopLevelEnd; 267 // We don't actually need the following since after_value_state should override. 268 p.after_string_state = State.ValueEnd; 269 p.string_has_escape = false; 270 p.count = 0; 271 }, 272 't' => { 273 p.state = State.TrueLiteral1; 274 p.after_value_state = State.TopLevelEnd; 275 p.count = 0; 276 }, 277 'f' => { 278 p.state = State.FalseLiteral1; 279 p.after_value_state = State.TopLevelEnd; 280 p.count = 0; 281 }, 282 'n' => { 283 p.state = State.NullLiteral1; 284 p.after_value_state = State.TopLevelEnd; 285 p.count = 0; 286 }, 287 0x09, 0x0A, 0x0D, 0x20 => { 288 // whitespace 289 }, 290 else => { 291 return error.InvalidTopLevel; 292 }, 293 }, 294 295 State.TopLevelEnd => switch (c) { 296 0x09, 0x0A, 0x0D, 0x20 => { 297 // whitespace 298 }, 299 else => { 300 return error.InvalidTopLevelTrailing; 301 }, 302 }, 303 304 State.ValueBegin => switch (c) { 305 // NOTE: These are shared in ValueEnd as well, think we can reorder states to 306 // be a bit clearer and avoid this duplication. 307 '}' => { 308 // unlikely 309 if (p.stack & 1 != object_bit) { 310 return error.UnexpectedClosingBracket; 311 } 312 if (p.stack_used == 0) { 313 return error.TooManyClosingItems; 314 } 315 316 p.state = State.ValueBegin; 317 p.after_string_state = State.fromInt(p.stack & 1); 318 319 p.stack >>= 1; 320 p.stack_used -= 1; 321 322 switch (p.stack_used) { 323 0 => { 324 p.complete = true; 325 p.state = State.TopLevelEnd; 326 }, 327 else => { 328 p.state = State.ValueEnd; 329 }, 330 } 331 332 token.* = Token.initMarker(Token.Id.ObjectEnd); 333 }, 334 ']' => { 335 if (p.stack & 1 != array_bit) { 336 return error.UnexpectedClosingBrace; 337 } 338 if (p.stack_used == 0) { 339 return error.TooManyClosingItems; 340 } 341 342 p.state = State.ValueBegin; 343 p.after_string_state = State.fromInt(p.stack & 1); 344 345 p.stack >>= 1; 346 p.stack_used -= 1; 347 348 switch (p.stack_used) { 349 0 => { 350 p.complete = true; 351 p.state = State.TopLevelEnd; 352 }, 353 else => { 354 p.state = State.ValueEnd; 355 }, 356 } 357 358 token.* = Token.initMarker(Token.Id.ArrayEnd); 359 }, 360 '{' => { 361 if (p.stack_used == max_stack_size) { 362 return error.TooManyNestedItems; 363 } 364 365 p.stack <<= 1; 366 p.stack |= object_bit; 367 p.stack_used += 1; 368 369 p.state = State.ValueBegin; 370 p.after_string_state = State.ObjectSeparator; 371 372 token.* = Token.initMarker(Token.Id.ObjectBegin); 373 }, 374 '[' => { 375 if (p.stack_used == max_stack_size) { 376 return error.TooManyNestedItems; 377 } 378 379 p.stack <<= 1; 380 p.stack |= array_bit; 381 p.stack_used += 1; 382 383 p.state = State.ValueBegin; 384 p.after_string_state = State.ValueEnd; 385 386 token.* = Token.initMarker(Token.Id.ArrayBegin); 387 }, 388 '-' => { 389 p.state = State.Number; 390 p.count = 0; 391 }, 392 '0' => { 393 p.state = State.NumberMaybeDotOrExponent; 394 p.count = 0; 395 }, 396 '1'...'9' => { 397 p.state = State.NumberMaybeDigitOrDotOrExponent; 398 p.count = 0; 399 }, 400 '"' => { 401 p.state = State.String; 402 p.count = 0; 403 }, 404 't' => { 405 p.state = State.TrueLiteral1; 406 p.count = 0; 407 }, 408 'f' => { 409 p.state = State.FalseLiteral1; 410 p.count = 0; 411 }, 412 'n' => { 413 p.state = State.NullLiteral1; 414 p.count = 0; 415 }, 416 0x09, 0x0A, 0x0D, 0x20 => { 417 // whitespace 418 }, 419 else => { 420 return error.InvalidValueBegin; 421 }, 422 }, 423 424 // TODO: A bit of duplication here and in the following state, redo. 425 State.ValueBeginNoClosing => switch (c) { 426 '{' => { 427 if (p.stack_used == max_stack_size) { 428 return error.TooManyNestedItems; 429 } 430 431 p.stack <<= 1; 432 p.stack |= object_bit; 433 p.stack_used += 1; 434 435 p.state = State.ValueBegin; 436 p.after_string_state = State.ObjectSeparator; 437 438 token.* = Token.initMarker(Token.Id.ObjectBegin); 439 }, 440 '[' => { 441 if (p.stack_used == max_stack_size) { 442 return error.TooManyNestedItems; 443 } 444 445 p.stack <<= 1; 446 p.stack |= array_bit; 447 p.stack_used += 1; 448 449 p.state = State.ValueBegin; 450 p.after_string_state = State.ValueEnd; 451 452 token.* = Token.initMarker(Token.Id.ArrayBegin); 453 }, 454 '-' => { 455 p.state = State.Number; 456 p.count = 0; 457 }, 458 '0' => { 459 p.state = State.NumberMaybeDotOrExponent; 460 p.count = 0; 461 }, 462 '1'...'9' => { 463 p.state = State.NumberMaybeDigitOrDotOrExponent; 464 p.count = 0; 465 }, 466 '"' => { 467 p.state = State.String; 468 p.count = 0; 469 }, 470 't' => { 471 p.state = State.TrueLiteral1; 472 p.count = 0; 473 }, 474 'f' => { 475 p.state = State.FalseLiteral1; 476 p.count = 0; 477 }, 478 'n' => { 479 p.state = State.NullLiteral1; 480 p.count = 0; 481 }, 482 0x09, 0x0A, 0x0D, 0x20 => { 483 // whitespace 484 }, 485 else => { 486 return error.InvalidValueBegin; 487 }, 488 }, 489 490 State.ValueEnd => switch (c) { 491 ',' => { 492 p.after_string_state = State.fromInt(p.stack & 1); 493 p.state = State.ValueBeginNoClosing; 494 }, 495 ']' => { 496 if (p.stack_used == 0) { 497 return error.UnbalancedBrackets; 498 } 499 500 p.state = State.ValueEnd; 501 p.after_string_state = State.fromInt(p.stack & 1); 502 503 p.stack >>= 1; 504 p.stack_used -= 1; 505 506 if (p.stack_used == 0) { 507 p.complete = true; 508 p.state = State.TopLevelEnd; 509 } 510 511 token.* = Token.initMarker(Token.Id.ArrayEnd); 512 }, 513 '}' => { 514 if (p.stack_used == 0) { 515 return error.UnbalancedBraces; 516 } 517 518 p.state = State.ValueEnd; 519 p.after_string_state = State.fromInt(p.stack & 1); 520 521 p.stack >>= 1; 522 p.stack_used -= 1; 523 524 if (p.stack_used == 0) { 525 p.complete = true; 526 p.state = State.TopLevelEnd; 527 } 528 529 token.* = Token.initMarker(Token.Id.ObjectEnd); 530 }, 531 0x09, 0x0A, 0x0D, 0x20 => { 532 // whitespace 533 }, 534 else => { 535 return error.InvalidValueEnd; 536 }, 537 }, 538 539 State.ObjectSeparator => switch (c) { 540 ':' => { 541 p.state = State.ValueBegin; 542 p.after_string_state = State.ValueEnd; 543 }, 544 0x09, 0x0A, 0x0D, 0x20 => { 545 // whitespace 546 }, 547 else => { 548 return error.InvalidSeparator; 549 }, 550 }, 551 552 State.String => switch (c) { 553 0x00...0x1F => { 554 return error.InvalidControlCharacter; 555 }, 556 '"' => { 557 p.state = p.after_string_state; 558 if (p.after_value_state == State.TopLevelEnd) { 559 p.state = State.TopLevelEnd; 560 p.complete = true; 561 } 562 563 token.* = Token.initString(p.count - 1, p.string_has_escape); 564 }, 565 '\\' => { 566 p.state = State.StringEscapeCharacter; 567 }, 568 0x20, 0x21, 0x23...0x5B, 0x5D...0x7F => { 569 // non-control ascii 570 }, 571 0xC0...0xDF => { 572 p.state = State.StringUtf8Byte1; 573 }, 574 0xE0...0xEF => { 575 p.state = State.StringUtf8Byte2; 576 }, 577 0xF0...0xFF => { 578 p.state = State.StringUtf8Byte3; 579 }, 580 else => { 581 return error.InvalidUtf8Byte; 582 }, 583 }, 584 585 State.StringUtf8Byte3 => switch (c >> 6) { 586 0b10 => p.state = State.StringUtf8Byte2, 587 else => return error.InvalidUtf8Byte, 588 }, 589 590 State.StringUtf8Byte2 => switch (c >> 6) { 591 0b10 => p.state = State.StringUtf8Byte1, 592 else => return error.InvalidUtf8Byte, 593 }, 594 595 State.StringUtf8Byte1 => switch (c >> 6) { 596 0b10 => p.state = State.String, 597 else => return error.InvalidUtf8Byte, 598 }, 599 600 State.StringEscapeCharacter => switch (c) { 601 // NOTE: '/' is allowed as an escaped character but it also is allowed 602 // as unescaped according to the RFC. There is a reported errata which suggests 603 // removing the non-escaped variant but it makes more sense to simply disallow 604 // it as an escape code here. 605 // 606 // The current JSONTestSuite tests rely on both of this behaviour being present 607 // however, so we default to the status quo where both are accepted until this 608 // is further clarified. 609 '"', '\\', '/', 'b', 'f', 'n', 'r', 't' => { 610 p.string_has_escape = true; 611 p.state = State.String; 612 }, 613 'u' => { 614 p.string_has_escape = true; 615 p.state = State.StringEscapeHexUnicode4; 616 }, 617 else => { 618 return error.InvalidEscapeCharacter; 619 }, 620 }, 621 622 State.StringEscapeHexUnicode4 => switch (c) { 623 '0'...'9', 'A'...'F', 'a'...'f' => { 624 p.state = State.StringEscapeHexUnicode3; 625 }, 626 else => return error.InvalidUnicodeHexSymbol, 627 }, 628 629 State.StringEscapeHexUnicode3 => switch (c) { 630 '0'...'9', 'A'...'F', 'a'...'f' => { 631 p.state = State.StringEscapeHexUnicode2; 632 }, 633 else => return error.InvalidUnicodeHexSymbol, 634 }, 635 636 State.StringEscapeHexUnicode2 => switch (c) { 637 '0'...'9', 'A'...'F', 'a'...'f' => { 638 p.state = State.StringEscapeHexUnicode1; 639 }, 640 else => return error.InvalidUnicodeHexSymbol, 641 }, 642 643 State.StringEscapeHexUnicode1 => switch (c) { 644 '0'...'9', 'A'...'F', 'a'...'f' => { 645 p.state = State.String; 646 }, 647 else => return error.InvalidUnicodeHexSymbol, 648 }, 649 650 State.Number => { 651 p.complete = p.after_value_state == State.TopLevelEnd; 652 switch (c) { 653 '0' => { 654 p.state = State.NumberMaybeDotOrExponent; 655 }, 656 '1'...'9' => { 657 p.state = State.NumberMaybeDigitOrDotOrExponent; 658 }, 659 else => { 660 return error.InvalidNumber; 661 }, 662 } 663 }, 664 665 State.NumberMaybeDotOrExponent => { 666 p.complete = p.after_value_state == State.TopLevelEnd; 667 switch (c) { 668 '.' => { 669 p.number_is_integer = false; 670 p.state = State.NumberFractionalRequired; 671 }, 672 'e', 'E' => { 673 p.number_is_integer = false; 674 p.state = State.NumberExponent; 675 }, 676 else => { 677 p.state = p.after_value_state; 678 token.* = Token.initNumber(p.count, p.number_is_integer); 679 return true; 680 }, 681 } 682 }, 683 684 State.NumberMaybeDigitOrDotOrExponent => { 685 p.complete = p.after_value_state == State.TopLevelEnd; 686 switch (c) { 687 '.' => { 688 p.number_is_integer = false; 689 p.state = State.NumberFractionalRequired; 690 }, 691 'e', 'E' => { 692 p.number_is_integer = false; 693 p.state = State.NumberExponent; 694 }, 695 '0'...'9' => { 696 // another digit 697 }, 698 else => { 699 p.state = p.after_value_state; 700 token.* = Token.initNumber(p.count, p.number_is_integer); 701 return true; 702 }, 703 } 704 }, 705 706 State.NumberFractionalRequired => { 707 p.complete = p.after_value_state == State.TopLevelEnd; 708 switch (c) { 709 '0'...'9' => { 710 p.state = State.NumberFractional; 711 }, 712 else => { 713 return error.InvalidNumber; 714 }, 715 } 716 }, 717 718 State.NumberFractional => { 719 p.complete = p.after_value_state == State.TopLevelEnd; 720 switch (c) { 721 '0'...'9' => { 722 // another digit 723 }, 724 'e', 'E' => { 725 p.number_is_integer = false; 726 p.state = State.NumberExponent; 727 }, 728 else => { 729 p.state = p.after_value_state; 730 token.* = Token.initNumber(p.count, p.number_is_integer); 731 return true; 732 }, 733 } 734 }, 735 736 State.NumberMaybeExponent => { 737 p.complete = p.after_value_state == State.TopLevelEnd; 738 switch (c) { 739 'e', 'E' => { 740 p.number_is_integer = false; 741 p.state = State.NumberExponent; 742 }, 743 else => { 744 p.state = p.after_value_state; 745 token.* = Token.initNumber(p.count, p.number_is_integer); 746 return true; 747 }, 748 } 749 }, 750 751 State.NumberExponent => switch (c) { 752 '-', '+' => { 753 p.complete = false; 754 p.state = State.NumberExponentDigitsRequired; 755 }, 756 '0'...'9' => { 757 p.complete = p.after_value_state == State.TopLevelEnd; 758 p.state = State.NumberExponentDigits; 759 }, 760 else => { 761 return error.InvalidNumber; 762 }, 763 }, 764 765 State.NumberExponentDigitsRequired => switch (c) { 766 '0'...'9' => { 767 p.complete = p.after_value_state == State.TopLevelEnd; 768 p.state = State.NumberExponentDigits; 769 }, 770 else => { 771 return error.InvalidNumber; 772 }, 773 }, 774 775 State.NumberExponentDigits => { 776 p.complete = p.after_value_state == State.TopLevelEnd; 777 switch (c) { 778 '0'...'9' => { 779 // another digit 780 }, 781 else => { 782 p.state = p.after_value_state; 783 token.* = Token.initNumber(p.count, p.number_is_integer); 784 return true; 785 }, 786 } 787 }, 788 789 State.TrueLiteral1 => switch (c) { 790 'r' => p.state = State.TrueLiteral2, 791 else => return error.InvalidLiteral, 792 }, 793 794 State.TrueLiteral2 => switch (c) { 795 'u' => p.state = State.TrueLiteral3, 796 else => return error.InvalidLiteral, 797 }, 798 799 State.TrueLiteral3 => switch (c) { 800 'e' => { 801 p.state = p.after_value_state; 802 p.complete = p.state == State.TopLevelEnd; 803 token.* = Token.init(Token.Id.True, p.count + 1, 1); 804 }, 805 else => { 806 return error.InvalidLiteral; 807 }, 808 }, 809 810 State.FalseLiteral1 => switch (c) { 811 'a' => p.state = State.FalseLiteral2, 812 else => return error.InvalidLiteral, 813 }, 814 815 State.FalseLiteral2 => switch (c) { 816 'l' => p.state = State.FalseLiteral3, 817 else => return error.InvalidLiteral, 818 }, 819 820 State.FalseLiteral3 => switch (c) { 821 's' => p.state = State.FalseLiteral4, 822 else => return error.InvalidLiteral, 823 }, 824 825 State.FalseLiteral4 => switch (c) { 826 'e' => { 827 p.state = p.after_value_state; 828 p.complete = p.state == State.TopLevelEnd; 829 token.* = Token.init(Token.Id.False, p.count + 1, 1); 830 }, 831 else => { 832 return error.InvalidLiteral; 833 }, 834 }, 835 836 State.NullLiteral1 => switch (c) { 837 'u' => p.state = State.NullLiteral2, 838 else => return error.InvalidLiteral, 839 }, 840 841 State.NullLiteral2 => switch (c) { 842 'l' => p.state = State.NullLiteral3, 843 else => return error.InvalidLiteral, 844 }, 845 846 State.NullLiteral3 => switch (c) { 847 'l' => { 848 p.state = p.after_value_state; 849 p.complete = p.state == State.TopLevelEnd; 850 token.* = Token.init(Token.Id.Null, p.count + 1, 1); 851 }, 852 else => { 853 return error.InvalidLiteral; 854 }, 855 }, 856 } 857 858 return false; 859 } 860 }; 861 862 // A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens. 863 pub const TokenStream = struct { 864 i: usize, 865 slice: []const u8, 866 parser: StreamingParser, 867 token: ?Token, 868 869 pub fn init(slice: []const u8) TokenStream { 870 return TokenStream{ 871 .i = 0, 872 .slice = slice, 873 .parser = StreamingParser.init(), 874 .token = null, 875 }; 876 } 877 878 pub fn next(self: *TokenStream) !?Token { 879 if (self.token) |token| { 880 self.token = null; 881 return token; 882 } 883 884 var t1: ?Token = undefined; 885 var t2: ?Token = undefined; 886 887 while (self.i < self.slice.len) { 888 try self.parser.feed(self.slice[self.i], &t1, &t2); 889 self.i += 1; 890 891 if (t1) |token| { 892 self.token = t2; 893 return token; 894 } 895 } 896 897 if (self.i > self.slice.len) { 898 try self.parser.feed(' ', &t1, &t2); 899 self.i += 1; 900 901 if (t1) |token| { 902 return token; 903 } 904 } 905 906 return null; 907 } 908 }; 909 910 fn checkNext(p: *TokenStream, id: Token.Id) void { 911 const token = (p.next() catch unreachable).?; 912 debug.assert(token.id == id); 913 } 914 915 test "token" { 916 const s = 917 \\{ 918 \\ "Image": { 919 \\ "Width": 800, 920 \\ "Height": 600, 921 \\ "Title": "View from 15th Floor", 922 \\ "Thumbnail": { 923 \\ "Url": "http://www.example.com/image/481989943", 924 \\ "Height": 125, 925 \\ "Width": 100 926 \\ }, 927 \\ "Animated" : false, 928 \\ "IDs": [116, 943, 234, 38793] 929 \\ } 930 \\} 931 ; 932 933 var p = TokenStream.init(s); 934 935 checkNext(&p, Token.Id.ObjectBegin); 936 checkNext(&p, Token.Id.String); // Image 937 checkNext(&p, Token.Id.ObjectBegin); 938 checkNext(&p, Token.Id.String); // Width 939 checkNext(&p, Token.Id.Number); 940 checkNext(&p, Token.Id.String); // Height 941 checkNext(&p, Token.Id.Number); 942 checkNext(&p, Token.Id.String); // Title 943 checkNext(&p, Token.Id.String); 944 checkNext(&p, Token.Id.String); // Thumbnail 945 checkNext(&p, Token.Id.ObjectBegin); 946 checkNext(&p, Token.Id.String); // Url 947 checkNext(&p, Token.Id.String); 948 checkNext(&p, Token.Id.String); // Height 949 checkNext(&p, Token.Id.Number); 950 checkNext(&p, Token.Id.String); // Width 951 checkNext(&p, Token.Id.Number); 952 checkNext(&p, Token.Id.ObjectEnd); 953 checkNext(&p, Token.Id.String); // Animated 954 checkNext(&p, Token.Id.False); 955 checkNext(&p, Token.Id.String); // IDs 956 checkNext(&p, Token.Id.ArrayBegin); 957 checkNext(&p, Token.Id.Number); 958 checkNext(&p, Token.Id.Number); 959 checkNext(&p, Token.Id.Number); 960 checkNext(&p, Token.Id.Number); 961 checkNext(&p, Token.Id.ArrayEnd); 962 checkNext(&p, Token.Id.ObjectEnd); 963 checkNext(&p, Token.Id.ObjectEnd); 964 965 debug.assert((try p.next()) == null); 966 } 967 968 // Validate a JSON string. This does not limit number precision so a decoder may not necessarily 969 // be able to decode the string even if this returns true. 970 pub fn validate(s: []const u8) bool { 971 var p = StreamingParser.init(); 972 973 for (s) |c, i| { 974 var token1: ?Token = undefined; 975 var token2: ?Token = undefined; 976 977 p.feed(c, &token1, &token2) catch |err| { 978 return false; 979 }; 980 } 981 982 return p.complete; 983 } 984 985 test "json validate" { 986 debug.assert(validate("{}")); 987 } 988 989 const Allocator = std.mem.Allocator; 990 const ArenaAllocator = std.heap.ArenaAllocator; 991 const ArrayList = std.ArrayList; 992 const HashMap = std.HashMap; 993 994 pub const ValueTree = struct { 995 arena: ArenaAllocator, 996 root: Value, 997 998 pub fn deinit(self: *ValueTree) void { 999 self.arena.deinit(); 1000 } 1001 }; 1002 1003 pub const ObjectMap = HashMap([]const u8, Value, mem.hash_slice_u8, mem.eql_slice_u8); 1004 1005 pub const Value = union(enum) { 1006 Null, 1007 Bool: bool, 1008 Integer: i64, 1009 Float: f64, 1010 String: []const u8, 1011 Array: ArrayList(Value), 1012 Object: ObjectMap, 1013 1014 pub fn dump(self: *const Value) void { 1015 switch (self.*) { 1016 Value.Null => { 1017 debug.warn("null"); 1018 }, 1019 Value.Bool => |inner| { 1020 debug.warn("{}", inner); 1021 }, 1022 Value.Integer => |inner| { 1023 debug.warn("{}", inner); 1024 }, 1025 Value.Float => |inner| { 1026 debug.warn("{.5}", inner); 1027 }, 1028 Value.String => |inner| { 1029 debug.warn("\"{}\"", inner); 1030 }, 1031 Value.Array => |inner| { 1032 var not_first = false; 1033 debug.warn("["); 1034 for (inner.toSliceConst()) |value| { 1035 if (not_first) { 1036 debug.warn(","); 1037 } 1038 not_first = true; 1039 value.dump(); 1040 } 1041 debug.warn("]"); 1042 }, 1043 Value.Object => |inner| { 1044 var not_first = false; 1045 debug.warn("{{"); 1046 var it = inner.iterator(); 1047 1048 while (it.next()) |entry| { 1049 if (not_first) { 1050 debug.warn(","); 1051 } 1052 not_first = true; 1053 debug.warn("\"{}\":", entry.key); 1054 entry.value.dump(); 1055 } 1056 debug.warn("}}"); 1057 }, 1058 } 1059 } 1060 1061 pub fn dumpIndent(self: *const Value, indent: usize) void { 1062 if (indent == 0) { 1063 self.dump(); 1064 } else { 1065 self.dumpIndentLevel(indent, 0); 1066 } 1067 } 1068 1069 fn dumpIndentLevel(self: *const Value, indent: usize, level: usize) void { 1070 switch (self.*) { 1071 Value.Null => { 1072 debug.warn("null"); 1073 }, 1074 Value.Bool => |inner| { 1075 debug.warn("{}", inner); 1076 }, 1077 Value.Integer => |inner| { 1078 debug.warn("{}", inner); 1079 }, 1080 Value.Float => |inner| { 1081 debug.warn("{.5}", inner); 1082 }, 1083 Value.String => |inner| { 1084 debug.warn("\"{}\"", inner); 1085 }, 1086 Value.Array => |inner| { 1087 var not_first = false; 1088 debug.warn("[\n"); 1089 1090 for (inner.toSliceConst()) |value| { 1091 if (not_first) { 1092 debug.warn(",\n"); 1093 } 1094 not_first = true; 1095 padSpace(level + indent); 1096 value.dumpIndentLevel(indent, level + indent); 1097 } 1098 debug.warn("\n"); 1099 padSpace(level); 1100 debug.warn("]"); 1101 }, 1102 Value.Object => |inner| { 1103 var not_first = false; 1104 debug.warn("{{\n"); 1105 var it = inner.iterator(); 1106 1107 while (it.next()) |entry| { 1108 if (not_first) { 1109 debug.warn(",\n"); 1110 } 1111 not_first = true; 1112 padSpace(level + indent); 1113 debug.warn("\"{}\": ", entry.key); 1114 entry.value.dumpIndentLevel(indent, level + indent); 1115 } 1116 debug.warn("\n"); 1117 padSpace(level); 1118 debug.warn("}}"); 1119 }, 1120 } 1121 } 1122 1123 fn padSpace(indent: usize) void { 1124 var i: usize = 0; 1125 while (i < indent) : (i += 1) { 1126 debug.warn(" "); 1127 } 1128 } 1129 }; 1130 1131 // A non-stream JSON parser which constructs a tree of Value's. 1132 pub const Parser = struct { 1133 allocator: *Allocator, 1134 state: State, 1135 copy_strings: bool, 1136 // Stores parent nodes and un-combined Values. 1137 stack: ArrayList(Value), 1138 1139 const State = enum { 1140 ObjectKey, 1141 ObjectValue, 1142 ArrayValue, 1143 Simple, 1144 }; 1145 1146 pub fn init(allocator: *Allocator, copy_strings: bool) Parser { 1147 return Parser{ 1148 .allocator = allocator, 1149 .state = State.Simple, 1150 .copy_strings = copy_strings, 1151 .stack = ArrayList(Value).init(allocator), 1152 }; 1153 } 1154 1155 pub fn deinit(p: *Parser) void { 1156 p.stack.deinit(); 1157 } 1158 1159 pub fn reset(p: *Parser) void { 1160 p.state = State.Simple; 1161 p.stack.shrink(0); 1162 } 1163 1164 pub fn parse(p: *Parser, input: []const u8) !ValueTree { 1165 var s = TokenStream.init(input); 1166 1167 var arena = ArenaAllocator.init(p.allocator); 1168 errdefer arena.deinit(); 1169 1170 while (try s.next()) |token| { 1171 try p.transition(&arena.allocator, input, s.i - 1, token); 1172 } 1173 1174 debug.assert(p.stack.len == 1); 1175 1176 return ValueTree{ 1177 .arena = arena, 1178 .root = p.stack.at(0), 1179 }; 1180 } 1181 1182 // Even though p.allocator exists, we take an explicit allocator so that allocation state 1183 // can be cleaned up on error correctly during a `parse` on call. 1184 fn transition(p: *Parser, allocator: *Allocator, input: []const u8, i: usize, token: *const Token) !void { 1185 switch (p.state) { 1186 State.ObjectKey => switch (token.id) { 1187 Token.Id.ObjectEnd => { 1188 if (p.stack.len == 1) { 1189 return; 1190 } 1191 1192 var value = p.stack.pop(); 1193 try p.pushToParent(value); 1194 }, 1195 Token.Id.String => { 1196 try p.stack.append(try p.parseString(allocator, token, input, i)); 1197 p.state = State.ObjectValue; 1198 }, 1199 else => { 1200 unreachable; 1201 }, 1202 }, 1203 State.ObjectValue => { 1204 var object = &p.stack.items[p.stack.len - 2].Object; 1205 var key = p.stack.items[p.stack.len - 1].String; 1206 1207 switch (token.id) { 1208 Token.Id.ObjectBegin => { 1209 try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); 1210 p.state = State.ObjectKey; 1211 }, 1212 Token.Id.ArrayBegin => { 1213 try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); 1214 p.state = State.ArrayValue; 1215 }, 1216 Token.Id.String => { 1217 _ = try object.put(key, try p.parseString(allocator, token, input, i)); 1218 _ = p.stack.pop(); 1219 p.state = State.ObjectKey; 1220 }, 1221 Token.Id.Number => { 1222 _ = try object.put(key, try p.parseNumber(token, input, i)); 1223 _ = p.stack.pop(); 1224 p.state = State.ObjectKey; 1225 }, 1226 Token.Id.True => { 1227 _ = try object.put(key, Value{ .Bool = true }); 1228 _ = p.stack.pop(); 1229 p.state = State.ObjectKey; 1230 }, 1231 Token.Id.False => { 1232 _ = try object.put(key, Value{ .Bool = false }); 1233 _ = p.stack.pop(); 1234 p.state = State.ObjectKey; 1235 }, 1236 Token.Id.Null => { 1237 _ = try object.put(key, Value.Null); 1238 _ = p.stack.pop(); 1239 p.state = State.ObjectKey; 1240 }, 1241 Token.Id.ObjectEnd, Token.Id.ArrayEnd => { 1242 unreachable; 1243 }, 1244 } 1245 }, 1246 State.ArrayValue => { 1247 var array = &p.stack.items[p.stack.len - 1].Array; 1248 1249 switch (token.id) { 1250 Token.Id.ArrayEnd => { 1251 if (p.stack.len == 1) { 1252 return; 1253 } 1254 1255 var value = p.stack.pop(); 1256 try p.pushToParent(value); 1257 }, 1258 Token.Id.ObjectBegin => { 1259 try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); 1260 p.state = State.ObjectKey; 1261 }, 1262 Token.Id.ArrayBegin => { 1263 try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); 1264 p.state = State.ArrayValue; 1265 }, 1266 Token.Id.String => { 1267 try array.append(try p.parseString(allocator, token, input, i)); 1268 }, 1269 Token.Id.Number => { 1270 try array.append(try p.parseNumber(token, input, i)); 1271 }, 1272 Token.Id.True => { 1273 try array.append(Value{ .Bool = true }); 1274 }, 1275 Token.Id.False => { 1276 try array.append(Value{ .Bool = false }); 1277 }, 1278 Token.Id.Null => { 1279 try array.append(Value.Null); 1280 }, 1281 Token.Id.ObjectEnd => { 1282 unreachable; 1283 }, 1284 } 1285 }, 1286 State.Simple => switch (token.id) { 1287 Token.Id.ObjectBegin => { 1288 try p.stack.append(Value{ .Object = ObjectMap.init(allocator) }); 1289 p.state = State.ObjectKey; 1290 }, 1291 Token.Id.ArrayBegin => { 1292 try p.stack.append(Value{ .Array = ArrayList(Value).init(allocator) }); 1293 p.state = State.ArrayValue; 1294 }, 1295 Token.Id.String => { 1296 try p.stack.append(try p.parseString(allocator, token, input, i)); 1297 }, 1298 Token.Id.Number => { 1299 try p.stack.append(try p.parseNumber(token, input, i)); 1300 }, 1301 Token.Id.True => { 1302 try p.stack.append(Value{ .Bool = true }); 1303 }, 1304 Token.Id.False => { 1305 try p.stack.append(Value{ .Bool = false }); 1306 }, 1307 Token.Id.Null => { 1308 try p.stack.append(Value.Null); 1309 }, 1310 Token.Id.ObjectEnd, Token.Id.ArrayEnd => { 1311 unreachable; 1312 }, 1313 }, 1314 } 1315 } 1316 1317 fn pushToParent(p: *Parser, value: *const Value) !void { 1318 switch (p.stack.at(p.stack.len - 1)) { 1319 // Object Parent -> [ ..., object, <key>, value ] 1320 Value.String => |key| { 1321 _ = p.stack.pop(); 1322 1323 var object = &p.stack.items[p.stack.len - 1].Object; 1324 _ = try object.put(key, value); 1325 p.state = State.ObjectKey; 1326 }, 1327 // Array Parent -> [ ..., <array>, value ] 1328 Value.Array => |*array| { 1329 try array.append(value.*); 1330 p.state = State.ArrayValue; 1331 }, 1332 else => { 1333 unreachable; 1334 }, 1335 } 1336 } 1337 1338 fn parseString(p: *Parser, allocator: *Allocator, token: *const Token, input: []const u8, i: usize) !Value { 1339 // TODO: We don't strictly have to copy values which do not contain any escape 1340 // characters if flagged with the option. 1341 const slice = token.slice(input, i); 1342 return Value{ .String = try mem.dupe(p.allocator, u8, slice) }; 1343 } 1344 1345 fn parseNumber(p: *Parser, token: *const Token, input: []const u8, i: usize) !Value { 1346 return if (token.number_is_integer) 1347 Value{ .Integer = try std.fmt.parseInt(i64, token.slice(input, i), 10) } 1348 else 1349 @panic("TODO: fmt.parseFloat not yet implemented"); 1350 } 1351 }; 1352 1353 test "json parser dynamic" { 1354 var p = Parser.init(debug.global_allocator, false); 1355 defer p.deinit(); 1356 1357 const s = 1358 \\{ 1359 \\ "Image": { 1360 \\ "Width": 800, 1361 \\ "Height": 600, 1362 \\ "Title": "View from 15th Floor", 1363 \\ "Thumbnail": { 1364 \\ "Url": "http://www.example.com/image/481989943", 1365 \\ "Height": 125, 1366 \\ "Width": 100 1367 \\ }, 1368 \\ "Animated" : false, 1369 \\ "IDs": [116, 943, 234, 38793] 1370 \\ } 1371 \\} 1372 ; 1373 1374 var tree = try p.parse(s); 1375 defer tree.deinit(); 1376 1377 var root = tree.root; 1378 1379 var image = root.Object.get("Image").?.value; 1380 1381 const width = image.Object.get("Width").?.value; 1382 debug.assert(width.Integer == 800); 1383 1384 const height = image.Object.get("Height").?.value; 1385 debug.assert(height.Integer == 600); 1386 1387 const title = image.Object.get("Title").?.value; 1388 debug.assert(mem.eql(u8, title.String, "View from 15th Floor")); 1389 1390 const animated = image.Object.get("Animated").?.value; 1391 debug.assert(animated.Bool == false); 1392 }