commit 91ca0e4b02ff8f67e7e18a21fdcd1168f1f5a675 (tree)
parent ded6e0326d8965de8763806593b008c9c28d5508
Author: Andrew Kelley <andrew@ziglang.org>
Date: Sun, 19 Apr 2020 20:33:15 -0400
implement rendering escaped zig string literals
Diffstat:
3 files changed, 157 insertions(+), 126 deletions(-)
diff --git a/lib/std/zig.zig b/lib/std/zig.zig
@@ -2,8 +2,9 @@ const tokenizer = @import("zig/tokenizer.zig");
pub const Token = tokenizer.Token;
pub const Tokenizer = tokenizer.Tokenizer;
pub const parse = @import("zig/parse.zig").parse;
-pub const parseStringLiteral = @import("zig/parse_string_literal.zig").parseStringLiteral;
+pub const parseStringLiteral = @import("zig/string_literal.zig").parse;
pub const render = @import("zig/render.zig").render;
+pub const renderStringLiteral = @import("zig/string_literal.zig").render;
pub const ast = @import("zig/ast.zig");
pub const system = @import("zig/system.zig");
pub const CrossTarget = @import("zig/cross_target.zig").CrossTarget;
diff --git a/lib/std/zig/parse_string_literal.zig b/lib/std/zig/parse_string_literal.zig
@@ -1,125 +0,0 @@
-const std = @import("../std.zig");
-const assert = std.debug.assert;
-
-const State = enum {
- Start,
- Backslash,
-};
-
-pub const ParseStringLiteralError = error{
- OutOfMemory,
-
- /// When this is returned, index will be the position of the character.
- InvalidCharacter,
-};
-
-/// caller owns returned memory
-pub fn parseStringLiteral(
- allocator: *std.mem.Allocator,
- bytes: []const u8,
- bad_index: *usize, // populated if error.InvalidCharacter is returned
-) ParseStringLiteralError![]u8 {
- assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
-
- var list = std.ArrayList(u8).init(allocator);
- errdefer list.deinit();
-
- const slice = bytes[1..];
- try list.ensureCapacity(slice.len - 1);
-
- var state = State.Start;
- var index: usize = 0;
- while (index < slice.len) : (index += 1) {
- const b = slice[index];
-
- switch (state) {
- State.Start => switch (b) {
- '\\' => state = State.Backslash,
- '\n' => {
- bad_index.* = index;
- return error.InvalidCharacter;
- },
- '"' => return list.toOwnedSlice(),
- else => try list.append(b),
- },
- State.Backslash => switch (b) {
- 'n' => {
- try list.append('\n');
- state = State.Start;
- },
- 'r' => {
- try list.append('\r');
- state = State.Start;
- },
- '\\' => {
- try list.append('\\');
- state = State.Start;
- },
- 't' => {
- try list.append('\t');
- state = State.Start;
- },
- '\'' => {
- try list.append('\'');
- state = State.Start;
- },
- '"' => {
- try list.append('"');
- state = State.Start;
- },
- 'x' => {
- // TODO: add more/better/broader tests for this.
- const index_continue = index + 3;
- if (slice.len >= index_continue)
- if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| {
- try list.append(char);
- state = State.Start;
- index = index_continue - 1; // loop-header increments again
- continue;
- } else |_| {};
-
- bad_index.* = index;
- return error.InvalidCharacter;
- },
- 'u' => {
- // TODO: add more/better/broader tests for this.
- if (slice.len > index + 2 and slice[index + 1] == '{')
- if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
- const hex_str = slice[index + 2 .. index_end];
- if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
- if (uint <= 0x10ffff) {
- try list.appendSlice(std.mem.toBytes(uint)[0..]);
- state = State.Start;
- index = index_end; // loop-header increments
- continue;
- }
- } else |_| {}
- };
-
- bad_index.* = index;
- return error.InvalidCharacter;
- },
- else => {
- bad_index.* = index;
- return error.InvalidCharacter;
- },
- },
- else => unreachable,
- }
- }
- unreachable;
-}
-
-test "parseStringLiteral" {
- const expect = std.testing.expect;
- const eql = std.mem.eql;
-
- var fixed_buf_mem: [32]u8 = undefined;
- var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
- var alloc = &fixed_buf_alloc.allocator;
- var bad_index: usize = undefined;
-
- expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"foo\"", &bad_index)));
- expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"f\x6f\x6f\"", &bad_index)));
- expect(eql(u8, "f💯", try parseStringLiteral(alloc, "\"f\u{1f4af}\"", &bad_index)));
-}
diff --git a/lib/std/zig/string_literal.zig b/lib/std/zig/string_literal.zig
@@ -0,0 +1,155 @@
+const std = @import("../std.zig");
+const assert = std.debug.assert;
+
+const State = enum {
+ Start,
+ Backslash,
+};
+
+pub const ParseError = error{
+ OutOfMemory,
+
+ /// When this is returned, index will be the position of the character.
+ InvalidCharacter,
+};
+
+/// caller owns returned memory
+pub fn parse(
+ allocator: *std.mem.Allocator,
+ bytes: []const u8,
+ bad_index: *usize, // populated if error.InvalidCharacter is returned
+) ParseError![]u8 {
+ assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
+
+ var list = std.ArrayList(u8).init(allocator);
+ errdefer list.deinit();
+
+ const slice = bytes[1..];
+ try list.ensureCapacity(slice.len - 1);
+
+ var state = State.Start;
+ var index: usize = 0;
+ while (index < slice.len) : (index += 1) {
+ const b = slice[index];
+
+ switch (state) {
+ State.Start => switch (b) {
+ '\\' => state = State.Backslash,
+ '\n' => {
+ bad_index.* = index;
+ return error.InvalidCharacter;
+ },
+ '"' => return list.toOwnedSlice(),
+ else => try list.append(b),
+ },
+ State.Backslash => switch (b) {
+ 'n' => {
+ try list.append('\n');
+ state = State.Start;
+ },
+ 'r' => {
+ try list.append('\r');
+ state = State.Start;
+ },
+ '\\' => {
+ try list.append('\\');
+ state = State.Start;
+ },
+ 't' => {
+ try list.append('\t');
+ state = State.Start;
+ },
+ '\'' => {
+ try list.append('\'');
+ state = State.Start;
+ },
+ '"' => {
+ try list.append('"');
+ state = State.Start;
+ },
+ 'x' => {
+ // TODO: add more/better/broader tests for this.
+ const index_continue = index + 3;
+ if (slice.len >= index_continue)
+ if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| {
+ try list.append(char);
+ state = State.Start;
+ index = index_continue - 1; // loop-header increments again
+ continue;
+ } else |_| {};
+
+ bad_index.* = index;
+ return error.InvalidCharacter;
+ },
+ 'u' => {
+ // TODO: add more/better/broader tests for this.
+ if (slice.len > index + 2 and slice[index + 1] == '{')
+ if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
+ const hex_str = slice[index + 2 .. index_end];
+ if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
+ if (uint <= 0x10ffff) {
+ try list.appendSlice(std.mem.toBytes(uint)[0..]);
+ state = State.Start;
+ index = index_end; // loop-header increments
+ continue;
+ }
+ } else |_| {}
+ };
+
+ bad_index.* = index;
+ return error.InvalidCharacter;
+ },
+ else => {
+ bad_index.* = index;
+ return error.InvalidCharacter;
+ },
+ },
+ else => unreachable,
+ }
+ }
+ unreachable;
+}
+
+test "parse" {
+ const expect = std.testing.expect;
+ const eql = std.mem.eql;
+
+ var fixed_buf_mem: [32]u8 = undefined;
+ var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
+ var alloc = &fixed_buf_alloc.allocator;
+ var bad_index: usize = undefined;
+
+ expect(eql(u8, "foo", try parse(alloc, "\"foo\"", &bad_index)));
+ expect(eql(u8, "foo", try parse(alloc, "\"f\x6f\x6f\"", &bad_index)));
+ expect(eql(u8, "f💯", try parse(alloc, "\"f\u{1f4af}\"", &bad_index)));
+}
+
+/// Writes a Zig-syntax escaped string literal to the stream. Includes the double quotes.
+pub fn render(utf8: []const u8, out_stream: var) !void {
+ try out_stream.writeByte('"');
+ for (utf8) |byte| switch (byte) {
+ '\n' => try out_stream.writeAll("\\n"),
+ '\r' => try out_stream.writeAll("\\r"),
+ '\t' => try out_stream.writeAll("\\t"),
+ '\\' => try out_stream.writeAll("\\\\"),
+ '"' => try out_stream.writeAll("\\\""),
+ ' ', '!', '#'...'[', ']'...'~' => try out_stream.writeByte(byte),
+ else => try out_stream.print("\\x{x:0>2}", .{byte}),
+ };
+ try out_stream.writeByte('"');
+}
+
+test "render" {
+ const expect = std.testing.expect;
+ const eql = std.mem.eql;
+
+ var fixed_buf_mem: [32]u8 = undefined;
+
+ {
+ var fbs = std.io.fixedBufferStream(&fixed_buf_mem);
+ try render(" \\ hi \x07 \x11 \" derp", fbs.outStream());
+ expect(eql(u8,
+ \\" \\ hi \x07 \x11 \" derp"
+ , fbs.getWritten()));
+ }
+}