commit e7c109329d2ed05f6a2ca14066d6b19c2ee76500 (tree)
parent 08496aa2aaca0771bbd16bccf640da7ccc050158
Author: Felix "xq" Queißner <xq@random-projects.net>
Date: Thu, 5 Jan 2023 13:23:58 +0100
Renames Url.zig to Uri.zig
Diffstat:
| A | lib/std/Uri.zig | | | 515 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| D | lib/std/Url.zig | | | 515 | ------------------------------------------------------------------------------- |
| M | lib/std/std.zig | | | 2 | +- |
3 files changed, 516 insertions(+), 516 deletions(-)
diff --git a/lib/std/Uri.zig b/lib/std/Uri.zig
@@ -0,0 +1,515 @@
+//! Implements URI parsing roughly adhering to <https://tools.ietf.org/html/rfc3986>.
+//! Does not do perfect grammar and character class checking, but should be robust against URIs in the wild.
+
+const Uri = @This();
+const std = @import("std.zig");
+const testing = std.testing;
+
+scheme: ?[]const u8,
+user: ?[]const u8,
+password: ?[]const u8,
+host: ?[]const u8,
+port: ?u16,
+path: []const u8,
+query: ?[]const u8,
+fragment: ?[]const u8,
+
+/// Applies URI encoding and replaces all reserved characters with their respective %XX code.
+pub fn escapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
+ var outsize: usize = 0;
+ for (input) |c| {
+ outsize += if (isUnreserved(c)) @as(usize, 1) else 3;
+ }
+ var output = try allocator.alloc(u8, outsize);
+ var outptr: usize = 0;
+
+ for (input) |c| {
+ if (isUnreserved(c)) {
+ output[outptr] = c;
+ outptr += 1;
+ } else {
+ var buf: [2]u8 = undefined;
+ _ = std.fmt.bufPrint(&buf, "{X:0>2}", .{c}) catch unreachable;
+
+ output[outptr + 0] = '%';
+ output[outptr + 1] = buf[0];
+ output[outptr + 2] = buf[1];
+ outptr += 3;
+ }
+ }
+ return output;
+}
+
+/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies
+/// them to the output.
+pub fn unescapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
+ var outsize: usize = 0;
+ var inptr: usize = 0;
+ while (inptr < input.len) {
+ if (input[inptr] == '%') {
+ inptr += 1;
+ if (inptr + 2 <= input.len) {
+ _ = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
+ outsize += 3;
+ inptr += 2;
+ continue;
+ };
+ inptr += 2;
+ outsize += 1;
+ }
+ } else {
+ inptr += 1;
+ outsize += 1;
+ }
+ }
+
+ var output = try allocator.alloc(u8, outsize);
+ var outptr: usize = 0;
+ inptr = 0;
+ while (inptr < input.len) {
+ if (input[inptr] == '%') {
+ inptr += 1;
+ if (inptr + 2 <= input.len) {
+ const value = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
+ output[outptr + 0] = input[inptr + 0];
+ output[outptr + 1] = input[inptr + 1];
+ inptr += 2;
+ outptr += 2;
+ continue;
+ };
+
+ output[outptr] = value;
+
+ inptr += 2;
+ outptr += 1;
+ }
+ } else {
+ output[outptr] = input[inptr];
+ inptr += 1;
+ outptr += 1;
+ }
+ }
+ return output;
+}
+
+pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort };
+
+/// Parses the URI or returns an error.
+/// The return value will contain unescaped strings pointing into the
+/// original `text`. Each component that is provided, will be non-`null`.
+pub fn parse(text: []const u8) ParseError!Uri {
+ var uri = Uri{
+ .scheme = null,
+ .user = null,
+ .password = null,
+ .host = null,
+ .port = null,
+ .path = "", // path is always set, but empty by default.
+ .query = null,
+ .fragment = null,
+ };
+
+ var reader = SliceReader{ .slice = text };
+
+ uri.scheme = reader.readWhile(isSchemeChar);
+
+ // after the scheme, a ':' must appear
+ if (reader.get()) |c| {
+ if (c != ':')
+ return error.UnexpectedCharacter;
+ } else {
+ return error.InvalidFormat;
+ }
+
+ if (reader.peekPrefix("//")) { // authority part
+ std.debug.assert(reader.get().? == '/');
+ std.debug.assert(reader.get().? == '/');
+
+ const authority = reader.readUntil(isAuthoritySeparator);
+ if (authority.len == 0)
+ return error.InvalidFormat;
+
+ var start_of_host: usize = 0;
+ if (std.mem.indexOf(u8, authority, "@")) |index| {
+ start_of_host = index + 1;
+ const user_info = authority[0..index];
+
+ if (std.mem.indexOf(u8, user_info, ":")) |idx| {
+ uri.user = user_info[0..idx];
+ if (idx < user_info.len - 1) { // empty password is also "no password"
+ uri.password = user_info[idx + 1 ..];
+ }
+ } else {
+ uri.user = user_info;
+ uri.password = null;
+ }
+ }
+
+ var end_of_host: usize = authority.len;
+
+ if (authority[start_of_host] == '[') { // IPv6
+ end_of_host = std.mem.lastIndexOf(u8, authority, "]") orelse return error.InvalidFormat;
+ end_of_host += 1;
+
+ if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
+ if (index >= end_of_host) { // if not part of the V6 address field
+ end_of_host = std.math.min(end_of_host, index);
+ uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
+ }
+ }
+ } else if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
+ if (index >= start_of_host) { // if not part of the userinfo field
+ end_of_host = std.math.min(end_of_host, index);
+ uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
+ }
+ }
+
+ uri.host = authority[start_of_host..end_of_host];
+ }
+
+ uri.path = reader.readUntil(isPathSeparator);
+
+ if ((reader.peek() orelse 0) == '?') { // query part
+ std.debug.assert(reader.get().? == '?');
+ uri.query = reader.readUntil(isQuerySeparator);
+ }
+
+ if ((reader.peek() orelse 0) == '#') { // fragment part
+ std.debug.assert(reader.get().? == '#');
+ uri.fragment = reader.readUntilEof();
+ }
+
+ return uri;
+}
+
+const SliceReader = struct {
+ const Self = @This();
+
+ slice: []const u8,
+ offset: usize = 0,
+
+ fn get(self: *Self) ?u8 {
+ if (self.offset >= self.slice.len)
+ return null;
+ const c = self.slice[self.offset];
+ self.offset += 1;
+ return c;
+ }
+
+ fn peek(self: Self) ?u8 {
+ if (self.offset >= self.slice.len)
+ return null;
+ return self.slice[self.offset];
+ }
+
+ fn readWhile(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
+ const start = self.offset;
+ var end = start;
+ while (end < self.slice.len and predicate(self.slice[end])) {
+ end += 1;
+ }
+ self.offset = end;
+ return self.slice[start..end];
+ }
+
+ fn readUntil(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
+ const start = self.offset;
+ var end = start;
+ while (end < self.slice.len and !predicate(self.slice[end])) {
+ end += 1;
+ }
+ self.offset = end;
+ return self.slice[start..end];
+ }
+
+ fn readUntilEof(self: *Self) []const u8 {
+ const start = self.offset;
+ self.offset = self.slice.len;
+ return self.slice[start..];
+ }
+
+ fn peekPrefix(self: Self, prefix: []const u8) bool {
+ if (self.offset + prefix.len > self.slice.len)
+ return false;
+ return std.mem.eql(u8, self.slice[self.offset..][0..prefix.len], prefix);
+ }
+};
+
+/// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+fn isSchemeChar(c: u8) bool {
+ return switch (c) {
+ 'A'...'Z', 'a'...'z', '0'...'9', '+', '-', '.' => true,
+ else => false,
+ };
+}
+
+fn isAuthoritySeparator(c: u8) bool {
+ return switch (c) {
+ '/', '?', '#' => true,
+ else => false,
+ };
+}
+
+/// reserved = gen-delims / sub-delims
+fn isReserved(c: u8) bool {
+ return isGenLimit(c) or isSubLimit(c);
+}
+
+/// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+fn isGenLimit(c: u8) bool {
+ return switch (c) {
+ ':', ',', '?', '#', '[', ']', '@' => true,
+ else => false,
+ };
+}
+
+/// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+/// / "*" / "+" / "," / ";" / "="
+fn isSubLimit(c: u8) bool {
+ return switch (c) {
+ '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => true,
+ else => false,
+ };
+}
+
+/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+fn isUnreserved(c: u8) bool {
+ return switch (c) {
+ 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true,
+ else => false,
+ };
+}
+
+fn isPathSeparator(c: u8) bool {
+ return switch (c) {
+ '?', '#' => true,
+ else => false,
+ };
+}
+
+fn isQuerySeparator(c: u8) bool {
+ return switch (c) {
+ '#' => true,
+ else => false,
+ };
+}
+
+test "basic" {
+ const parsed = try parse("https://ziglang.org/download");
+ try testing.expectEqualStrings("https", parsed.scheme orelse return error.UnexpectedNull);
+ try testing.expectEqualStrings("ziglang.org", parsed.host orelse return error.UnexpectedNull);
+ try testing.expectEqualStrings("/download", parsed.path);
+ try testing.expectEqual(@as(?u16, null), parsed.port);
+}
+
+test "with port" {
+ const parsed = try parse("http://example:1337/");
+ try testing.expectEqualStrings("http", parsed.scheme orelse return error.UnexpectedNull);
+ try testing.expectEqualStrings("example", parsed.host orelse return error.UnexpectedNull);
+ try testing.expectEqualStrings("/", parsed.path);
+ try testing.expectEqual(@as(?u16, 1337), parsed.port);
+}
+
+test "should fail gracefully" {
+ try std.testing.expectEqual(@as(ParseError!Uri, error.InvalidFormat), parse("foobar://"));
+}
+
+test "scheme" {
+ try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme.?);
+ try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme.?);
+ try std.testing.expectEqualSlices(u8, "a.b.c", (try parse("a.b.c:_")).scheme.?);
+ try std.testing.expectEqualSlices(u8, "ab+", (try parse("ab+:_")).scheme.?);
+ try std.testing.expectEqualSlices(u8, "X+++", (try parse("X+++:_")).scheme.?);
+ try std.testing.expectEqualSlices(u8, "Y+-.", (try parse("Y+-.:_")).scheme.?);
+}
+
+test "authority" {
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname")).host.?);
+
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname")).host.?);
+ try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname")).user.?);
+ try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname")).password);
+
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname")).host.?);
+ try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname")).user.?);
+ try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname")).password.?);
+
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname:0")).host.?);
+ try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?);
+
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname:1234")).host.?);
+ try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?);
+ try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?);
+ try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname:1234")).password);
+
+ try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname:1234")).host.?);
+ try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?);
+ try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname:1234")).user.?);
+ try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname:1234")).password.?);
+}
+
+test "authority.password" {
+ try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username@a")).user.?);
+ try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username@a")).password);
+
+ try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:@a")).user.?);
+ try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username:@a")).password);
+
+ try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:password@a")).user.?);
+ try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://username:password@a")).password.?);
+
+ try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username::@a")).user.?);
+ try std.testing.expectEqualSlices(u8, ":", (try parse("scheme://username::@a")).password.?);
+}
+
+fn testAuthorityHost(comptime hostlist: anytype) !void {
+ inline for (hostlist) |hostname| {
+ try std.testing.expectEqualSlices(u8, hostname, (try parse("scheme://" ++ hostname)).host.?);
+ }
+}
+
+test "authority.dns-names" {
+ try testAuthorityHost(.{
+ "a",
+ "a.b",
+ "example.com",
+ "www.example.com",
+ "example.org.",
+ "www.example.org.",
+ "xn--nw2a.xn--j6w193g", // internationalized URI: 見.香港
+ "fe80--1ff-fe23-4567-890as3.ipv6-literal.net",
+ });
+}
+
+test "authority.IPv4" {
+ try testAuthorityHost(.{
+ "127.0.0.1",
+ "255.255.255.255",
+ "0.0.0.0",
+ "8.8.8.8",
+ "1.2.3.4",
+ "192.168.0.1",
+ "10.42.0.0",
+ });
+}
+
+test "authority.IPv6" {
+ try testAuthorityHost(.{
+ "[2001:db8:0:0:0:0:2:1]",
+ "[2001:db8::2:1]",
+ "[2001:db8:0000:1:1:1:1:1]",
+ "[2001:db8:0:1:1:1:1:1]",
+ "[0:0:0:0:0:0:0:0]",
+ "[0:0:0:0:0:0:0:1]",
+ "[::1]",
+ "[::]",
+ "[2001:db8:85a3:8d3:1319:8a2e:370:7348]",
+ "[fe80::1ff:fe23:4567:890a%25eth2]",
+ "[fe80::1ff:fe23:4567:890a]",
+ "[fe80::1ff:fe23:4567:890a%253]",
+ "[fe80:3::1ff:fe23:4567:890a]",
+ });
+}
+
+test "RFC example 1" {
+ const uri = "foo://example.com:8042/over/there?name=ferret#nose";
+ try std.testing.expectEqual(Uri{
+ .scheme = uri[0..3],
+ .user = null,
+ .password = null,
+ .host = uri[6..17],
+ .port = 8042,
+ .path = uri[22..33],
+ .query = uri[34..45],
+ .fragment = uri[46..50],
+ }, try parse(uri));
+}
+
+test "RFX example 2" {
+ const uri = "urn:example:animal:ferret:nose";
+ try std.testing.expectEqual(Uri{
+ .scheme = uri[0..3],
+ .user = null,
+ .password = null,
+ .host = null,
+ .port = null,
+ .path = uri[4..],
+ .query = null,
+ .fragment = null,
+ }, try parse(uri));
+}
+
+// source:
+// https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Examples
+test "Examples from wikipedia" {
+ const list = [_][]const u8{
+ "https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top",
+ "ldap://[2001:db8::7]/c=GB?objectClass?one",
+ "mailto:John.Doe@example.com",
+ "news:comp.infosystems.www.servers.unix",
+ "tel:+1-816-555-1212",
+ "telnet://192.0.2.16:80/",
+ "urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
+ "http://a/b/c/d;p?q",
+ };
+ for (list) |uri| {
+ _ = try parse(uri);
+ }
+}
+
+// source:
+// https://tools.ietf.org/html/rfc3986#section-5.4.1
+test "Examples from RFC3986" {
+ const list = [_][]const u8{
+ "http://a/b/c/g",
+ "http://a/b/c/g",
+ "http://a/b/c/g/",
+ "http://a/g",
+ "http://g",
+ "http://a/b/c/d;p?y",
+ "http://a/b/c/g?y",
+ "http://a/b/c/d;p?q#s",
+ "http://a/b/c/g#s",
+ "http://a/b/c/g?y#s",
+ "http://a/b/c/;x",
+ "http://a/b/c/g;x",
+ "http://a/b/c/g;x?y#s",
+ "http://a/b/c/d;p?q",
+ "http://a/b/c/",
+ "http://a/b/c/",
+ "http://a/b/",
+ "http://a/b/",
+ "http://a/b/g",
+ "http://a/",
+ "http://a/",
+ "http://a/g",
+ };
+ for (list) |uri| {
+ _ = try parse(uri);
+ }
+}
+
+test "Special test" {
+ // This is for all of you code readers ♥
+ _ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0");
+}
+
+test "URI escaping" {
+ const input = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
+ const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
+
+ const actual = try escapeString(std.testing.allocator, input);
+ defer std.testing.allocator.free(actual);
+
+ try std.testing.expectEqualSlices(u8, expected, actual);
+}
+
+test "URI unescaping" {
+ const input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
+ const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
+
+ const actual = try unescapeString(std.testing.allocator, input);
+ defer std.testing.allocator.free(actual);
+
+ try std.testing.expectEqualSlices(u8, expected, actual);
+}
diff --git a/lib/std/Url.zig b/lib/std/Url.zig
@@ -1,515 +0,0 @@
-//! Implements URI parsing roughly adhering to <https://tools.ietf.org/html/rfc3986>.
-//! Does not do perfect grammar and character class checking, but should be robust against URIs in the wild.
-
-const Url = @This();
-const std = @import("std.zig");
-const testing = std.testing;
-
-scheme: ?[]const u8,
-user: ?[]const u8,
-password: ?[]const u8,
-host: ?[]const u8,
-port: ?u16,
-path: []const u8,
-query: ?[]const u8,
-fragment: ?[]const u8,
-
-/// Applies URI encoding and replaces all reserved characters with their respective %XX code.
-pub fn escapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
- var outsize: usize = 0;
- for (input) |c| {
- outsize += if (isUnreserved(c)) @as(usize, 1) else 3;
- }
- var output = try allocator.alloc(u8, outsize);
- var outptr: usize = 0;
-
- for (input) |c| {
- if (isUnreserved(c)) {
- output[outptr] = c;
- outptr += 1;
- } else {
- var buf: [2]u8 = undefined;
- _ = std.fmt.bufPrint(&buf, "{X:0>2}", .{c}) catch unreachable;
-
- output[outptr + 0] = '%';
- output[outptr + 1] = buf[0];
- output[outptr + 2] = buf[1];
- outptr += 3;
- }
- }
- return output;
-}
-
-/// Parses a URI string and unescapes all %XX where XX is a valid hex number. Otherwise, verbatim copies
-/// them to the output.
-pub fn unescapeString(allocator: std.mem.Allocator, input: []const u8) error{OutOfMemory}![]const u8 {
- var outsize: usize = 0;
- var inptr: usize = 0;
- while (inptr < input.len) {
- if (input[inptr] == '%') {
- inptr += 1;
- if (inptr + 2 <= input.len) {
- _ = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
- outsize += 3;
- inptr += 2;
- continue;
- };
- inptr += 2;
- outsize += 1;
- }
- } else {
- inptr += 1;
- outsize += 1;
- }
- }
-
- var output = try allocator.alloc(u8, outsize);
- var outptr: usize = 0;
- inptr = 0;
- while (inptr < input.len) {
- if (input[inptr] == '%') {
- inptr += 1;
- if (inptr + 2 <= input.len) {
- const value = std.fmt.parseInt(u8, input[inptr..][0..2], 16) catch {
- output[outptr + 0] = input[inptr + 0];
- output[outptr + 1] = input[inptr + 1];
- inptr += 2;
- outptr += 2;
- continue;
- };
-
- output[outptr] = value;
-
- inptr += 2;
- outptr += 1;
- }
- } else {
- output[outptr] = input[inptr];
- inptr += 1;
- outptr += 1;
- }
- }
- return output;
-}
-
-pub const ParseError = error{ UnexpectedCharacter, InvalidFormat, InvalidPort };
-
-/// Parses the URI or returns an error.
-/// The return value will contain unescaped strings pointing into the
-/// original `text`. Each component that is provided, will be non-`null`.
-pub fn parse(text: []const u8) ParseError!Url {
- var uri = Url{
- .scheme = null,
- .user = null,
- .password = null,
- .host = null,
- .port = null,
- .path = "", // path is always set, but empty by default.
- .query = null,
- .fragment = null,
- };
-
- var reader = SliceReader{ .slice = text };
-
- uri.scheme = reader.readWhile(isSchemeChar);
-
- // after the scheme, a ':' must appear
- if (reader.get()) |c| {
- if (c != ':')
- return error.UnexpectedCharacter;
- } else {
- return error.InvalidFormat;
- }
-
- if (reader.peekPrefix("//")) { // authority part
- std.debug.assert(reader.get().? == '/');
- std.debug.assert(reader.get().? == '/');
-
- const authority = reader.readUntil(isAuthoritySeparator);
- if (authority.len == 0)
- return error.InvalidFormat;
-
- var start_of_host: usize = 0;
- if (std.mem.indexOf(u8, authority, "@")) |index| {
- start_of_host = index + 1;
- const user_info = authority[0..index];
-
- if (std.mem.indexOf(u8, user_info, ":")) |idx| {
- uri.user = user_info[0..idx];
- if (idx < user_info.len - 1) { // empty password is also "no password"
- uri.password = user_info[idx + 1 ..];
- }
- } else {
- uri.user = user_info;
- uri.password = null;
- }
- }
-
- var end_of_host: usize = authority.len;
-
- if (authority[start_of_host] == '[') { // IPv6
- end_of_host = std.mem.lastIndexOf(u8, authority, "]") orelse return error.InvalidFormat;
- end_of_host += 1;
-
- if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
- if (index >= end_of_host) { // if not part of the V6 address field
- end_of_host = std.math.min(end_of_host, index);
- uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
- }
- }
- } else if (std.mem.lastIndexOf(u8, authority, ":")) |index| {
- if (index >= start_of_host) { // if not part of the userinfo field
- end_of_host = std.math.min(end_of_host, index);
- uri.port = std.fmt.parseInt(u16, authority[index + 1 ..], 10) catch return error.InvalidPort;
- }
- }
-
- uri.host = authority[start_of_host..end_of_host];
- }
-
- uri.path = reader.readUntil(isPathSeparator);
-
- if ((reader.peek() orelse 0) == '?') { // query part
- std.debug.assert(reader.get().? == '?');
- uri.query = reader.readUntil(isQuerySeparator);
- }
-
- if ((reader.peek() orelse 0) == '#') { // fragment part
- std.debug.assert(reader.get().? == '#');
- uri.fragment = reader.readUntilEof();
- }
-
- return uri;
-}
-
-const SliceReader = struct {
- const Self = @This();
-
- slice: []const u8,
- offset: usize = 0,
-
- fn get(self: *Self) ?u8 {
- if (self.offset >= self.slice.len)
- return null;
- const c = self.slice[self.offset];
- self.offset += 1;
- return c;
- }
-
- fn peek(self: Self) ?u8 {
- if (self.offset >= self.slice.len)
- return null;
- return self.slice[self.offset];
- }
-
- fn readWhile(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
- const start = self.offset;
- var end = start;
- while (end < self.slice.len and predicate(self.slice[end])) {
- end += 1;
- }
- self.offset = end;
- return self.slice[start..end];
- }
-
- fn readUntil(self: *Self, comptime predicate: fn (u8) bool) []const u8 {
- const start = self.offset;
- var end = start;
- while (end < self.slice.len and !predicate(self.slice[end])) {
- end += 1;
- }
- self.offset = end;
- return self.slice[start..end];
- }
-
- fn readUntilEof(self: *Self) []const u8 {
- const start = self.offset;
- self.offset = self.slice.len;
- return self.slice[start..];
- }
-
- fn peekPrefix(self: Self, prefix: []const u8) bool {
- if (self.offset + prefix.len > self.slice.len)
- return false;
- return std.mem.eql(u8, self.slice[self.offset..][0..prefix.len], prefix);
- }
-};
-
-/// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-fn isSchemeChar(c: u8) bool {
- return switch (c) {
- 'A'...'Z', 'a'...'z', '0'...'9', '+', '-', '.' => true,
- else => false,
- };
-}
-
-fn isAuthoritySeparator(c: u8) bool {
- return switch (c) {
- '/', '?', '#' => true,
- else => false,
- };
-}
-
-/// reserved = gen-delims / sub-delims
-fn isReserved(c: u8) bool {
- return isGenLimit(c) or isSubLimit(c);
-}
-
-/// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-fn isGenLimit(c: u8) bool {
- return switch (c) {
- ':', ',', '?', '#', '[', ']', '@' => true,
- else => false,
- };
-}
-
-/// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
-/// / "*" / "+" / "," / ";" / "="
-fn isSubLimit(c: u8) bool {
- return switch (c) {
- '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=' => true,
- else => false,
- };
-}
-
-/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-fn isUnreserved(c: u8) bool {
- return switch (c) {
- 'A'...'Z', 'a'...'z', '0'...'9', '-', '.', '_', '~' => true,
- else => false,
- };
-}
-
-fn isPathSeparator(c: u8) bool {
- return switch (c) {
- '?', '#' => true,
- else => false,
- };
-}
-
-fn isQuerySeparator(c: u8) bool {
- return switch (c) {
- '#' => true,
- else => false,
- };
-}
-
-test "basic" {
- const parsed = try parse("https://ziglang.org/download");
- try testing.expectEqualStrings("https", parsed.scheme orelse return error.UnexpectedNull);
- try testing.expectEqualStrings("ziglang.org", parsed.host orelse return error.UnexpectedNull);
- try testing.expectEqualStrings("/download", parsed.path);
- try testing.expectEqual(@as(?u16, null), parsed.port);
-}
-
-test "with port" {
- const parsed = try parse("http://example:1337/");
- try testing.expectEqualStrings("http", parsed.scheme orelse return error.UnexpectedNull);
- try testing.expectEqualStrings("example", parsed.host orelse return error.UnexpectedNull);
- try testing.expectEqualStrings("/", parsed.path);
- try testing.expectEqual(@as(?u16, 1337), parsed.port);
-}
-
-test "should fail gracefully" {
- try std.testing.expectEqual(@as(ParseError!Url, error.InvalidFormat), parse("foobar://"));
-}
-
-test "scheme" {
- try std.testing.expectEqualSlices(u8, "http", (try parse("http:_")).scheme.?);
- try std.testing.expectEqualSlices(u8, "scheme-mee", (try parse("scheme-mee:_")).scheme.?);
- try std.testing.expectEqualSlices(u8, "a.b.c", (try parse("a.b.c:_")).scheme.?);
- try std.testing.expectEqualSlices(u8, "ab+", (try parse("ab+:_")).scheme.?);
- try std.testing.expectEqualSlices(u8, "X+++", (try parse("X+++:_")).scheme.?);
- try std.testing.expectEqualSlices(u8, "Y+-.", (try parse("Y+-.:_")).scheme.?);
-}
-
-test "authority" {
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname")).host.?);
-
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname")).host.?);
- try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname")).user.?);
- try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname")).password);
-
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname")).host.?);
- try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname")).user.?);
- try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname")).password.?);
-
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://hostname:0")).host.?);
- try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://hostname:1234")).port.?);
-
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://userinfo@hostname:1234")).host.?);
- try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://userinfo@hostname:1234")).port.?);
- try std.testing.expectEqualSlices(u8, "userinfo", (try parse("scheme://userinfo@hostname:1234")).user.?);
- try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://userinfo@hostname:1234")).password);
-
- try std.testing.expectEqualSlices(u8, "hostname", (try parse("scheme://user:password@hostname:1234")).host.?);
- try std.testing.expectEqual(@as(u16, 1234), (try parse("scheme://user:password@hostname:1234")).port.?);
- try std.testing.expectEqualSlices(u8, "user", (try parse("scheme://user:password@hostname:1234")).user.?);
- try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://user:password@hostname:1234")).password.?);
-}
-
-test "authority.password" {
- try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username@a")).user.?);
- try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username@a")).password);
-
- try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:@a")).user.?);
- try std.testing.expectEqual(@as(?[]const u8, null), (try parse("scheme://username:@a")).password);
-
- try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username:password@a")).user.?);
- try std.testing.expectEqualSlices(u8, "password", (try parse("scheme://username:password@a")).password.?);
-
- try std.testing.expectEqualSlices(u8, "username", (try parse("scheme://username::@a")).user.?);
- try std.testing.expectEqualSlices(u8, ":", (try parse("scheme://username::@a")).password.?);
-}
-
-fn testAuthorityHost(comptime hostlist: anytype) !void {
- inline for (hostlist) |hostname| {
- try std.testing.expectEqualSlices(u8, hostname, (try parse("scheme://" ++ hostname)).host.?);
- }
-}
-
-test "authority.dns-names" {
- try testAuthorityHost(.{
- "a",
- "a.b",
- "example.com",
- "www.example.com",
- "example.org.",
- "www.example.org.",
- "xn--nw2a.xn--j6w193g", // internationalized URI: 見.香港
- "fe80--1ff-fe23-4567-890as3.ipv6-literal.net",
- });
-}
-
-test "authority.IPv4" {
- try testAuthorityHost(.{
- "127.0.0.1",
- "255.255.255.255",
- "0.0.0.0",
- "8.8.8.8",
- "1.2.3.4",
- "192.168.0.1",
- "10.42.0.0",
- });
-}
-
-test "authority.IPv6" {
- try testAuthorityHost(.{
- "[2001:db8:0:0:0:0:2:1]",
- "[2001:db8::2:1]",
- "[2001:db8:0000:1:1:1:1:1]",
- "[2001:db8:0:1:1:1:1:1]",
- "[0:0:0:0:0:0:0:0]",
- "[0:0:0:0:0:0:0:1]",
- "[::1]",
- "[::]",
- "[2001:db8:85a3:8d3:1319:8a2e:370:7348]",
- "[fe80::1ff:fe23:4567:890a%25eth2]",
- "[fe80::1ff:fe23:4567:890a]",
- "[fe80::1ff:fe23:4567:890a%253]",
- "[fe80:3::1ff:fe23:4567:890a]",
- });
-}
-
-test "RFC example 1" {
- const uri = "foo://example.com:8042/over/there?name=ferret#nose";
- try std.testing.expectEqual(Url{
- .scheme = uri[0..3],
- .user = null,
- .password = null,
- .host = uri[6..17],
- .port = 8042,
- .path = uri[22..33],
- .query = uri[34..45],
- .fragment = uri[46..50],
- }, try parse(uri));
-}
-
-test "RFX example 2" {
- const uri = "urn:example:animal:ferret:nose";
- try std.testing.expectEqual(Url{
- .scheme = uri[0..3],
- .user = null,
- .password = null,
- .host = null,
- .port = null,
- .path = uri[4..],
- .query = null,
- .fragment = null,
- }, try parse(uri));
-}
-
-// source:
-// https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#Examples
-test "Examples from wikipedia" {
- const list = [_][]const u8{
- "https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top",
- "ldap://[2001:db8::7]/c=GB?objectClass?one",
- "mailto:John.Doe@example.com",
- "news:comp.infosystems.www.servers.unix",
- "tel:+1-816-555-1212",
- "telnet://192.0.2.16:80/",
- "urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
- "http://a/b/c/d;p?q",
- };
- for (list) |uri| {
- _ = try parse(uri);
- }
-}
-
-// source:
-// https://tools.ietf.org/html/rfc3986#section-5.4.1
-test "Examples from RFC3986" {
- const list = [_][]const u8{
- "http://a/b/c/g",
- "http://a/b/c/g",
- "http://a/b/c/g/",
- "http://a/g",
- "http://g",
- "http://a/b/c/d;p?y",
- "http://a/b/c/g?y",
- "http://a/b/c/d;p?q#s",
- "http://a/b/c/g#s",
- "http://a/b/c/g?y#s",
- "http://a/b/c/;x",
- "http://a/b/c/g;x",
- "http://a/b/c/g;x?y#s",
- "http://a/b/c/d;p?q",
- "http://a/b/c/",
- "http://a/b/c/",
- "http://a/b/",
- "http://a/b/",
- "http://a/b/g",
- "http://a/",
- "http://a/",
- "http://a/g",
- };
- for (list) |uri| {
- _ = try parse(uri);
- }
-}
-
-test "Special test" {
- // This is for all of you code readers ♥
- _ = try parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ&feature=youtu.be&t=0");
-}
-
-test "URI escaping" {
- const input = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
- const expected = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
-
- const actual = try escapeString(std.testing.allocator, input);
- defer std.testing.allocator.free(actual);
-
- try std.testing.expectEqualSlices(u8, expected, actual);
-}
-
-test "URI unescaping" {
- const input = "%5C%C3%B6%2F%20%C3%A4%C3%B6%C3%9F%20~~.adas-https%3A%2F%2Fcanvas%3A123%2F%23ads%26%26sad";
- const expected = "\\ö/ äöß ~~.adas-https://canvas:123/#ads&&sad";
-
- const actual = try unescapeString(std.testing.allocator, input);
- defer std.testing.allocator.free(actual);
-
- try std.testing.expectEqualSlices(u8, expected, actual);
-}
diff --git a/lib/std/std.zig b/lib/std/std.zig
@@ -42,7 +42,7 @@ pub const Target = @import("target.zig").Target;
pub const Thread = @import("Thread.zig");
pub const Treap = @import("treap.zig").Treap;
pub const Tz = tz.Tz;
-pub const Url = @import("Url.zig");
+pub const Uri = @import("Uri.zig");
pub const array_hash_map = @import("array_hash_map.zig");
pub const atomic = @import("atomic.zig");