From ad34ed5a63ef912fba5232806a1adea6ea55181b Mon Sep 17 00:00:00 2001 From: Ian Johnson Date: Fri, 22 Mar 2024 20:50:07 -0400 Subject: [PATCH] Autodoc: recognize Markdown links in plain text This extension to the typical `<>` Markdown autolink syntax allows HTTP(S) links to be recognized in normal text without being delimited by `<>`. This is the most natural way to write links in text, so it makes sense to support it and allow documentation comments to be written in a more natural way. --- lib/docs/wasm/markdown.zig | 25 +++++++ lib/docs/wasm/markdown/Parser.zig | 112 ++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) diff --git a/lib/docs/wasm/markdown.zig b/lib/docs/wasm/markdown.zig index 092906c46a..e0bf4bbaac 100644 --- a/lib/docs/wasm/markdown.zig +++ b/lib/docs/wasm/markdown.zig @@ -81,6 +81,11 @@ //! escapes). `target` is expected to be an absolute URI: an autolink will not //! be recognized unless `target` starts with a URI scheme followed by a `:`. //! +//! For convenience, autolinks may also be recognized in plain text without +//! any `<>` delimiters. Such autolinks are restricted to start with `http://` +//! or `https://` followed by at least one other character, not including any +//! trailing punctuation after the link. +//! //! - **Image** - a link directly preceded by a `!`. The link text is //! interpreted as the alt text of the image. //! @@ -740,6 +745,26 @@ test "autolinks" { ); } +test "text autolinks" { + try testRender( + \\Text autolinks must start with http:// or https://. + \\This doesn't count: ftp://example.com. + \\Example: https://ziglang.org. + \\Here is an important link: **http://example.com** + \\(Links may be in parentheses: https://example.com/?q=(parens)) + \\Escaping a link so it's plain text: https\://example.com + \\ + , + \\

Text autolinks must start with http:// or https://. + \\This doesn't count: ftp://example.com. + \\Example: https://ziglang.org. + \\Here is an important link: http://example.com + \\(Links may be in parentheses: https://example.com/?q=(parens)) + \\Escaping a link so it's plain text: https://example.com

+ \\ + ); +} + test "images" { try testRender( \\![Alt text](https://example.com/image.png) diff --git a/lib/docs/wasm/markdown/Parser.zig b/lib/docs/wasm/markdown/Parser.zig index 5a52882e48..9b377dce34 100644 --- a/lib/docs/wasm/markdown/Parser.zig +++ b/lib/docs/wasm/markdown/Parser.zig @@ -988,6 +988,9 @@ const InlineParser = struct { '<' => try ip.parseAutolink(), '*', '_' => try ip.parseEmphasis(), '`' => try ip.parseCodeSpan(), + 'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) { + try ip.parseTextAutolink(); + }, else => {}, } } @@ -1123,6 +1126,115 @@ const InlineParser = struct { ip.pos = start; } + /// Parses a plain text autolink (not delimited by `<>`), starting at the + /// first character in the link (an `h`). `ip.pos` is left at the last + /// character of the link, or remains unchanged if there is no valid link. + fn parseTextAutolink(ip: *InlineParser) !void { + const start = ip.pos; + var state: union(enum) { + /// Inside `http`. Contains the rest of the text to be matched. + http: []const u8, + after_http, + after_https, + /// Inside `://`. Contains the rest of the text to be matched. + authority: []const u8, + /// Inside link content. + content: struct { + start: usize, + paren_nesting: usize, + }, + } = .{ .http = "http" }; + + while (ip.pos < ip.content.len) : (ip.pos += 1) { + switch (state) { + .http => |rest| { + if (ip.content[ip.pos] != rest[0]) break; + if (rest.len > 1) { + state = .{ .http = rest[1..] }; + } else { + state = .after_http; + } + }, + .after_http => switch (ip.content[ip.pos]) { + 's' => state = .after_https, + ':' => state = .{ .authority = "//" }, + else => break, + }, + .after_https => switch (ip.content[ip.pos]) { + ':' => state = .{ .authority = "//" }, + else => break, + }, + .authority => |rest| { + if (ip.content[ip.pos] != rest[0]) break; + if (rest.len > 1) { + state = .{ .authority = rest[1..] }; + } else { + state = .{ .content = .{ + .start = ip.pos + 1, + .paren_nesting = 0, + } }; + } + }, + .content => |*content| switch (ip.content[ip.pos]) { + ' ', '\t', '\n' => break, + '(' => content.paren_nesting += 1, + ')' => if (content.paren_nesting == 0) { + break; + } else { + content.paren_nesting -= 1; + }, + else => {}, + }, + } + } + + switch (state) { + .http, .after_http, .after_https, .authority => { + ip.pos = start; + }, + .content => |content| { + while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) { + ip.pos -= 1; + } + if (ip.pos == content.start) { + ip.pos = start; + return; + } + + const target = try ip.parent.addString(ip.content[start..ip.pos]); + const node = try ip.parent.addNode(.{ + .tag = .autolink, + .data = .{ .text = .{ + .content = target, + } }, + }); + try ip.completed_inlines.append(ip.parent.allocator, .{ + .node = node, + .start = start, + .len = ip.pos - start, + }); + ip.pos -= 1; + }, + } + } + + /// Returns whether `c` may appear before a text autolink is recognized. + fn isPreTextAutolink(c: u8) bool { + return switch (c) { + ' ', '\t', '\n', '*', '_', '(' => true, + else => false, + }; + } + + /// Returns whether `c` is punctuation that may appear after a text autolink + /// and not be considered part of it. + fn isPostTextAutolink(c: u8) bool { + return switch (c) { + '?', '!', '.', ',', ':', '*', '_' => true, + else => false, + }; + } + /// Parses emphasis, starting at the beginning of a run of `*` or `_` /// characters. `ip.pos` is left at the last character in the run after /// parsing.