langref: add paragraph and examples about indexing non-ASCII strings - zig

commit 1fba88450db12f184d76f0651f7ca933322c1fc0 (tree)
parent 86ec26b1f00ce9ee2a9d559a1ca0415d05a9b908
Author: Josh Holland <josh@inv.alid.pw>
Date:   Fri, 27 Jan 2023 18:29:28 +0000

langref: add paragraph and examples about indexing non-ASCII strings

PR #10610 addressed most of the points from #1854.  This
additional paragraph and examples covers the OMISSIONS section
clarifying issues about indexing into non-ASCII strings (whether valid
UTF-8 or not).  I think this finally closes #1854.

Diffstat:
M doc/langref.html.in  | 14 ++++++++++++--

1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/doc/langref.html.in b/doc/langref.html.in
@@ -871,6 +871,13 @@ pub fn main() void {
       However, it is possible to embed non-UTF-8 bytes into a string literal using <code>\xNN</code> notation.
       </p>
       <p>
+      Indexing into a string containing non-ASCII bytes will return individual bytes, whether valid
+      UTF-8 or not.
+      The {#link|Zig Standard Library#} provides routines for checking the validity of UTF-8 encoded
+      strings, accessing their code points and other encoding/decoding related tasks in
+      {#syntax#}std.unicode{#endsyntax#}.
+      </p>
+      <p>
       Unicode code point literals have type {#syntax#}comptime_int{#endsyntax#}, the same as
       {#link|Integer Literals#}. All {#link|Escape Sequences#} are valid in both string literals
       and Unicode code point literals.
@@ -894,9 +901,12 @@ pub fn main() void {
     print("{}\n", .{'e' == '\x65'});                    // true
     print("{d}\n", .{'\u{1f4a9}'});                     // 128169
     print("{d}\n", .{'💯'});                            // 128175
-    print("{}\n", .{mem.eql(u8, "hello", "h\x65llo")}); // true
-    print("0x{x}\n", .{"\xff"[0]}); // non-UTF-8 strings are possible with \xNN notation.
     print("{u}\n", .{'⚡'});
+    print("{}\n", .{mem.eql(u8, "hello", "h\x65llo")});      // true
+    print("{}\n", .{mem.eql(u8, "💯", "\xf0\x9f\x92\xaf")}); // also true
+    const invalid_utf8 = "\xff\xfe";      // non-UTF-8 strings are possible with \xNN notation.
+    print("0x{x}\n", .{invalid_utf8[1]}); // indexing them returns individual bytes...
+    print("0x{x}\n", .{"💯"[1]});    // ...as does indexing part-way through non-ASCII characters
 }
       {#code_end#}
       {#see_also|Arrays|Source Encoding#}

	zig fork of https://codeberg.org/ziglang/zig
	Log \| Files \| Refs \| README \| LICENSE