diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index e12e8ac4d3..1337efdd34 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -29,6 +29,7 @@ pub const ws2_32 = @import("windows/ws2_32.zig"); pub const gdi32 = @import("windows/gdi32.zig"); pub const winmm = @import("windows/winmm.zig"); pub const crypt32 = @import("windows/crypt32.zig"); +pub const nls = @import("windows/nls.zig"); pub const self_process_handle = @as(HANDLE, @ptrFromInt(maxInt(usize))); @@ -1911,8 +1912,31 @@ pub fn nanoSecondsToFileTime(ns: i128) FILETIME { }; } -/// Compares two WTF16 strings using RtlEqualUnicodeString +/// Compares two WTF16 strings using the equivalent functionality of +/// `RtlEqualUnicodeString` (with case insensitive comparison enabled). +/// This function can be called on any target. pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool { + if (@inComptime() or builtin.os.tag != .windows) { + // This function compares the strings code unit by code unit (aka u16-to-u16), + // so any length difference implies inequality. In other words, there's no possible + // conversion that changes the number of UTF-16 code units needed for the uppercase/lowercase + // version in the conversion table since only codepoints <= max(u16) are eligible + // for conversion at all. + if (a.len != b.len) return false; + + for (a, b) |a_c, b_c| { + // The slices are always UTF-16 LE, so need to convert the elements to native + // endianness for the uppercasing + const a_c_native = std.mem.littleToNative(u16, a_c); + const b_c_native = std.mem.littleToNative(u16, b_c); + if (a_c != b_c and nls.upcaseW(a_c_native) != nls.upcaseW(b_c_native)) { + return false; + } + } + return true; + } + // Use RtlEqualUnicodeString on Windows when not in comptime to avoid including a + // redundant copy of the uppercase data. const a_bytes = @as(u16, @intCast(a.len * 2)); const a_string = UNICODE_STRING{ .Length = a_bytes, @@ -1928,6 +1952,64 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool { return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE; } +/// Compares two UTF-8 strings using the equivalent functionality of +/// `RtlEqualUnicodeString` (with case insensitive comparison enabled). +/// This function can be called on any target. +/// Assumes `a` and `b` are valid UTF-8. +pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool { + // A length equality check is not possible here because there are + // some codepoints that have a different length uppercase UTF-8 representations + // than their lowercase counterparts, e.g. U+0250 (2 bytes) <-> U+2C6F (3 bytes). + // There are 7 such codepoints in the uppercase data used by Windows. + + var a_utf8_it = std.unicode.Utf8View.initUnchecked(a).iterator(); + var b_utf8_it = std.unicode.Utf8View.initUnchecked(b).iterator(); + + // Use RtlUpcaseUnicodeChar on Windows when not in comptime to avoid including a + // redundant copy of the uppercase data. + const upcaseImpl = switch (builtin.os.tag) { + .windows => if (@inComptime()) nls.upcaseW else ntdll.RtlUpcaseUnicodeChar, + else => nls.upcaseW, + }; + + while (true) { + var a_cp = a_utf8_it.nextCodepoint() orelse break; + var b_cp = b_utf8_it.nextCodepoint() orelse return false; + + if (a_cp <= std.math.maxInt(u16) and b_cp <= std.math.maxInt(u16)) { + if (a_cp != b_cp and upcaseImpl(@intCast(a_cp)) != upcaseImpl(@intCast(b_cp))) { + return false; + } + } else if (a_cp != b_cp) { + return false; + } + } + // Make sure there are no leftover codepoints in b + if (b_utf8_it.nextCodepoint() != null) return false; + + return true; +} + +fn testEqlIgnoreCase(comptime expect_eql: bool, comptime a: []const u8, comptime b: []const u8) !void { + try std.testing.expectEqual(expect_eql, eqlIgnoreCaseUtf8(a, b)); + try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWTF16( + std.unicode.utf8ToUtf16LeStringLiteral(a), + std.unicode.utf8ToUtf16LeStringLiteral(b), + )); + + try comptime std.testing.expect(expect_eql == eqlIgnoreCaseUtf8(a, b)); + try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWTF16( + std.unicode.utf8ToUtf16LeStringLiteral(a), + std.unicode.utf8ToUtf16LeStringLiteral(b), + )); +} + +test "eqlIgnoreCaseWTF16/Utf8" { + try testEqlIgnoreCase(true, "\x01 a B Λ ɐ", "\x01 A b λ Ɐ"); + // does not do case-insensitive comparison for codepoints >= U+10000 + try testEqlIgnoreCase(false, "𐓏", "𐓷"); +} + pub const PathSpace = struct { data: [PATH_MAX_WIDE:0]u16, len: usize, diff --git a/lib/std/os/windows/nls.zig b/lib/std/os/windows/nls.zig new file mode 100644 index 0000000000..b204997b61 --- /dev/null +++ b/lib/std/os/windows/nls.zig @@ -0,0 +1,166 @@ +//! Implementations of functionality related to National Language Support +//! on Windows. + +const builtin = @import("builtin"); +const std = @import("../../std.zig"); + +/// This corresponds to the uppercase table within the locale-independent +/// l_intl.nls data (found at system32\l_intl.nls). +/// - In l_intl.nls, this data starts at offset 0x04. +/// - In the PEB, this data starts at index [2] of peb.UnicodeCaseTableData when +/// it is casted to `[*]u16`. +/// +/// Note: This data has not changed since Windows 8.1, and has become out-of-sync with +/// the Unicode standard. +const uppercase_table = [2544]u16{ + 272, 288, 304, 320, 336, 352, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 368, 384, 400, 256, 416, 256, 256, 432, 256, 256, 256, 256, 256, 256, 256, 448, 464, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 480, 496, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, + 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 256, 512, 528, 528, 528, 528, 528, 528, 528, 528, + 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 544, 560, 528, 528, 528, 576, 528, 528, 592, 608, + 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, + 1008, 1024, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 1040, 528, 528, 1056, 528, 528, 1072, 1088, 1104, 1120, 1136, 1152, + 528, 528, 528, 1168, 1184, 1200, 1216, 1232, 1248, 1264, 1280, 1296, 1312, 1328, 1344, 1360, 1376, 1392, 1408, 528, 528, 528, 1424, 1440, + 1456, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 1472, 528, 528, 528, 528, 528, 528, 528, 528, + 1488, 1504, 1520, 1536, 1552, 1568, 1584, 1600, 1616, 1632, 1648, 1664, 1680, 1696, 1712, 1728, 1744, 1760, 1776, 1792, 1808, 1824, 1840, 1856, + 1872, 1888, 1904, 1920, 1936, 1952, 1968, 1984, 528, 528, 528, 528, 2000, 528, 528, 2016, 2032, 528, 528, 528, 528, 528, 528, 528, + 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 2048, 2064, 528, 528, 528, 528, 2080, 2096, 2112, 2128, 2144, + 2160, 2176, 2192, 2208, 2224, 2240, 2256, 528, 2272, 2288, 2304, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, + 528, 528, 528, 528, 2320, 2336, 2352, 528, 2368, 2384, 528, 528, 528, 528, 528, 528, 528, 528, 2400, 2416, 2432, 2448, 2464, 2480, + 2496, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 2512, 2528, 528, 528, 528, 528, 528, 528, 528, 528, 528, 528, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 121, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, + 65535, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 65535, 0, 65535, 0, 65535, 0, 195, 0, 0, 65535, 0, 65535, 0, 0, 65535, 0, 0, 0, 65535, 0, 0, 0, + 0, 0, 65535, 0, 0, 97, 0, 0, 0, 65535, 163, 0, 0, 0, 130, 0, 0, 65535, 0, 65535, 0, 65535, 0, 0, + 65535, 0, 0, 0, 0, 65535, 0, 0, 65535, 0, 0, 0, 65535, 0, 65535, 0, 0, 65535, 0, 0, 0, 65535, 0, 56, + 0, 0, 0, 0, 0, 0, 65534, 0, 0, 65534, 0, 0, 65534, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, + 65535, 0, 65535, 0, 65535, 65457, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 0, 65534, 0, 65535, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, + 0, 0, 0, 0, 65535, 0, 0, 0, 0, 0, 65535, 0, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 10783, 10780, 0, 65326, 65330, 0, 65331, 65331, 0, 65334, 0, 65333, 0, 0, 0, 0, 65331, 0, 0, 65329, 0, 0, 0, 0, + 65327, 65325, 0, 10743, 0, 0, 0, 65325, 0, 10749, 65323, 0, 0, 65322, 0, 0, 0, 0, 0, 0, 0, 10727, 0, 0, + 65318, 0, 0, 65318, 0, 0, 0, 0, 65318, 65467, 65319, 65319, 65465, 0, 0, 0, 0, 0, 65317, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 65535, 0, 65535, 0, 0, 0, 65535, 0, 0, 0, 130, 130, 130, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 65498, 65499, 65499, 65499, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65504, 65504, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65472, 65473, 65473, 0, 0, 0, 0, 0, 0, 0, 0, 65528, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 7, 0, 0, 0, 0, 0, 65535, 0, 0, 65535, 0, 0, 0, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 65456, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 65535, 0, 65535, 0, 65535, 0, + 65535, 0, 65535, 0, 65535, 0, 65535, 65521, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, + 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35332, 0, 0, 0, 3814, 0, 0, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 74, 74, 86, 86, 86, 86, 100, 100, 128, 128, 112, 112, 126, 126, 0, 0, + 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65508, 0, + 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 65520, 0, 0, 0, 0, 65535, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, + 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 65510, 0, 0, 0, 0, 0, 0, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, + 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, + 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 65488, 0, 0, 65535, 0, 0, 0, 54741, 54744, 0, + 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, 0, 0, 65535, 0, 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, + 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, 58272, + 58272, 58272, 58272, 58272, 58272, 58272, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65535, 0, 65535, 0, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 0, 0, 0, 65535, 0, 0, 0, 0, 65504, 65504, 65504, 65504, 65504, 65504, 65504, + 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 65504, 0, 0, 0, 0, 0, +}; + +/// Cross-platform implementation of `ntdll.RtlUpcaseUnicodeChar`. +/// Transforms the UTF-16 code unit in `c` to its uppercased version +/// if there is one. Otherwise, returns `c` unmodified. +/// +/// Note: When this function is referenced, it will need to include +/// `uppercase_table.len * 2` bytes of data in the resulting binary +/// since it depends on the `uppercase_table` data. When +/// targeting Windows, `ntdll.RtlUpcaseUnicodeChar` can be +/// used instead to avoid having to include a copy of this data. +pub fn upcaseW(c: u16) u16 { + if (c < 'a') { + return c; + } + if (c <= 'z') { + return c - ('a' - 'A'); + } + if (c >= 0xC0) { + var offset: u16 = 0; + + offset += @as(u8, @truncate(c >> 8)); + offset = uppercase_table[offset]; + offset += @as(u4, @truncate(c >> 4)); + offset = uppercase_table[offset]; + offset += @as(u4, @truncate(c)); + offset = uppercase_table[offset]; + + return c +% offset; + } + return c; +} + +test "upcaseW matches RtlUpcaseUnicodeChar" { + if (builtin.os.tag != .windows) return error.SkipZigTest; + + var c: u16 = 0; + while (true) : (c += 1) { + std.testing.expectEqual(std.os.windows.ntdll.RtlUpcaseUnicodeChar(c), upcaseW(c)) catch |err| { + std.debug.print("mismatch for codepoint U+{X}\n", .{c}); + return err; + }; + if (c == 0xFFFF) break; + } +}