zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

windows1252.zig (31322B) - Raw


      1 const std = @import("std");
      2 
      3 pub fn windows1252ToUtf8Stream(writer: anytype, reader: anytype) !usize {
      4     var bytes_written: usize = 0;
      5     var utf8_buf: [3]u8 = undefined;
      6     while (true) {
      7         const c = reader.readByte() catch |err| switch (err) {
      8             error.EndOfStream => return bytes_written,
      9             else => |e| return e,
     10         };
     11         const codepoint = toCodepoint(c);
     12         if (codepoint <= 0x7F) {
     13             try writer.writeByte(c);
     14             bytes_written += 1;
     15         } else {
     16             const utf8_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch unreachable;
     17             try writer.writeAll(utf8_buf[0..utf8_len]);
     18             bytes_written += utf8_len;
     19         }
     20     }
     21 }
     22 
     23 /// Returns the number of code units written to the writer
     24 pub fn windows1252ToUtf16AllocZ(allocator: std.mem.Allocator, win1252_str: []const u8) ![:0]u16 {
     25     // Guaranteed to need exactly the same number of code units as Windows-1252 bytes
     26     var utf16_slice = try allocator.allocSentinel(u16, win1252_str.len, 0);
     27     errdefer allocator.free(utf16_slice);
     28     for (win1252_str, 0..) |c, i| {
     29         utf16_slice[i] = toCodepoint(c);
     30     }
     31     return utf16_slice;
     32 }
     33 
     34 /// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
     35 pub fn toCodepoint(c: u8) u16 {
     36     return switch (c) {
     37         0x80 => 0x20ac, // Euro Sign
     38         0x82 => 0x201a, // Single Low-9 Quotation Mark
     39         0x83 => 0x0192, // Latin Small Letter F With Hook
     40         0x84 => 0x201e, // Double Low-9 Quotation Mark
     41         0x85 => 0x2026, // Horizontal Ellipsis
     42         0x86 => 0x2020, // Dagger
     43         0x87 => 0x2021, // Double Dagger
     44         0x88 => 0x02c6, // Modifier Letter Circumflex Accent
     45         0x89 => 0x2030, // Per Mille Sign
     46         0x8a => 0x0160, // Latin Capital Letter S With Caron
     47         0x8b => 0x2039, // Single Left-Pointing Angle Quotation Mark
     48         0x8c => 0x0152, // Latin Capital Ligature Oe
     49         0x8e => 0x017d, // Latin Capital Letter Z With Caron
     50         0x91 => 0x2018, // Left Single Quotation Mark
     51         0x92 => 0x2019, // Right Single Quotation Mark
     52         0x93 => 0x201c, // Left Double Quotation Mark
     53         0x94 => 0x201d, // Right Double Quotation Mark
     54         0x95 => 0x2022, // Bullet
     55         0x96 => 0x2013, // En Dash
     56         0x97 => 0x2014, // Em Dash
     57         0x98 => 0x02dc, // Small Tilde
     58         0x99 => 0x2122, // Trade Mark Sign
     59         0x9a => 0x0161, // Latin Small Letter S With Caron
     60         0x9b => 0x203a, // Single Right-Pointing Angle Quotation Mark
     61         0x9c => 0x0153, // Latin Small Ligature Oe
     62         0x9e => 0x017e, // Latin Small Letter Z With Caron
     63         0x9f => 0x0178, // Latin Capital Letter Y With Diaeresis
     64         else => c,
     65     };
     66 }
     67 
     68 /// https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WindowsBestFit/bestfit1252.txt
     69 /// Plus some mappings found empirically by iterating all codepoints:
     70 ///  0x2007 => 0xA0, // Figure Space
     71 ///  0x2008 => ' ', // Punctuation Space
     72 ///  0x2009 => ' ', // Thin Space
     73 ///  0x200A => ' ', // Hair Space
     74 ///  0x2012 => '-', // Figure Dash
     75 ///  0x2015 => '-', // Horizontal Bar
     76 ///  0x201B => '\'', // Single High-reversed-9 Quotation Mark
     77 ///  0x201F => '"', // Double High-reversed-9 Quotation Mark
     78 ///  0x202F => 0xA0, // Narrow No-Break Space
     79 ///  0x2033 => '"', // Double Prime
     80 ///  0x2036 => '"', // Reversed Double Prime
     81 pub fn bestFitFromCodepoint(codepoint: u21) ?u8 {
     82     return switch (codepoint) {
     83         0x00...0x7F,
     84         0x81,
     85         0x8D,
     86         0x8F,
     87         0x90,
     88         0x9D,
     89         0xA0...0xFF,
     90         => @intCast(codepoint),
     91         0x0100 => 0x41, // Latin Capital Letter A With Macron
     92         0x0101 => 0x61, // Latin Small Letter A With Macron
     93         0x0102 => 0x41, // Latin Capital Letter A With Breve
     94         0x0103 => 0x61, // Latin Small Letter A With Breve
     95         0x0104 => 0x41, // Latin Capital Letter A With Ogonek
     96         0x0105 => 0x61, // Latin Small Letter A With Ogonek
     97         0x0106 => 0x43, // Latin Capital Letter C With Acute
     98         0x0107 => 0x63, // Latin Small Letter C With Acute
     99         0x0108 => 0x43, // Latin Capital Letter C With Circumflex
    100         0x0109 => 0x63, // Latin Small Letter C With Circumflex
    101         0x010a => 0x43, // Latin Capital Letter C With Dot Above
    102         0x010b => 0x63, // Latin Small Letter C With Dot Above
    103         0x010c => 0x43, // Latin Capital Letter C With Caron
    104         0x010d => 0x63, // Latin Small Letter C With Caron
    105         0x010e => 0x44, // Latin Capital Letter D With Caron
    106         0x010f => 0x64, // Latin Small Letter D With Caron
    107         0x0110 => 0xd0, // Latin Capital Letter D With Stroke
    108         0x0111 => 0x64, // Latin Small Letter D With Stroke
    109         0x0112 => 0x45, // Latin Capital Letter E With Macron
    110         0x0113 => 0x65, // Latin Small Letter E With Macron
    111         0x0114 => 0x45, // Latin Capital Letter E With Breve
    112         0x0115 => 0x65, // Latin Small Letter E With Breve
    113         0x0116 => 0x45, // Latin Capital Letter E With Dot Above
    114         0x0117 => 0x65, // Latin Small Letter E With Dot Above
    115         0x0118 => 0x45, // Latin Capital Letter E With Ogonek
    116         0x0119 => 0x65, // Latin Small Letter E With Ogonek
    117         0x011a => 0x45, // Latin Capital Letter E With Caron
    118         0x011b => 0x65, // Latin Small Letter E With Caron
    119         0x011c => 0x47, // Latin Capital Letter G With Circumflex
    120         0x011d => 0x67, // Latin Small Letter G With Circumflex
    121         0x011e => 0x47, // Latin Capital Letter G With Breve
    122         0x011f => 0x67, // Latin Small Letter G With Breve
    123         0x0120 => 0x47, // Latin Capital Letter G With Dot Above
    124         0x0121 => 0x67, // Latin Small Letter G With Dot Above
    125         0x0122 => 0x47, // Latin Capital Letter G With Cedilla
    126         0x0123 => 0x67, // Latin Small Letter G With Cedilla
    127         0x0124 => 0x48, // Latin Capital Letter H With Circumflex
    128         0x0125 => 0x68, // Latin Small Letter H With Circumflex
    129         0x0126 => 0x48, // Latin Capital Letter H With Stroke
    130         0x0127 => 0x68, // Latin Small Letter H With Stroke
    131         0x0128 => 0x49, // Latin Capital Letter I With Tilde
    132         0x0129 => 0x69, // Latin Small Letter I With Tilde
    133         0x012a => 0x49, // Latin Capital Letter I With Macron
    134         0x012b => 0x69, // Latin Small Letter I With Macron
    135         0x012c => 0x49, // Latin Capital Letter I With Breve
    136         0x012d => 0x69, // Latin Small Letter I With Breve
    137         0x012e => 0x49, // Latin Capital Letter I With Ogonek
    138         0x012f => 0x69, // Latin Small Letter I With Ogonek
    139         0x0130 => 0x49, // Latin Capital Letter I With Dot Above
    140         0x0131 => 0x69, // Latin Small Letter Dotless I
    141         0x0134 => 0x4a, // Latin Capital Letter J With Circumflex
    142         0x0135 => 0x6a, // Latin Small Letter J With Circumflex
    143         0x0136 => 0x4b, // Latin Capital Letter K With Cedilla
    144         0x0137 => 0x6b, // Latin Small Letter K With Cedilla
    145         0x0139 => 0x4c, // Latin Capital Letter L With Acute
    146         0x013a => 0x6c, // Latin Small Letter L With Acute
    147         0x013b => 0x4c, // Latin Capital Letter L With Cedilla
    148         0x013c => 0x6c, // Latin Small Letter L With Cedilla
    149         0x013d => 0x4c, // Latin Capital Letter L With Caron
    150         0x013e => 0x6c, // Latin Small Letter L With Caron
    151         0x0141 => 0x4c, // Latin Capital Letter L With Stroke
    152         0x0142 => 0x6c, // Latin Small Letter L With Stroke
    153         0x0143 => 0x4e, // Latin Capital Letter N With Acute
    154         0x0144 => 0x6e, // Latin Small Letter N With Acute
    155         0x0145 => 0x4e, // Latin Capital Letter N With Cedilla
    156         0x0146 => 0x6e, // Latin Small Letter N With Cedilla
    157         0x0147 => 0x4e, // Latin Capital Letter N With Caron
    158         0x0148 => 0x6e, // Latin Small Letter N With Caron
    159         0x014c => 0x4f, // Latin Capital Letter O With Macron
    160         0x014d => 0x6f, // Latin Small Letter O With Macron
    161         0x014e => 0x4f, // Latin Capital Letter O With Breve
    162         0x014f => 0x6f, // Latin Small Letter O With Breve
    163         0x0150 => 0x4f, // Latin Capital Letter O With Double Acute
    164         0x0151 => 0x6f, // Latin Small Letter O With Double Acute
    165         0x0152 => 0x8c, // Latin Capital Ligature Oe
    166         0x0153 => 0x9c, // Latin Small Ligature Oe
    167         0x0154 => 0x52, // Latin Capital Letter R With Acute
    168         0x0155 => 0x72, // Latin Small Letter R With Acute
    169         0x0156 => 0x52, // Latin Capital Letter R With Cedilla
    170         0x0157 => 0x72, // Latin Small Letter R With Cedilla
    171         0x0158 => 0x52, // Latin Capital Letter R With Caron
    172         0x0159 => 0x72, // Latin Small Letter R With Caron
    173         0x015a => 0x53, // Latin Capital Letter S With Acute
    174         0x015b => 0x73, // Latin Small Letter S With Acute
    175         0x015c => 0x53, // Latin Capital Letter S With Circumflex
    176         0x015d => 0x73, // Latin Small Letter S With Circumflex
    177         0x015e => 0x53, // Latin Capital Letter S With Cedilla
    178         0x015f => 0x73, // Latin Small Letter S With Cedilla
    179         0x0160 => 0x8a, // Latin Capital Letter S With Caron
    180         0x0161 => 0x9a, // Latin Small Letter S With Caron
    181         0x0162 => 0x54, // Latin Capital Letter T With Cedilla
    182         0x0163 => 0x74, // Latin Small Letter T With Cedilla
    183         0x0164 => 0x54, // Latin Capital Letter T With Caron
    184         0x0165 => 0x74, // Latin Small Letter T With Caron
    185         0x0166 => 0x54, // Latin Capital Letter T With Stroke
    186         0x0167 => 0x74, // Latin Small Letter T With Stroke
    187         0x0168 => 0x55, // Latin Capital Letter U With Tilde
    188         0x0169 => 0x75, // Latin Small Letter U With Tilde
    189         0x016a => 0x55, // Latin Capital Letter U With Macron
    190         0x016b => 0x75, // Latin Small Letter U With Macron
    191         0x016c => 0x55, // Latin Capital Letter U With Breve
    192         0x016d => 0x75, // Latin Small Letter U With Breve
    193         0x016e => 0x55, // Latin Capital Letter U With Ring Above
    194         0x016f => 0x75, // Latin Small Letter U With Ring Above
    195         0x0170 => 0x55, // Latin Capital Letter U With Double Acute
    196         0x0171 => 0x75, // Latin Small Letter U With Double Acute
    197         0x0172 => 0x55, // Latin Capital Letter U With Ogonek
    198         0x0173 => 0x75, // Latin Small Letter U With Ogonek
    199         0x0174 => 0x57, // Latin Capital Letter W With Circumflex
    200         0x0175 => 0x77, // Latin Small Letter W With Circumflex
    201         0x0176 => 0x59, // Latin Capital Letter Y With Circumflex
    202         0x0177 => 0x79, // Latin Small Letter Y With Circumflex
    203         0x0178 => 0x9f, // Latin Capital Letter Y With Diaeresis
    204         0x0179 => 0x5a, // Latin Capital Letter Z With Acute
    205         0x017a => 0x7a, // Latin Small Letter Z With Acute
    206         0x017b => 0x5a, // Latin Capital Letter Z With Dot Above
    207         0x017c => 0x7a, // Latin Small Letter Z With Dot Above
    208         0x017d => 0x8e, // Latin Capital Letter Z With Caron
    209         0x017e => 0x9e, // Latin Small Letter Z With Caron
    210         0x0180 => 0x62, // Latin Small Letter B With Stroke
    211         0x0189 => 0xd0, // Latin Capital Letter African D
    212         0x0191 => 0x83, // Latin Capital Letter F With Hook
    213         0x0192 => 0x83, // Latin Small Letter F With Hook
    214         0x0197 => 0x49, // Latin Capital Letter I With Stroke
    215         0x019a => 0x6c, // Latin Small Letter L With Bar
    216         0x019f => 0x4f, // Latin Capital Letter O With Middle Tilde
    217         0x01a0 => 0x4f, // Latin Capital Letter O With Horn
    218         0x01a1 => 0x6f, // Latin Small Letter O With Horn
    219         0x01ab => 0x74, // Latin Small Letter T With Palatal Hook
    220         0x01ae => 0x54, // Latin Capital Letter T With Retroflex Hook
    221         0x01af => 0x55, // Latin Capital Letter U With Horn
    222         0x01b0 => 0x75, // Latin Small Letter U With Horn
    223         0x01b6 => 0x7a, // Latin Small Letter Z With Stroke
    224         0x01c0 => 0x7c, // Latin Letter Dental Click
    225         0x01c3 => 0x21, // Latin Letter Retroflex Click
    226         0x01cd => 0x41, // Latin Capital Letter A With Caron
    227         0x01ce => 0x61, // Latin Small Letter A With Caron
    228         0x01cf => 0x49, // Latin Capital Letter I With Caron
    229         0x01d0 => 0x69, // Latin Small Letter I With Caron
    230         0x01d1 => 0x4f, // Latin Capital Letter O With Caron
    231         0x01d2 => 0x6f, // Latin Small Letter O With Caron
    232         0x01d3 => 0x55, // Latin Capital Letter U With Caron
    233         0x01d4 => 0x75, // Latin Small Letter U With Caron
    234         0x01d5 => 0x55, // Latin Capital Letter U With Diaeresis And Macron
    235         0x01d6 => 0x75, // Latin Small Letter U With Diaeresis And Macron
    236         0x01d7 => 0x55, // Latin Capital Letter U With Diaeresis And Acute
    237         0x01d8 => 0x75, // Latin Small Letter U With Diaeresis And Acute
    238         0x01d9 => 0x55, // Latin Capital Letter U With Diaeresis And Caron
    239         0x01da => 0x75, // Latin Small Letter U With Diaeresis And Caron
    240         0x01db => 0x55, // Latin Capital Letter U With Diaeresis And Grave
    241         0x01dc => 0x75, // Latin Small Letter U With Diaeresis And Grave
    242         0x01de => 0x41, // Latin Capital Letter A With Diaeresis And Macron
    243         0x01df => 0x61, // Latin Small Letter A With Diaeresis And Macron
    244         0x01e4 => 0x47, // Latin Capital Letter G With Stroke
    245         0x01e5 => 0x67, // Latin Small Letter G With Stroke
    246         0x01e6 => 0x47, // Latin Capital Letter G With Caron
    247         0x01e7 => 0x67, // Latin Small Letter G With Caron
    248         0x01e8 => 0x4b, // Latin Capital Letter K With Caron
    249         0x01e9 => 0x6b, // Latin Small Letter K With Caron
    250         0x01ea => 0x4f, // Latin Capital Letter O With Ogonek
    251         0x01eb => 0x6f, // Latin Small Letter O With Ogonek
    252         0x01ec => 0x4f, // Latin Capital Letter O With Ogonek And Macron
    253         0x01ed => 0x6f, // Latin Small Letter O With Ogonek And Macron
    254         0x01f0 => 0x6a, // Latin Small Letter J With Caron
    255         0x0261 => 0x67, // Latin Small Letter Script G
    256         0x02b9 => 0x27, // Modifier Letter Prime
    257         0x02ba => 0x22, // Modifier Letter Double Prime
    258         0x02bc => 0x27, // Modifier Letter Apostrophe
    259         0x02c4 => 0x5e, // Modifier Letter Up Arrowhead
    260         0x02c6 => 0x88, // Modifier Letter Circumflex Accent
    261         0x02c8 => 0x27, // Modifier Letter Vertical Line
    262         0x02c9 => 0xaf, // Modifier Letter Macron
    263         0x02ca => 0xb4, // Modifier Letter Acute Accent
    264         0x02cb => 0x60, // Modifier Letter Grave Accent
    265         0x02cd => 0x5f, // Modifier Letter Low Macron
    266         0x02da => 0xb0, // Ring Above
    267         0x02dc => 0x98, // Small Tilde
    268         0x0300 => 0x60, // Combining Grave Accent
    269         0x0301 => 0xb4, // Combining Acute Accent
    270         0x0302 => 0x5e, // Combining Circumflex Accent
    271         0x0303 => 0x7e, // Combining Tilde
    272         0x0304 => 0xaf, // Combining Macron
    273         0x0305 => 0xaf, // Combining Overline
    274         0x0308 => 0xa8, // Combining Diaeresis
    275         0x030a => 0xb0, // Combining Ring Above
    276         0x030e => 0x22, // Combining Double Vertical Line Above
    277         0x0327 => 0xb8, // Combining Cedilla
    278         0x0331 => 0x5f, // Combining Macron Below
    279         0x0332 => 0x5f, // Combining Low Line
    280         0x037e => 0x3b, // Greek Question Mark
    281         0x0393 => 0x47, // Greek Capital Letter Gamma
    282         0x0398 => 0x54, // Greek Capital Letter Theta
    283         0x03a3 => 0x53, // Greek Capital Letter Sigma
    284         0x03a6 => 0x46, // Greek Capital Letter Phi
    285         0x03a9 => 0x4f, // Greek Capital Letter Omega
    286         0x03b1 => 0x61, // Greek Small Letter Alpha
    287         0x03b2 => 0xdf, // Greek Small Letter Beta
    288         0x03b4 => 0x64, // Greek Small Letter Delta
    289         0x03b5 => 0x65, // Greek Small Letter Epsilon
    290         0x03bc => 0xb5, // Greek Small Letter Mu
    291         0x03c0 => 0x70, // Greek Small Letter Pi
    292         0x03c3 => 0x73, // Greek Small Letter Sigma
    293         0x03c4 => 0x74, // Greek Small Letter Tau
    294         0x03c6 => 0x66, // Greek Small Letter Phi
    295         0x04bb => 0x68, // Cyrillic Small Letter Shha
    296         0x0589 => 0x3a, // Armenian Full Stop
    297         0x066a => 0x25, // Arabic Percent Sign
    298         0x2000 => 0x20, // En Quad
    299         0x2001 => 0x20, // Em Quad
    300         0x2002 => 0x20, // En Space
    301         0x2003 => 0x20, // Em Space
    302         0x2004 => 0x20, // Three-Per-Em Space
    303         0x2005 => 0x20, // Four-Per-Em Space
    304         0x2006 => 0x20, // Six-Per-Em Space
    305         0x2010 => 0x2d, // Hyphen
    306         0x2011 => 0x2d, // Non-Breaking Hyphen
    307         0x2013 => 0x96, // En Dash
    308         0x2014 => 0x97, // Em Dash
    309         0x2017 => 0x3d, // Double Low Line
    310         0x2018 => 0x91, // Left Single Quotation Mark
    311         0x2019 => 0x92, // Right Single Quotation Mark
    312         0x201a => 0x82, // Single Low-9 Quotation Mark
    313         0x201c => 0x93, // Left Double Quotation Mark
    314         0x201d => 0x94, // Right Double Quotation Mark
    315         0x201e => 0x84, // Double Low-9 Quotation Mark
    316         0x2020 => 0x86, // Dagger
    317         0x2021 => 0x87, // Double Dagger
    318         0x2022 => 0x95, // Bullet
    319         0x2024 => 0xb7, // One Dot Leader
    320         0x2026 => 0x85, // Horizontal Ellipsis
    321         0x2030 => 0x89, // Per Mille Sign
    322         0x2032 => 0x27, // Prime
    323         0x2035 => 0x60, // Reversed Prime
    324         0x2039 => 0x8b, // Single Left-Pointing Angle Quotation Mark
    325         0x203a => 0x9b, // Single Right-Pointing Angle Quotation Mark
    326         0x2044 => 0x2f, // Fraction Slash
    327         0x2070 => 0xb0, // Superscript Zero
    328         0x2074 => 0x34, // Superscript Four
    329         0x2075 => 0x35, // Superscript Five
    330         0x2076 => 0x36, // Superscript Six
    331         0x2077 => 0x37, // Superscript Seven
    332         0x2078 => 0x38, // Superscript Eight
    333         0x207f => 0x6e, // Superscript Latin Small Letter N
    334         0x2080 => 0x30, // Subscript Zero
    335         0x2081 => 0x31, // Subscript One
    336         0x2082 => 0x32, // Subscript Two
    337         0x2083 => 0x33, // Subscript Three
    338         0x2084 => 0x34, // Subscript Four
    339         0x2085 => 0x35, // Subscript Five
    340         0x2086 => 0x36, // Subscript Six
    341         0x2087 => 0x37, // Subscript Seven
    342         0x2088 => 0x38, // Subscript Eight
    343         0x2089 => 0x39, // Subscript Nine
    344         0x20ac => 0x80, // Euro Sign
    345         0x20a1 => 0xa2, // Colon Sign
    346         0x20a4 => 0xa3, // Lira Sign
    347         0x20a7 => 0x50, // Peseta Sign
    348         0x2102 => 0x43, // Double-Struck Capital C
    349         0x2107 => 0x45, // Euler Constant
    350         0x210a => 0x67, // Script Small G
    351         0x210b => 0x48, // Script Capital H
    352         0x210c => 0x48, // Black-Letter Capital H
    353         0x210d => 0x48, // Double-Struck Capital H
    354         0x210e => 0x68, // Planck Constant
    355         0x2110 => 0x49, // Script Capital I
    356         0x2111 => 0x49, // Black-Letter Capital I
    357         0x2112 => 0x4c, // Script Capital L
    358         0x2113 => 0x6c, // Script Small L
    359         0x2115 => 0x4e, // Double-Struck Capital N
    360         0x2118 => 0x50, // Script Capital P
    361         0x2119 => 0x50, // Double-Struck Capital P
    362         0x211a => 0x51, // Double-Struck Capital Q
    363         0x211b => 0x52, // Script Capital R
    364         0x211c => 0x52, // Black-Letter Capital R
    365         0x211d => 0x52, // Double-Struck Capital R
    366         0x2122 => 0x99, // Trade Mark Sign
    367         0x2124 => 0x5a, // Double-Struck Capital Z
    368         0x2128 => 0x5a, // Black-Letter Capital Z
    369         0x212a => 0x4b, // Kelvin Sign
    370         0x212b => 0xc5, // Angstrom Sign
    371         0x212c => 0x42, // Script Capital B
    372         0x212d => 0x43, // Black-Letter Capital C
    373         0x212e => 0x65, // Estimated Symbol
    374         0x212f => 0x65, // Script Small E
    375         0x2130 => 0x45, // Script Capital E
    376         0x2131 => 0x46, // Script Capital F
    377         0x2133 => 0x4d, // Script Capital M
    378         0x2134 => 0x6f, // Script Small O
    379         0x2205 => 0xd8, // Empty Set
    380         0x2212 => 0x2d, // Minus Sign
    381         0x2213 => 0xb1, // Minus-Or-Plus Sign
    382         0x2215 => 0x2f, // Division Slash
    383         0x2216 => 0x5c, // Set Minus
    384         0x2217 => 0x2a, // Asterisk Operator
    385         0x2218 => 0xb0, // Ring Operator
    386         0x2219 => 0xb7, // Bullet Operator
    387         0x221a => 0x76, // Square Root
    388         0x221e => 0x38, // Infinity
    389         0x2223 => 0x7c, // Divides
    390         0x2229 => 0x6e, // Intersection
    391         0x2236 => 0x3a, // Ratio
    392         0x223c => 0x7e, // Tilde Operator
    393         0x2248 => 0x98, // Almost Equal To
    394         0x2261 => 0x3d, // Identical To
    395         0x2264 => 0x3d, // Less-Than Or Equal To
    396         0x2265 => 0x3d, // Greater-Than Or Equal To
    397         0x226a => 0xab, // Much Less-Than
    398         0x226b => 0xbb, // Much Greater-Than
    399         0x22c5 => 0xb7, // Dot Operator
    400         0x2302 => 0xa6, // House
    401         0x2303 => 0x5e, // Up Arrowhead
    402         0x2310 => 0xac, // Reversed Not Sign
    403         0x2320 => 0x28, // Top Half Integral
    404         0x2321 => 0x29, // Bottom Half Integral
    405         0x2329 => 0x3c, // Left-Pointing Angle Bracket
    406         0x232a => 0x3e, // Right-Pointing Angle Bracket
    407         0x2500 => 0x2d, // Box Drawings Light Horizontal
    408         0x2502 => 0xa6, // Box Drawings Light Vertical
    409         0x250c => 0x2b, // Box Drawings Light Down And Right
    410         0x2510 => 0x2b, // Box Drawings Light Down And Left
    411         0x2514 => 0x2b, // Box Drawings Light Up And Right
    412         0x2518 => 0x2b, // Box Drawings Light Up And Left
    413         0x251c => 0x2b, // Box Drawings Light Vertical And Right
    414         0x2524 => 0xa6, // Box Drawings Light Vertical And Left
    415         0x252c => 0x2d, // Box Drawings Light Down And Horizontal
    416         0x2534 => 0x2d, // Box Drawings Light Up And Horizontal
    417         0x253c => 0x2b, // Box Drawings Light Vertical And Horizontal
    418         0x2550 => 0x2d, // Box Drawings Double Horizontal
    419         0x2551 => 0xa6, // Box Drawings Double Vertical
    420         0x2552 => 0x2b, // Box Drawings Down Single And Right Double
    421         0x2553 => 0x2b, // Box Drawings Down Double And Right Single
    422         0x2554 => 0x2b, // Box Drawings Double Down And Right
    423         0x2555 => 0x2b, // Box Drawings Down Single And Left Double
    424         0x2556 => 0x2b, // Box Drawings Down Double And Left Single
    425         0x2557 => 0x2b, // Box Drawings Double Down And Left
    426         0x2558 => 0x2b, // Box Drawings Up Single And Right Double
    427         0x2559 => 0x2b, // Box Drawings Up Double And Right Single
    428         0x255a => 0x2b, // Box Drawings Double Up And Right
    429         0x255b => 0x2b, // Box Drawings Up Single And Left Double
    430         0x255c => 0x2b, // Box Drawings Up Double And Left Single
    431         0x255d => 0x2b, // Box Drawings Double Up And Left
    432         0x255e => 0xa6, // Box Drawings Vertical Single And Right Double
    433         0x255f => 0xa6, // Box Drawings Vertical Double And Right Single
    434         0x2560 => 0xa6, // Box Drawings Double Vertical And Right
    435         0x2561 => 0xa6, // Box Drawings Vertical Single And Left Double
    436         0x2562 => 0xa6, // Box Drawings Vertical Double And Left Single
    437         0x2563 => 0xa6, // Box Drawings Double Vertical And Left
    438         0x2564 => 0x2d, // Box Drawings Down Single And Horizontal Double
    439         0x2565 => 0x2d, // Box Drawings Down Double And Horizontal Single
    440         0x2566 => 0x2d, // Box Drawings Double Down And Horizontal
    441         0x2567 => 0x2d, // Box Drawings Up Single And Horizontal Double
    442         0x2568 => 0x2d, // Box Drawings Up Double And Horizontal Single
    443         0x2569 => 0x2d, // Box Drawings Double Up And Horizontal
    444         0x256a => 0x2b, // Box Drawings Vertical Single And Horizontal Double
    445         0x256b => 0x2b, // Box Drawings Vertical Double And Horizontal Single
    446         0x256c => 0x2b, // Box Drawings Double Vertical And Horizontal
    447         0x2580 => 0xaf, // Upper Half Block
    448         0x2584 => 0x5f, // Lower Half Block
    449         0x2588 => 0xa6, // Full Block
    450         0x258c => 0xa6, // Left Half Block
    451         0x2590 => 0xa6, // Right Half Block
    452         0x2591 => 0xa6, // Light Shade
    453         0x2592 => 0xa6, // Medium Shade
    454         0x2593 => 0xa6, // Dark Shade
    455         0x25a0 => 0xa6, // Black Square
    456         0x263c => 0xa4, // White Sun With Rays
    457         0x2758 => 0x7c, // Light Vertical Bar
    458         0x3000 => 0x20, // Ideographic Space
    459         0x3008 => 0x3c, // Left Angle Bracket
    460         0x3009 => 0x3e, // Right Angle Bracket
    461         0x300a => 0xab, // Left Double Angle Bracket
    462         0x300b => 0xbb, // Right Double Angle Bracket
    463         0x301a => 0x5b, // Left White Square Bracket
    464         0x301b => 0x5d, // Right White Square Bracket
    465         0x30fb => 0xb7, // Katakana Middle Dot
    466         0xff01 => 0x21, // Fullwidth Exclamation Mark
    467         0xff02 => 0x22, // Fullwidth Quotation Mark
    468         0xff03 => 0x23, // Fullwidth Number Sign
    469         0xff04 => 0x24, // Fullwidth Dollar Sign
    470         0xff05 => 0x25, // Fullwidth Percent Sign
    471         0xff06 => 0x26, // Fullwidth Ampersand
    472         0xff07 => 0x27, // Fullwidth Apostrophe
    473         0xff08 => 0x28, // Fullwidth Left Parenthesis
    474         0xff09 => 0x29, // Fullwidth Right Parenthesis
    475         0xff0a => 0x2a, // Fullwidth Asterisk
    476         0xff0b => 0x2b, // Fullwidth Plus Sign
    477         0xff0c => 0x2c, // Fullwidth Comma
    478         0xff0d => 0x2d, // Fullwidth Hyphen-Minus
    479         0xff0e => 0x2e, // Fullwidth Full Stop
    480         0xff0f => 0x2f, // Fullwidth Solidus
    481         0xff10 => 0x30, // Fullwidth Digit Zero
    482         0xff11 => 0x31, // Fullwidth Digit One
    483         0xff12 => 0x32, // Fullwidth Digit Two
    484         0xff13 => 0x33, // Fullwidth Digit Three
    485         0xff14 => 0x34, // Fullwidth Digit Four
    486         0xff15 => 0x35, // Fullwidth Digit Five
    487         0xff16 => 0x36, // Fullwidth Digit Six
    488         0xff17 => 0x37, // Fullwidth Digit Seven
    489         0xff18 => 0x38, // Fullwidth Digit Eight
    490         0xff19 => 0x39, // Fullwidth Digit Nine
    491         0xff1a => 0x3a, // Fullwidth Colon
    492         0xff1b => 0x3b, // Fullwidth Semicolon
    493         0xff1c => 0x3c, // Fullwidth Less-Than Sign
    494         0xff1d => 0x3d, // Fullwidth Equals Sign
    495         0xff1e => 0x3e, // Fullwidth Greater-Than Sign
    496         0xff1f => 0x3f, // Fullwidth Question Mark
    497         0xff20 => 0x40, // Fullwidth Commercial At
    498         0xff21 => 0x41, // Fullwidth Latin Capital Letter A
    499         0xff22 => 0x42, // Fullwidth Latin Capital Letter B
    500         0xff23 => 0x43, // Fullwidth Latin Capital Letter C
    501         0xff24 => 0x44, // Fullwidth Latin Capital Letter D
    502         0xff25 => 0x45, // Fullwidth Latin Capital Letter E
    503         0xff26 => 0x46, // Fullwidth Latin Capital Letter F
    504         0xff27 => 0x47, // Fullwidth Latin Capital Letter G
    505         0xff28 => 0x48, // Fullwidth Latin Capital Letter H
    506         0xff29 => 0x49, // Fullwidth Latin Capital Letter I
    507         0xff2a => 0x4a, // Fullwidth Latin Capital Letter J
    508         0xff2b => 0x4b, // Fullwidth Latin Capital Letter K
    509         0xff2c => 0x4c, // Fullwidth Latin Capital Letter L
    510         0xff2d => 0x4d, // Fullwidth Latin Capital Letter M
    511         0xff2e => 0x4e, // Fullwidth Latin Capital Letter N
    512         0xff2f => 0x4f, // Fullwidth Latin Capital Letter O
    513         0xff30 => 0x50, // Fullwidth Latin Capital Letter P
    514         0xff31 => 0x51, // Fullwidth Latin Capital Letter Q
    515         0xff32 => 0x52, // Fullwidth Latin Capital Letter R
    516         0xff33 => 0x53, // Fullwidth Latin Capital Letter S
    517         0xff34 => 0x54, // Fullwidth Latin Capital Letter T
    518         0xff35 => 0x55, // Fullwidth Latin Capital Letter U
    519         0xff36 => 0x56, // Fullwidth Latin Capital Letter V
    520         0xff37 => 0x57, // Fullwidth Latin Capital Letter W
    521         0xff38 => 0x58, // Fullwidth Latin Capital Letter X
    522         0xff39 => 0x59, // Fullwidth Latin Capital Letter Y
    523         0xff3a => 0x5a, // Fullwidth Latin Capital Letter Z
    524         0xff3b => 0x5b, // Fullwidth Left Square Bracket
    525         0xff3c => 0x5c, // Fullwidth Reverse Solidus
    526         0xff3d => 0x5d, // Fullwidth Right Square Bracket
    527         0xff3e => 0x5e, // Fullwidth Circumflex Accent
    528         0xff3f => 0x5f, // Fullwidth Low Line
    529         0xff40 => 0x60, // Fullwidth Grave Accent
    530         0xff41 => 0x61, // Fullwidth Latin Small Letter A
    531         0xff42 => 0x62, // Fullwidth Latin Small Letter B
    532         0xff43 => 0x63, // Fullwidth Latin Small Letter C
    533         0xff44 => 0x64, // Fullwidth Latin Small Letter D
    534         0xff45 => 0x65, // Fullwidth Latin Small Letter E
    535         0xff46 => 0x66, // Fullwidth Latin Small Letter F
    536         0xff47 => 0x67, // Fullwidth Latin Small Letter G
    537         0xff48 => 0x68, // Fullwidth Latin Small Letter H
    538         0xff49 => 0x69, // Fullwidth Latin Small Letter I
    539         0xff4a => 0x6a, // Fullwidth Latin Small Letter J
    540         0xff4b => 0x6b, // Fullwidth Latin Small Letter K
    541         0xff4c => 0x6c, // Fullwidth Latin Small Letter L
    542         0xff4d => 0x6d, // Fullwidth Latin Small Letter M
    543         0xff4e => 0x6e, // Fullwidth Latin Small Letter N
    544         0xff4f => 0x6f, // Fullwidth Latin Small Letter O
    545         0xff50 => 0x70, // Fullwidth Latin Small Letter P
    546         0xff51 => 0x71, // Fullwidth Latin Small Letter Q
    547         0xff52 => 0x72, // Fullwidth Latin Small Letter R
    548         0xff53 => 0x73, // Fullwidth Latin Small Letter S
    549         0xff54 => 0x74, // Fullwidth Latin Small Letter T
    550         0xff55 => 0x75, // Fullwidth Latin Small Letter U
    551         0xff56 => 0x76, // Fullwidth Latin Small Letter V
    552         0xff57 => 0x77, // Fullwidth Latin Small Letter W
    553         0xff58 => 0x78, // Fullwidth Latin Small Letter X
    554         0xff59 => 0x79, // Fullwidth Latin Small Letter Y
    555         0xff5a => 0x7a, // Fullwidth Latin Small Letter Z
    556         0xff5b => 0x7b, // Fullwidth Left Curly Bracket
    557         0xff5c => 0x7c, // Fullwidth Vertical Line
    558         0xff5d => 0x7d, // Fullwidth Right Curly Bracket
    559         0xff5e => 0x7e, // Fullwidth Tilde
    560         // Not in the best fit mapping, but RC uses these mappings too
    561         0x2007 => 0xA0, // Figure Space
    562         0x2008 => ' ', // Punctuation Space
    563         0x2009 => ' ', // Thin Space
    564         0x200A => ' ', // Hair Space
    565         0x2012 => '-', // Figure Dash
    566         0x2015 => '-', // Horizontal Bar
    567         0x201B => '\'', // Single High-reversed-9 Quotation Mark
    568         0x201F => '"', // Double High-reversed-9 Quotation Mark
    569         0x202F => 0xA0, // Narrow No-Break Space
    570         0x2033 => '"', // Double Prime
    571         0x2036 => '"', // Reversed Double Prime
    572         else => null,
    573     };
    574 }
    575 
    576 test "windows-1252 to utf8" {
    577     var buf = std.array_list.Managed(u8).init(std.testing.allocator);
    578     defer buf.deinit();
    579 
    580     const input_windows1252 = "\x81pqrstuvwxyz{|}~\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8e\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9e\x9f\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
    581     const expected_utf8 = "\xc2\x81pqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ";
    582 
    583     var fbs = std.io.fixedBufferStream(input_windows1252);
    584     const bytes_written = try windows1252ToUtf8Stream(buf.writer(), fbs.reader());
    585 
    586     try std.testing.expectEqualStrings(expected_utf8, buf.items);
    587     try std.testing.expectEqual(expected_utf8.len, bytes_written);
    588 }