diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index b93b5e361f..25f1ba1b48 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -553,8 +553,9 @@ fn testDecode(bytes: []const u8) !u21 { /// Caller must free returned memory. pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8 { var result = std.ArrayList(u8).init(allocator); + errdefer result.deinit(); // optimistically guess that it will all be ascii. - try result.ensureCapacity(utf16le.len); + try result.ensureTotalCapacity(utf16le.len); var out_index: usize = 0; var it = Utf16LeIterator.init(utf16le); while (try it.nextCodepoint()) |codepoint| { @@ -569,9 +570,10 @@ pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8 /// Caller must free returned memory. pub fn utf16leToUtf8AllocZ(allocator: *mem.Allocator, utf16le: []const u16) ![:0]u8 { - var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len); + var result = std.ArrayList(u8).init(allocator); + errdefer result.deinit(); // optimistically guess that it will all be ascii. - try result.ensureCapacity(utf16le.len); + try result.ensureTotalCapacity(utf16le.len); var out_index: usize = 0; var it = Utf16LeIterator.init(utf16le); while (try it.nextCodepoint()) |codepoint| { @@ -653,10 +655,18 @@ test "utf16leToUtf8" { defer std.testing.allocator.free(utf8); try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80")); } + + { + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xdcdc); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdcdc); + const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le); + try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result); + } } pub fn utf8ToUtf16LeWithNull(allocator: *mem.Allocator, utf8: []const u8) ![:0]u16 { var result = std.ArrayList(u16).init(allocator); + errdefer result.deinit(); // optimistically guess that it will not require surrogate pairs try result.ensureCapacity(utf8.len + 1); @@ -718,6 +728,10 @@ test "utf8ToUtf16Le" { try testing.expectEqual(@as(usize, 2), length); try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..])); } + { + const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80"); + try testing.expectError(error.InvalidUtf8, result); + } } test "utf8ToUtf16LeWithNull" { @@ -733,6 +747,10 @@ test "utf8ToUtf16LeWithNull" { try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..])); try testing.expect(utf16[2] == 0); } + { + const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80"); + try testing.expectError(error.InvalidUtf8, result); + } } /// Converts a UTF-8 string literal into a UTF-16LE string literal.