From db940a2c8131c52fb6e1f2e40af9c68d2228e656 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Wed, 15 Sep 2021 17:31:40 -0700 Subject: [PATCH] std.unicode: cleanup allocations on error in allocating functions Fixes leaks when `utf16leToUtf8Alloc`/`utf16leToUtf8AllocZ`/`utf8ToUtf16LeWithNull` return an error and adds relevant test cases --- lib/std/unicode.zig | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index b93b5e361f..25f1ba1b48 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -553,8 +553,9 @@ fn testDecode(bytes: []const u8) !u21 { /// Caller must free returned memory. pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8 { var result = std.ArrayList(u8).init(allocator); + errdefer result.deinit(); // optimistically guess that it will all be ascii. - try result.ensureCapacity(utf16le.len); + try result.ensureTotalCapacity(utf16le.len); var out_index: usize = 0; var it = Utf16LeIterator.init(utf16le); while (try it.nextCodepoint()) |codepoint| { @@ -569,9 +570,10 @@ pub fn utf16leToUtf8Alloc(allocator: *mem.Allocator, utf16le: []const u16) ![]u8 /// Caller must free returned memory. pub fn utf16leToUtf8AllocZ(allocator: *mem.Allocator, utf16le: []const u16) ![:0]u8 { - var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len); + var result = std.ArrayList(u8).init(allocator); + errdefer result.deinit(); // optimistically guess that it will all be ascii. - try result.ensureCapacity(utf16le.len); + try result.ensureTotalCapacity(utf16le.len); var out_index: usize = 0; var it = Utf16LeIterator.init(utf16le); while (try it.nextCodepoint()) |codepoint| { @@ -653,10 +655,18 @@ test "utf16leToUtf8" { defer std.testing.allocator.free(utf8); try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80")); } + + { + mem.writeIntSliceLittle(u16, utf16le_as_bytes[0..], 0xdcdc); + mem.writeIntSliceLittle(u16, utf16le_as_bytes[2..], 0xdcdc); + const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le); + try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result); + } } pub fn utf8ToUtf16LeWithNull(allocator: *mem.Allocator, utf8: []const u8) ![:0]u16 { var result = std.ArrayList(u16).init(allocator); + errdefer result.deinit(); // optimistically guess that it will not require surrogate pairs try result.ensureCapacity(utf8.len + 1); @@ -718,6 +728,10 @@ test "utf8ToUtf16Le" { try testing.expectEqual(@as(usize, 2), length); try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..])); } + { + const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80"); + try testing.expectError(error.InvalidUtf8, result); + } } test "utf8ToUtf16LeWithNull" { @@ -733,6 +747,10 @@ test "utf8ToUtf16LeWithNull" { try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..])); try testing.expect(utf16[2] == 0); } + { + const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80"); + try testing.expectError(error.InvalidUtf8, result); + } } /// Converts a UTF-8 string literal into a UTF-16LE string literal.