zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

commit 84f4c5d9ccbebb6675c7366c4e1fdb661003356e (tree)
parent a2b834e8c7152f70d71c71107db40b9182909647
Author: Ryan Liptak <squeek502@hotmail.com>
Date:   Mon, 22 Apr 2024 03:22:15 -0700

std.unicode: Fix ArrayList functions when using populated ArrayLists

ensureTotalCapacityPrecise only satisfies the assumptions made in the ArrayListImpl functions (that there's already enough capacity for the entire converted string if it's all ASCII) when the ArrayList has no items, otherwise it would hit illegal behavior.

Diffstat:
Mlib/std/unicode.zig | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig @@ -934,7 +934,7 @@ fn utf16LeToUtf8ArrayListImpl( .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError, .can_encode_surrogate_half => mem.Allocator.Error, })!void { - assert(result.capacity >= utf16le.len); + assert(result.unusedCapacitySlice().len >= utf16le.len); var remaining = utf16le; vectorized: { @@ -979,7 +979,7 @@ fn utf16LeToUtf8ArrayListImpl( pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error; pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { - try result.ensureTotalCapacityPrecise(utf16le.len); + try result.ensureUnusedCapacity(utf16le.len); return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half); } @@ -1138,7 +1138,7 @@ test utf16LeToUtf8 { } fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void { - assert(result.capacity >= utf8.len); + assert(result.unusedCapacitySlice().len >= utf8.len); var remaining = utf8; vectorized: { @@ -1176,7 +1176,7 @@ fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, com } pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { - try result.ensureTotalCapacityPrecise(utf8.len); + try result.ensureUnusedCapacity(utf8.len); return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half); } @@ -1351,6 +1351,64 @@ test utf8ToUtf16LeAllocZ { } } +test "ArrayList functions on a re-used list" { + // utf8ToUtf16LeArrayList + { + var list = std.ArrayList(u16).init(testing.allocator); + defer list.deinit(); + + const init_slice = utf8ToUtf16LeStringLiteral("abcdefg"); + try list.ensureTotalCapacityPrecise(init_slice.len); + list.appendSliceAssumeCapacity(init_slice); + + try utf8ToUtf16LeArrayList(&list, "hijklmnopqrstuvwyxz"); + + try testing.expectEqualSlices(u16, utf8ToUtf16LeStringLiteral("abcdefghijklmnopqrstuvwyxz"), list.items); + } + + // utf16LeToUtf8ArrayList + { + var list = std.ArrayList(u8).init(testing.allocator); + defer list.deinit(); + + const init_slice = "abcdefg"; + try list.ensureTotalCapacityPrecise(init_slice.len); + list.appendSliceAssumeCapacity(init_slice); + + try utf16LeToUtf8ArrayList(&list, utf8ToUtf16LeStringLiteral("hijklmnopqrstuvwyxz")); + + try testing.expectEqualStrings("abcdefghijklmnopqrstuvwyxz", list.items); + } + + // wtf8ToWtf16LeArrayList + { + var list = std.ArrayList(u16).init(testing.allocator); + defer list.deinit(); + + const init_slice = utf8ToUtf16LeStringLiteral("abcdefg"); + try list.ensureTotalCapacityPrecise(init_slice.len); + list.appendSliceAssumeCapacity(init_slice); + + try wtf8ToWtf16LeArrayList(&list, "hijklmnopqrstuvwyxz"); + + try testing.expectEqualSlices(u16, utf8ToUtf16LeStringLiteral("abcdefghijklmnopqrstuvwyxz"), list.items); + } + + // wtf16LeToWtf8ArrayList + { + var list = std.ArrayList(u8).init(testing.allocator); + defer list.deinit(); + + const init_slice = "abcdefg"; + try list.ensureTotalCapacityPrecise(init_slice.len); + list.appendSliceAssumeCapacity(init_slice); + + try wtf16LeToWtf8ArrayList(&list, utf8ToUtf16LeStringLiteral("hijklmnopqrstuvwyxz")); + + try testing.expectEqualStrings("abcdefghijklmnopqrstuvwyxz", list.items); + } +} + /// Converts a UTF-8 string literal into a UTF-16LE string literal. pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 { return comptime blk: { @@ -1685,7 +1743,7 @@ pub const Wtf8Iterator = struct { }; pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { - try result.ensureTotalCapacityPrecise(utf16le.len); + try result.ensureUnusedCapacity(utf16le.len); return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half); } @@ -1714,7 +1772,7 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize { } pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { - try result.ensureTotalCapacityPrecise(wtf8.len); + try result.ensureUnusedCapacity(wtf8.len); return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half); }