From 84f4c5d9ccbebb6675c7366c4e1fdb661003356e Mon Sep 17 00:00:00 2001
From: Ryan Liptak <squeek502@hotmail.com>
Date: Mon, 22 Apr 2024 03:22:15 -0700
Subject: [PATCH] std.unicode: Fix ArrayList functions when using populated
 ArrayLists

ensureTotalCapacityPrecise only satisfies the assumptions made in the ArrayListImpl functions (that there's already enough capacity for the entire converted string if it's all ASCII) when the ArrayList has no items, otherwise it would hit illegal behavior.
---
 lib/std/unicode.zig | 70 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 9d1f52fb2d..327c485fd7 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -934,7 +934,7 @@ fn utf16LeToUtf8ArrayListImpl(
     .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
     .can_encode_surrogate_half => mem.Allocator.Error,
 })!void {
-    assert(result.capacity >= utf16le.len);
+    assert(result.unusedCapacitySlice().len >= utf16le.len);
 
     var remaining = utf16le;
     vectorized: {
@@ -979,7 +979,7 @@ fn utf16LeToUtf8ArrayListImpl(
 pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
 
 pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
-    try result.ensureTotalCapacityPrecise(utf16le.len);
+    try result.ensureUnusedCapacity(utf16le.len);
     return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
 }
 
@@ -1138,7 +1138,7 @@ test utf16LeToUtf8 {
 }
 
 fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {
-    assert(result.capacity >= utf8.len);
+    assert(result.unusedCapacitySlice().len >= utf8.len);
 
     var remaining = utf8;
     vectorized: {
@@ -1176,7 +1176,7 @@ fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, com
 }
 
 pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {
-    try result.ensureTotalCapacityPrecise(utf8.len);
+    try result.ensureUnusedCapacity(utf8.len);
     return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half);
 }
 
@@ -1351,6 +1351,64 @@ test utf8ToUtf16LeAllocZ {
     }
 }
 
+test "ArrayList functions on a re-used list" {
+    // utf8ToUtf16LeArrayList
+    {
+        var list = std.ArrayList(u16).init(testing.allocator);
+        defer list.deinit();
+
+        const init_slice = utf8ToUtf16LeStringLiteral("abcdefg");
+        try list.ensureTotalCapacityPrecise(init_slice.len);
+        list.appendSliceAssumeCapacity(init_slice);
+
+        try utf8ToUtf16LeArrayList(&list, "hijklmnopqrstuvwyxz");
+
+        try testing.expectEqualSlices(u16, utf8ToUtf16LeStringLiteral("abcdefghijklmnopqrstuvwyxz"), list.items);
+    }
+
+    // utf16LeToUtf8ArrayList
+    {
+        var list = std.ArrayList(u8).init(testing.allocator);
+        defer list.deinit();
+
+        const init_slice = "abcdefg";
+        try list.ensureTotalCapacityPrecise(init_slice.len);
+        list.appendSliceAssumeCapacity(init_slice);
+
+        try utf16LeToUtf8ArrayList(&list, utf8ToUtf16LeStringLiteral("hijklmnopqrstuvwyxz"));
+
+        try testing.expectEqualStrings("abcdefghijklmnopqrstuvwyxz", list.items);
+    }
+
+    // wtf8ToWtf16LeArrayList
+    {
+        var list = std.ArrayList(u16).init(testing.allocator);
+        defer list.deinit();
+
+        const init_slice = utf8ToUtf16LeStringLiteral("abcdefg");
+        try list.ensureTotalCapacityPrecise(init_slice.len);
+        list.appendSliceAssumeCapacity(init_slice);
+
+        try wtf8ToWtf16LeArrayList(&list, "hijklmnopqrstuvwyxz");
+
+        try testing.expectEqualSlices(u16, utf8ToUtf16LeStringLiteral("abcdefghijklmnopqrstuvwyxz"), list.items);
+    }
+
+    // wtf16LeToWtf8ArrayList
+    {
+        var list = std.ArrayList(u8).init(testing.allocator);
+        defer list.deinit();
+
+        const init_slice = "abcdefg";
+        try list.ensureTotalCapacityPrecise(init_slice.len);
+        list.appendSliceAssumeCapacity(init_slice);
+
+        try wtf16LeToWtf8ArrayList(&list, utf8ToUtf16LeStringLiteral("hijklmnopqrstuvwyxz"));
+
+        try testing.expectEqualStrings("abcdefghijklmnopqrstuvwyxz", list.items);
+    }
+}
+
 /// Converts a UTF-8 string literal into a UTF-16LE string literal.
 pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 {
     return comptime blk: {
@@ -1685,7 +1743,7 @@ pub const Wtf8Iterator = struct {
 };
 
 pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
-    try result.ensureTotalCapacityPrecise(utf16le.len);
+    try result.ensureUnusedCapacity(utf16le.len);
     return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
 }
 
@@ -1714,7 +1772,7 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize {
 }
 
 pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {
-    try result.ensureTotalCapacityPrecise(wtf8.len);
+    try result.ensureUnusedCapacity(wtf8.len);
     return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half);
 }