std.fmt: breaking API changes

added adapter to AnyWriter and GenericWriter to help bridge the gap
between old and new API

make std.testing.expectFmt work at compile-time

std.fmt no longer has a dependency on std.unicode. Formatted printing
was never properly unicode-aware. Now it no longer pretends to be.

Breakage/deprecations:
* std.fs.File.reader -> std.fs.File.deprecatedReader
* std.fs.File.writer -> std.fs.File.deprecatedWriter
* std.io.GenericReader -> std.io.Reader
* std.io.GenericWriter -> std.io.Writer
* std.io.AnyReader -> std.io.Reader
* std.io.AnyWriter -> std.io.Writer
* std.fmt.format -> std.fmt.deprecatedFormat
* std.fmt.fmtSliceEscapeLower -> std.ascii.hexEscape
* std.fmt.fmtSliceEscapeUpper -> std.ascii.hexEscape
* std.fmt.fmtSliceHexLower -> {x}
* std.fmt.fmtSliceHexUpper -> {X}
* std.fmt.fmtIntSizeDec -> {B}
* std.fmt.fmtIntSizeBin -> {Bi}
* std.fmt.fmtDuration -> {D}
* std.fmt.fmtDurationSigned -> {D}
* {} -> {f} when there is a format method
* format method signature
  - anytype -> *std.io.Writer
  - inferred error set -> error{WriteFailed}
  - options -> (deleted)
* std.fmt.Formatted
  - now takes context type explicitly
  - no fmt string
This commit is contained in:
Andrew Kelley
2025-06-27 20:05:22 -07:00
parent 0b3f0124dc
commit 0e37ff0d59
161 changed files with 4385 additions and 5847 deletions

View File

@@ -9,6 +9,7 @@ const native_endian = builtin.cpu.arch.endian();
///
/// See also: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
pub const replacement_character: u21 = 0xFFFD;
pub const replacement_character_utf8: [3]u8 = utf8EncodeComptime(replacement_character);
/// Returns how many bytes the UTF-8 representation would require
/// for the given codepoint.
@@ -802,14 +803,7 @@ fn testDecode(bytes: []const u8) !u21 {
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
fn formatUtf8(
utf8: []const u8,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
fn formatUtf8(utf8: []const u8, writer: *std.io.Writer) std.io.Writer.Error!void {
var buf: [300]u8 = undefined; // just an arbitrary size
var u8len: usize = 0;
@@ -898,27 +892,27 @@ fn formatUtf8(
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {
pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter([]const u8, formatUtf8) {
return .{ .data = utf8 };
}
test fmtUtf8 {
const expectFmt = testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf8("")});
try expectFmt("foo", "{}", .{fmtUtf8("foo")});
try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});
try expectFmt("", "{f}", .{fmtUtf8("")});
try expectFmt("foo", "{f}", .{fmtUtf8("foo")});
try expectFmt("𐐷", "{f}", .{fmtUtf8("𐐷")});
// Table 3-8. U+FFFD for Non-Shortest Form Sequences
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{f}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
// Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{f}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
// Table 3-10. U+FFFD for Other Ill-Formed Sequences
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD><EFBFBD>B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>A<EFBFBD><EFBFBD>B", "{f}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
// Table 3-11. U+FFFD for Truncated Sequences
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
try expectFmt("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>A", "{f}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
}
fn utf16LeToUtf8ArrayListImpl(
@@ -1477,14 +1471,7 @@ test calcWtf16LeLen {
/// Print the given `utf16le` string, encoded as UTF-8 bytes.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
fn formatUtf16Le(
utf16le: []const u16,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
fn formatUtf16Le(utf16le: []const u16, writer: *std.io.Writer) std.io.Writer.Error!void {
var buf: [300]u8 = undefined; // just an arbitrary size
var it = Utf16LeIterator.init(utf16le);
var u8len: usize = 0;
@@ -1505,23 +1492,23 @@ pub const fmtUtf16le = @compileError("deprecated; renamed to fmtUtf16Le");
/// Return a Formatter for a (potentially ill-formed) UTF-16 LE string,
/// which will be converted to UTF-8 during formatting.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) {
pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter([]const u16, formatUtf16Le) {
return .{ .data = utf16le };
}
test fmtUtf16Le {
const expectFmt = testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
try expectFmt("", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))});
try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
try expectFmt("foo", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))});
try expectFmt("𐐷", "{}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))});
try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
try expectFmt("<EFBFBD>", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
try expectFmt("<EFBFBD>", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
try expectFmt("<EFBFBD>", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
try expectFmt("<EFBFBD>", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
try expectFmt("", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
try expectFmt("", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral(""))});
try expectFmt("foo", "{f}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
try expectFmt("foo", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("foo"))});
try expectFmt("𐐷", "{f}", .{fmtUtf16Le(wtf8ToWtf16LeStringLiteral("𐐷"))});
try expectFmt("", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
try expectFmt("<EFBFBD>", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
try expectFmt("<EFBFBD>", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
try expectFmt("<EFBFBD>", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
try expectFmt("<EFBFBD>", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
try expectFmt("", "{f}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
}
fn testUtf8ToUtf16LeStringLiteral(utf8ToUtf16LeStringLiteral_: anytype) !void {