Merge pull request #13370 from r00ster91/newascii

std.ascii: remove LUT and deprecations
This commit is contained in:
Andrew Kelley
2022-12-10 14:53:41 -05:00
committed by GitHub
9 changed files with 89 additions and 284 deletions

View File

@@ -114,7 +114,7 @@ pub fn parse(text: []const u8) !Version {
if (id.len == 0) return error.InvalidVersion;
// Identifiers MUST comprise only ASCII alphanumerics and hyphens [0-9A-Za-z-].
for (id) |c| if (!std.ascii.isAlNum(c) and c != '-') return error.InvalidVersion;
for (id) |c| if (!std.ascii.isAlphanumeric(c) and c != '-') return error.InvalidVersion;
// Numeric identifiers MUST NOT include leading zeroes.
const is_num = for (id) |c| {
@@ -133,7 +133,7 @@ pub fn parse(text: []const u8) !Version {
if (id.len == 0) return error.InvalidVersion;
// Identifiers MUST comprise only ASCII alphanumerics and hyphens [0-9A-Za-z-].
for (id) |c| if (!std.ascii.isAlNum(c) and c != '-') return error.InvalidVersion;
for (id) |c| if (!std.ascii.isAlphanumeric(c) and c != '-') return error.InvalidVersion;
}
}

View File

@@ -10,83 +10,10 @@
const std = @import("std");
// TODO: remove all decls marked as DEPRECATED after 0.10.0's release
/// The C0 control codes of the ASCII encoding.
///
/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`.
/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`
pub const control_code = struct {
// DEPRECATED: use the lowercase variant
pub const NUL = 0x00;
// DEPRECATED: use the lowercase variant
pub const SOH = 0x01;
// DEPRECATED: use the lowercase variant
pub const STX = 0x02;
// DEPRECATED: use the lowercase variant
pub const ETX = 0x03;
// DEPRECATED: use the lowercase variant
pub const EOT = 0x04;
// DEPRECATED: use the lowercase variant
pub const ENQ = 0x05;
// DEPRECATED: use the lowercase variant
pub const ACK = 0x06;
// DEPRECATED: use the lowercase variant
pub const BEL = 0x07;
// DEPRECATED: use the lowercase variant
pub const BS = 0x08;
// DEPRECATED: use `ht`
pub const TAB = 0x09;
// DEPRECATED: use the lowercase variant
pub const LF = 0x0A;
// DEPRECATED: use the lowercase variant
pub const VT = 0x0B;
// DEPRECATED: use the lowercase variant
pub const FF = 0x0C;
// DEPRECATED: use the lowercase variant
pub const CR = 0x0D;
// DEPRECATED: use the lowercase variant
pub const SO = 0x0E;
// DEPRECATED: use the lowercase variant
pub const SI = 0x0F;
// DEPRECATED: use the lowercase variant
pub const DLE = 0x10;
// DEPRECATED: use the lowercase variant
pub const DC1 = 0x11;
// DEPRECATED: use the lowercase variant
pub const DC2 = 0x12;
// DEPRECATED: use the lowercase variant
pub const DC3 = 0x13;
// DEPRECATED: use the lowercase variant
pub const DC4 = 0x14;
// DEPRECATED: use the lowercase variant
pub const NAK = 0x15;
// DEPRECATED: use the lowercase variant
pub const SYN = 0x16;
// DEPRECATED: use the lowercase variant
pub const ETB = 0x17;
// DEPRECATED: use the lowercase variant
pub const CAN = 0x18;
// DEPRECATED: use the lowercase variant
pub const EM = 0x19;
// DEPRECATED: use the lowercase variant
pub const SUB = 0x1A;
// DEPRECATED: use the lowercase variant
pub const ESC = 0x1B;
// DEPRECATED: use the lowercase variant
pub const FS = 0x1C;
// DEPRECATED: use the lowercase variant
pub const GS = 0x1D;
// DEPRECATED: use the lowercase variant
pub const RS = 0x1E;
// DEPRECATED: use the lowercase variant
pub const US = 0x1F;
// DEPRECATED: use the lowercase variant
pub const DEL = 0x7F;
// DEPRECATED: use the lowercase variant
pub const XON = 0x11;
// DEPRECATED: use the lowercase variant
pub const XOFF = 0x13;
/// Null.
pub const nul = 0x00;
/// Start of Heading.
@@ -161,211 +88,63 @@ pub const control_code = struct {
pub const xoff = dc3;
};
const tIndex = enum(u3) {
Alpha,
Hex,
Space,
Digit,
Lower,
Upper,
// Ctrl, < 0x20 || == DEL
// Print, = Graph || == ' '. NOT '\t' et cetera
Punct,
Graph,
//ASCII, | ~0b01111111
//isBlank, == ' ' || == '\x09'
};
const combinedTable = init: {
comptime var table: [256]u8 = undefined;
const mem = std.mem;
const alpha = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const lower = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const upper = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const digit = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const hex = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const space = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const punct = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
};
const graph = [_]u1{
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
comptime var i = 0;
inline while (i < 128) : (i += 1) {
table[i] =
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
}
mem.set(u8, table[128..256], 0);
break :init table;
};
fn inTable(c: u8, t: tIndex) bool {
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
}
/// DEPRECATED: use `isAlphanumeric`
pub const isAlNum = isAlphanumeric;
/// DEPRECATED: use `isAlphabetic`
pub const isAlpha = isAlphabetic;
/// DEPRECATED: use `isControl`
pub const isCntrl = isControl;
/// DEPRECATED: use `isWhitespace`.
pub const isSpace = isWhitespace;
/// DEPRECATED: use `whitespace`.
pub const spaces = whitespace;
/// DEPRECATED: use `isHex`.
pub const isXDigit = isHex;
/// Returns whether the character is alphanumeric.
/// Returns whether the character is alphanumeric: A-Z, a-z, or 0-9.
pub fn isAlphanumeric(c: u8) bool {
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
return switch (c) {
'0'...'9', 'A'...'Z', 'a'...'z' => true,
else => false,
};
}
/// Returns whether the character is alphabetic.
/// Returns whether the character is alphabetic: A-Z or a-z.
pub fn isAlphabetic(c: u8) bool {
return inTable(c, tIndex.Alpha);
return switch (c) {
'A'...'Z', 'a'...'z' => true,
else => false,
};
}
/// Returns whether the character is a control character.
/// This is the same as `!isPrint(c)`.
///
/// See also: `control_code`.
/// See also: `control_code`
pub fn isControl(c: u8) bool {
return c <= control_code.us or c == control_code.del;
}
/// Returns whether the character is a digit.
pub fn isDigit(c: u8) bool {
return inTable(c, tIndex.Digit);
return switch (c) {
'0'...'9' => true,
else => false,
};
}
/// DEPRECATED: use `isPrint(c) and c != ' '` instead
pub fn isGraph(c: u8) bool {
return inTable(c, tIndex.Graph);
}
/// Returns whether the character is a lowercased letter.
/// Returns whether the character is a lowercase letter.
pub fn isLower(c: u8) bool {
return inTable(c, tIndex.Lower);
return switch (c) {
'a'...'z' => true,
else => false,
};
}
/// Returns whether the character has some graphical representation and can be printed.
/// This also returns `true` for the space character.
/// This is the same as `!isControl(c)`.
/// Returns whether the character is printable and has some graphical representation,
/// including the space character.
pub fn isPrint(c: u8) bool {
return inTable(c, tIndex.Graph) or c == ' ';
}
/// DEPRECATED: create your own function based on your needs and what you want to do.
pub fn isPunct(c: u8) bool {
return inTable(c, tIndex.Punct);
return isASCII(c) and !isControl(c);
}
/// Returns whether this character is included in `whitespace`.
pub fn isWhitespace(c: u8) bool {
return inTable(c, tIndex.Space);
return for (whitespace) |other| {
if (c == other)
break true;
} else false;
}
/// Whitespace for general use.
/// This may be used with e.g. `std.mem.trim` to trim whitespace.
///
/// See also: `isWhitespace`.
/// See also: `isWhitespace`
pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff };
test "whitespace" {
@@ -377,14 +156,20 @@ test "whitespace" {
}
}
/// Returns whether the character is an uppercased letter.
/// Returns whether the character is an uppercase letter.
pub fn isUpper(c: u8) bool {
return inTable(c, tIndex.Upper);
return switch (c) {
'A'...'Z' => true,
else => false,
};
}
/// Returns whether the character is a hexadecimal digit. This is case-insensitive.
/// Returns whether the character is a hexadecimal digit: A-F, a-f, or 0-9.
pub fn isHex(c: u8) bool {
return inTable(c, tIndex.Hex);
return switch (c) {
'0'...'9', 'A'...'F', 'a'...'f' => true,
else => false,
};
}
/// Returns whether the character is a 7-bit ASCII character.
@@ -392,12 +177,7 @@ pub fn isASCII(c: u8) bool {
return c < 128;
}
/// DEPRECATED: use `c == ' ' or c == '\t'` or try `isWhitespace`
pub fn isBlank(c: u8) bool {
return (c == ' ') or (c == '\x09');
}
/// Uppercases the character and returns it as-is if it's already uppercased or not a letter.
/// Uppercases the character and returns it as-is if already uppercase or not a letter.
pub fn toUpper(c: u8) u8 {
if (isLower(c)) {
return c & 0b11011111;
@@ -406,7 +186,7 @@ pub fn toUpper(c: u8) u8 {
}
}
/// Lowercases the character and returns it as-is if it's already lowercased or not a letter.
/// Lowercases the character and returns it as-is if already lowercase or not a letter.
pub fn toLower(c: u8) u8 {
if (isUpper(c)) {
return c | 0b00100000;
@@ -415,53 +195,77 @@ pub fn toLower(c: u8) u8 {
}
}
test "ascii character classes" {
test "ASCII character classes" {
const testing = std.testing;
try testing.expect(!isControl('a'));
try testing.expect(!isControl('z'));
try testing.expect(!isControl(' '));
try testing.expect(isControl(control_code.nul));
try testing.expect(isControl(control_code.ff));
try testing.expect(isControl(control_code.us));
try testing.expect(isControl(control_code.del));
try testing.expect(!isControl(0x80));
try testing.expect(!isControl(0xff));
try testing.expect('C' == toUpper('c'));
try testing.expect(':' == toUpper(':'));
try testing.expect('\xab' == toUpper('\xab'));
try testing.expect(!isUpper('z'));
try testing.expect(!isUpper(0x80));
try testing.expect(!isUpper(0xff));
try testing.expect('c' == toLower('C'));
try testing.expect(':' == toLower(':'));
try testing.expect('\xab' == toLower('\xab'));
try testing.expect(!isLower('Z'));
try testing.expect(!isLower(0x80));
try testing.expect(!isLower(0xff));
try testing.expect(isAlphanumeric('Z'));
try testing.expect(isAlphanumeric('z'));
try testing.expect(isAlphanumeric('5'));
try testing.expect(isAlphanumeric('5'));
try testing.expect(isAlphanumeric('a'));
try testing.expect(!isAlphanumeric('!'));
try testing.expect(!isAlphanumeric(0x80));
try testing.expect(!isAlphanumeric(0xff));
try testing.expect(!isAlpha('5'));
try testing.expect(isAlpha('c'));
try testing.expect(!isAlpha('5'));
try testing.expect(!isAlphabetic('5'));
try testing.expect(isAlphabetic('c'));
try testing.expect(!isAlphabetic('@'));
try testing.expect(isAlphabetic('Z'));
try testing.expect(!isAlphabetic(0x80));
try testing.expect(!isAlphabetic(0xff));
try testing.expect(isWhitespace(' '));
try testing.expect(isWhitespace('\t'));
try testing.expect(isWhitespace('\r'));
try testing.expect(isWhitespace('\n'));
try testing.expect(isWhitespace(control_code.ff));
try testing.expect(!isWhitespace('.'));
try testing.expect(!isWhitespace(control_code.us));
try testing.expect(!isWhitespace(0x80));
try testing.expect(!isWhitespace(0xff));
try testing.expect(!isHex('g'));
try testing.expect(isHex('b'));
try testing.expect(isHex('F'));
try testing.expect(isHex('9'));
try testing.expect(!isHex(0x80));
try testing.expect(!isHex(0xff));
try testing.expect(!isDigit('~'));
try testing.expect(isDigit('0'));
try testing.expect(isDigit('9'));
try testing.expect(!isDigit(0x80));
try testing.expect(!isDigit(0xff));
try testing.expect(isPrint(' '));
try testing.expect(isPrint('@'));
try testing.expect(isPrint('~'));
try testing.expect(!isPrint(control_code.esc));
try testing.expect(!isPrint(0x80));
try testing.expect(!isPrint(0xff));
}
/// Writes a lower case copy of `ascii_string` to `output`.
@@ -541,7 +345,7 @@ pub fn startsWithIgnoreCase(haystack: []const u8, needle: []const u8) bool {
return if (needle.len > haystack.len) false else eqlIgnoreCase(haystack[0..needle.len], needle);
}
test "ascii.startsWithIgnoreCase" {
test "startsWithIgnoreCase" {
try std.testing.expect(startsWithIgnoreCase("boB", "Bo"));
try std.testing.expect(!startsWithIgnoreCase("Needle in hAyStAcK", "haystack"));
}
@@ -550,7 +354,7 @@ pub fn endsWithIgnoreCase(haystack: []const u8, needle: []const u8) bool {
return if (needle.len > haystack.len) false else eqlIgnoreCase(haystack[haystack.len - needle.len ..], needle);
}
test "ascii.endsWithIgnoreCase" {
test "endsWithIgnoreCase" {
try std.testing.expect(endsWithIgnoreCase("Needle in HaYsTaCk", "haystack"));
try std.testing.expect(!endsWithIgnoreCase("BoB", "Bo"));
}

View File

@@ -2198,8 +2198,9 @@ test "slice" {
}
test "escape non-printable" {
try expectFmt("abc", "{s}", .{fmtSliceEscapeLower("abc")});
try expectFmt("abc 123", "{s}", .{fmtSliceEscapeLower("abc 123")});
try expectFmt("ab\\xffc", "{s}", .{fmtSliceEscapeLower("ab\xffc")});
try expectFmt("abc 123", "{s}", .{fmtSliceEscapeUpper("abc 123")});
try expectFmt("ab\\xFFc", "{s}", .{fmtSliceEscapeUpper("ab\xffc")});
}

View File

@@ -1192,7 +1192,7 @@ pub fn isValidHostName(hostname: []const u8) bool {
if (hostname.len >= 254) return false;
if (!std.unicode.utf8ValidateSlice(hostname)) return false;
for (hostname) |byte| {
if (byte >= 0x80 or byte == '.' or byte == '-' or std.ascii.isAlNum(byte)) {
if (!std.ascii.isASCII(byte) or byte == '.' or byte == '-' or std.ascii.isAlphanumeric(byte)) {
continue;
}
return false;

View File

@@ -1531,7 +1531,7 @@ const Parser = struct {
// without types we don't know if '&&' was intended as 'bitwise_and address_of', or a c-style logical_and
// The best the parser can do is recommend changing it to 'and' or ' & &'
try p.warnMsg(.{ .tag = .invalid_ampersand_ampersand, .token = oper_token });
} else if (std.ascii.isSpace(char_before) != std.ascii.isSpace(char_after)) {
} else if (std.ascii.isWhitespace(char_before) != std.ascii.isWhitespace(char_after)) {
try p.warnMsg(.{ .tag = .mismatched_binary_op_whitespace, .token = oper_token });
}
}
@@ -1728,7 +1728,7 @@ const Parser = struct {
var sentinel: Node.Index = 0;
if (p.eatToken(.identifier)) |ident| {
const ident_slice = p.source[p.token_starts[ident]..p.token_starts[ident + 1]];
if (!std.mem.eql(u8, std.mem.trimRight(u8, ident_slice, &std.ascii.spaces), "c")) {
if (!std.mem.eql(u8, std.mem.trimRight(u8, ident_slice, &std.ascii.whitespace), "c")) {
p.tok_i -= 1;
}
} else if (p.eatToken(.colon)) |_| {

View File

@@ -2648,7 +2648,7 @@ fn renderComments(ais: *Ais, tree: Ast, start: usize, end: usize) Error!bool {
const newline = if (newline_index) |i| comment_start + i else null;
const untrimmed_comment = tree.source[comment_start .. newline orelse tree.source.len];
const trimmed_comment = mem.trimRight(u8, untrimmed_comment, &std.ascii.spaces);
const trimmed_comment = mem.trimRight(u8, untrimmed_comment, &std.ascii.whitespace);
// Don't leave any whitespace at the start of the file
if (index != 0) {
@@ -2669,7 +2669,7 @@ fn renderComments(ais: *Ais, tree: Ast, start: usize, end: usize) Error!bool {
index = 1 + (newline orelse end - 1);
const comment_content = mem.trimLeft(u8, trimmed_comment["//".len..], &std.ascii.spaces);
const comment_content = mem.trimLeft(u8, trimmed_comment["//".len..], &std.ascii.whitespace);
if (ais.disabled_offset != null and mem.eql(u8, comment_content, "zig fmt: on")) {
// Write the source for which formatting was disabled directly
// to the underlying writer, fixing up invaild whitespace.
@@ -2716,7 +2716,7 @@ fn renderExtraNewlineToken(ais: *Ais, tree: Ast, token_index: Ast.TokenIndex) Er
// non-whitespace character is encountered or two newlines have been found.
var i = token_start - 1;
var newlines: u2 = 0;
while (std.ascii.isSpace(tree.source[i])) : (i -= 1) {
while (std.ascii.isWhitespace(tree.source[i])) : (i -= 1) {
if (tree.source[i] == '\n') newlines += 1;
if (newlines == 2) return ais.insertNewline();
if (i == prev_token_end) break;
@@ -2778,7 +2778,7 @@ fn tokenSliceForRender(tree: Ast, token_index: Ast.TokenIndex) []const u8 {
ret.len -= 1;
},
.container_doc_comment, .doc_comment => {
ret = mem.trimRight(u8, ret, &std.ascii.spaces);
ret = mem.trimRight(u8, ret, &std.ascii.whitespace);
},
else => {},
}

View File

@@ -1232,7 +1232,7 @@ pub const Tokenizer = struct {
fn getInvalidCharacterLength(self: *Tokenizer) u3 {
const c0 = self.buffer[self.index];
if (std.ascii.isASCII(c0)) {
if (std.ascii.isCntrl(c0)) {
if (std.ascii.isControl(c0)) {
// ascii control codes are never allowed
// (note that \n was checked before we got here)
return 1;

View File

@@ -866,7 +866,7 @@ test "error target - continuation expecting end-of-line" {
);
try depTokenizer("foo.o: \\ ",
\\target = {foo.o}
\\ERROR: illegal char \x20 at position 8: continuation expecting end-of-line
\\ERROR: illegal char ' ' at position 8: continuation expecting end-of-line
);
try depTokenizer("foo.o: \\x",
\\target = {foo.o}
@@ -1053,10 +1053,10 @@ fn printCharValues(out: anytype, bytes: []const u8) !void {
}
fn printUnderstandableChar(out: anytype, char: u8) !void {
if (!std.ascii.isPrint(char) or char == ' ') {
try out.print("\\x{X:0>2}", .{char});
if (std.ascii.isPrint(char)) {
try out.print("'{c}'", .{char});
} else {
try out.print("'{c}'", .{printable_char_tab[char]});
try out.print("\\x{X:0>2}", .{char});
}
}

View File

@@ -5738,7 +5738,7 @@ fn parseCNumLit(c: *Context, m: *MacroCtx) ParseError!Node {
if (mem.indexOfScalar(u8, lit_bytes, '.')) |dot_index| {
if (dot_index == 2) {
lit_bytes = try std.fmt.allocPrint(c.arena, "0x0{s}", .{lit_bytes[2..]});
} else if (dot_index + 1 == lit_bytes.len or !std.ascii.isXDigit(lit_bytes[dot_index + 1])) {
} else if (dot_index + 1 == lit_bytes.len or !std.ascii.isHex(lit_bytes[dot_index + 1])) {
// If the literal lacks a digit after the `.`, we need to
// add one since `0x1.p10` would be invalid syntax in Zig.
lit_bytes = try std.fmt.allocPrint(c.arena, "0x{s}0{s}", .{