Recognize & skip the UTF-8 BOM
This commit is contained in:
@@ -407,9 +407,14 @@ void tokenize(Buf *buf, Tokenization *out) {
|
||||
t.buf = buf;
|
||||
|
||||
out->line_offsets = allocate<ZigList<size_t>>(1);
|
||||
|
||||
out->line_offsets->append(0);
|
||||
for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
|
||||
|
||||
// Skip the UTF-8 BOM if present
|
||||
if (buf_starts_with_mem(buf, "\xEF\xBB\xBF", 3)) {
|
||||
t.pos += 3;
|
||||
}
|
||||
|
||||
for (; t.pos < buf_len(t.buf); t.pos += 1) {
|
||||
uint8_t c = buf_ptr(t.buf)[t.pos];
|
||||
switch (t.state) {
|
||||
case TokenizeStateError:
|
||||
|
||||
Reference in New Issue
Block a user