Recognize & skip the UTF-8 BOM

2019-09-11 11:46:51 +02:00
parent 0eddee449d
commit f36b8fd7b2
2 changed files with 17 additions and 3 deletions
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -407,9 +407,14 @@ void tokenize(Buf *buf, Tokenization *out) {
    t.buf = buf;

    out->line_offsets = allocate<ZigList<size_t>>(1);
-
    out->line_offsets->append(0);
-    for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
+
+    // Skip the UTF-8 BOM if present
+    if (buf_starts_with_mem(buf, "\xEF\xBB\xBF", 3)) {
+        t.pos += 3;
+    }
+
+    for (; t.pos < buf_len(t.buf); t.pos += 1) {
        uint8_t c = buf_ptr(t.buf)[t.pos];
        switch (t.state) {
            case TokenizeStateError: