zig/src/link/LdScript.zig

path: Path,
cpu_arch: ?std.Target.Cpu.Arch,
args: []const Arg,

pub const Arg = struct {
    needed: bool = false,
    path: []const u8,
};

pub fn deinit(ls: *LdScript, gpa: Allocator) void {
    gpa.free(ls.args);
    ls.* = undefined;
}

pub const Error = error{
    LinkFailure,
    UnknownCpuArch,
    OutOfMemory,
};

pub fn parse(
    gpa: Allocator,
    diags: *Diags,
    /// For error reporting.
    path: Path,
    data: []const u8,
) Error!LdScript {
    var tokenizer = Tokenizer{ .source = data };
    var tokens: std.ArrayListUnmanaged(Token) = .empty;
    defer tokens.deinit(gpa);
    var line_col: std.ArrayListUnmanaged(LineColumn) = .empty;
    defer line_col.deinit(gpa);

    var line: usize = 0;
    var prev_line_last_col: usize = 0;

    while (true) {
        const tok = tokenizer.next();
        try tokens.append(gpa, tok);
        const column = tok.start - prev_line_last_col;
        try line_col.append(gpa, .{ .line = line, .column = column });
        switch (tok.id) {
            .invalid => {
                return diags.failParse(path, "invalid token in LD script: '{s}' ({d}:{d})", .{
                    std.fmt.fmtSliceEscapeLower(tok.get(data)), line, column,
                });
            },
            .new_line => {
                line += 1;
                prev_line_last_col = tok.end;
            },
            .eof => break,
            else => {},
        }
    }

    var it: TokenIterator = .{ .tokens = tokens.items };
    var parser: Parser = .{
        .gpa = gpa,
        .source = data,
        .it = &it,
        .args = .empty,
        .cpu_arch = null,
    };
    defer parser.args.deinit(gpa);

    parser.start() catch |err| switch (err) {
        error.UnexpectedToken => {
            const last_token_id = parser.it.pos - 1;
            const last_token = parser.it.get(last_token_id);
            const lcol = line_col.items[last_token_id];
            return diags.failParse(path, "unexpected token in LD script: {s}: '{s}' ({d}:{d})", .{
                @tagName(last_token.id),
                last_token.get(data),
                lcol.line,
                lcol.column,
            });
        },
        else => |e| return e,
    };
    return .{
        .path = path,
        .cpu_arch = parser.cpu_arch,
        .args = try parser.args.toOwnedSlice(gpa),
    };
}

const LineColumn = struct {
    line: usize,
    column: usize,
};

const Command = enum {
    output_format,
    input,
    group,
    as_needed,

    fn fromString(s: []const u8) ?Command {
        inline for (@typeInfo(Command).@"enum".fields) |field| {
            const upper_name = n: {
                comptime var buf: [field.name.len]u8 = undefined;
                inline for (field.name, 0..) |c, i| {
                    buf[i] = comptime std.ascii.toUpper(c);
                }
                break :n buf;
            };
            if (std.mem.eql(u8, &upper_name, s)) return @field(Command, field.name);
        }
        return null;
    }
};

const Parser = struct {
    gpa: Allocator,
    source: []const u8,
    it: *TokenIterator,

    cpu_arch: ?std.Target.Cpu.Arch,
    args: std.ArrayListUnmanaged(Arg),

    fn start(parser: *Parser) !void {
        while (true) {
            parser.skipAny(&.{ .comment, .new_line });

            if (parser.maybe(.command)) |cmd_id| {
                const cmd = parser.getCommand(cmd_id);
                switch (cmd) {
                    .output_format => parser.cpu_arch = try parser.outputFormat(),
                    // TODO we should verify that group only contains libraries
                    .input, .group => try parser.group(),
                    else => return error.UnexpectedToken,
                }
            } else break;
        }

        if (parser.it.next()) |tok| switch (tok.id) {
            .eof => {},
            else => return error.UnexpectedToken,
        };
    }

    fn outputFormat(p: *Parser) !std.Target.Cpu.Arch {
        const value = value: {
            if (p.skip(&.{.lparen})) {
                const value_id = try p.require(.literal);
                const value = p.it.get(value_id);
                _ = try p.require(.rparen);
                break :value value.get(p.source);
            } else if (p.skip(&.{ .new_line, .lbrace })) {
                const value_id = try p.require(.literal);
                const value = p.it.get(value_id);
                _ = p.skip(&.{.new_line});
                _ = try p.require(.rbrace);
                break :value value.get(p.source);
            } else return error.UnexpectedToken;
        };
        if (std.mem.eql(u8, value, "elf64-x86-64")) return .x86_64;
        if (std.mem.eql(u8, value, "elf64-littleaarch64")) return .aarch64;
        return error.UnknownCpuArch;
    }

    fn group(p: *Parser) !void {
        const gpa = p.gpa;
        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;

        while (true) {
            if (p.maybe(.literal)) |tok_id| {
                const tok = p.it.get(tok_id);
                const path = tok.get(p.source);
                try p.args.append(gpa, .{ .path = path, .needed = true });
            } else if (p.maybe(.command)) |cmd_id| {
                const cmd = p.getCommand(cmd_id);
                switch (cmd) {
                    .as_needed => try p.asNeeded(),
                    else => return error.UnexpectedToken,
                }
            } else break;
        }

        _ = try p.require(.rparen);
    }

    fn asNeeded(p: *Parser) !void {
        const gpa = p.gpa;
        if (!p.skip(&.{.lparen})) return error.UnexpectedToken;

        while (p.maybe(.literal)) |tok_id| {
            const tok = p.it.get(tok_id);
            const path = tok.get(p.source);
            try p.args.append(gpa, .{ .path = path, .needed = false });
        }

        _ = try p.require(.rparen);
    }

    fn skip(p: *Parser, comptime ids: []const Token.Id) bool {
        const pos = p.it.pos;
        inline for (ids) |id| {
            const tok = p.it.next() orelse return false;
            if (tok.id != id) {
                p.it.seekTo(pos);
                return false;
            }
        }
        return true;
    }

    fn skipAny(p: *Parser, comptime ids: []const Token.Id) void {
        outer: while (p.it.next()) |tok| {
            inline for (ids) |id| {
                if (id == tok.id) continue :outer;
            }
            break p.it.seekBy(-1);
        }
    }

    fn maybe(p: *Parser, comptime id: Token.Id) ?Token.Index {
        const pos = p.it.pos;
        const tok = p.it.next() orelse return null;
        if (tok.id == id) return pos;
        p.it.seekBy(-1);
        return null;
    }

    fn require(p: *Parser, comptime id: Token.Id) !Token.Index {
        return p.maybe(id) orelse return error.UnexpectedToken;
    }

    fn getCommand(p: *Parser, index: Token.Index) Command {
        const tok = p.it.get(index);
        assert(tok.id == .command);
        return Command.fromString(tok.get(p.source)).?;
    }
};

const Token = struct {
    id: Id,
    start: usize,
    end: usize,

    const Id = enum {
        eof,
        invalid,

        new_line,
        lparen, // (
        rparen, // )
        lbrace, // {
        rbrace, // }

        comment, // /* */

        command, // literal with special meaning, see Command
        literal,
    };

    const Index = usize;

    fn get(tok: Token, source: []const u8) []const u8 {
        return source[tok.start..tok.end];
    }
};

const Tokenizer = struct {
    source: []const u8,
    index: usize = 0,

    fn matchesPattern(comptime pattern: []const u8, slice: []const u8) bool {
        comptime var count: usize = 0;
        inline while (count < pattern.len) : (count += 1) {
            if (count >= slice.len) return false;
            const c = slice[count];
            if (pattern[count] != c) return false;
        }
        return true;
    }

    fn matches(tok: Tokenizer, comptime pattern: []const u8) bool {
        return matchesPattern(pattern, tok.source[tok.index..]);
    }

    fn isCommand(tok: Tokenizer, start: usize, end: usize) bool {
        return if (Command.fromString(tok.source[start..end]) == null) false else true;
    }

    fn next(tok: *Tokenizer) Token {
        var result = Token{
            .id = .eof,
            .start = tok.index,
            .end = undefined,
        };

        var state: enum {
            start,
            comment,
            literal,
        } = .start;

        while (tok.index < tok.source.len) : (tok.index += 1) {
            const c = tok.source[tok.index];
            switch (state) {
                .start => switch (c) {
                    ' ', '\t' => result.start += 1,

                    '\n' => {
                        result.id = .new_line;
                        tok.index += 1;
                        break;
                    },

                    '\r' => {
                        if (tok.matches("\r\n")) {
                            result.id = .new_line;
                            tok.index += "\r\n".len;
                        } else {
                            result.id = .invalid;
                            tok.index += 1;
                        }
                        break;
                    },

                    '/' => if (tok.matches("/*")) {
                        state = .comment;
                        tok.index += "/*".len;
                    } else {
                        state = .literal;
                    },

                    '(' => {
                        result.id = .lparen;
                        tok.index += 1;
                        break;
                    },

                    ')' => {
                        result.id = .rparen;
                        tok.index += 1;
                        break;
                    },

                    '{' => {
                        result.id = .lbrace;
                        tok.index += 1;
                        break;
                    },

                    '}' => {
                        result.id = .rbrace;
                        tok.index += 1;
                        break;
                    },

                    else => state = .literal,
                },

                .comment => switch (c) {
                    '*' => if (tok.matches("*/")) {
                        result.id = .comment;
                        tok.index += "*/".len;
                        break;
                    },
                    else => {},
                },

                .literal => switch (c) {
                    ' ', '(', '\n' => {
                        if (tok.isCommand(result.start, tok.index)) {
                            result.id = .command;
                        } else {
                            result.id = .literal;
                        }
                        break;
                    },

                    ')' => {
                        result.id = .literal;
                        break;
                    },

                    '\r' => {
                        if (tok.matches("\r\n")) {
                            if (tok.isCommand(result.start, tok.index)) {
                                result.id = .command;
                            } else {
                                result.id = .literal;
                            }
                        } else {
                            result.id = .invalid;
                            tok.index += 1;
                        }
                        break;
                    },

                    else => {},
                },
            }
        }

        result.end = tok.index;
        return result;
    }
};

const TokenIterator = struct {
    tokens: []const Token,
    pos: Token.Index = 0,

    fn next(it: *TokenIterator) ?Token {
        const token = it.peek() orelse return null;
        it.pos += 1;
        return token;
    }

    fn peek(it: TokenIterator) ?Token {
        if (it.pos >= it.tokens.len) return null;
        return it.tokens[it.pos];
    }

    fn reset(it: *TokenIterator) void {
        it.pos = 0;
    }

    fn seekTo(it: *TokenIterator, pos: Token.Index) void {
        it.pos = pos;
    }

    fn seekBy(it: *TokenIterator, offset: isize) void {
        const new_pos = @as(isize, @bitCast(it.pos)) + offset;
        if (new_pos < 0) {
            it.pos = 0;
        } else {
            it.pos = @as(usize, @intCast(new_pos));
        }
    }

    fn get(it: *TokenIterator, pos: Token.Index) Token {
        assert(pos < it.tokens.len);
        return it.tokens[pos];
    }
};

const LdScript = @This();
const Diags = @import("../link.zig").Diags;

const std = @import("std");
const assert = std.debug.assert;
const Path = std.Build.Cache.Path;
const Allocator = std.mem.Allocator;