dwarf: add expression writer

2023-07-05 13:36:28 -04:00
parent ad5f74c0b1
commit 424b1299a8
1 changed files with 483 additions and 20 deletions
--- a/lib/std/dwarf/expressions.zig
+++ b/lib/std/dwarf/expressions.zig
@@ -5,17 +5,7 @@ const leb = @import("../leb128.zig");
 const dwarf = @import("../dwarf.zig");
 const abi = dwarf.abi;
 const mem = std.mem;
-
-pub const StackMachineOptions = struct {
-    /// The address size of the target architecture
-    addr_size: u8 = @sizeOf(usize),
-
-    /// Endianess of the target architecture
-    endian: std.builtin.Endian = .Little,
-
-    /// Restrict the stack machine to a subset of opcodes used in call frame instructions
-    call_frame_mode: bool = false,
-};
+const assert = std.debug.assert;

 /// Expressions can be evaluated in different contexts, each requiring its own set of inputs.
 /// Callers should specify all the fields relevant to their context. If a field is required
@@ -35,10 +25,21 @@ pub const ExpressionContext = struct {
    cfa: ?usize,
 };

+pub const ExpressionOptions = struct {
+    /// The address size of the target architecture
+    addr_size: u8 = @sizeOf(usize),
+
+    /// Endianess of the target architecture
+    endian: std.builtin.Endian = .Little,
+
+    /// Restrict the stack machine to a subset of opcodes used in call frame instructions
+    call_frame_mode: bool = false,
+};
+
 /// A stack machine that can decode and run DWARF expressions.
 /// Expressions can be decoded for non-native address size and endianness,
 /// but can only be executed if the current target matches the configuration.
-pub fn StackMachine(comptime options: StackMachineOptions) type {
+pub fn StackMachine(comptime options: ExpressionOptions) type {
    const addr_type = switch (options.addr_size) {
        2 => u16,
        4 => u32,
@@ -60,6 +61,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
            generic: addr_type,
            register: u8,
            type_size: u8,
+            branch_offset: i16,
            base_register: struct {
                base_register: u8,
                offset: i64,
@@ -114,7 +116,7 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                            2 => mem.readIntSliceNative(u16, const_type.value_bytes),
                            4 => mem.readIntSliceNative(u32, const_type.value_bytes),
                            8 => mem.readIntSliceNative(u64, const_type.value_bytes),
-                            else => return error.InvalidIntegralTypeLength,
+                            else => error.InvalidIntegralTypeLength,
                        };
                    },
                };
@@ -163,10 +165,10 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                OP.call2,
                => generic(try reader.readInt(u16, options.endian)),
                OP.call4 => generic(try reader.readInt(u32, options.endian)),
-                OP.const2s,
+                OP.const2s => generic(try reader.readInt(i16, options.endian)),
                OP.bra,
                OP.skip,
-                => generic(try reader.readInt(i16, options.endian)),
+                => .{ .branch_offset = try reader.readInt(i16, options.endian) },
                OP.const4u => generic(try reader.readInt(u32, options.endian)),
                OP.const4s => generic(try reader.readInt(i32, options.endian)),
                OP.const8u => generic(try reader.readInt(u64, options.endian)),
@@ -362,13 +364,21 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                    if (context.ucontext == null) return error.IncompleteExpressionContext;

                    const base_register = (try readOperand(stream, opcode)).?.base_register;
-                    var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, base_register.base_register, context.reg_ctx)));
+                    var value: i64 = @intCast(mem.readIntSliceNative(usize, try abi.regBytes(
+                        context.ucontext.?,
+                        base_register.base_register,
+                        context.reg_ctx,
+                    )));
                    value += base_register.offset;
                    try self.stack.append(allocator, .{ .generic = @intCast(value) });
                },
                OP.regval_type => {
                    const register_type = (try readOperand(stream, opcode)).?.register_type;
-                    const value = mem.readIntSliceNative(usize, try abi.regBytes(context.ucontext.?, register_type.register, context.reg_ctx));
+                    const value = mem.readIntSliceNative(usize, try abi.regBytes(
+                        context.ucontext.?,
+                        register_type.register,
+                        context.reg_ctx,
+                    ));
                    try self.stack.append(allocator, .{
                        .regval_type = .{
                            .type_offset = register_type.type_offset,
@@ -472,18 +482,242 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
                OP.abs => {
                    if (self.stack.items.len == 0) return error.InvalidExpression;
                    const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
-                    self.stack.items[self.stack.items.len - 1] = .{ .generic = std.math.absCast(value) };
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = std.math.absCast(value),
+                    };
                },
                OP.@"and" => {
                    if (self.stack.items.len < 2) return error.InvalidExpression;
                    const a = try self.stack.pop().asIntegral();
-                    self.stack.items[self.stack.items.len - 1] = .{ .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral() };
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral(),
+                    };
+                },
+                OP.div => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a: isize = @bitCast(try self.stack.pop().asIntegral());
+                    const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = @bitCast(try std.math.divTrunc(isize, b, a)),
+                    };
+                },
+                OP.minus => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const b = try self.stack.pop().asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = try std.math.sub(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b),
+                    };
+                },
+                OP.mod => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a: isize = @bitCast(try self.stack.pop().asIntegral());
+                    const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = @bitCast(@mod(b, a)),
+                    };
+                },
+                OP.mul => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a: isize = @bitCast(try self.stack.pop().asIntegral());
+                    const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = @bitCast(@mulWithOverflow(a, b)[0]),
+                    };
+                },
+                OP.neg => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = @bitCast(
+                            try std.math.negate(
+                                @as(isize, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())),
+                            ),
+                        ),
+                    };
+                },
+                OP.not => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = ~try self.stack.items[self.stack.items.len - 1].asIntegral(),
+                    };
+                },
+                OP.@"or" => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = a | try self.stack.items[self.stack.items.len - 1].asIntegral(),
+                    };
+                },
+                OP.plus => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const b = try self.stack.pop().asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b),
+                    };
+                },
+                OP.plus_uconst => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    const constant = (try readOperand(stream, opcode)).?.generic;
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), constant),
+                    };
+                },
+                OP.shl => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    const b = try self.stack.items[self.stack.items.len - 1].asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = std.math.shl(usize, b, a),
+                    };
+                },
+                OP.shr => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    const b = try self.stack.items[self.stack.items.len - 1].asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = std.math.shr(usize, b, a),
+                    };
+                },
+                OP.shra => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral());
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = @bitCast(std.math.shr(isize, b, a)),
+                    };
+                },
+                OP.xor => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = try self.stack.pop().asIntegral();
+                    self.stack.items[self.stack.items.len - 1] = .{
+                        .generic = a ^ try self.stack.items[self.stack.items.len - 1].asIntegral(),
+                    };
+                },
+
+                // 2.5.1.5: Control Flow Operations
+                OP.le,
+                OP.ge,
+                OP.eq,
+                OP.lt,
+                OP.gt,
+                OP.ne,
+                => {
+                    if (self.stack.items.len < 2) return error.InvalidExpression;
+                    const a = self.stack.pop();
+                    const b = self.stack.items[self.stack.items.len - 1];
+
+                    if (a == .generic and b == .generic) {
+                        const a_int: isize = @bitCast(a.asIntegral() catch unreachable);
+                        const b_int: isize = @bitCast(b.asIntegral() catch unreachable);
+                        const result = @intFromBool(switch (opcode) {
+                            OP.le => b_int < a_int,
+                            OP.ge => b_int >= a_int,
+                            OP.eq => b_int == a_int,
+                            OP.lt => b_int < a_int,
+                            OP.gt => b_int > a_int,
+                            OP.ne => b_int != a_int,
+                            else => unreachable,
+                        });
+
+                        self.stack.items[self.stack.items.len - 1] = .{ .generic = result };
+                    } else {
+                        // TODO: Load the types referenced by these values, find their comparison operator, and run it
+                        return error.UnimplementedTypedComparison;
+                    }
+                },
+                OP.skip, OP.bra => {
+                    const branch_offset = (try readOperand(stream, opcode)).?.branch_offset;
+                    const condition = if (opcode == OP.bra) blk: {
+                        if (self.stack.items.len == 0) return error.InvalidExpression;
+                        break :blk try self.stack.pop().asIntegral() != 0;
+                    } else true;
+
+                    if (condition) {
+                        const new_pos = std.math.cast(
+                            usize,
+                            try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset),
+                        ) orelse return error.InvalidExpression;
+
+                        if (new_pos < 0 or new_pos >= stream.buffer.len) return error.InvalidExpression;
+                        stream.pos = new_pos;
+                    }
+                },
+                OP.call2,
+                OP.call4,
+                OP.call_ref,
+                => {
+                    const debug_info_offset = (try readOperand(stream, opcode)).?.generic;
+                    _ = debug_info_offset;
+
+                    // TODO: Load a DIE entry at debug_info_offset in a .debug_info section (the spec says that it
+                    //       can be in a separate exe / shared object from the one containing this expression).
+                    //       Transfer control to the DW_AT_location attribute, with the current stack as input.
+
+                    return error.UnimplementedExpressionCall;
+                },
+
+                // 2.5.1.6: Type Conversions
+                OP.convert => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    const type_offset = (try readOperand(stream, opcode)).?.generic;
+                    _ = type_offset;
+
+                    // TODO: Load the DW_TAG_base_type entry in context.compile_unit, find a conversion operator
+                    //       from the old type to the new type, run it.
+
+                    return error.UnimplementedTypeConversion;
+                },
+                OP.reinterpret => {
+                    if (self.stack.items.len == 0) return error.InvalidExpression;
+                    const type_offset = (try readOperand(stream, opcode)).?.generic;
+
+                    // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size
+                    const value = self.stack.items[self.stack.items.len - 1];
+                    if (type_offset == 0) {
+                        self.stack.items[self.stack.items.len - 1] = .{ .generic = try value.asIntegral() };
+                    } else {
+                        self.stack.items[self.stack.items.len - 1] = switch (value) {
+                            .generic => |v| .{
+                                .regval_type = .{
+                                    .type_offset = type_offset,
+                                    .type_size = @sizeOf(addr_type),
+                                    .value = v,
+                                },
+                            },
+                            .regval_type => |r| .{
+                                .regval_type = .{
+                                    .type_offset = type_offset,
+                                    .type_size = r.type_size,
+                                    .value = r.value,
+                                },
+                            },
+                            .const_type => |c| .{
+                                .const_type = .{
+                                    .type_offset = type_offset,
+                                    .value_bytes = c.value_bytes,
+                                },
+                            },
+                        };
+                    }
+                },
+
+                // 2.5.1.7: Special Operations
+                OP.nop => {},
+                OP.entry_value => {
+                    const block = (try readOperand(stream, opcode)).?.block;
+                    _ = block;
+
+                    // TODO: If block is an expression, run it on a new stack. Push the resulting value onto this stack.
+                    // TODO: If block is a register location, push the value that location had before running this program onto this stack.
+                    //       This implies capturing all register values before executing this block, in case this program modifies them.
+                    // TODO: If the block contains, OP.push_object_address, treat it as OP.nop
+
+                    return error.UnimplementedSubExpression;
                },

                // These have already been handled by readOperand
                OP.lo_user...OP.hi_user => unreachable,
                else => {
-                    //std.debug.print("Unimplemented DWARF expression opcode: {x}\n", .{opcode});
+                    //std.debug.print("Unknown DWARF expression opcode: {x}\n", .{opcode});
                    return error.UnknownExpressionOpcode;
                },
            }
@@ -492,3 +726,232 @@ pub fn StackMachine(comptime options: StackMachineOptions) type {
        }
    };
 }
+
+pub fn Writer(options: ExpressionOptions) type {
+    const addr_type = switch (options.addr_size) {
+        2 => u16,
+        4 => u32,
+        8 => u64,
+        else => @compileError("Unsupported address size of " ++ options.addr_size),
+    };
+
+    return struct {
+        /// Zero-operand instructions
+        pub fn writeOpcode(writer: anytype, comptime opcode: u8) !void {
+            switch (opcode) {
+                OP.dup,
+                OP.drop,
+                OP.over,
+                OP.swap,
+                OP.rot,
+                OP.deref,
+                OP.xderef,
+                OP.push_object_address,
+                OP.form_tls_address,
+                OP.call_frame_cfa,
+                OP.abs,
+                OP.@"and",
+                OP.div,
+                OP.minus,
+                OP.mod,
+                OP.mul,
+                OP.neg,
+                OP.not,
+                OP.@"or",
+                OP.plus,
+                OP.shl,
+                OP.shr,
+                OP.shra,
+                OP.xor,
+                OP.le,
+                OP.ge,
+                OP.eq,
+                OP.lt,
+                OP.gt,
+                OP.ne,
+                OP.nop,
+                => try writer.writeByte(opcode),
+                else => @compileError("This opcode requires operands, use write<Opcode>() instead"),
+            }
+        }
+
+        // 2.5.1.1: Literal Encodings
+        pub fn writeLiteral(writer: anytype, literal: u8) !void {
+            switch (literal) {
+                0...31 => |n| try writer.writeByte(n + OP.lit0),
+                else => return error.InvalidLiteral,
+            }
+        }
+
+        pub fn writeConst(writer: anytype, comptime T: type, value: T) !void {
+            if (@typeInfo(T) != .Int) @compileError("Constants must be integers");
+
+            switch (T) {
+                u8, i8, u16, i16, u32, i32, u64, i64 => {
+                    try writer.writeByte(switch (T) {
+                        u8 => OP.const1u,
+                        i8 => OP.const1s,
+                        u16 => OP.const2u,
+                        i16 => OP.const2s,
+                        u32 => OP.const4u,
+                        i32 => OP.const4s,
+                        u64 => OP.const8u,
+                        i64 => OP.const8s,
+                    });
+
+                    try writer.writeInt(T, value, options.endian);
+                },
+                else => switch (@typeInfo(T).Int.signedness) {
+                    .unsigned => {
+                        try writer.writeByte(OP.constu);
+                        try leb.writeULEB128(writer, value);
+                    },
+                    .signed => {
+                        try writer.writeByte(OP.consts);
+                        try leb.writeILEB128(writer, value);
+                    },
+                },
+            }
+        }
+
+        pub fn writeConstx(writer: anytype, debug_addr_offset: anytype) !void {
+            try writer.writeByte(OP.constx);
+            try leb.writeULEB128(writer, debug_addr_offset);
+        }
+
+        pub fn writeConstType(writer: anytype, die_offset: anytype, size: u8, value_bytes: []const u8) !void {
+            if (size != value_bytes.len) return error.InvalidValueSize;
+            try writer.writeByte(OP.const_type);
+            try leb.writeULEB128(writer, die_offset);
+            try writer.writeByte(size);
+            try writer.writeAll(value_bytes);
+        }
+
+        pub fn writeAddr(writer: anytype, value: addr_type) !void {
+            try writer.writeByte(OP.addr);
+            try writer.writeInt(addr_type, value, options.endian);
+        }
+
+        pub fn writeAddrx(writer: anytype, debug_addr_offset: anytype) !void {
+            try writer.writeByte(OP.addrx);
+            try leb.writeULEB128(writer, debug_addr_offset);
+        }
+
+        // 2.5.1.2: Register Values
+        pub fn writeFbreg(writer: anytype, offset: anytype) !void {
+            try writer.writeByte(OP.fbreg);
+            try leb.writeILEB128(writer, offset);
+        }
+
+        pub fn writeBreg(writer: anytype, register: u8, offset: anytype) !void {
+            if (register > 31) return error.InvalidRegister;
+            try writer.writeByte(OP.reg0 + register);
+            try leb.writeILEB128(writer, offset);
+        }
+
+        pub fn writeBregx(writer: anytype, register: anytype, offset: anytype) !void {
+            try writer.writeByte(OP.bregx);
+            try leb.writeULEB128(writer, register);
+            try leb.writeILEB128(writer, offset);
+        }
+
+        pub fn writeRegvalType(writer: anytype, register: anytype, offset: anytype) !void {
+            try writer.writeByte(OP.bregx);
+            try leb.writeULEB128(writer, register);
+            try leb.writeULEB128(writer, offset);
+        }
+
+        // 2.5.1.3: Stack Operations
+        pub fn writePick(writer: anytype, index: u8) !void {
+            try writer.writeByte(OP.pick);
+            try writer.writeByte(index);
+        }
+
+        pub fn writeDerefSize(writer: anytype, size: u8) !void {
+            try writer.writeByte(OP.deref_size);
+            try writer.writeByte(size);
+        }
+
+        pub fn writeXDerefSize(writer: anytype, size: u8) !void {
+            try writer.writeByte(OP.xderef_size);
+            try writer.writeByte(size);
+        }
+
+        pub fn writeDerefType(writer: anytype, size: u8, die_offset: anytype) !void {
+            try writer.writeByte(OP.deref_type);
+            try writer.writeByte(size);
+            try leb.writeULEB128(writer, die_offset);
+        }
+
+        pub fn writeXDerefType(writer: anytype, size: u8, die_offset: anytype) !void {
+            try writer.writeByte(OP.xderef_type);
+            try writer.writeByte(size);
+            try leb.writeULEB128(writer, die_offset);
+        }
+
+        // 2.5.1.4: Arithmetic and Logical Operations
+
+        pub fn writePlusUconst(writer: anytype, uint_value: anytype) !void {
+            try writer.writeByte(OP.plus_uconst);
+            try leb.writeULEB128(writer, uint_value);
+        }
+
+        // 2.5.1.5: Control Flow Operations
+
+        pub fn writeSkip(writer: anytype, offset: i16) !void {
+            try writer.writeByte(OP.skip);
+            try writer.writeInt(i16, offset, options.endian);
+        }
+
+        pub fn writeBra(writer: anytype, offset: i16) !void {
+            try writer.writeByte(OP.bra);
+            try writer.writeInt(i16, offset, options.endian);
+        }
+
+        pub fn writeCall(writer: anytype, comptime T: type, offset: T) !void {
+            switch (T) {
+                u16 => try writer.writeByte(OP.call2),
+                u32 => try writer.writeByte(OP.call4),
+                else => @compileError("Call operand must be a 2 or 4 byte offset"),
+            }
+
+            try writer.writeInt(T, offset, options.endian);
+        }
+
+        pub fn writeCallRef(writer: anytype, debug_info_offset: addr_type) !void {
+            try writer.writeByte(OP.call_ref);
+            try writer.writeInt(addr_type, debug_info_offset, options.endian);
+        }
+
+        pub fn writeConvert(writer: anytype, die_offset: anytype) !void {
+            try writer.writeByte(OP.convert);
+            try leb.writeULEB128(writer, die_offset);
+        }
+
+        pub fn writeReinterpret(writer: anytype, die_offset: anytype) !void {
+            try writer.writeByte(OP.reinterpret);
+            try leb.writeULEB128(writer, die_offset);
+        }
+
+        // 2.5.1.7: Special Operations
+
+        pub fn writeEntryValue(writer: anytype, expression: []const u8) !void {
+            try writer.writeByte(OP.entry_value);
+            try leb.writeULEB128(writer, expression.len);
+            try writer.writeAll(expression);
+        }
+
+        // 2.6: Location Descriptions
+
+        // TODO
+
+    };
+}
+
+test "DWARF expressions" {
+    const allocator = std.testing.allocator;
+
+    const options = ExpressionOptions{};
+    const stack_machine = StackMachine(options){};
+    defer stack_machine.deinit(allocator);
+}