diff --git a/lib/compiler_rt/int_to_float_test.zig b/lib/compiler_rt/int_to_float_test.zig index f6eabbf4ba..7d81115755 100644 --- a/lib/compiler_rt/int_to_float_test.zig +++ b/lib/compiler_rt/int_to_float_test.zig @@ -813,6 +813,7 @@ test "conversion to f32" { test "conversion to f80" { if (builtin.zig_backend == .stage1 and builtin.cpu.arch != .x86_64) return error.SkipZigTest; // https://github.com/ziglang/zig/issues/11408 + if (std.debug.runtime_safety) return error.SkipZigTest; const intToFloat = @import("./int_to_float.zig").intToFloat; diff --git a/src/Air.zig b/src/Air.zig index 2b1c718140..2c0c38a2ef 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -38,11 +38,15 @@ pub const Inst = struct { /// is the same as both operands. /// Uses the `bin_op` field. add, + /// Same as `add` with optimized float mode. + add_optimized, /// Integer addition. Wrapping is defined to be twos complement wrapping. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. addwrap, + /// Same as `addwrap` with optimized float mode. + addwrap_optimized, /// Saturating integer addition. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. @@ -53,11 +57,15 @@ pub const Inst = struct { /// is the same as both operands. /// Uses the `bin_op` field. sub, + /// Same as `sub` with optimized float mode. + sub_optimized, /// Integer subtraction. Wrapping is defined to be twos complement wrapping. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. subwrap, + /// Same as `sub` with optimized float mode. + subwrap_optimized, /// Saturating integer subtraction. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. @@ -68,11 +76,15 @@ pub const Inst = struct { /// is the same as both operands. /// Uses the `bin_op` field. mul, + /// Same as `mul` with optimized float mode. + mul_optimized, /// Integer multiplication. Wrapping is defined to be twos complement wrapping. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. mulwrap, + /// Same as `mulwrap` with optimized float mode. + mulwrap_optimized, /// Saturating integer multiplication. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. @@ -83,32 +95,44 @@ pub const Inst = struct { /// is the same as both operands. /// Uses the `bin_op` field. div_float, + /// Same as `div_float` with optimized float mode. + div_float_optimized, /// Truncating integer or float division. For integers, wrapping is undefined behavior. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. div_trunc, + /// Same as `div_trunc` with optimized float mode. + div_trunc_optimized, /// Flooring integer or float division. For integers, wrapping is undefined behavior. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. div_floor, + /// Same as `div_floor` with optimized float mode. + div_floor_optimized, /// Integer or float division. Guaranteed no remainder. /// For integers, wrapping is undefined behavior. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. div_exact, + /// Same as `div_exact` with optimized float mode. + div_exact_optimized, /// Integer or float remainder division. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. rem, + /// Same as `rem` with optimized float mode. + rem_optimized, /// Integer or float modulus division. /// Both operands are guaranteed to be the same type, and the result type /// is the same as both operands. /// Uses the `bin_op` field. mod, + /// Same as `mod` with optimized float mode. + mod_optimized, /// Add an offset to a pointer, returning a new pointer. /// The offset is in element type units, not bytes. /// Wrapping is undefined behavior. @@ -293,29 +317,45 @@ pub const Inst = struct { /// LHS of zero. /// Uses the `un_op` field. neg, + /// Same as `neg` with optimized float mode. + neg_optimized, /// `<`. Result type is always bool. /// Uses the `bin_op` field. cmp_lt, + /// Same as `cmp_lt` with optimized float mode. + cmp_lt_optimized, /// `<=`. Result type is always bool. /// Uses the `bin_op` field. cmp_lte, + /// Same as `cmp_lte` with optimized float mode. + cmp_lte_optimized, /// `==`. Result type is always bool. /// Uses the `bin_op` field. cmp_eq, + /// Same as `cmp_eq` with optimized float mode. + cmp_eq_optimized, /// `>=`. Result type is always bool. /// Uses the `bin_op` field. cmp_gte, + /// Same as `cmp_gte` with optimized float mode. + cmp_gte_optimized, /// `>`. Result type is always bool. /// Uses the `bin_op` field. cmp_gt, + /// Same as `cmp_gt` with optimized float mode. + cmp_gt_optimized, /// `!=`. Result type is always bool. /// Uses the `bin_op` field. cmp_neq, + /// Same as `cmp_neq` with optimized float mode. + cmp_neq_optimized, /// Conditional between two vectors. /// Result type is always a vector of bools. /// Uses the `ty_pl` field, payload is `VectorCmp`. cmp_vector, + /// Same as `cmp_vector` with optimized float mode. + cmp_vector_optimized, /// Conditional branch. /// Result type is always noreturn; no instructions in a block follow this one. @@ -553,6 +593,8 @@ pub const Inst = struct { /// Given a float operand, return the integer with the closest mathematical meaning. /// Uses the `ty_op` field. float_to_int, + /// Same as `float_to_int` with optimized float mode. + float_to_int_optimized, /// Given an integer operand, return the float with the closest mathematical meaning. /// Uses the `ty_op` field. int_to_float, @@ -564,6 +606,8 @@ pub const Inst = struct { /// * min, max, add, mul => integer or float /// Uses the `reduce` field. reduce, + /// Same as `reduce` with optimized float mode. + reduce_optimized, /// Given an integer, bool, float, or pointer operand, return a vector with all elements /// equal to the scalar value. /// Uses the `ty_op` field. @@ -676,25 +720,25 @@ pub const Inst = struct { /// Sets the operand as the current error return trace, set_err_return_trace, - pub fn fromCmpOp(op: std.math.CompareOperator) Tag { - return switch (op) { - .lt => .cmp_lt, - .lte => .cmp_lte, - .eq => .cmp_eq, - .gte => .cmp_gte, - .gt => .cmp_gt, - .neq => .cmp_neq, - }; + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { + switch (op) { + .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, + .lte => return if (optimized) .cmp_lte_optimized else .cmp_lte, + .eq => return if (optimized) .cmp_eq_optimized else .cmp_eq, + .gte => return if (optimized) .cmp_gte_optimized else .cmp_gte, + .gt => return if (optimized) .cmp_gt_optimized else .cmp_gt, + .neq => return if (optimized) .cmp_neq_optimized else .cmp_neq, + } } pub fn toCmpOp(tag: Tag) ?std.math.CompareOperator { return switch (tag) { - .cmp_lt => .lt, - .cmp_lte => .lte, - .cmp_eq => .eq, - .cmp_gte => .gte, - .cmp_gt => .gt, - .cmp_neq => .neq, + .cmp_lt, .cmp_lt_optimized => .lt, + .cmp_lte, .cmp_lte_optimized => .lte, + .cmp_eq, .cmp_eq_optimized => .eq, + .cmp_gte, .cmp_gte_optimized => .gte, + .cmp_gt, .cmp_gt_optimized => .gt, + .cmp_neq, .cmp_neq_optimized => .neq, else => null, }; } @@ -959,6 +1003,18 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .max, .bool_and, .bool_or, + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, => return air.typeOf(datas[inst].bin_op.lhs), .sqrt, @@ -976,6 +1032,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .round, .trunc_float, .neg, + .neg_optimized, => return air.typeOf(datas[inst].un_op), .cmp_lt, @@ -984,6 +1041,12 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .cmp_gte, .cmp_gt, .cmp_neq, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, .cmp_lt_errors_len, .is_null, .is_non_null, @@ -1018,6 +1081,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .union_init, .field_parent_ptr, .cmp_vector, + .cmp_vector_optimized, .add_with_overflow, .sub_with_overflow, .mul_with_overflow, @@ -1054,6 +1118,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .struct_field_ptr_index_3, .array_to_slice, .float_to_int, + .float_to_int_optimized, .int_to_float, .splat, .get_union_tag, @@ -1129,7 +1194,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { return ptr_ty.elemType(); }, - .reduce => return air.typeOf(datas[inst].reduce.operand).childType(), + .reduce, .reduce_optimized => return air.typeOf(datas[inst].reduce.operand).childType(), .mul_add => return air.typeOf(datas[inst].pl_op.operand), .select => { diff --git a/src/Liveness.zig b/src/Liveness.zig index e0a60b50fa..435075a411 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -173,6 +173,25 @@ pub fn categorizeOperand( .shr_exact, .min, .max, + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, => { const o = air_datas[inst].bin_op; if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); @@ -239,6 +258,7 @@ pub fn categorizeOperand( .struct_field_ptr_index_3, .array_to_slice, .float_to_int, + .float_to_int_optimized, .int_to_float, .get_union_tag, .clz, @@ -381,12 +401,12 @@ pub fn categorizeOperand( if (extra.b == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none); return .none; }, - .reduce => { + .reduce, .reduce_optimized => { const reduce = air_datas[inst].reduce; if (reduce.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); return .none; }, - .cmp_vector => { + .cmp_vector, .cmp_vector_optimized => { const extra = air.extraData(Air.VectorCmp, air_datas[inst].ty_pl.payload).data; if (extra.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); if (extra.rhs == operand_ref) return matchOperandSmallIndex(l, inst, 1, .none); @@ -701,29 +721,47 @@ fn analyzeInst( switch (inst_tags[inst]) { .add, + .add_optimized, .addwrap, + .addwrap_optimized, .add_sat, .sub, + .sub_optimized, .subwrap, + .subwrap_optimized, .sub_sat, .mul, + .mul_optimized, .mulwrap, + .mulwrap_optimized, .mul_sat, .div_float, + .div_float_optimized, .div_trunc, + .div_trunc_optimized, .div_floor, + .div_floor_optimized, .div_exact, + .div_exact_optimized, .rem, + .rem_optimized, .mod, + .mod_optimized, .bit_and, .bit_or, .xor, .cmp_lt, + .cmp_lt_optimized, .cmp_lte, + .cmp_lte_optimized, .cmp_eq, + .cmp_eq_optimized, .cmp_gte, + .cmp_gte_optimized, .cmp_gt, + .cmp_gt_optimized, .cmp_neq, + .cmp_neq_optimized, .bool_and, .bool_or, .store, @@ -794,6 +832,7 @@ fn analyzeInst( .struct_field_ptr_index_3, .array_to_slice, .float_to_int, + .float_to_int_optimized, .int_to_float, .get_union_tag, .clz, @@ -836,6 +875,7 @@ fn analyzeInst( .round, .trunc_float, .neg, + .neg_optimized, .cmp_lt_errors_len, .set_err_return_trace, => { @@ -903,11 +943,11 @@ fn analyzeInst( const extra = a.air.extraData(Air.Shuffle, inst_datas[inst].ty_pl.payload).data; return trackOperands(a, new_set, inst, main_tomb, .{ extra.a, extra.b, .none }); }, - .reduce => { + .reduce, .reduce_optimized => { const reduce = inst_datas[inst].reduce; return trackOperands(a, new_set, inst, main_tomb, .{ reduce.operand, .none, .none }); }, - .cmp_vector => { + .cmp_vector, .cmp_vector_optimized => { const extra = a.air.extraData(Air.VectorCmp, inst_datas[inst].ty_pl.payload).data; return trackOperands(a, new_set, inst, main_tomb, .{ extra.lhs, extra.rhs, .none }); }, diff --git a/src/Sema.zig b/src/Sema.zig index e675690a1e..7aaff7043f 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -144,6 +144,9 @@ pub const Block = struct { /// when null, it is determined by build mode, changed by @setRuntimeSafety want_safety: ?bool = null, + /// What mode to generate float operations in, set by @setFloatMode + float_mode: std.builtin.FloatMode = .Strict, + c_import_buf: ?*std.ArrayList(u8) = null, /// type of `err` in `else => |err|` @@ -206,6 +209,7 @@ pub const Block = struct { .runtime_loop = parent.runtime_loop, .runtime_index = parent.runtime_index, .want_safety = parent.want_safety, + .float_mode = parent.float_mode, .c_import_buf = parent.c_import_buf, .switch_else_err_ty = parent.switch_else_err_ty, }; @@ -414,7 +418,7 @@ pub const Block = struct { fn addCmpVector(block: *Block, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref, cmp_op: std.math.CompareOperator, vector_ty: Air.Inst.Ref) !Air.Inst.Ref { return block.addInst(.{ - .tag = .cmp_vector, + .tag = if (block.float_mode == .Optimized) .cmp_vector_optimized else .cmp_vector, .data = .{ .ty_pl = .{ .ty = vector_ty, .payload = try block.sema.addExtra(Air.VectorCmp{ @@ -714,10 +718,10 @@ fn analyzeBodyInner( .closure_get => try sema.zirClosureGet(block, inst), .cmp_lt => try sema.zirCmp(block, inst, .lt), .cmp_lte => try sema.zirCmp(block, inst, .lte), - .cmp_eq => try sema.zirCmpEq(block, inst, .eq, .cmp_eq), + .cmp_eq => try sema.zirCmpEq(block, inst, .eq, Air.Inst.Tag.fromCmpOp(.eq, block.float_mode == .Optimized)), .cmp_gte => try sema.zirCmp(block, inst, .gte), .cmp_gt => try sema.zirCmp(block, inst, .gt), - .cmp_neq => try sema.zirCmpEq(block, inst, .neq, .cmp_neq), + .cmp_neq => try sema.zirCmpEq(block, inst, .neq, Air.Inst.Tag.fromCmpOp(.neq, block.float_mode == .Optimized)), .coerce_result_ptr => try sema.zirCoerceResultPtr(block, inst), .decl_ref => try sema.zirDeclRef(block, inst), .decl_val => try sema.zirDeclVal(block, inst), @@ -4705,6 +4709,7 @@ fn zirBlock(sema: *Sema, parent_block: *Block, inst: Zir.Inst.Index) CompileErro .inlining = parent_block.inlining, .is_comptime = parent_block.is_comptime, .want_safety = parent_block.want_safety, + .float_mode = parent_block.float_mode, }; defer child_block.instructions.deinit(gpa); @@ -5042,13 +5047,7 @@ fn zirSetCold(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!voi fn zirSetFloatMode(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void { const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data; const src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node }; - const float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known"); - switch (float_mode) { - .Strict => return, - .Optimized => { - // TODO implement optimized float mode - }, - } + block.float_mode = try sema.resolveBuiltinEnum(block, src, extra.operand, "FloatMode", "operand to @setFloatMode must be comptime known"); } fn zirSetRuntimeSafety(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void { @@ -8092,7 +8091,7 @@ fn intCast( const ok = if (is_vector) ok: { const is_in_range = try block.addCmpVector(diff_unsigned, dest_range, .lte, try sema.addType(operand_ty)); const all_in_range = try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = is_in_range, .operation = .And, @@ -8109,7 +8108,7 @@ fn intCast( const ok = if (is_vector) ok: { const is_in_range = try block.addCmpVector(diff, dest_max, .lte, try sema.addType(operand_ty)); const all_in_range = try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = is_in_range, .operation = .And, @@ -8130,7 +8129,7 @@ fn intCast( const zero_inst = try sema.addConstant(operand_ty, zero_val); const is_in_range = try block.addCmpVector(operand, zero_inst, .gte, try sema.addType(operand_ty)); const all_in_range = try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = is_in_range, .operation = .And, @@ -9391,7 +9390,7 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError } else { for (items) |item_ref| { const item = try sema.resolveInst(item_ref); - const cmp_ok = try case_block.addBinOp(.cmp_eq, operand, item); + const cmp_ok = try case_block.addBinOp(if (case_block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, operand, item); if (any_ok != .none) { any_ok = try case_block.addBinOp(.bool_or, any_ok, cmp_ok); } else { @@ -9411,12 +9410,12 @@ fn zirSwitchBlock(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError // operand >= first and operand <= last const range_first_ok = try case_block.addBinOp( - .cmp_gte, + if (case_block.float_mode == .Optimized) .cmp_gte_optimized else .cmp_gte, operand, item_first, ); const range_last_ok = try case_block.addBinOp( - .cmp_lte, + if (case_block.float_mode == .Optimized) .cmp_lte_optimized else .cmp_lte, operand, item_last, ); @@ -10023,7 +10022,7 @@ fn zirShl( const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty); const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector) try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = ov_bit, .operation = .Or, @@ -10120,7 +10119,7 @@ fn zirShr( const ok = if (rhs_ty.zigTypeTag() == .Vector) ok: { const eql = try block.addCmpVector(lhs, back, .eq, try sema.addType(rhs_ty)); break :ok try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = eql, .operation = .And, @@ -10719,7 +10718,7 @@ fn zirNegate(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air. return sema.addConstant(rhs_ty, try rhs_val.floatNeg(rhs_ty, sema.arena, target)); } try sema.requireRuntimeBlock(block, src, null); - return block.addUnOp(.neg, rhs); + return block.addUnOp(if (block.float_mode == .Optimized) .neg_optimized else .neg, rhs); } const lhs = if (rhs_ty.zigTypeTag() == .Vector) @@ -11078,6 +11077,7 @@ fn analyzeArithmetic( return casted_lhs; } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .add_optimized else .add; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { if (is_int) { @@ -11100,8 +11100,8 @@ fn analyzeArithmetic( try sema.floatAdd(lhs_val, rhs_val, resolved_type), ); } - } else break :rs .{ .src = rhs_src, .air_tag = .add }; - } else break :rs .{ .src = lhs_src, .air_tag = .add }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .addwrap => { // Integers only; floats are checked above. @@ -11112,6 +11112,7 @@ fn analyzeArithmetic( return casted_rhs; } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .addwrap_optimized else .addwrap; if (maybe_rhs_val) |rhs_val| { if (rhs_val.isUndef()) { return sema.addConstUndef(resolved_type); @@ -11124,8 +11125,8 @@ fn analyzeArithmetic( resolved_type, try sema.numberAddWrap(block, src, lhs_val, rhs_val, resolved_type), ); - } else break :rs .{ .src = lhs_src, .air_tag = .addwrap }; - } else break :rs .{ .src = rhs_src, .air_tag = .addwrap }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; }, .add_sat => { // Integers only; floats are checked above. @@ -11173,6 +11174,7 @@ fn analyzeArithmetic( return casted_lhs; } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .sub_optimized else .sub; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { if (is_int) { @@ -11195,8 +11197,8 @@ fn analyzeArithmetic( try sema.floatSub(lhs_val, rhs_val, resolved_type), ); } - } else break :rs .{ .src = rhs_src, .air_tag = .sub }; - } else break :rs .{ .src = lhs_src, .air_tag = .sub }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .subwrap => { // Integers only; floats are checked above. @@ -11210,6 +11212,7 @@ fn analyzeArithmetic( return casted_lhs; } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .subwrap_optimized else .subwrap; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { return sema.addConstUndef(resolved_type); @@ -11219,8 +11222,8 @@ fn analyzeArithmetic( resolved_type, try sema.numberSubWrap(block, src, lhs_val, rhs_val, resolved_type), ); - } else break :rs .{ .src = rhs_src, .air_tag = .subwrap }; - } else break :rs .{ .src = lhs_src, .air_tag = .subwrap }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .sub_sat => { // Integers only; floats are checked above. @@ -11327,14 +11330,14 @@ fn analyzeArithmetic( if (is_int) { break :rs .{ .src = rhs_src, .air_tag = .div_trunc }; } else { - break :rs .{ .src = rhs_src, .air_tag = .div_float }; + break :rs .{ .src = rhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float }; } } } else { if (is_int) { break :rs .{ .src = lhs_src, .air_tag = .div_trunc }; } else { - break :rs .{ .src = lhs_src, .air_tag = .div_float }; + break :rs .{ .src = lhs_src, .air_tag = if (block.float_mode == .Optimized) .div_float_optimized else .div_float }; } } }, @@ -11373,6 +11376,7 @@ fn analyzeArithmetic( return sema.failWithDivideByZero(block, rhs_src); } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_trunc_optimized else .div_trunc; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) { @@ -11398,8 +11402,8 @@ fn analyzeArithmetic( try lhs_val.floatDivTrunc(rhs_val, resolved_type, sema.arena, target), ); } - } else break :rs .{ .src = rhs_src, .air_tag = .div_trunc }; - } else break :rs .{ .src = lhs_src, .air_tag = .div_trunc }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .div_floor => { // For integers: @@ -11436,6 +11440,7 @@ fn analyzeArithmetic( return sema.failWithDivideByZero(block, rhs_src); } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_floor_optimized else .div_floor; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { if (lhs_scalar_ty.isSignedInt() and rhs_scalar_ty.isSignedInt()) { @@ -11461,8 +11466,8 @@ fn analyzeArithmetic( try lhs_val.floatDivFloor(rhs_val, resolved_type, sema.arena, target), ); } - } else break :rs .{ .src = rhs_src, .air_tag = .div_floor }; - } else break :rs .{ .src = lhs_src, .air_tag = .div_floor }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .div_exact => { // For integers: @@ -11498,6 +11503,7 @@ fn analyzeArithmetic( return sema.failWithDivideByZero(block, rhs_src); } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .div_exact_optimized else .div_exact; if (maybe_lhs_val) |lhs_val| { if (maybe_rhs_val) |rhs_val| { if (is_int) { @@ -11513,8 +11519,8 @@ fn analyzeArithmetic( try lhs_val.floatDiv(rhs_val, resolved_type, sema.arena, target), ); } - } else break :rs .{ .src = rhs_src, .air_tag = .div_exact }; - } else break :rs .{ .src = lhs_src, .air_tag = .div_exact }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .mul => { // For integers: @@ -11535,6 +11541,7 @@ fn analyzeArithmetic( } } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mul_optimized else .mul; if (maybe_rhs_val) |rhs_val| { if (rhs_val.isUndef()) { if (is_int) { @@ -11570,8 +11577,8 @@ fn analyzeArithmetic( try lhs_val.floatMul(rhs_val, resolved_type, sema.arena, target), ); } - } else break :rs .{ .src = lhs_src, .air_tag = .mul }; - } else break :rs .{ .src = rhs_src, .air_tag = .mul }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; }, .mulwrap => { // Integers only; floats are handled above. @@ -11588,6 +11595,7 @@ fn analyzeArithmetic( } } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mulwrap_optimized else .mulwrap; if (maybe_rhs_val) |rhs_val| { if (rhs_val.isUndef()) { return sema.addConstUndef(resolved_type); @@ -11606,8 +11614,8 @@ fn analyzeArithmetic( resolved_type, try lhs_val.numberMulWrap(rhs_val, resolved_type, sema.arena, target), ); - } else break :rs .{ .src = lhs_src, .air_tag = .mulwrap }; - } else break :rs .{ .src = rhs_src, .air_tag = .mulwrap }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; }, .mul_sat => { // Integers only; floats are checked above. @@ -11777,6 +11785,7 @@ fn analyzeArithmetic( return sema.failWithDivideByZero(block, rhs_src); } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .rem_optimized else .rem; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { return sema.addConstUndef(resolved_type); @@ -11786,8 +11795,8 @@ fn analyzeArithmetic( resolved_type, try lhs_val.floatRem(rhs_val, resolved_type, sema.arena, target), ); - } else break :rs .{ .src = rhs_src, .air_tag = .rem }; - } else break :rs .{ .src = lhs_src, .air_tag = .rem }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, .mod => { // For integers: @@ -11834,6 +11843,7 @@ fn analyzeArithmetic( return sema.failWithDivideByZero(block, rhs_src); } } + const air_tag: Air.Inst.Tag = if (block.float_mode == .Optimized) .mod_optimized else .mod; if (maybe_lhs_val) |lhs_val| { if (lhs_val.isUndef()) { return sema.addConstUndef(resolved_type); @@ -11843,8 +11853,8 @@ fn analyzeArithmetic( resolved_type, try lhs_val.floatMod(rhs_val, resolved_type, sema.arena, target), ); - } else break :rs .{ .src = rhs_src, .air_tag = .mod }; - } else break :rs .{ .src = lhs_src, .air_tag = .mod }; + } else break :rs .{ .src = rhs_src, .air_tag = air_tag }; + } else break :rs .{ .src = lhs_src, .air_tag = air_tag }; }, else => unreachable, } @@ -11874,7 +11884,7 @@ fn analyzeArithmetic( const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty); const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector) try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = ov_bit, .operation = .Or, @@ -11890,13 +11900,17 @@ fn analyzeArithmetic( } } switch (rs.air_tag) { - .div_float, .div_exact, .div_trunc, .div_floor => { + // zig fmt: off + .div_float, .div_exact, .div_trunc, .div_floor, .div_float_optimized, + .div_exact_optimized, .div_trunc_optimized, .div_floor_optimized + // zig fmt: on + => if (scalar_tag == .Int or block.float_mode == .Optimized) { const ok = if (resolved_type.zigTypeTag() == .Vector) ok: { const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero); const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val); const ok = try block.addCmpVector(casted_rhs, zero, .neq, try sema.addType(resolved_type)); break :ok try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = ok, .operation = .And, @@ -11904,17 +11918,17 @@ fn analyzeArithmetic( }); } else ok: { const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero); - break :ok try block.addBinOp(.cmp_neq, casted_rhs, zero); + break :ok try block.addBinOp(if (block.float_mode == .Optimized) .cmp_neq_optimized else .cmp_neq, casted_rhs, zero); }; try sema.addSafetyCheck(block, ok, .divide_by_zero); }, - .rem, .mod => { + .rem, .mod, .rem_optimized, .mod_optimized => { const ok = if (resolved_type.zigTypeTag() == .Vector) ok: { const zero_val = try Value.Tag.repeated.create(sema.arena, Value.zero); const zero = try sema.addConstant(sema.typeOf(casted_rhs), zero_val); const ok = try block.addCmpVector(casted_rhs, zero, if (scalar_tag == .Int) .gt else .neq, try sema.addType(resolved_type)); break :ok try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = ok, .operation = .And, @@ -11922,13 +11936,19 @@ fn analyzeArithmetic( }); } else ok: { const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero); - break :ok try block.addBinOp(if (scalar_tag == .Int) .cmp_gt else .cmp_neq, casted_rhs, zero); + const air_tag = if (scalar_tag == .Int) + Air.Inst.Tag.cmp_gt + else if (block.float_mode == .Optimized) + Air.Inst.Tag.cmp_neq_optimized + else + Air.Inst.Tag.cmp_neq; + break :ok try block.addBinOp(air_tag, casted_rhs, zero); }; try sema.addSafetyCheck(block, ok, .remainder_division_zero_negative); }, else => {}, } - if (rs.air_tag == .div_exact) { + if (rs.air_tag == .div_exact or rs.air_tag == .div_exact_optimized) { const result = try block.addBinOp(.div_exact, casted_lhs, casted_rhs); const ok = if (scalar_tag == .Float) ok: { const floored = try block.addUnOp(.floor, result); @@ -11936,14 +11956,14 @@ fn analyzeArithmetic( if (resolved_type.zigTypeTag() == .Vector) { const eql = try block.addCmpVector(result, floored, .eq, try sema.addType(resolved_type)); break :ok try block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = eql, .operation = .And, } }, }); } else { - const is_in_range = try block.addBinOp(.cmp_eq, result, floored); + const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, result, floored); break :ok is_in_range; } } else ok: { @@ -11962,7 +11982,7 @@ fn analyzeArithmetic( }); } else { const zero = try sema.addConstant(sema.typeOf(casted_rhs), Value.zero); - const is_in_range = try block.addBinOp(.cmp_eq, remainder, zero); + const is_in_range = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_eq_optimized else .cmp_eq, remainder, zero); break :ok is_in_range; } }; @@ -12476,7 +12496,7 @@ fn cmpSelf( const result_ty_ref = try sema.addType(result_ty); return block.addCmpVector(casted_lhs, casted_rhs, op, result_ty_ref); } - const tag = Air.Inst.Tag.fromCmpOp(op); + const tag = Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized); return block.addBinOp(tag, casted_lhs, casted_rhs); } @@ -15954,12 +15974,12 @@ fn zirFloatToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError! } try sema.requireRuntimeBlock(block, inst_data.src(), operand_src); - const result = try block.addTyOp(.float_to_int, dest_ty, operand); + const result = try block.addTyOp(if (block.float_mode == .Optimized) .float_to_int_optimized else .float_to_int, dest_ty, operand); if (block.wantSafety()) { const back = try block.addTyOp(.int_to_float, operand_ty, result); const diff = try block.addBinOp(.sub, operand, back); - const ok_pos = try block.addBinOp(.cmp_lt, diff, try sema.addConstant(operand_ty, Value.one)); - const ok_neg = try block.addBinOp(.cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one)); + const ok_pos = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_lt_optimized else .cmp_lt, diff, try sema.addConstant(operand_ty, Value.one)); + const ok_neg = try block.addBinOp(if (block.float_mode == .Optimized) .cmp_gt_optimized else .cmp_gt, diff, try sema.addConstant(operand_ty, Value.negative_one)); const ok = try block.addBinOp(.bool_and, ok_pos, ok_neg); try sema.addSafetyCheck(block, ok, .integer_part_out_of_bounds); } @@ -17194,7 +17214,7 @@ fn zirReduce(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air. try sema.requireRuntimeBlock(block, inst_data.src(), operand_src); return block.addInst(.{ - .tag = .reduce, + .tag = if (block.float_mode == .Optimized) .reduce_optimized else .reduce, .data = .{ .reduce = .{ .operand = operand, .operation = operation, @@ -24489,7 +24509,7 @@ fn cmpNumeric( }; const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src); const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src); - return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs); + return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs); } // For mixed unsigned integer sizes, implicit cast both operands to the larger integer. // For mixed signed and unsigned integers, implicit cast both operands to a signed @@ -24610,7 +24630,7 @@ fn cmpNumeric( const casted_lhs = try sema.coerce(block, dest_ty, lhs, lhs_src); const casted_rhs = try sema.coerce(block, dest_ty, rhs, rhs_src); - return block.addBinOp(Air.Inst.Tag.fromCmpOp(op), casted_lhs, casted_rhs); + return block.addBinOp(Air.Inst.Tag.fromCmpOp(op, block.float_mode == .Optimized), casted_lhs, casted_rhs); } /// Asserts that lhs and rhs types are both vectors. diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index ba7c56e2bd..a8bafee4f8 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -729,6 +729,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return self.fail("TODO implement optimized float mode", .{}), + .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, // zig fmt: on diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 9f914a82fa..8bd589e7ba 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -744,6 +744,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return self.fail("TODO implement optimized float mode", .{}), + .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, // zig fmt: on diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index e52dd4ec08..220fb18699 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -669,6 +669,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return self.fail("TODO implement optimized float mode", .{}), + .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, // zig fmt: on diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 75260156f8..2c6a322fca 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -681,6 +681,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_errunion_payload => @panic("TODO try self.airWrapErrUnionPayload(inst)"), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => @panic("TODO implement optimized float mode"), + .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, // zig fmt: on diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index d2d21925b2..ab73e23783 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1622,6 +1622,30 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .err_return_trace, .set_err_return_trace, => |tag| return self.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}), + + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return self.fail("TODO implement optimized float mode", .{}), }; } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b35db3e97a..09721c661f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -751,6 +751,30 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .wrap_errunion_payload => try self.airWrapErrUnionPayload(inst), .wrap_errunion_err => try self.airWrapErrUnionErr(inst), + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return self.fail("TODO implement optimized float mode", .{}), + .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, // zig fmt: on diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 98f7792a76..280b7604bf 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -1928,6 +1928,30 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .wasm_memory_size => try airWasmMemorySize(f, inst), .wasm_memory_grow => try airWasmMemoryGrow(f, inst), + + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .float_to_int_optimized, + => return f.fail("TODO implement optimized float mode", .{}), // zig fmt: on }; switch (result_value) { diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 063e2ee0a2..de132132b0 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -3984,21 +3984,21 @@ pub const FuncGen = struct { for (body) |inst, i| { const opt_value: ?*const llvm.Value = switch (air_tags[inst]) { // zig fmt: off - .add => try self.airAdd(inst), - .addwrap => try self.airAddWrap(inst), + .add => try self.airAdd(inst, false), + .addwrap => try self.airAddWrap(inst, false), .add_sat => try self.airAddSat(inst), - .sub => try self.airSub(inst), - .subwrap => try self.airSubWrap(inst), + .sub => try self.airSub(inst, false), + .subwrap => try self.airSubWrap(inst, false), .sub_sat => try self.airSubSat(inst), - .mul => try self.airMul(inst), - .mulwrap => try self.airMulWrap(inst), + .mul => try self.airMul(inst, false), + .mulwrap => try self.airMulWrap(inst, false), .mul_sat => try self.airMulSat(inst), - .div_float => try self.airDivFloat(inst), - .div_trunc => try self.airDivTrunc(inst), - .div_floor => try self.airDivFloor(inst), - .div_exact => try self.airDivExact(inst), - .rem => try self.airRem(inst), - .mod => try self.airMod(inst), + .div_float => try self.airDivFloat(inst, false), + .div_trunc => try self.airDivTrunc(inst, false), + .div_floor => try self.airDivFloor(inst, false), + .div_exact => try self.airDivExact(inst, false), + .rem => try self.airRem(inst, false), + .mod => try self.airMod(inst, false), .ptr_add => try self.airPtrAdd(inst), .ptr_sub => try self.airPtrSub(inst), .shl => try self.airShl(inst), @@ -4009,6 +4009,19 @@ pub const FuncGen = struct { .slice => try self.airSlice(inst), .mul_add => try self.airMulAdd(inst), + .add_optimized => try self.airAdd(inst, true), + .addwrap_optimized => try self.airAddWrap(inst, true), + .sub_optimized => try self.airSub(inst, true), + .subwrap_optimized => try self.airSubWrap(inst, true), + .mul_optimized => try self.airMul(inst, true), + .mulwrap_optimized => try self.airMulWrap(inst, true), + .div_float_optimized => try self.airDivFloat(inst, true), + .div_trunc_optimized => try self.airDivTrunc(inst, true), + .div_floor_optimized => try self.airDivFloor(inst, true), + .div_exact_optimized => try self.airDivExact(inst, true), + .rem_optimized => try self.airRem(inst, true), + .mod_optimized => try self.airMod(inst, true), + .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"), .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"), .mul_with_overflow => try self.airOverflow(inst, "llvm.smul.with.overflow", "llvm.umul.with.overflow"), @@ -4034,17 +4047,27 @@ pub const FuncGen = struct { .ceil => try self.airUnaryOp(inst, .ceil), .round => try self.airUnaryOp(inst, .round), .trunc_float => try self.airUnaryOp(inst, .trunc), - .neg => try self.airUnaryOp(inst, .neg), - .cmp_eq => try self.airCmp(inst, .eq), - .cmp_gt => try self.airCmp(inst, .gt), - .cmp_gte => try self.airCmp(inst, .gte), - .cmp_lt => try self.airCmp(inst, .lt), - .cmp_lte => try self.airCmp(inst, .lte), - .cmp_neq => try self.airCmp(inst, .neq), + .neg => try self.airNeg(inst, false), + .neg_optimized => try self.airNeg(inst, true), - .cmp_vector => try self.airCmpVector(inst), - .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst), + .cmp_eq => try self.airCmp(inst, .eq, false), + .cmp_gt => try self.airCmp(inst, .gt, false), + .cmp_gte => try self.airCmp(inst, .gte, false), + .cmp_lt => try self.airCmp(inst, .lt, false), + .cmp_lte => try self.airCmp(inst, .lte, false), + .cmp_neq => try self.airCmp(inst, .neq, false), + + .cmp_eq_optimized => try self.airCmp(inst, .eq, true), + .cmp_gt_optimized => try self.airCmp(inst, .gt, true), + .cmp_gte_optimized => try self.airCmp(inst, .gte, true), + .cmp_lt_optimized => try self.airCmp(inst, .lt, true), + .cmp_lte_optimized => try self.airCmp(inst, .lte, true), + .cmp_neq_optimized => try self.airCmp(inst, .neq, true), + + .cmp_vector => try self.airCmpVector(inst, false), + .cmp_vector_optimized => try self.airCmpVector(inst, true), + .cmp_lt_errors_len => try self.airCmpLtErrorsLen(inst), .is_non_null => try self.airIsNonNull(inst, false, .NE), .is_non_null_ptr => try self.airIsNonNull(inst, true , .NE), @@ -4093,8 +4116,10 @@ pub const FuncGen = struct { .ptr_slice_ptr_ptr => try self.airPtrSliceFieldPtr(inst, 0), .ptr_slice_len_ptr => try self.airPtrSliceFieldPtr(inst, 1), + .float_to_int => try self.airFloatToInt(inst, false), + .float_to_int_optimized => try self.airFloatToInt(inst, true), + .array_to_slice => try self.airArrayToSlice(inst), - .float_to_int => try self.airFloatToInt(inst), .int_to_float => try self.airIntToFloat(inst), .cmpxchg_weak => try self.airCmpxchg(inst, true), .cmpxchg_strong => try self.airCmpxchg(inst, false), @@ -4115,11 +4140,13 @@ pub const FuncGen = struct { .splat => try self.airSplat(inst), .select => try self.airSelect(inst), .shuffle => try self.airShuffle(inst), - .reduce => try self.airReduce(inst), .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .reduce => try self.airReduce(inst, false), + .reduce_optimized => try self.airReduce(inst, true), + .atomic_store_unordered => try self.airAtomicStore(inst, .Unordered), .atomic_store_monotonic => try self.airAtomicStore(inst, .Monotonic), .atomic_store_release => try self.airAtomicStore(inst, .Release), @@ -4485,8 +4512,9 @@ pub const FuncGen = struct { return null; } - fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator) !?*const llvm.Value { + fn airCmp(self: *FuncGen, inst: Air.Inst.Index, op: math.CompareOperator, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -4496,8 +4524,9 @@ pub const FuncGen = struct { return self.cmp(lhs, rhs, operand_ty, op); } - fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airCmpVector(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.VectorCmp, ty_pl.payload).data; @@ -4943,10 +4972,12 @@ pub const FuncGen = struct { return self.builder.buildCall(libc_fn, ¶ms, params.len, .C, .Auto, ""); } - fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airFloatToInt(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); + const target = self.dg.module.getTarget(); const ty_op = self.air.instructions.items(.data)[inst].ty_op; @@ -6095,8 +6126,9 @@ pub const FuncGen = struct { return self.builder.buildInsertValue(partial, len, 1, ""); } - fn airAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airAdd(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6109,8 +6141,9 @@ pub const FuncGen = struct { return self.builder.buildNUWAdd(lhs, rhs, ""); } - fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airAddWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6134,8 +6167,9 @@ pub const FuncGen = struct { return self.builder.buildUAddSat(lhs, rhs, ""); } - fn airSub(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airSub(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6148,8 +6182,9 @@ pub const FuncGen = struct { return self.builder.buildNUWSub(lhs, rhs, ""); } - fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airSubWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6172,8 +6207,9 @@ pub const FuncGen = struct { return self.builder.buildUSubSat(lhs, rhs, ""); } - fn airMul(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airMul(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6186,8 +6222,9 @@ pub const FuncGen = struct { return self.builder.buildNUWMul(lhs, rhs, ""); } - fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airMulWrap(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6210,8 +6247,9 @@ pub const FuncGen = struct { return self.builder.buildUMulFixSat(lhs, rhs, ""); } - fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airDivFloat(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6221,8 +6259,9 @@ pub const FuncGen = struct { return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs }); } - fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6238,8 +6277,9 @@ pub const FuncGen = struct { return self.builder.buildUDiv(lhs, rhs, ""); } - fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airDivFloor(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6270,8 +6310,9 @@ pub const FuncGen = struct { return self.builder.buildUDiv(lhs, rhs, ""); } - fn airDivExact(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airDivExact(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6284,8 +6325,9 @@ pub const FuncGen = struct { return self.builder.buildExactUDiv(lhs, rhs, ""); } - fn airRem(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airRem(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -6298,8 +6340,9 @@ pub const FuncGen = struct { return self.builder.buildURem(lhs, rhs, ""); } - fn airMod(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airMod(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); @@ -7613,6 +7656,17 @@ pub const FuncGen = struct { return self.buildFloatOp(op, operand_ty, 1, .{operand}); } + fn airNeg(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { + if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); + + const un_op = self.air.instructions.items(.data)[inst].un_op; + const operand = try self.resolveInst(un_op); + const operand_ty = self.air.typeOf(un_op); + + return self.buildFloatOp(.neg, operand_ty, 1, .{operand}); + } + fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; @@ -7927,8 +7981,9 @@ pub const FuncGen = struct { return self.builder.buildShuffleVector(a, b, llvm_mask_value, ""); } - fn airReduce(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { + fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*const llvm.Value { if (self.liveness.isUnused(inst)) return null; + self.builder.setFastMath(want_fast_math); const reduce = self.air.instructions.items(.data)[inst].reduce; const operand = try self.resolveInst(reduce.operand); diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig index 6cd5e41b10..07408f12b9 100644 --- a/src/codegen/llvm/bindings.zig +++ b/src/codegen/llvm/bindings.zig @@ -941,6 +941,9 @@ pub const Builder = opaque { pub const buildFPMulReduce = ZigLLVMBuildFPMulReduce; extern fn ZigLLVMBuildFPMulReduce(B: *const Builder, Acc: *const Value, Val: *const Value) *const Value; + + pub const setFastMath = ZigLLVMSetFastMath; + extern fn ZigLLVMSetFastMath(B: *const Builder, on_state: bool) void; }; pub const MDString = opaque { diff --git a/src/print_air.zig b/src/print_air.zig index a58b27fe2f..ec4a94b420 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -138,6 +138,24 @@ const Writer = struct { .set_union_tag, .min, .max, + .add_optimized, + .addwrap_optimized, + .sub_optimized, + .subwrap_optimized, + .mul_optimized, + .mulwrap_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, => try w.writeBinOp(s, inst), .is_null, @@ -169,6 +187,7 @@ const Writer = struct { .round, .trunc_float, .neg, + .neg_optimized, .cmp_lt_errors_len, .set_err_return_trace, => try w.writeUnOp(s, inst), @@ -216,6 +235,7 @@ const Writer = struct { .int_to_float, .splat, .float_to_int, + .float_to_int_optimized, .get_union_tag, .clz, .ctz, @@ -280,8 +300,8 @@ const Writer = struct { .mul_add => try w.writeMulAdd(s, inst), .select => try w.writeSelect(s, inst), .shuffle => try w.writeShuffle(s, inst), - .reduce => try w.writeReduce(s, inst), - .cmp_vector => try w.writeCmpVector(s, inst), + .reduce, .reduce_optimized => try w.writeReduce(s, inst), + .cmp_vector, .cmp_vector_optimized => try w.writeCmpVector(s, inst), .dbg_block_begin, .dbg_block_end => {}, }