From 738f124ec12899771f8a3d6d2001145ce885abf6 Mon Sep 17 00:00:00 2001 From: Ratakor Date: Fri, 24 Jan 2025 02:13:11 +0100 Subject: [PATCH] Add @divCeil builtin --- doc/langref.html.in | 24 +++- lib/std/math.zig | 1 + lib/std/math/big/int.zig | 66 ++++++++++ lib/std/zig/AstGen.zig | 2 + lib/std/zig/AstRlAnnotate.zig | 1 + lib/std/zig/BuiltinFn.zig | 8 ++ lib/std/zig/Zir.zig | 7 ++ lib/zig.h | 35 ++++++ src/Air.zig | 11 ++ src/Air/types_resolved.zig | 2 + src/Liveness.zig | 4 + src/Liveness/Verify.zig | 2 + src/Sema.zig | 116 +++++++++++++++++- src/Value.zig | 89 ++++++++++++++ src/arch/aarch64/CodeGen.zig | 20 +++ src/arch/arm/CodeGen.zig | 27 ++++ src/arch/riscv64/CodeGen.zig | 9 +- src/arch/sparc64/CodeGen.zig | 3 +- src/arch/wasm/CodeGen.zig | 111 +++++++++++++++++ src/arch/x86_64/CodeGen.zig | 49 +++++--- src/codegen/c.zig | 2 + src/codegen/llvm.zig | 50 ++++++++ src/print_air.zig | 2 + src/print_zir.zig | 1 + test/behavior/int128.zig | 1 + test/behavior/math.zig | 40 ++++++ test/behavior/vector.zig | 8 +- .../signed_integer_division.zig | 2 +- 28 files changed, 666 insertions(+), 27 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 51f78e1be0af..e56e025ddd13 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -1340,7 +1340,8 @@ a /= b{#endsyntax#}
  • Can cause {#link|Division by Zero#} for floats in {#link|FloatMode.Optimized Mode|Floating Point Operations#}.
  • Signed integer operands must be comptime-known and positive. In other cases, use {#link|@divTrunc#}, - {#link|@divFloor#}, or + {#link|@divFloor#}, + {#link|@divCeil#}, or {#link|@divExact#} instead.
  • Invokes {#link|Peer Type Resolution#} for the operands.
  • @@ -4695,7 +4696,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
  • {#syntax#}@divExact(a, b) * b == a{#endsyntax#}
  • For a function that returns a possible error code, use {#syntax#}@import("std").math.divExact{#endsyntax#}.

    - {#see_also|@divTrunc|@divFloor#} + {#see_also|@divTrunc|@divFloor|@divCeil#} {#header_close#} {#header_open|@divFloor#}
    {#syntax#}@divFloor(numerator: T, denominator: T) T{#endsyntax#}
    @@ -4709,7 +4710,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
  • {#syntax#}(@divFloor(a, b) * b) + @mod(a, b) == a{#endsyntax#}
  • For a function that returns a possible error code, use {#syntax#}@import("std").math.divFloor{#endsyntax#}.

    - {#see_also|@divTrunc|@divExact#} + {#see_also|@divTrunc|@divCeil|@divExact#} {#header_close#} {#header_open|@divTrunc#}
    {#syntax#}@divTrunc(numerator: T, denominator: T) T{#endsyntax#}
    @@ -4723,7 +4724,20 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
  • {#syntax#}(@divTrunc(a, b) * b) + @rem(a, b) == a{#endsyntax#}
  • For a function that returns a possible error code, use {#syntax#}@import("std").math.divTrunc{#endsyntax#}.

    - {#see_also|@divFloor|@divExact#} + {#see_also|@divFloor|@divCeil|@divExact#} + {#header_close#} + {#header_open|@divCeil#} +
    {#syntax#}@divCeil(numerator: T, denominator: T) T{#endsyntax#}
    +

    + Ceiled division. Rounds toward positive infinity. Caller guarantees {#syntax#}denominator != 0{#endsyntax#} and + {#syntax#}!(@typeInfo(T) == .int and T.is_signed and numerator == std.math.minInt(T) and denominator == -1){#endsyntax#}. +

    + +

    For a function that returns a possible error code, use {#syntax#}@import("std").math.divCeil{#endsyntax#}.

    + {#see_also|@divFloor|@divTrunc|@divExact#} {#header_close#} {#header_open|@embedFile#} @@ -5963,6 +5977,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
  • {#syntax#}/{#endsyntax#} (division)
  • {#link|@divTrunc#} (division)
  • {#link|@divFloor#} (division)
  • +
  • {#link|@divCeil#} (division)
  • {#link|@divExact#} (division)
  • Example with addition at compile-time:

    @@ -5980,6 +5995,7 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
  • {#syntax#}@import("std").math.mul{#endsyntax#}
  • {#syntax#}@import("std").math.divTrunc{#endsyntax#}
  • {#syntax#}@import("std").math.divFloor{#endsyntax#}
  • +
  • {#syntax#}@import("std").math.divCeil{#endsyntax#}
  • {#syntax#}@import("std").math.divExact{#endsyntax#}
  • {#syntax#}@import("std").math.shl{#endsyntax#}
  • diff --git a/lib/std/math.zig b/lib/std/math.zig index 1e7858aaa93c..355515ff8f33 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -895,6 +895,7 @@ fn testDivFloor() !void { /// infinity. Returns an error on overflow or when denominator is /// zero. pub fn divCeil(comptime T: type, numerator: T, denominator: T) !T { + // TODO: replace with @divCeil once https://github.com/ziglang/zig/pull/21757 is merged @setRuntimeSafety(false); if (denominator == 0) return error.DivisionByZero; const info = @typeInfo(T); diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 3e9109dd1ac5..caa9d88065b0 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -1061,6 +1061,72 @@ pub const Mutable = struct { } } + /// q = a / b (rem r) + /// + /// a / b are ceiled (rounded towards +inf). + /// q may alias with a or b. + /// + /// Asserts there is enough memory to store q and r. + /// The upper bound for r limb count is `b.limbs.len`. + /// The upper bound for q limb count is given by `a.limbs`. + /// + /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`. + pub fn divCeil( + q: *Mutable, + r: *Mutable, + a: Const, + b: Const, + limbs_buffer: []Limb, + ) void { + const sep = a.limbs.len + 2; + var x = a.toMutable(limbs_buffer[0..sep]); + var y = b.toMutable(limbs_buffer[sep..]); + + // div performs truncating division (@divTrunc) which rounds towards negative + // infinity if the result is positive and towards positive infinity if the result is + // negative. + div(q, r, &x, &y); + + // @rem gives the remainder after @divTrunc, and is defined by: + // x * @divTrunc(x, y) + @rem(x, y) = x + // For all integers x, y with y != 0. + // In the following comments, a, b will be integers with a >= 0, b > 0, and we will take + // modCeil to be the remainder after @divCeil, defined by: + // x * @divCeil(x, y) + modCeil(x, y) = x + // For all integers x, y with y != 0. + + if (a.positive != b.positive or r.eqlZero()) { + // In this case either the result is negative or the remainder is 0. + // If the result is negative then the default truncating division already rounds + // towards positive infinity, so no adjustment is needed. + // If the remainder is 0 then the division is exact and no adjustment is needed. + } else if (a.positive) { + // Both positive. + // We have: + // modCeil(a, b) != 0 + // => @divCeil(a, b) = @divTrunc(a, b) + 1 + // And: + // b * @divTrunc(a, b) + @rem(a, b) = a + // b * @divCeil(a, b) + modCeil(a, b) = a + // => b * @divTrunc(a, b) + b + modCeil(a, b) = a + // => modCeil(a, b) = @rem(a, b) - b + q.addScalar(q.toConst(), 1); + r.sub(r.toConst(), y.toConst()); + } else { + // Both negative. + // We have: + // modCeil(-a, -b) != 0 + // => @divCeil(-a, -b) = @divTrunc(-a, -b) + 1 + // And: + // -b * @divTrunc(-a, -b) + @rem(-a, -b) = -a + // -b * @divCeil(-a, -b) + modCeil(-a, -b) = -a + // => -b * @divTrunc(-a, -b) - b + modCeil(-a, -b) = -a + // => modCeil(-a, -b) = @rem(-a, -b) + b + q.addScalar(q.toConst(), 1); + r.add(r.toConst(), y.toConst().abs()); + } + } + /// q = a / b (rem r) /// /// a / b are truncated (rounded towards -inf). diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index d6207d16a8c8..8d19d7105bc7 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -2905,6 +2905,7 @@ fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: As .pop_count, .byte_swap, .bit_reverse, + .div_ceil, .div_exact, .div_floor, .div_trunc, @@ -9684,6 +9685,7 @@ fn builtinCall( .byte_swap => return bitBuiltin(gz, scope, ri, node, params[0], .byte_swap), .bit_reverse => return bitBuiltin(gz, scope, ri, node, params[0], .bit_reverse), + .div_ceil => return divBuiltin(gz, scope, ri, node, params[0], params[1], .div_ceil), .div_exact => return divBuiltin(gz, scope, ri, node, params[0], params[1], .div_exact), .div_floor => return divBuiltin(gz, scope, ri, node, params[0], params[1], .div_floor), .div_trunc => return divBuiltin(gz, scope, ri, node, params[0], params[1], .div_trunc), diff --git a/lib/std/zig/AstRlAnnotate.zig b/lib/std/zig/AstRlAnnotate.zig index e79061250548..a39e00c1297a 100644 --- a/lib/std/zig/AstRlAnnotate.zig +++ b/lib/std/zig/AstRlAnnotate.zig @@ -961,6 +961,7 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast. _ = try astrl.expr(args[0], block, ResultInfo.none); return false; }, + .div_ceil, .div_exact, .div_floor, .div_trunc, diff --git a/lib/std/zig/BuiltinFn.zig b/lib/std/zig/BuiltinFn.zig index 7ad5bb1a87d3..b6fa3d2bb015 100644 --- a/lib/std/zig/BuiltinFn.zig +++ b/lib/std/zig/BuiltinFn.zig @@ -35,6 +35,7 @@ pub const Tag = enum { c_va_copy, c_va_end, c_va_start, + div_ceil, div_exact, div_floor, div_trunc, @@ -403,6 +404,13 @@ pub const list = list: { .illegal_outside_function = true, }, }, + .{ + "@divCeil", + .{ + .tag = .div_ceil, + .param_count = 2, + }, + }, .{ "@divExact", .{ diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig index 7edb99ee03a1..87473526f346 100644 --- a/lib/std/zig/Zir.zig +++ b/lib/std/zig/Zir.zig @@ -186,6 +186,9 @@ pub const Inst = struct { /// Saturating multiplication. /// Uses the `pl_node` union field. Payload is `Bin`. mul_sat, + /// Implements the `@divCeil` builtin. + /// Uses the `pl_node` union field with payload `Bin`. + div_ceil, /// Implements the `@divExact` builtin. /// Uses the `pl_node` union field with payload `Bin`. div_exact, @@ -1246,6 +1249,7 @@ pub const Inst = struct { .pop_count, .byte_swap, .bit_reverse, + .div_ceil, .div_exact, .div_floor, .div_trunc, @@ -1532,6 +1536,7 @@ pub const Inst = struct { .pop_count, .byte_swap, .bit_reverse, + .div_ceil, .div_exact, .div_floor, .div_trunc, @@ -1801,6 +1806,7 @@ pub const Inst = struct { .byte_swap = .un_node, .bit_reverse = .un_node, + .div_ceil = .pl_node, .div_exact = .pl_node, .div_floor = .pl_node, .div_trunc = .pl_node, @@ -4052,6 +4058,7 @@ fn findTrackableInner( .mul, .mulwrap, .mul_sat, + .div_ceil, .div_exact, .div_floor, .div_trunc, diff --git a/lib/zig.h b/lib/zig.h index e636785f1e0a..9c60db3e78a7 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -693,6 +693,15 @@ typedef ptrdiff_t intptr_t; static inline int##w##_t zig_div_floor_i##w(int##w##_t lhs, int##w##_t rhs) { \ return lhs / rhs + (lhs % rhs != INT##w##_C(0) ? zig_shr_i##w(lhs ^ rhs, UINT8_C(w) - UINT8_C(1)) : INT##w##_C(0)); \ } \ +\ + static inline uint##w##_t zig_div_ceil_u##w(uint##w##_t lhs, uint##w##_t rhs) { \ + return lhs / rhs + (lhs % rhs != UINT##w##_C(0) ? UINT##w##_C(1) : UINT##w##_C(0)); \ + } \ +\ + static inline int##w##_t zig_div_ceil_i##w(int##w##_t lhs, int##w##_t rhs) { \ + return lhs / rhs + (lhs % rhs != INT##w##_C(0) \ + ? zig_shr_i##w(lhs ^ rhs, UINT8_C(w) - UINT8_C(1)) + INT##w##_C(1) : INT##w##_C(0)); \ + } \ \ zig_basic_operator(uint##w##_t, mod_u##w, %) \ \ @@ -1600,6 +1609,21 @@ static inline zig_i128 zig_div_floor_i128(zig_i128 lhs, zig_i128 rhs) { return zig_add_i128(zig_div_trunc_i128(lhs, rhs), zig_make_i128(mask, (uint64_t)mask)); } +static inline zig_u128 zig_div_ceil_u128(zig_u128 lhs, zig_u128 rhs) { + zig_u128 rem = zig_rem_u128(lhs, rhs); + uint64_t mask = zig_or_u64(zig_hi_u128(rem), zig_lo_u128(rem)) != UINT64_C(0) + ? UINT64_C(1) : UINT64_C(0); + return zig_add_u128(zig_div_trunc_u128(lhs, rhs), zig_make_u128(UINT64_C(0), mask)); +} + +static inline zig_i128 zig_div_ceil_i128(zig_i128 lhs, zig_i128 rhs) { + zig_i128 rem = zig_rem_i128(lhs, rhs); + int64_t mask = zig_or_u64((uint64_t)zig_hi_i128(rem), zig_lo_i128(rem)) != UINT64_C(0) + ? zig_shr_i64(zig_xor_i64(zig_hi_i128(lhs), zig_hi_i128(rhs)), UINT8_C(63)) + INT64_C(1) + : INT64_C(0); + return zig_add_i128(zig_div_trunc_i128(lhs, rhs), zig_make_i128(INT64_C(0), (uint64_t)mask)); +} + #define zig_mod_u128 zig_rem_u128 static inline zig_i128 zig_mod_i128(zig_i128 lhs, zig_i128 rhs) { @@ -2775,6 +2799,13 @@ static inline void zig_div_floor_big(void *res, const void *lhs, const void *rhs zig_trap(); } +static inline void zig_div_ceil_big(void *res, const void *lhs, const void *rhs, bool is_signed, uint16_t bits) { + // TODO: need another wrapper for divmod from lib/compiler_rt/udivmodei4.zig + // but is this even needed? + + zig_trap(); +} + zig_extern void __umodei4(uint32_t *res, const uint32_t *lhs, const uint32_t *rhs, uintptr_t bits); static inline void zig_rem_big(void *res, const void *lhs, const void *rhs, bool is_signed, uint16_t bits) { if (!is_signed) { @@ -3534,6 +3565,10 @@ zig_float_negate_builtin(128, zig_make_u128, (UINT64_C(1) << 63, UINT64_C(0))) static inline zig_f##w zig_div_floor_f##w(zig_f##w lhs, zig_f##w rhs) { \ return zig_floor_f##w(zig_div_f##w(lhs, rhs)); \ } \ +\ + static inline zig_f##w zig_div_ceil_f##w(zig_f##w lhs, zig_f##w rhs) { \ + return zig_ceil_f##w(zig_div_f##w(lhs, rhs)); \ + } \ \ static inline zig_f##w zig_mod_f##w(zig_f##w lhs, zig_f##w rhs) { \ return zig_sub_f##w(lhs, zig_mul_f##w(zig_div_floor_f##w(lhs, rhs), rhs)); \ diff --git a/src/Air.zig b/src/Air.zig index 3bd658befdda..a1574430f2ae 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -145,6 +145,13 @@ pub const Inst = struct { div_floor, /// Same as `div_floor` with optimized float mode. div_floor_optimized, + /// Ceiling integer or float division. For integers, wrapping is undefined behavior. + /// Both operands are guaranteed to be the same type, and the result type + /// is the same as both operands. + /// Uses the `bin_op` field. + div_ceil, + /// Same as `div_ceil` with optimized float mode. + div_ceil_optimized, /// Integer or float division. /// If a remainder would be produced, undefined behavior occurs. /// For integers, overflow is undefined behavior. @@ -1333,6 +1340,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, .rem, .mod, @@ -1354,6 +1362,7 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, @@ -1710,6 +1719,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .div_trunc_optimized, .div_floor, .div_floor_optimized, + .div_ceil, + .div_ceil_optimized, .div_exact, .div_exact_optimized, .rem, diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index b627ad388b55..372555aff622 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -44,6 +44,8 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .div_trunc_optimized, .div_floor, .div_floor_optimized, + .div_ceil, + .div_ceil_optimized, .div_exact, .div_exact_optimized, .rem, diff --git a/src/Liveness.zig b/src/Liveness.zig index e6e9c073633f..45826cfe0ba9 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -247,6 +247,7 @@ pub fn categorizeOperand( .div_trunc, .div_floor, .div_exact, + .div_ceil, .rem, .mod, .bit_and, @@ -274,6 +275,7 @@ pub fn categorizeOperand( .div_trunc_optimized, .div_floor_optimized, .div_exact_optimized, + .div_ceil_optimized, .rem_optimized, .mod_optimized, .neg_optimized, @@ -896,6 +898,8 @@ fn analyzeInst( .div_floor_optimized, .div_exact, .div_exact_optimized, + .div_ceil, + .div_ceil_optimized, .rem, .rem_optimized, .mod, diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig index 01e0842dedf3..4f5725795317 100644 --- a/src/Liveness/Verify.zig +++ b/src/Liveness/Verify.zig @@ -227,6 +227,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { .div_floor_optimized, .div_exact, .div_exact_optimized, + .div_ceil, + .div_ceil_optimized, .rem, .rem_optimized, .mod, diff --git a/src/Sema.zig b/src/Sema.zig index 833b05413f33..96cb33b92b7f 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1293,6 +1293,7 @@ fn analyzeBodyInner( .sub_sat => try sema.zirArithmetic(block, inst, .sub_sat, true), .div => try sema.zirDiv(block, inst), + .div_ceil => try sema.zirDivCeil(block, inst), .div_exact => try sema.zirDivExact(block, inst), .div_floor => try sema.zirDivFloor(block, inst), .div_trunc => try sema.zirDivTrunc(block, inst), @@ -15320,7 +15321,7 @@ fn zirDiv(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Ins return sema.fail( block, src, - "division with '{}' and '{}': signed integers must use @divTrunc, @divFloor, or @divExact", + "division with '{}' and '{}': signed integers must use @divTrunc, @divFloor, @divCeil, or @divExact", .{ lhs_ty.fmt(pt), rhs_ty.fmt(pt) }, ); } @@ -15332,6 +15333,117 @@ fn zirDiv(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Ins return block.addBinOp(air_tag, casted_lhs, casted_rhs); } +fn zirDivCeil(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { + const pt = sema.pt; + const zcu = pt.zcu; + const inst_data = sema.code.instructions.items(.data)[@intFromEnum(inst)].pl_node; + const src = block.src(.{ .node_offset_bin_op = inst_data.src_node }); + const lhs_src = block.src(.{ .node_offset_bin_lhs = inst_data.src_node }); + const rhs_src = block.src(.{ .node_offset_bin_rhs = inst_data.src_node }); + const extra = sema.code.extraData(Zir.Inst.Bin, inst_data.payload_index).data; + const lhs = try sema.resolveInst(extra.lhs); + const rhs = try sema.resolveInst(extra.rhs); + const lhs_ty = sema.typeOf(lhs); + const rhs_ty = sema.typeOf(rhs); + const lhs_zig_ty_tag = lhs_ty.zigTypeTag(zcu); + const rhs_zig_ty_tag = rhs_ty.zigTypeTag(zcu); + try sema.checkVectorizableBinaryOperands(block, src, lhs_ty, rhs_ty, lhs_src, rhs_src); + try sema.checkInvalidPtrIntArithmetic(block, src, lhs_ty); + + const instructions = &[_]Air.Inst.Ref{ lhs, rhs }; + const resolved_type = try sema.resolvePeerTypes(block, src, instructions, .{ + .override = &[_]?LazySrcLoc{ lhs_src, rhs_src }, + }); + + const casted_lhs = try sema.coerce(block, resolved_type, lhs, lhs_src); + const casted_rhs = try sema.coerce(block, resolved_type, rhs, rhs_src); + + const lhs_scalar_ty = lhs_ty.scalarType(zcu); + const rhs_scalar_ty = rhs_ty.scalarType(zcu); + const scalar_tag = resolved_type.scalarType(zcu).zigTypeTag(zcu); + + const is_int = scalar_tag == .int or scalar_tag == .comptime_int; + + try sema.checkArithmeticOp(block, src, scalar_tag, lhs_zig_ty_tag, rhs_zig_ty_tag, .div_ceil); + + const maybe_lhs_val = try sema.resolveValueIntable(casted_lhs); + const maybe_rhs_val = try sema.resolveValueIntable(casted_rhs); + + const runtime_src = rs: { + // For integers: + // If the lhs is zero, then zero is returned regardless of rhs. + // If the rhs is zero, compile error for division by zero. + // If the rhs is undefined, compile error because there is a possible + // value (zero) for which the division would be illegal behavior. + // If the lhs is undefined: + // * if lhs type is signed: + // * if rhs is comptime-known and not -1, result is undefined + // * if rhs is -1 or runtime-known, compile error because there is a + // possible value (-min_int / -1) for which division would be + // illegal behavior. + // * if lhs type is unsigned, undef is returned regardless of rhs. + // TODO: emit runtime safety for division by zero + // + // For floats: + // If the rhs is zero, compile error for division by zero. + // If the rhs is undefined, compile error because there is a possible + // value (zero) for which the division would be illegal behavior. + // If the lhs is undefined, result is undefined. + if (maybe_lhs_val) |lhs_val| { + if (!lhs_val.isUndef(zcu)) { + if (try lhs_val.compareAllWithZeroSema(.eq, pt)) { + const scalar_zero = switch (scalar_tag) { + .comptime_float, .float => try pt.floatValue(resolved_type.scalarType(zcu), 0.0), + .comptime_int, .int => try pt.intValue(resolved_type.scalarType(zcu), 0), + else => unreachable, + }; + const zero_val = try sema.splat(resolved_type, scalar_zero); + return Air.internedToRef(zero_val.toIntern()); + } + } + } + if (maybe_rhs_val) |rhs_val| { + if (rhs_val.isUndef(zcu)) { + return sema.failWithUseOfUndef(block, rhs_src); + } + if (!(try rhs_val.compareAllWithZeroSema(.neq, pt))) { + return sema.failWithDivideByZero(block, rhs_src); + } + // TODO: if the RHS is one, return the LHS directly + } + if (maybe_lhs_val) |lhs_val| { + if (lhs_val.isUndef(zcu)) { + if (lhs_scalar_ty.isSignedInt(zcu) and rhs_scalar_ty.isSignedInt(zcu)) { + if (maybe_rhs_val) |rhs_val| { + if (try sema.compareAll(rhs_val, .neq, try pt.intValue(resolved_type, -1), resolved_type)) { + return pt.undefRef(resolved_type); + } + } + return sema.failWithUseOfUndef(block, rhs_src); + } + return pt.undefRef(resolved_type); + } + + if (maybe_rhs_val) |rhs_val| { + if (is_int) { + return Air.internedToRef((try lhs_val.intDivCeil(rhs_val, resolved_type, sema.arena, pt)).toIntern()); + } else { + return Air.internedToRef((try lhs_val.floatDivCeil(rhs_val, resolved_type, sema.arena, pt)).toIntern()); + } + } else break :rs rhs_src; + } else break :rs lhs_src; + }; + + try sema.requireRuntimeBlock(block, src, runtime_src); + + if (block.wantSafety()) { + try sema.addDivIntOverflowSafety(block, src, resolved_type, lhs_scalar_ty, maybe_lhs_val, maybe_rhs_val, casted_lhs, casted_rhs, is_int); + try sema.addDivByZeroSafety(block, src, resolved_type, maybe_rhs_val, casted_rhs, is_int); + } + + return block.addBinOp(airTag(block, is_int, .div_ceil, .div_ceil_optimized), casted_lhs, casted_rhs); +} + fn zirDivExact(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const pt = sema.pt; const zcu = pt.zcu; @@ -19935,7 +20047,7 @@ fn analyzeRet( fn floatOpAllowed(tag: Zir.Inst.Tag) bool { // extend this swich as additional operators are implemented return switch (tag) { - .add, .sub, .mul, .div, .div_exact, .div_trunc, .div_floor, .mod, .rem, .mod_rem => true, + .add, .sub, .mul, .div, .div_ceil, .div_exact, .div_trunc, .div_floor, .mod, .rem, .mod_rem => true, else => false, }; } diff --git a/src/Value.zig b/src/Value.zig index c8ae997d9823..8486b5c76c51 100644 --- a/src/Value.zig +++ b/src/Value.zig @@ -2263,6 +2263,49 @@ pub fn intDivFloorScalar(lhs: Value, rhs: Value, ty: Type, allocator: Allocator, return pt.intValue_big(ty, result_q.toConst()); } +pub fn intDivCeil(lhs: Value, rhs: Value, ty: Type, allocator: Allocator, pt: Zcu.PerThread) !Value { + if (ty.zigTypeTag(pt.zcu) == .vector) { + const result_data = try allocator.alloc(InternPool.Index, ty.vectorLen(pt.zcu)); + const scalar_ty = ty.scalarType(pt.zcu); + for (result_data, 0..) |*scalar, i| { + const lhs_elem = try lhs.elemValue(pt, i); + const rhs_elem = try rhs.elemValue(pt, i); + scalar.* = (try intDivCeilScalar(lhs_elem, rhs_elem, scalar_ty, allocator, pt)).toIntern(); + } + return Value.fromInterned((try pt.intern(.{ .aggregate = .{ + .ty = ty.toIntern(), + .storage = .{ .elems = result_data }, + } }))); + } + return intDivCeilScalar(lhs, rhs, ty, allocator, pt); +} + +pub fn intDivCeilScalar(lhs: Value, rhs: Value, ty: Type, allocator: Allocator, pt: Zcu.PerThread) !Value { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + const zcu = pt.zcu; + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const lhs_bigint = lhs.toBigInt(&lhs_space, zcu); + const rhs_bigint = rhs.toBigInt(&rhs_space, zcu); + const limbs_q = try allocator.alloc( + std.math.big.Limb, + lhs_bigint.limbs.len, + ); + const limbs_r = try allocator.alloc( + std.math.big.Limb, + rhs_bigint.limbs.len, + ); + const limbs_buffer = try allocator.alloc( + std.math.big.Limb, + std.math.big.int.calcDivLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len), + ); + var result_q = BigIntMutable{ .limbs = limbs_q, .positive = undefined, .len = undefined }; + var result_r = BigIntMutable{ .limbs = limbs_r, .positive = undefined, .len = undefined }; + result_q.divCeil(&result_r, lhs_bigint, rhs_bigint, limbs_buffer); + return pt.intValue_big(ty, result_q.toConst()); +} + pub fn intMod(lhs: Value, rhs: Value, ty: Type, allocator: Allocator, pt: Zcu.PerThread) !Value { if (ty.zigTypeTag(pt.zcu) == .vector) { const result_data = try allocator.alloc(InternPool.Index, ty.vectorLen(pt.zcu)); @@ -3011,6 +3054,52 @@ pub fn floatDivFloorScalar( } })); } +pub fn floatDivCeil( + lhs: Value, + rhs: Value, + float_type: Type, + arena: Allocator, + pt: Zcu.PerThread, +) !Value { + if (float_type.zigTypeTag(pt.zcu) == .vector) { + const result_data = try arena.alloc(InternPool.Index, float_type.vectorLen(pt.zcu)); + const scalar_ty = float_type.scalarType(pt.zcu); + for (result_data, 0..) |*scalar, i| { + const lhs_elem = try lhs.elemValue(pt, i); + const rhs_elem = try rhs.elemValue(pt, i); + scalar.* = (try floatDivCeilScalar(lhs_elem, rhs_elem, scalar_ty, pt)).toIntern(); + } + return Value.fromInterned((try pt.intern(.{ .aggregate = .{ + .ty = float_type.toIntern(), + .storage = .{ .elems = result_data }, + } }))); + } + return floatDivCeilScalar(lhs, rhs, float_type, pt); +} + +pub fn floatDivCeilScalar( + lhs: Value, + rhs: Value, + float_type: Type, + pt: Zcu.PerThread, +) !Value { + const zcu = pt.zcu; + const target = zcu.getTarget(); + // TODO: Replace @ceil(x / y) with @divCeil(x, y). + const storage: InternPool.Key.Float.Storage = switch (float_type.floatBits(target)) { + 16 => .{ .f16 = @ceil(lhs.toFloat(f16, zcu) / rhs.toFloat(f16, zcu)) }, + 32 => .{ .f32 = @ceil(lhs.toFloat(f32, zcu) / rhs.toFloat(f32, zcu)) }, + 64 => .{ .f64 = @ceil(lhs.toFloat(f64, zcu) / rhs.toFloat(f64, zcu)) }, + 80 => .{ .f80 = @ceil(lhs.toFloat(f80, zcu) / rhs.toFloat(f80, zcu)) }, + 128 => .{ .f128 = @ceil(lhs.toFloat(f128, zcu) / rhs.toFloat(f128, zcu)) }, + else => unreachable, + }; + return Value.fromInterned((try pt.intern(.{ .float = .{ + .ty = float_type.toIntern(), + .storage = storage, + } }))); +} + pub fn floatDivTrunc( lhs: Value, rhs: Value, diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 64230dfc9e97..85d8073500fe 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -669,6 +669,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .div_float => try self.airBinOp(inst, .div_float), .div_trunc => try self.airBinOp(inst, .div_trunc), .div_floor => try self.airBinOp(inst, .div_floor), + .div_ceil => try self.airBinOp(inst, .div_ceil), .div_exact => try self.airBinOp(inst, .div_exact), .rem => try self.airBinOp(inst, .rem), .mod => try self.airBinOp(inst, .mod), @@ -853,6 +854,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, @@ -2097,6 +2099,22 @@ fn divExact( } } +fn divCeil( + self: *Self, + lhs_bind: ReadArg.Bind, + rhs_bind: ReadArg.Bind, + lhs_ty: Type, + rhs_ty: Type, + maybe_inst: ?Air.Inst.Index, +) InnerError!MCValue { + _ = lhs_bind; + _ = rhs_bind; + _ = lhs_ty; + _ = rhs_ty; + _ = maybe_inst; + return self.fail("TODO: implement `@divCeil` for {}", .{self.target.cpu.arch}); +} + fn rem( self: *Self, lhs_bind: ReadArg.Bind, @@ -2448,6 +2466,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) InnerError!voi .div_floor => try self.divFloor(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), + .div_ceil => try self.divCeil(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), + .div_exact => try self.divExact(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), .rem => try self.rem(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index fa892117f329..1dca57bfbc15 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -658,6 +658,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .div_float => try self.airBinOp(inst, .div_float), .div_trunc => try self.airBinOp(inst, .div_trunc), .div_floor => try self.airBinOp(inst, .div_floor), + .div_ceil => try self.airBinOp(inst, .div_ceil), .div_exact => try self.airBinOp(inst, .div_exact), .rem => try self.airBinOp(inst, .rem), .mod => try self.airBinOp(inst, .mod), @@ -842,6 +843,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, @@ -1507,6 +1509,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { .div_floor => try self.divFloor(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), + .div_ceil => try self.divCeil(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), + .div_exact => try self.divExact(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), .rem => try self.rem(lhs_bind, rhs_bind, lhs_ty, rhs_ty, inst), @@ -3587,6 +3591,29 @@ fn divFloor( } } +fn divCeil( + self: *Self, + lhs_bind: ReadArg.Bind, + rhs_bind: ReadArg.Bind, + lhs_ty: Type, + rhs_ty: Type, + maybe_inst: ?Air.Inst.Index, +) InnerError!MCValue { + _ = lhs_bind; + _ = rhs_bind; + _ = rhs_ty; + _ = maybe_inst; + + const pt = self.pt; + const zcu = pt.zcu; + switch (lhs_ty.zigTypeTag(zcu)) { + .float => return self.fail("TODO ARM binary operations on floats", .{}), + .vector => return self.fail("TODO ARM binary operations on vectors", .{}), + .int => return self.fail("TODO ARM div_ceil", .{}), + else => unreachable, + } +} + fn divExact( self: *Self, lhs_bind: ReadArg.Bind, diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 66f4ce6f0d50..022b56c782eb 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1461,7 +1461,7 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .mul, .mul_wrap, - .div_trunc, + .div_trunc, .div_exact, .rem, @@ -1479,13 +1479,13 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .max, => try func.airBinOp(inst, tag), - .ptr_add, .ptr_sub => try func.airPtrArithmetic(inst, tag), .mod, - .div_float, - .div_floor, + .div_float, + .div_floor, + .div_ceil, => return func.fail("TODO: {s}", .{@tagName(tag)}), .sqrt, @@ -1673,6 +1673,7 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .div_trunc_optimized, .div_floor_optimized, .div_exact_optimized, + .div_ceil_optimized, .rem_optimized, .mod_optimized, .neg_optimized, diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 240123ee5142..74144b23e570 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -548,7 +548,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .mul_with_overflow => try self.airMulWithOverflow(inst), .shl_with_overflow => try self.airShlWithOverflow(inst), - .div_float, .div_trunc, .div_floor, .div_exact => try self.airDiv(inst), + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => try self.airDiv(inst), .cmp_lt => try self.airCmp(inst, .lt), .cmp_lte => try self.airCmp(inst, .lte), @@ -696,6 +696,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 51019969f696..bca354385715 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1865,6 +1865,7 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { .div_float, .div_exact => cg.airDiv(inst), .div_trunc => cg.airDivTrunc(inst), .div_floor => cg.airDivFloor(inst), + .div_ceil => cg.airDivCeil(inst), .bit_and => cg.airBinOp(inst, .@"and"), .bit_or => cg.airBinOp(inst, .@"or"), .bool_and => cg.airBinOp(inst, .@"and"), @@ -2067,6 +2068,7 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { .div_trunc_optimized, .div_floor_optimized, .div_exact_optimized, + .div_ceil_optimized, .rem_optimized, .mod_optimized, .neg_optimized, @@ -6682,6 +6684,115 @@ fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); } +fn airDivCeil(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = func.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const pt = func.pt; + const zcu = pt.zcu; + const ty = func.typeOfIndex(inst); + const lhs = try func.resolveInst(bin_op.lhs); + const rhs = try func.resolveInst(bin_op.rhs); + + if (ty.isUnsignedInt(zcu)) { + const int_bits = ty.intInfo(zcu).bits; + const wasm_bits = toWasmBits(int_bits) orelse { + return func.fail("TODO: `@divCeil` for unsigned integers larger than 64 bits ({d} bits requested)", .{int_bits}); + }; + + if (wasm_bits > 64) { + return func.fail("TODO: `@divCeil` for unsigned integers larger than 64 bits ({d} bits requested)", .{int_bits}); + } + + _ = try func.binOp(lhs, rhs, ty, .div); + _ = try func.binOp(lhs, rhs, ty, .rem); + + switch (wasm_bits) { + 32 => { + _ = try func.cmp(.stack, WValue{ .imm32 = 0 }, ty, .neq); + try func.addTag(.i32_add); + }, + 64 => { + _ = try func.cmp(.stack, WValue{ .imm64 = 0 }, ty, .neq); + try func.addTag(.i64_extend_i32_u); + try func.addTag(.i64_add); + }, + else => unreachable, + } + } else if (ty.isSignedInt(zcu)) { + const int_bits = ty.intInfo(zcu).bits; + const wasm_bits = toWasmBits(int_bits) orelse { + return func.fail("TODO: `@divCeil` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + }; + + if (wasm_bits > 64) { + return func.fail("TODO: `@divCeil` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + } + + const zero = switch (wasm_bits) { + 32 => WValue{ .imm32 = 0 }, + 64 => WValue{ .imm64 = 0 }, + else => unreachable, + }; + + _ = try func.binOp(lhs, rhs, ty, .div); + + // 1 if lhs and rhs have the same sign, 0 otherwise. + _ = try func.binOp(lhs, rhs, ty, .xor); + _ = try func.cmp(.stack, zero, ty, .gte); + + _ = try func.binOp(lhs, rhs, ty, .rem); + _ = try func.cmp(.stack, zero, ty, .neq); + + try func.addTag(.i32_and); + + // Comparisons produce 32 bit integers, which must be extended in the 64 bit case. + if (wasm_bits == 64) { + try func.addTag(.i64_extend_i32_u); + } + + _ = try func.binOp(.stack, .stack, ty, .add); + + // We need to zero the high bits because N bit comparisons consider all 32 or 64 bits, and + // expect all but the lowest N bits to be 0. + // TODO: Should we be zeroing the high bits here or should we be ignoring the high bits + // when performing comparisons? + if (wasm_bits != int_bits) { + _ = try func.wrapOperand(.stack, ty); + } + } else { + const float_bits = ty.floatBits(func.target.*); + if (float_bits > 64) { + return func.fail("TODO: `@divCeil` for floats larger than 64 bits ({d} bits requested)", .{float_bits}); + } + const is_f16 = float_bits == 16; + const lhs_wasm = if (is_f16) try func.fpext(lhs, Type.f16, Type.f32) else lhs; + const rhs_wasm = if (is_f16) try func.fpext(rhs, Type.f16, Type.f32) else rhs; + + try func.emitWValue(lhs_wasm); + try func.emitWValue(rhs_wasm); + + switch (float_bits) { + 16, 32 => { + try func.addTag(.f32_div); + try func.addTag(.f32_ceil); + }, + 64 => { + try func.addTag(.f64_div); + try func.addTag(.f64_ceil); + }, + else => unreachable, + } + + if (is_f16) { + _ = try func.fptrunc(.stack, Type.f32, Type.f16); + } + } + + const result = try func.allocLocal(ty); + try func.addLabel(.local_set, result.local.value); + return func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + fn airRem(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index a04921786790..9bae3702f200 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2472,7 +2472,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .mul_with_overflow => try cg.airMulWithOverflow(inst), .shl_with_overflow => try cg.airShlWithOverflow(inst), - .div_float, .div_trunc, .div_floor, .div_exact => try cg.airMulDivBinOp(inst), + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => try cg.airMulDivBinOp(inst), .cmp_lt_errors_len => try cg.airCmpLtErrorsLen(inst), @@ -2534,6 +2534,7 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, @@ -12630,7 +12631,7 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { self.activeIntBits(bin_op.rhs), dst_info.bits / 2, ), - .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits, + .div_trunc, .div_floor, .div_ceil, .div_exact, .rem, .mod => dst_info.bits, }); const src_abi_size: u32 = @intCast(src_ty.abiSize(zcu)); @@ -12823,6 +12824,10 @@ fn airMulDivBinOp(self: *CodeGen, inst: Air.Inst.Index) !void { else => call_mcv, } else call_mcv; }, + .div_ceil => return self.fail( + "TODO: implement `@divCeil` on {} and {}", + .{ dst_ty.fmt(pt), src_ty.fmt(pt) }, + ), }; try self.spillEflagsIfOccupied(); @@ -18321,7 +18326,7 @@ fn genMulDivBinOp( if (switch (tag) { else => unreachable, .mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2, - .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size, + .div_trunc, .div_floor, .div_ceil, .div_exact, .rem, .mod => dst_abi_size != src_abi_size, } or src_abi_size > 8) { const src_info = src_ty.intInfo(zcu); switch (tag) { @@ -18424,7 +18429,7 @@ fn genMulDivBinOp( return dst_mcv; }, - .div_trunc, .div_floor, .div_exact, .rem, .mod => switch (src_info.signedness) { + .div_trunc, .div_floor, .div_ceil, .div_exact, .rem, .mod => switch (src_info.signedness) { .signed => {}, .unsigned => { const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); @@ -18452,6 +18457,7 @@ fn genMulDivBinOp( .callee = switch (tag) { .div_trunc, .div_floor, + .div_ceil, .div_exact, => "__udivei4", .rem, @@ -18643,6 +18649,11 @@ fn genMulDivBinOp( } }, + .div_ceil => return self.fail( + "TODO: implement `@divCeil` on {} and {}", + .{ dst_ty.fmt(pt), src_ty.fmt(pt) }, + ), + else => unreachable, } } @@ -18681,6 +18692,7 @@ fn genBinOp( .div_trunc, .div_floor, .div_exact, + .div_ceil, => std.fmt.bufPrint(&callee_buf, "__{s}{c}f3", .{ @tagName(air_tag)[0..3], floatCompilerRtAbiName(float_bits), @@ -18818,10 +18830,11 @@ fn genBinOp( .callee = callee, } }, &.{ lhs_ty, rhs_ty }, &.{ adjusted, .{ .air_ref = rhs_air } }, .{}); }, - .div_trunc, .div_floor => try self.genRoundLibcall(lhs_ty, result, .{ + .div_trunc, .div_floor, .div_ceil => try self.genRoundLibcall(lhs_ty, result, .{ .mode = switch (air_tag) { .div_trunc => .zero, .div_floor => .down, + .div_ceil => .up, else => unreachable, }, .precision = .inexact, @@ -19261,7 +19274,7 @@ fn genBinOp( .add => .{ .v_ss, .add }, .sub => .{ .v_ss, .sub }, .mul => .{ .v_ss, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ss, .div }, .max => .{ .v_ss, .max }, .min => .{ .v_ss, .max }, else => unreachable, @@ -19271,7 +19284,7 @@ fn genBinOp( tmp_reg, ); switch (air_tag) { - .div_trunc, .div_floor => try self.asmRegisterRegisterRegisterImmediate( + .div_trunc, .div_floor, .div_ceil => try self.asmRegisterRegisterRegisterImmediate( .{ .v_ss, .round }, dst_reg, dst_reg, @@ -19280,6 +19293,7 @@ fn genBinOp( .mode = switch (air_tag) { .div_trunc => .zero, .div_floor => .down, + .div_ceil => .up, else => unreachable, }, .precision = .inexact, @@ -19302,6 +19316,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, @@ -19315,6 +19330,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, @@ -19706,7 +19722,7 @@ fn genBinOp( .add => .{ .v_ss, .add }, .sub => .{ .v_ss, .sub }, .mul => .{ .v_ss, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ss, .div }, .max => .{ .v_ss, .max }, .min => .{ .v_ss, .max }, else => unreachable, @@ -19758,7 +19774,7 @@ fn genBinOp( .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, else => unreachable, @@ -19801,7 +19817,7 @@ fn genBinOp( .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, else => unreachable, @@ -19844,7 +19860,7 @@ fn genBinOp( .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .max }, else => unreachable, @@ -19872,6 +19888,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, @@ -19892,6 +19909,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, @@ -19909,7 +19927,7 @@ fn genBinOp( .add => .{ .v_ps, .add }, .sub => .{ .v_ps, .sub }, .mul => .{ .v_ps, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_ps, .div }, .max => .{ .v_ps, .max }, .min => .{ .v_ps, .min }, .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_ps, .cmp }, @@ -19925,6 +19943,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, @@ -19945,6 +19964,7 @@ fn genBinOp( .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, @@ -19962,7 +19982,7 @@ fn genBinOp( .add => .{ .v_pd, .add }, .sub => .{ .v_pd, .sub }, .mul => .{ .v_pd, .mul }, - .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, + .div_float, .div_trunc, .div_floor, .div_ceil, .div_exact => .{ .v_pd, .div }, .max => .{ .v_pd, .max }, .cmp_lt, .cmp_lte, .cmp_eq, .cmp_gte, .cmp_gt, .cmp_neq => .{ .v_pd, .cmp }, .min => .{ .v_pd, .min }, @@ -20078,10 +20098,11 @@ fn genBinOp( switch (air_tag) { .add, .add_wrap, .sub, .sub_wrap, .mul, .mul_wrap, .div_float, .div_exact => {}, - .div_trunc, .div_floor => try self.genRound(lhs_ty, dst_reg, .{ .register = dst_reg }, .{ + .div_trunc, .div_floor, .div_ceil => try self.genRound(lhs_ty, dst_reg, .{ .register = dst_reg }, .{ .mode = switch (air_tag) { .div_trunc => .zero, .div_floor => .down, + .div_ceil => .up, else => unreachable, }, .precision = .inexact, diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 5364b0432dd1..6b314071555c 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3242,6 +3242,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, try airBinBuiltinCall(f, inst, "fmod", .none); }, .div_floor => try airBinBuiltinCall(f, inst, "div_floor", .none), + .div_ceil => try airBinBuiltinCall(f, inst, "div_ceil", .none), .mod => try airBinBuiltinCall(f, inst, "mod", .none), .abs => try airUnBuiltinCall(f, inst, air_datas[@intFromEnum(inst)].ty_op.operand, "abs", .none), @@ -3418,6 +3419,7 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index cf5f75b880d7..c12215c9ca11 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5085,6 +5085,7 @@ pub const FuncGen = struct { .div_float => try self.airDivFloat(inst, .normal), .div_trunc => try self.airDivTrunc(inst, .normal), .div_floor => try self.airDivFloor(inst, .normal), + .div_ceil => try self.airDivCeil(inst, .normal), .div_exact => try self.airDivExact(inst, .normal), .rem => try self.airRem(inst, .normal), .mod => try self.airMod(inst, .normal), @@ -5102,6 +5103,7 @@ pub const FuncGen = struct { .div_float_optimized => try self.airDivFloat(inst, .fast), .div_trunc_optimized => try self.airDivTrunc(inst, .fast), .div_floor_optimized => try self.airDivFloor(inst, .fast), + .div_ceil_optimized => try self.airDivCeil(inst, .fast), .div_exact_optimized => try self.airDivExact(inst, .fast), .rem_optimized => try self.airRem(inst, .fast), .mod_optimized => try self.airMod(inst, .fast), @@ -8606,6 +8608,54 @@ pub const FuncGen = struct { return self.wip.bin(.udiv, lhs, rhs, ""); } + fn airDivCeil(self: *FuncGen, inst: Air.Inst.Index, fast: Builder.FastMathKind) !Builder.Value { + const o = self.ng.object; + const zcu = o.pt.zcu; + const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); + const inst_ty = self.typeOfIndex(inst); + const scalar_ty = inst_ty.scalarType(zcu); + + if (scalar_ty.isRuntimeFloat()) { + const result = try self.buildFloatOp(.div, fast, inst_ty, 2, .{ lhs, rhs }); + return self.buildFloatOp(.ceil, fast, inst_ty, 1, .{result}); + } else if (scalar_ty.isSignedInt(zcu)) { + const inst_llvm_ty = try o.lowerType(inst_ty); + const bit_size_minus_one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst( + inst_llvm_ty.scalarType(&o.builder), + inst_llvm_ty.scalarBits(&o.builder) - 1, + )); + const zero = try o.builder.zeroInitValue(inst_llvm_ty); + const one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(inst_llvm_ty.scalarType(&o.builder), 1)); + + const quotient = try self.wip.bin(.sdiv, lhs, rhs, ""); + const rem = try self.wip.bin(.srem, lhs, rhs, ""); + + const quotient_sign = try self.wip.bin(.xor, lhs, rhs, ""); + // quotient_sign_mask has all bits set to 1 if the result is negative, or 0 + // otherwise. + const quotient_sign_mask = try self.wip.bin(.ashr, quotient_sign, bit_size_minus_one, ""); + // correction_if_inexact is 0 if the result is negative, 1 otherwise. + const correction_if_inexact = try self.wip.bin(.add, quotient_sign_mask, one, ""); + const is_rem_nonzero = try self.wip.icmp(.ne, rem, zero, ""); + const correction = try self.wip.select(fast, is_rem_nonzero, correction_if_inexact, zero, ""); + + return self.wip.bin(.@"add nsw", quotient, correction, ""); + } + // unsigned int + const inst_llvm_ty = try o.lowerType(inst_ty); + const zero = try o.builder.zeroInitValue(inst_llvm_ty); + const one = try o.builder.splatValue(inst_llvm_ty, try o.builder.intConst(inst_llvm_ty.scalarType(&o.builder), 1)); + + const quotient = try self.wip.bin(.udiv, lhs, rhs, ""); + const rem = try self.wip.bin(.urem, lhs, rhs, ""); + const is_non_zero = try self.wip.icmp(.ne, rem, zero, ""); + const correction = try self.wip.select(fast, is_non_zero, one, zero, ""); + + return try self.wip.bin(.@"add nuw", quotient, correction, ""); + } + fn airDivExact(self: *FuncGen, inst: Air.Inst.Index, fast: Builder.FastMathKind) !Builder.Value { const o = self.ng.object; const zcu = o.pt.zcu; diff --git a/src/print_air.zig b/src/print_air.zig index d99be7770d3b..e69eaf2802ae 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -120,6 +120,7 @@ const Writer = struct { .div_float, .div_trunc, .div_floor, + .div_ceil, .div_exact, .rem, .mod, @@ -150,6 +151,7 @@ const Writer = struct { .div_float_optimized, .div_trunc_optimized, .div_floor_optimized, + .div_ceil_optimized, .div_exact_optimized, .rem_optimized, .mod_optimized, diff --git a/src/print_zir.zig b/src/print_zir.zig index afa94f40ed3a..171c5902bfcb 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -403,6 +403,7 @@ const Writer = struct { .int_cast, .ptr_cast, .truncate, + .div_ceil, .div_exact, .div_floor, .div_trunc, diff --git a/test/behavior/int128.zig b/test/behavior/int128.zig index 544b38fca651..06319f61b931 100644 --- a/test/behavior/int128.zig +++ b/test/behavior/int128.zig @@ -64,6 +64,7 @@ test "int128" { const a: i128 = -170141183460469231731687303715884105728; const b: i128 = -0x8000_0000_0000_0000_0000_0000_0000_0000; try expect(@divFloor(b, 1_000_000) == -170141183460469231731687303715885); + try expect(@divCeil(b, 1_000_000) == -170141183460469231731687303715884); try expect(a == b); } diff --git a/test/behavior/math.zig b/test/behavior/math.zig index fb325a95b62a..1322286cce7d 100644 --- a/test/behavior/math.zig +++ b/test/behavior/math.zig @@ -507,6 +507,16 @@ fn testIntDivision() !void { try expect(divFloor(i64, -0x80000000, -2) == 0x40000000); try expect(divFloor(i64, -0x40000001, 0x40000000) == -2); + try expect(divCeil(i32, 5, 3) == 2); + try expect(divCeil(i32, -5, 3) == -1); + try expect(divCeil(i32, -0x80000000, -2) == 0x40000000); + try expect(divCeil(i32, 0, -0x80000000) == 0); + try expect(divCeil(i32, -0x40000001, 0x40000000) == -1); + try expect(divCeil(i32, -0x80000000, 1) == -0x80000000); + try expect(divCeil(i32, 10, 12) == 1); + try expect(divCeil(i32, -14, 12) == -1); + try expect(divCeil(i32, -2, 12) == 0); + try expect(divTrunc(i32, 5, 3) == 1); try expect(divTrunc(i32, -5, 3) == -1); try expect(divTrunc(i32, 9, -10) == 0); @@ -548,6 +558,24 @@ fn testIntDivision() !void { try expect( 1194735857077236777412821811143690633098347576 / 508740759824825164163191790951174292733114988 == 2, ); + try expect( + @divFloor(-1194735857077236777412821811143690633098347576, 508740759824825164163191790951174292733114988) == -3, + ); + try expect( + @divFloor(1194735857077236777412821811143690633098347576, -508740759824825164163191790951174292733114988) == -3, + ); + try expect( + @divFloor(-1194735857077236777412821811143690633098347576, -508740759824825164163191790951174292733114988) == 2, + ); + try expect( + @divCeil(-1194735857077236777412821811143690633098347576, 508740759824825164163191790951174292733114988) == -2, + ); + try expect( + @divCeil(1194735857077236777412821811143690633098347576, -508740759824825164163191790951174292733114988) == -2, + ); + try expect( + @divCeil(-1194735857077236777412821811143690633098347576, -508740759824825164163191790951174292733114988) == 3, + ); try expect( @divTrunc(-1194735857077236777412821811143690633098347576, 508740759824825164163191790951174292733114988) == -2, ); @@ -576,6 +604,13 @@ fn testFloatDivision() !void { try expect(divFloor(f16, -43.0, 12.0) == -4.0); try expect(divFloor(f64, -90.0, -9.0) == 10.0); + try expect(divCeil(f32, 5.0, 3.0) == 2.0); + try expect(divCeil(f32, -5.0, 3.0) == -1.0); + try expect(divCeil(f32, 56.0, 9.0) == 7.0); + try expect(divCeil(f32, 1053.0, -41.0) == -25.0); + try expect(divCeil(f16, -43.0, 12.0) == -3.0); + try expect(divCeil(f64, -90.0, -9.0) == 10.0); + try expect(divTrunc(f32, 5.0, 3.0) == 1.0); try expect(divTrunc(f32, -5.0, 3.0) == -1.0); try expect(divTrunc(f32, 9.0, -10.0) == 0.0); @@ -628,6 +663,8 @@ fn testDivisionFP16() !void { try expect(divFloor(f16, 5.0, 3.0) == 1.0); try expect(divFloor(f16, -5.0, 3.0) == -2.0); + try expect(divCeil(f16, 5.0, 3.0) == 2.0); + try expect(divCeil(f16, -5.0, 3.0) == -1.0); try expect(divTrunc(f16, 5.0, 3.0) == 1.0); try expect(divTrunc(f16, -5.0, 3.0) == -1.0); try expect(divTrunc(f16, 9.0, -10.0) == 0.0); @@ -643,6 +680,9 @@ fn divExact(comptime T: type, a: T, b: T) T { fn divFloor(comptime T: type, a: T, b: T) T { return @divFloor(a, b); } +fn divCeil(comptime T: type, a: T, b: T) T { + return @divCeil(a, b); +} fn divTrunc(comptime T: type, a: T, b: T) T { return @divTrunc(a, b); } diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig index 2dbd1de8eb71..8eab08b7f69d 100644 --- a/test/behavior/vector.zig +++ b/test/behavior/vector.zig @@ -578,8 +578,12 @@ test "vector division operators" { for (@as([4]T, d2), 0..) |v, i| { try expect(@divFloor(x[i], y[i]) == v); } - const d3 = @divTrunc(x, y); + const d3 = @divCeil(x, y); for (@as([4]T, d3), 0..) |v, i| { + try expect(@divCeil(x[i], y[i]) == v); + } + const d4 = @divTrunc(x, y); + for (@as([4]T, d4), 0..) |v, i| { try expect(@divTrunc(x[i], y[i]) == v); } } @@ -1286,11 +1290,13 @@ test "zero divisor" { const v2 = @divExact(zeros, ones); const v3 = @divTrunc(zeros, ones); const v4 = @divFloor(zeros, ones); + const v5 = @divCeil(zeros, ones); _ = v1[0]; _ = v2[0]; _ = v3[0]; _ = v4[0]; + _ = v5[0]; } test "zero multiplicand" { diff --git a/test/cases/compile_errors/signed_integer_division.zig b/test/cases/compile_errors/signed_integer_division.zig index 7e968ac77eb1..691476623931 100644 --- a/test/cases/compile_errors/signed_integer_division.zig +++ b/test/cases/compile_errors/signed_integer_division.zig @@ -6,4 +6,4 @@ export fn foo(a: i32, b: i32) i32 { // backend=stage2 // target=native // -// :2:14: error: division with 'i32' and 'i32': signed integers must use @divTrunc, @divFloor, or @divExact +// :2:14: error: division with 'i32' and 'i32': signed integers must use @divTrunc, @divFloor, @divCeil, or @divExact