From 1b93d0c64906fc7e85f8de90b72717370a08a84b Mon Sep 17 00:00:00 2001 From: Rac Date: Sun, 4 Jan 2026 00:48:36 +0900 Subject: [PATCH 1/2] Add copilot instructions and tasks configuration; enhance benchmark output formatting --- .github/copilot-instructions.md | 41 +++++++++++++++++++++++++++ .vscode/tasks.json | 50 +++++++++++++++++++++++++++++++++ benchmarks/isprime_bench.cpp | 28 +++++++++++++----- 3 files changed, 112 insertions(+), 7 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 .vscode/tasks.json diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..7808cf2 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,41 @@ +This is a C++ header-only library called "libcpprime", intended for fast primality testing of 64-bit integers. + + +The library itself and its test code should be written to work with compilers supporting C++11. +Benchmark and other temporary files may use the latest C++ features. +The library implementation should pay particular attention to compatibility with older compilers. + + +Supported compilers include gcc, clang, msvc, clang-cl, and the gcc and clang versions within mingw. + + +When you want to run tests or benchmarks, execute the tasks described in tasks.json. +For detailed benchmark results, please refer to benchmarks/bench_summary.md. +Running tests and benchmarks takes approximately 20-30 seconds. + + +When optimizing code, primarily use gcc or msvc for benchmarks. +However, to avoid significant speed differences between compilers, run them with clang and clang-cl once you have finished the initial implementation. + +Please inform users of any breaking changes. + + +.txt files often contain large amounts of data. Do not read files with the .txt extension. + + +If you wish to generate data mechanically, please create C++ code or Python scripts within the tmp folder. + + +For primality testing in Python, you can use Scipy. For execution speed, please prioritize using PyPy. + + +All code and README.md should be written in English. However, this response uses the language currently used in our chat. + +The directory structure is as follows: +include/libcpprime/ : Main library code +benchmarks/ : Code for benchmarks +benchmarks/bench_* : Benchmark results +tests/ : Code and test cases for tests +docs/ : Data used for documentation +tmp/ : Files used for experiments, etc. +/README.md : README diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..f5c40cb --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,50 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Test (gcc)", + "type": "shell", + "command": "task test:gcc", + }, + { + "label": "Test (clang)", + "type": "shell", + "command": "task test:clang", + }, + { + "label": "Test (msvc)", + "type": "shell", + "command": "task test:msvc", + }, + { + "label": "Test (clang-cl)", + "type": "shell", + "command": "task test:clang-cl", + }, + { + "label": "Benchmark (gcc)", + "type": "shell", + "command": "task bench:gcc", + }, + { + "label": "Benchmark (clang)", + "type": "shell", + "command": "task bench:clang", + }, + { + "label": "Benchmark (msvc)", + "type": "shell", + "command": "task bench:msvc", + }, + { + "label": "Benchmark (clang-cl)", + "type": "shell", + "command": "task bench:all", + }, + { + "label": "Generate Documentation", + "type": "shell", + "command": "task docs", + }, + ] +} diff --git a/benchmarks/isprime_bench.cpp b/benchmarks/isprime_bench.cpp index eb05065..cdc0675 100644 --- a/benchmarks/isprime_bench.cpp +++ b/benchmarks/isprime_bench.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,7 @@ int main(int argc, char** argv) { weighted[count++] = 64 - i; } } - auto bench = [rng = Rng(42), heavy](bool (*func)(std::uint64_t)) mutable { + auto bench = [rng = Rng(), heavy](bool (*func)(std::uint64_t)) mutable { std::uint32_t k = weighted[rng.bounded(89440)]; std::uint64_t n = (rng() >> k) | 1; int iters = (heavy ? 300 : 250); @@ -124,13 +125,26 @@ int main(int argc, char** argv) { f_summary << "avg_time_prime_IsPrime,avg_time_prime_IsPrimeNoTable,avg_time_composite_IsPrime,avg_time_composite_IsPrimeNoTable\n"; f_summary_md << "| Bit Width | IsPrime Avg Time (ns, prime) | IsPrimeNoTable Avg Time (ns, prime) | IsPrime Avg Time (ns, composite) | IsPrimeNoTable Avg Time (ns, composite) |\n"; f_summary_md << "|-----------|------------------------------|-------------------------------------|----------------------------------|-----------------------------------------|\n"; + f_summary << std::fixed << std::setprecision(6); + f_summary_md << std::fixed << std::setprecision(2); for (std::int32_t i = 1; i <= 64; ++i) { - std::string avg_prime = count_prime[i] ? std::to_string(time_prime_sum[i] / count_prime[i]) : "nan"; - std::string avg_prime_NoTable = count_prime_NoTable[i] ? std::to_string(time_prime_sum_NoTable[i] / count_prime_NoTable[i]) : "nan"; - std::string avg_composite = count_composite[i] ? std::to_string(time_composite_sum[i] / count_composite[i]) : "nan"; - std::string avg_composite_NoTable = count_composite_NoTable[i] ? std::to_string(time_composite_sum_NoTable[i] / count_composite_NoTable[i]) : "nan"; - f_summary << avg_prime << "," << avg_prime_NoTable << "," << avg_composite << "," << avg_composite_NoTable << "\n"; - f_summary_md << "| " << i << " | " << avg_prime << " | " << avg_prime_NoTable << " | " << avg_composite << " | " << avg_composite_NoTable << " |\n"; + auto print_result = [](std::ofstream& f, double val, std::int32_t count) -> std::ofstream& { + if (count) { + f << (val / count); + } else { + f << "nan"; + } + return f; + }; + print_result(f_summary, time_prime_sum[i], count_prime[i]) << ","; + print_result(f_summary, time_prime_sum_NoTable[i], count_prime_NoTable[i]) << ","; + print_result(f_summary, time_composite_sum[i], count_composite[i]) << ","; + print_result(f_summary, time_composite_sum_NoTable[i], count_composite_NoTable[i]) << "\n"; + f_summary_md << "| " << i << " | "; + print_result(f_summary_md, time_prime_sum[i], count_prime[i]) << " | "; + print_result(f_summary_md, time_prime_sum_NoTable[i], count_prime_NoTable[i]) << " | "; + print_result(f_summary_md, time_composite_sum[i], count_composite[i]) << " | "; + print_result(f_summary_md, time_composite_sum_NoTable[i], count_composite_NoTable[i]) << " |\n"; } f_summary << std::flush; f_summary_md << std::flush; From 28aae704e2010cf7bfb18f9c43ef002367c4a5e0 Mon Sep 17 00:00:00 2001 From: Rac Date: Sun, 4 Jan 2026 01:29:35 +0900 Subject: [PATCH 2/2] Improve performance --- README.md | 2 ++ benchmarks/isprime_bench.cpp | 2 +- include/libcpprime/IsPrime.hpp | 8 +++--- include/libcpprime/IsPrimeNoTable.hpp | 10 +++---- include/libcpprime/internal/IsPrimeCommon.hpp | 26 +++++++++---------- 5 files changed, 25 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index cb51725..2815588 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,8 @@ Benchmarks are executed on GitHub Actions. ## Releases +- 2026/01/04 ver 1.3.2 + - Improve performance - 2025/12/24 ver 1.3.1 - Improve performance and reduce binary size for `cppr::IsPrime` - 2025/12/21 ver 1.3.0 diff --git a/benchmarks/isprime_bench.cpp b/benchmarks/isprime_bench.cpp index cdc0675..bc48c8c 100644 --- a/benchmarks/isprime_bench.cpp +++ b/benchmarks/isprime_bench.cpp @@ -32,7 +32,7 @@ int main(int argc, char** argv) { weighted[count++] = 64 - i; } } - auto bench = [rng = Rng(), heavy](bool (*func)(std::uint64_t)) mutable { + auto bench = [rng = Rng(100), heavy](bool (*func)(std::uint64_t)) mutable { std::uint32_t k = weighted[rng.bounded(89440)]; std::uint64_t n = (rng() >> k) | 1; int iters = (heavy ? 300 : 250); diff --git a/include/libcpprime/IsPrime.hpp b/include/libcpprime/IsPrime.hpp index 80360e3..5cbea0a 100644 --- a/include/libcpprime/IsPrime.hpp +++ b/include/libcpprime/IsPrime.hpp @@ -41,14 +41,14 @@ constexpr std::uint64_t FlagTable17[1024] = { #include "internal/IsPrimeTable17.txt" }; // Bitset for odd numbers < 2^17 (2 is handled explicitly). -CPPR_INTERNAL_CONSTEXPR bool IsPrime17(const std::uint64_t n) noexcept { return n == 2 || (n % 2 == 1 && (FlagTable17[n / 128] & (1ull << (n % 128 / 2)))); } +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime17(const std::uint64_t n) noexcept { return n == 2 || (n % 2 == 1 && (FlagTable17[n / 128] & (1ull << (n % 128 / 2)))); } constexpr std::uint16_t Bases64[16384] = { #include "internal/IsPrimeBases64.txt" }; // Deterministic base selection via a multiplicative hash (fast table lookup). -CPPR_INTERNAL_CONSTEXPR std::uint16_t GetBase(std::uint64_t x) noexcept { return Bases64[(0xad625b89u * static_cast(x)) >> 18]; } -CPPR_INTERNAL_CONSTEXPR bool IsPrime49(const std::uint64_t x) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE std::uint16_t GetBase(std::uint64_t x) noexcept { return Bases64[(0xad625b89u * static_cast(x)) >> 18]; } +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime49(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const std::int32_t S = CountrZero(x - 1); const std::uint64_t D = (x - 1) >> S; @@ -91,7 +91,7 @@ CPPR_INTERNAL_CONSTEXPR bool IsPrime49(const std::uint64_t x) noexcept { return res1 && res2; } template -CPPR_INTERNAL_CONSTEXPR bool IsPrime64(const std::uint64_t x) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime64(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const std::int32_t S = CountrZero(x - 1); const std::uint64_t D = (x - 1) >> S; diff --git a/include/libcpprime/IsPrimeNoTable.hpp b/include/libcpprime/IsPrimeNoTable.hpp index e11144b..85c26b3 100644 --- a/include/libcpprime/IsPrimeNoTable.hpp +++ b/include/libcpprime/IsPrimeNoTable.hpp @@ -23,9 +23,9 @@ constexpr std::uint32_t FlagTable10[32] = { #include "internal/IsPrimeTable10.txt" }; // Bitset for small n < 1024. -CPPR_INTERNAL_CONSTEXPR bool IsPrime10(const std::uint64_t n) noexcept { return (FlagTable10[n / 32] >> (n % 32)) & 1; } +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime10(const std::uint64_t n) noexcept { return (FlagTable10[n / 32] >> (n % 32)) & 1; } -CPPR_INTERNAL_CONSTEXPR bool GCDFilter(const std::uint32_t n) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE bool GCDFilter(const std::uint32_t n) noexcept { auto GCD = [](std::uint32_t x, std::uint32_t y) -> std::uint32_t { // Binary GCD (Stein's algorithm). Assumes y != 0 when x != 0. if (x == 0) return 0; @@ -56,7 +56,7 @@ CPPR_INTERNAL_CONSTEXPR bool GCDFilter(const std::uint32_t n) noexcept { return GCD((a * b) % n, n) == 1; } -CPPR_INTERNAL_CONSTEXPR std::uint64_t GetLucasBase(const std::uint64_t x) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t GetLucasBase(const std::uint64_t x) noexcept { // Chooses a Lucas parameter D for the strong Lucas probable prime test. // Returns: // - 0: definitely composite (quick checks found a factor or perfect square) @@ -113,7 +113,7 @@ CPPR_INTERNAL_CONSTEXPR std::uint64_t GetLucasBase(const std::uint64_t x) noexce return Z; } -CPPR_INTERNAL_CONSTEXPR bool IsPrime64MillerRabin(const std::uint64_t x) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime64MillerRabin(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const std::int32_t S = CountrZero(x - 1); const std::uint64_t D = (x - 1) >> S; @@ -263,7 +263,7 @@ CPPR_INTERNAL_CONSTEXPR bool IsPrime64MillerRabin(const std::uint64_t x) noexcep } } -CPPR_INTERNAL_CONSTEXPR bool IsPrime64BailliePSW(const std::uint64_t x) noexcept { +CPPR_INTERNAL_CONSTEXPR_INLINE bool IsPrime64BailliePSW(const std::uint64_t x) noexcept { const MontgomeryModint64Impl mint(x); const auto one = mint.one(); const auto mone = mint.neg(one); diff --git a/include/libcpprime/internal/IsPrimeCommon.hpp b/include/libcpprime/internal/IsPrimeCommon.hpp index 80d16d0..38c13d8 100644 --- a/include/libcpprime/internal/IsPrimeCommon.hpp +++ b/include/libcpprime/internal/IsPrimeCommon.hpp @@ -40,7 +40,7 @@ template class MontgomeryModint64Impl { std::uint64_t mod_ = 0, rs = 0, nr = 0, np = 0; - CPPR_INTERNAL_CONSTEXPR std::uint64_t reduce(const std::uint64_t n) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t reduce(const std::uint64_t n) const noexcept { // Montgomery reduction of a 128-bit value with implicit low half `n`. std::uint64_t q = n * nr; if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { @@ -51,7 +51,7 @@ class MontgomeryModint64Impl { return mod_ - m; } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t reduce(const std::uint64_t a, const std::uint64_t b) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t reduce(const std::uint64_t a, const std::uint64_t b) const noexcept { // Montgomery reduction of the product a*b. auto tmp = Mulu128(a, b); std::uint64_t d = tmp.high; @@ -81,13 +81,13 @@ class MontgomeryModint64Impl { for (std::uint32_t i = 0; i != 5; ++i) nr *= 2 - n * nr; np = reduce(rs); } - CPPR_INTERNAL_CONSTEXPR std::uint64_t build(std::uint32_t x) const noexcept { return reduce(x % mod_, rs); } - CPPR_INTERNAL_CONSTEXPR std::uint64_t build(std::uint64_t x) const noexcept { return reduce(x % mod_, rs); } - CPPR_INTERNAL_CONSTEXPR std::uint64_t raw(std::uint64_t x) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t build(std::uint32_t x) const noexcept { return reduce(x % mod_, rs); } + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t build(std::uint64_t x) const noexcept { return reduce(x % mod_, rs); } + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t raw(std::uint64_t x) const noexcept { Assume(x < mod_); return reduce(x, rs); } - CPPR_INTERNAL_CONSTEXPR std::uint64_t val(std::uint64_t x) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t val(std::uint64_t x) const noexcept { // Converts from Montgomery domain back to the standard residue. // Non-strict mode permits values in [0, 2*mod) for faster operations. if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { @@ -99,7 +99,7 @@ class MontgomeryModint64Impl { return tmp - mod_ * (tmp >= mod_); } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t one() const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t one() const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(np < mod_); return np; @@ -108,7 +108,7 @@ class MontgomeryModint64Impl { return np; } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t neg(std::uint64_t x) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t neg(std::uint64_t x) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_); return (mod_ - x) * (x != 0); @@ -117,7 +117,7 @@ class MontgomeryModint64Impl { return (2 * mod_ - x) * (x != 0); } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t mul(std::uint64_t x, std::uint64_t y) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t mul(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return reduce(x, y); @@ -126,7 +126,7 @@ class MontgomeryModint64Impl { return reduce(x, y); } } - CPPR_INTERNAL_CONSTEXPR bool same(std::uint64_t x, std::uint64_t y) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE bool same(std::uint64_t x, std::uint64_t y) const noexcept { // Equality check that tolerates the relaxed range in non-strict mode. if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); @@ -137,7 +137,7 @@ class MontgomeryModint64Impl { return (tmp == 0) || (tmp == mod_) || (tmp == 0 - mod_); } } - CPPR_INTERNAL_CONSTEXPR bool is_zero(std::uint64_t x) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE bool is_zero(std::uint64_t x) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_); return x == 0; @@ -146,7 +146,7 @@ class MontgomeryModint64Impl { return x == 0 || x == mod_; } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t add(std::uint64_t x, std::uint64_t y) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t add(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return x + y - (x >= mod_ - y) * mod_; @@ -155,7 +155,7 @@ class MontgomeryModint64Impl { return x + y - (x >= 2 * mod_ - y) * (2 * mod_); } } - CPPR_INTERNAL_CONSTEXPR std::uint64_t sub(std::uint64_t x, std::uint64_t y) const noexcept { + CPPR_INTERNAL_CONSTEXPR_INLINE std::uint64_t sub(std::uint64_t x, std::uint64_t y) const noexcept { if CPPR_INTERNAL_IF_CONSTEXPR (Strict) { Assume(x < mod_ && y < mod_); return x - y + (x < y) * mod_;