diff --git a/Source/DigitViewer2/DigitScanner/DigitScanner.cpp b/Source/DigitViewer2/DigitScanner/DigitScanner.cpp new file mode 100644 index 0000000..167e8b1 --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/DigitScanner.cpp @@ -0,0 +1,588 @@ +/* DigitScanner.cpp + * + * Author : Michael Kleber + * Date Created : 01/15/2026 + * Last Modified : 04/07/2026 + * Copyright 2026 Google LLC + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#include +// MSVC: Map to the T0 hint (fetch to all cache levels), read-only. +#define PREFETCH(addr) _mm_prefetch((const char*)(addr), _MM_HINT_T0) +#elif defined(__GNUC__) || defined(__clang__) +// GCC/Clang: Map to the builtin with read-only and high locality +#define PREFETCH(addr) __builtin_prefetch((addr), 0, 3) +#else +// Do nothing on other compilers +#define PREFETCH(addr) ((void)0) +#endif + +#include "PublicLibs/ConsoleIO/BasicIO.h" +#include "PublicLibs/BasicLibs/StringTools/ToString.h" +#include "PublicLibs/BasicLibs/Memory/SmartBuffer.h" +#include "PublicLibs/SystemLibs/Concurrency/Parallelizers.h" +#include "PublicLibs/SystemLibs/Environment/Environment.h" +#ifdef YMP_STANDALONE +#include "PrivateLibs/SystemLibs/ParallelFrameworks/ParallelFrameworks.h" +#endif +#include "DigitViewer2/Globals.h" +#include "DigitViewer2/RawToAscii/RawToAscii.h" +#include "DigitViewer2/DigitReaders/BasicDigitReader.h" +#include "DigitScanner.h" + + + +namespace DigitViewer2 { + +using namespace ymp; + +static double get_cpu_time(){ +#if defined(__GNUC__) || defined(__clang__) +struct timespec ts; + if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0){ + return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000.0; + } +#endif + return 0; +} + +static std::string format_times(double wall_time, double cpu_time) { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "%.3fs (wall), %.3fs (CPU)", wall_time, cpu_time); + return std::string(buffer); +} + +class DigitsMapperAction : public ymp::BasicAction { +public: + DigitsMapperAction( + BasicDigitReader& reader, + std::vector>>& buckets, + uiL_t radix_to_d_minus_1, + upL_t digits, + char radix, + uiL_t current_stream_offset, + uiL_t chunk_to_process, + upL_t num_threads, + uiL_t partition_size_bits, + uiL_t recommended_bucket_capacity + ) + : m_reader(reader) + , m_buckets(buckets) + , m_radix_to_d_minus_1(radix_to_d_minus_1) + , m_d(digits) + , m_radix(radix) + , m_current_stream_offset(current_stream_offset) + , m_chunk_to_process(chunk_to_process) + , m_num_threads(num_threads) + , m_partition_size_bits(partition_size_bits) + , m_recommended_bucket_capacity(recommended_bucket_capacity) + {} + + virtual void run(upL_t index = 0) override { + upL_t block_size = (m_chunk_to_process + m_num_threads - 1) / m_num_threads; + uiL_t start_offset = m_current_stream_offset + index * block_size; + uiL_t end_offset = std::min(start_offset + block_size, m_current_stream_offset + m_chunk_to_process); + if (start_offset >= end_offset) return; + + auto& my_buckets = m_buckets[index]; + + // Clear my row in buckets (one bucket for each partition) + for (upL_t j = 0; j < m_num_threads; ++j) { + auto& bucket = my_buckets[j]; + bucket.clear(); + if (bucket.capacity() < m_recommended_bucket_capacity) { + bucket.reserve(m_recommended_bucket_capacity); + } + } + + upL_t bytes = m_reader.recommend_buffer_size(end_offset - start_offset); + SmartBuffer<> buffer(bytes, BUFFER_ALIGNMENT); + std::vector raw_digits(end_offset - start_offset); + AlignedBufferC frame(buffer, bytes); + + uiL_t thread_d_string_value = 0; + upL_t seed_digits = m_d - 1; + if (start_offset > 0) { + std::vector seed_raw(seed_digits); + m_reader.load_digits(&seed_raw[0], nullptr, start_offset - seed_digits, seed_digits, frame, parallelizer_none, 1); + std::vector seed_dec(seed_digits); + RawToAscii::raw_to_dec(&seed_dec[0], &seed_raw[0], seed_digits); + + for (upL_t i = 0; i < seed_digits; ++i) { + thread_d_string_value = thread_d_string_value * m_radix + (seed_dec[i] - '0'); + } + } + + m_reader.load_digits(&raw_digits[0], nullptr, start_offset, end_offset - start_offset, frame, parallelizer_none, 1); + std::vector dec_digits(end_offset - start_offset); + RawToAscii::raw_to_dec(&dec_digits[0], &raw_digits[0], end_offset - start_offset); + + uiL_t string_value = thread_d_string_value; + upL_t num_digits = end_offset - start_offset; + + for (upL_t i = 0; i < num_digits; ++i) { + char digit = dec_digits[i]; + string_value = (string_value % m_radix_to_d_minus_1) * m_radix + (digit - '0'); + + uiL_t val = string_value; + uiL_t partition_id = val / m_partition_size_bits; + if (partition_id >= m_num_threads) partition_id = m_num_threads - 1; + + my_buckets[partition_id].push_back(val); + } + } + +private: + BasicDigitReader& m_reader; + std::vector>>& m_buckets; + uiL_t m_radix_to_d_minus_1; + upL_t m_d; + char m_radix; + uiL_t m_current_stream_offset; + uiL_t m_chunk_to_process; + upL_t m_num_threads; + uiL_t m_partition_size_bits; + uiL_t m_recommended_bucket_capacity; +}; + +class ValuesReducerAction : public ymp::BasicAction { +public: + ValuesReducerAction( + std::vector& seen_strings, + const std::vector>>& buckets, + std::atomic& found_strings_count, + upL_t num_threads + ) + : m_seen_strings(seen_strings) + , m_buckets(buckets) + , m_found_strings_count(found_strings_count) + , m_num_threads(num_threads) + {} + + virtual void run(upL_t index = 0) override { + upL_t j = index; + if (j >= m_num_threads) return; + + uiL_t local_new_bits = 0; + const int PREFETCH_DIST = 16; + + // Directly iterate over buckets without merging or sorting + for (upL_t i = 0; i < m_num_threads; ++i) { + const auto& bucket = m_buckets[i][j]; + + for (size_t k = 0; k < bucket.size(); ++k) { + if (k + PREFETCH_DIST < bucket.size()) { + uiL_t future_val = bucket[k + PREFETCH_DIST]; + PREFETCH(&m_seen_strings[future_val / 64]); + } + + uiL_t val = bucket[k]; + uiL_t idx = val / 64; + uint64_t mask = 1ULL << (val % 64); + + uint64_t old_val = m_seen_strings[idx]; + if (!(old_val & mask)) { + m_seen_strings[idx] = old_val | mask; + local_new_bits++; + } + } + } + + m_found_strings_count.fetch_add(local_new_bits, std::memory_order_relaxed); + } + +private: + std::vector& m_seen_strings; + const std::vector>>& m_buckets; + std::atomic& m_found_strings_count; + upL_t m_num_threads; +}; + +class DigitMapScanAction : public ymp::BasicAction { +public: + DigitMapScanAction( + BasicDigitReader& reader, + std::unordered_map>& missing_strings_map, + const std::vector& bloom_filter, + uiL_t bloom_mask, + std::mutex& map_mutex, + std::atomic& unfound_count, + uiL_t radix_to_d_minus_1, + upL_t digits, + char radix, + uiL_t current_stream_offset, + uiL_t chunk_to_process, + upL_t num_threads + ) + : m_reader(reader) + , m_missing_strings_map(missing_strings_map) + , m_bloom_filter(bloom_filter) + , m_bloom_mask(bloom_mask) + , m_map_mutex(map_mutex) + , m_unfound_count(unfound_count) + , m_radix_to_d_minus_1(radix_to_d_minus_1) + , m_d(digits) + , m_radix(radix) + , m_current_stream_offset(current_stream_offset) + , m_chunk_to_process(chunk_to_process) + , m_num_threads(num_threads) + {} + + virtual void run(upL_t index = 0) override { + upL_t block_size = (m_chunk_to_process + m_num_threads - 1) / m_num_threads; + uiL_t start_offset = m_current_stream_offset + index * block_size; + uiL_t end_offset = std::min(start_offset + block_size, m_current_stream_offset + m_chunk_to_process); + if (start_offset >= end_offset) return; + + // Each thread needs its own buffer + upL_t bytes = m_reader.recommend_buffer_size(end_offset - start_offset); + SmartBuffer<> buffer(bytes, BUFFER_ALIGNMENT); + std::vector raw_digits(end_offset - start_offset); + AlignedBufferC frame(buffer, bytes); + + // Each thread seeds its sliding window, using the d-1 digits before its range of digits begins + uiL_t thread_d_string_value = 0; + upL_t seed_digits = m_d - 1; + if (start_offset > 0) { + std::vector seed_raw(seed_digits); + m_reader.load_digits(&seed_raw[0], nullptr, start_offset - seed_digits, seed_digits, frame, parallelizer_none, 1); + std::vector seed_dec(seed_digits); + RawToAscii::raw_to_dec(&seed_dec[0], &seed_raw[0], seed_digits); + + for (upL_t i = 0; i < seed_digits; ++i) { + thread_d_string_value = thread_d_string_value * m_radix + (seed_dec[i] - '0'); + } + } + + m_reader.load_digits(&raw_digits[0], nullptr, start_offset, end_offset - start_offset, frame, parallelizer_none, 1); + std::vector dec_digits(end_offset - start_offset); + RawToAscii::raw_to_dec(&dec_digits[0], &raw_digits[0], end_offset - start_offset); + + for (upL_t i = 0; i < (end_offset - start_offset); ++i) { + char current_digit = dec_digits[i]; + thread_d_string_value = (thread_d_string_value % m_radix_to_d_minus_1) * m_radix + (current_digit - '0'); + + // Bloom Filter Check + uiL_t bloom_idx = thread_d_string_value & m_bloom_mask; + if ((m_bloom_filter[bloom_idx / 64] >> (bloom_idx % 64)) & 1ULL) { + // Check if this string is in our map of missing values + auto it = m_missing_strings_map.find(thread_d_string_value); + if (it != m_missing_strings_map.end()) { + std::lock_guard lock(m_map_mutex); + if (it->second.empty()) { + m_unfound_count.fetch_sub(1, std::memory_order_relaxed); + } + it->second.push_back(start_offset + i + 1); + } + } + } + } + +private: + BasicDigitReader& m_reader; + std::unordered_map>& m_missing_strings_map; + const std::vector& m_bloom_filter; + uiL_t m_bloom_mask; + std::mutex& m_map_mutex; + std::atomic& m_unfound_count; + uiL_t m_radix_to_d_minus_1; + upL_t m_d; + char m_radix; + uiL_t m_current_stream_offset; + uiL_t m_chunk_to_process; + upL_t m_num_threads; +}; + +DigitScanner::DigitScanner(BasicDigitReader& reader, upL_t d) + : m_reader(reader) + , m_d(d) +{} + +void DigitScanner::search() { + auto start_time = std::chrono::high_resolution_clock::now(); + double start_cpu = get_cpu_time(); + + // Calculate the total number of possible d-digit strings (10^d). + uiL_t total_strings = 1; + for (upL_t i = 0; i < m_d; ++i) { + total_strings *= m_reader.radix(); + } + + // Check if there is enough memory for the bit vector. + uiL_t num_atomic_words = (total_strings + 63) / 64; + uiL_t required_bytes = num_atomic_words * sizeof(std::atomic); + uiL_t free_bytes = Environment::GetFreePhysicalMemory(); + if (required_bytes > free_bytes){ + Console::println("Error: Not enough memory.", 'R'); + Console::println(" Required Memory: " + StringTools::tostr(required_bytes, StringTools::COMMAS) + " bytes", 'R'); + Console::println(" Available Memory: " + StringTools::tostr(free_bytes, StringTools::COMMAS) + " bytes", 'R'); + Console::println(); + return; + } + + // Use atomic vector for thread-safe bitmask operations. + // Use atomic vector for thread-safe bitmask operations. + std::vector seen_strings(num_atomic_words, 0); + + // Use atomic counters for thread-safe updates. + std::atomic found_strings_count(0); + std::atomic last_found_digit_pos(0); + std::atomic last_found_d_string(0); + uiL_t current_offset = 0; + uiL_t digits_since_last_report = 0 ; + + // Prepare for reading digits in blocks. + uiL_t limit = m_reader.stream_end(); + if (limit == 0){ + limit = (uiL_t)0 - 1; + } + + // Not enough digits to form a d-digit string. + if (limit < m_d){ + Console::println("Warning: Not enough digits in the stream to form a d-digit string.", 'Y'); + Console::println(); + return; + } + + // Calculate 10^(d-1) to help with the sliding window + uiL_t radix_to_d_minus_1 = 1; + if (m_d > 1) { + for (upL_t i = 0; i < m_d - 1; ++i) { + radix_to_d_minus_1 *= m_reader.radix(); + } + } + + Console::println("Scanning for d-digit strings..."); + + current_offset = m_d - 1; + digits_since_last_report = 0; + + const upL_t MAX_PARALLEL_CHUNK_SIZE = 100000000; + + // Use floor(95% of available cores) + upL_t effective_threads = (Environment::GetLogicalProcessors() * 95) / 100; + if (effective_threads == 0) effective_threads = 1; + Console::println("Using " + StringTools::tostr(effective_threads) + " threads for parallel processing."); + + // Calculate partition size for partitions, aligned to 512 bits (cache line) + uiL_t partition_size_bits = ((total_strings / effective_threads) / 512) * 512; + if (partition_size_bits == 0) partition_size_bits = 512; // fallback for small total_strings + + // Pre-allocate buckets and merge lists + std::vector>> buckets(effective_threads, std::vector>(effective_threads)); + + uiL_t recommended_bucket_capacity = (MAX_PARALLEL_CHUNK_SIZE * 12) / (effective_threads * effective_threads * 10); + if (recommended_bucket_capacity < 1000) recommended_bucket_capacity = 1000; + + // Scan phase 1: Use a bitvector to record which of the 10^d strings have been seen. + while (found_strings_count.load(std::memory_order_acquire) < total_strings && current_offset < limit) { + uiL_t found_count = found_strings_count.load(std::memory_order_relaxed); + uiL_t unfound_count = total_strings - found_count; + + if (unfound_count < 100000){ + // Switch to Map-based scan + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println("\n" + StringTools::tostr(unfound_count) + + " strings remaining. Switching to map-based parallel mode. Time: " + + format_times(elapsed.count(), current_cpu - start_cpu)); + break; + } + + uiL_t chunk_to_process = std::min((uiL_t)MAX_PARALLEL_CHUNK_SIZE, limit - current_offset); + + // Phase 1: Distribution + DigitsMapperAction dist_action( + m_reader, + buckets, + radix_to_d_minus_1, + m_d, + m_reader.radix(), + current_offset, + chunk_to_process, + effective_threads, + partition_size_bits, + recommended_bucket_capacity + ); + parallelizer_default.run_in_parallel(dist_action, 0, effective_threads); + + // Phase 2: Application + ValuesReducerAction app_action( + seen_strings, + buckets, + found_strings_count, + effective_threads + ); + parallelizer_default.run_in_parallel(app_action, 0, effective_threads); + + current_offset += chunk_to_process; + digits_since_last_report += chunk_to_process; + + if (digits_since_last_report >= 1000000000){ + digits_since_last_report = 0; + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println( + "Progress: " + StringTools::tostr(found_strings_count.load(std::memory_order_relaxed), StringTools::COMMAS) + + " / " + StringTools::tostr(total_strings, StringTools::COMMAS) + + " strings found. Digits Scanned: " + StringTools::tostr(current_offset, StringTools::COMMAS) + + ". Time elapsed: " + format_times(elapsed.count(), current_cpu - start_cpu) + ); + } + } + + if (effective_threads > 1 && found_strings_count.load(std::memory_order_relaxed) == total_strings) { + // This is bad: Somehow we found all the strings during the multi-threaded bitvector phase, + // and that phase was not built to keep track of which string appeared last. Alert the user. + Console::println("\n\nSomething went wrong: Found all d-digit strings before keeping careful track of which came last."); + Console::println("Correct answer UNKNOWN but less than " + StringTools::tostr(current_offset, StringTools::COMMAS)); + return; + } + + // Scan phase 2: Use a (mutex-guarded) hash map to record appearances of the few strings not seen in phase 1. + std::unordered_map> missing_strings_map; + std::mutex map_mutex; + + // Bloom Filter setup + const uiL_t BLOOM_BITS_LOG2 = 20; + const uiL_t BLOOM_SIZE = 1ULL << BLOOM_BITS_LOG2; + const uiL_t BLOOM_MASK = BLOOM_SIZE - 1; + std::vector bloom_filter((BLOOM_SIZE + 63) / 64, 0); + + if (found_strings_count.load(std::memory_order_relaxed) < total_strings) { + // Build the map of missing strings + // This could be parallelized too, but it seems so fast that it's not worth it. + for (size_t i = 0; i < seen_strings.size(); ++i) { + uint64_t word = seen_strings[i]; + if (word == ~0ULL) continue; + + for (int bit = 0; bit < 64; ++bit) { + if (!((word >> bit) & 1ULL)) { + uiL_t string_val = (uiL_t)i * 64 + bit; + if (string_val < total_strings) { + missing_strings_map[string_val] = std::vector(); + + // Populate Bloom Filter + uiL_t idx = string_val & BLOOM_MASK; + bloom_filter[idx / 64] |= (1ULL << (idx % 64)); + } + } + } + } + + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println("Map construction complete. Time: " + format_times(elapsed.count(), current_cpu - start_cpu)); + + std::atomic map_unfound_count(missing_strings_map.size()); + Console::println("Processing " + StringTools::tostr((uiL_t)missing_strings_map.size()) + " remaining strings.\n"); + + while (map_unfound_count.load(std::memory_order_relaxed) > 0 && current_offset < limit) { + uiL_t chunk_to_process = std::min((uiL_t)MAX_PARALLEL_CHUNK_SIZE, limit - current_offset); + + DigitMapScanAction action( + m_reader, + missing_strings_map, + bloom_filter, + BLOOM_MASK, + map_mutex, + map_unfound_count, + radix_to_d_minus_1, + m_d, + m_reader.radix(), + current_offset, + chunk_to_process, + effective_threads + ); + parallelizer_default.run_in_parallel(action, 0, effective_threads); + current_offset += chunk_to_process; + digits_since_last_report += chunk_to_process; + + if (digits_since_last_report >= 1000000000){ + digits_since_last_report = 0; + auto current_time = std::chrono::high_resolution_clock::now(); + double current_cpu = get_cpu_time(); + std::chrono::duration elapsed = current_time - start_time; + Console::println( + "Progress: " + StringTools::tostr(total_strings - map_unfound_count.load(std::memory_order_relaxed), StringTools::COMMAS) + + " / " + StringTools::tostr(total_strings, StringTools::COMMAS) + + " strings found. Digits Scanned: " + StringTools::tostr(current_offset, StringTools::COMMAS) + + ". Time elapsed: " + format_times(elapsed.count(), current_cpu - start_cpu) + ); + } + } + + // Update found count for final report + found_strings_count.store(total_strings - map_unfound_count.load(std::memory_order_acquire), std::memory_order_relaxed); + } + + // Determine the true last string from the map results + if (!missing_strings_map.empty()) { + uiL_t max_first_pos = 0; + uiL_t winner_string = 0; + + // For each string found in the map phase, its first occurrence is a candidate "last d-string" + for (const auto& pair : missing_strings_map) { + if (!pair.second.empty()) { + uiL_t first_pos = pair.second[0]; + for (size_t i = 1; i < pair.second.size(); ++i) { + if (pair.second[i] < first_pos) { + first_pos = pair.second[i]; + } + } + + if (first_pos > max_first_pos) { + max_first_pos = first_pos; + winner_string = pair.first; + } + } + } + + if (max_first_pos > 0) { + last_found_digit_pos.store(max_first_pos, std::memory_order_relaxed); + last_found_d_string.store(winner_string, std::memory_order_relaxed); + } + } + + + Console::println("\nSearch Complete."); + + uiL_t found = found_strings_count.load(std::memory_order_relaxed); + if (found == total_strings) { + Console::println("All " + StringTools::tostr(total_strings, StringTools::COMMAS) + " d-digit strings found!"); + Console::println("The last unique d-digit string (" + StringTools::tostr_width(last_found_d_string.load(std::memory_order_relaxed), m_d) + ") was found at digit position: " + StringTools::tostr(last_found_digit_pos.load(std::memory_order_relaxed), StringTools::COMMAS)); + } else { + Console::println("Only " + StringTools::tostr(found, StringTools::COMMAS) + " out of " + StringTools::tostr(total_strings, StringTools::COMMAS) + " d-digit strings were found."); + Console::println("This is " + StringTools::tostr((upL_t)(found * 100 / total_strings)) + "% of all possible strings."); + Console::println("Digits processed: " + StringTools::tostr(current_offset, StringTools::COMMAS)); + if (total_strings < found + 20) { + Console::println("The digit strings that haven't appeared yet are:"); + for (const auto& pair : missing_strings_map) { + if (pair.second.empty()) { + Console::println(StringTools::tostr_width(pair.first, m_d)); + } + } + } + } + + auto end_time = std::chrono::high_resolution_clock::now(); + double end_cpu = get_cpu_time(); + std::chrono::duration elapsed = end_time - start_time; + Console::println("\nTotal execution time: " + format_times(elapsed.count(), end_cpu - start_cpu)); +} + +} // namespace DigitViewer2 diff --git a/Source/DigitViewer2/DigitScanner/DigitScanner.h b/Source/DigitViewer2/DigitScanner/DigitScanner.h new file mode 100644 index 0000000..eb19af3 --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/DigitScanner.h @@ -0,0 +1,28 @@ +/* DigitScanner.h + * + * Author : Michael Kleber + * Date Created : 01/15/2026 + * Last Modified : 01/15/2026 + * Copyright 2026 Google LLC + * + */ + +#pragma once +#include "PublicLibs/Types.h" + +namespace DigitViewer2 { +using namespace ymp; + +class BasicDigitReader; + +class DigitScanner { +public: + DigitScanner(BasicDigitReader& reader, upL_t d); + void search(); + +private: + BasicDigitReader& m_reader; + upL_t m_d; +}; + +} diff --git a/Source/DigitViewer2/DigitScanner/README.md b/Source/DigitViewer2/DigitScanner/README.md new file mode 100644 index 0000000..03edac1 --- /dev/null +++ b/Source/DigitViewer2/DigitScanner/README.md @@ -0,0 +1,85 @@ +Scanning for All Strings of Digits +======== +by Michael Kleber + +Code in this directory implements a way to scan through a large file of digits until _every_ sequence of $d$ digits has appeared. + +Are you wondering "Does my 10-digit phone number appear in the digits of pi?" +Yes it does, somewhere in the first 241,641,121,048 digits. +What about your 16-digit credit card number? +I don't know — we haven't calculated enough digits of pi to see every 16-digit number. +(Yet.) + +## Background + +Pi, and many other numbers you can compute with y-cruncher, are believed to be [normal numbers](https://en.wikipedia.org/wiki/Normal_number). +This would mean that every sequence of $d$ decimal digits should appear in it, in approximately $1/(10^d)$ of the possible locations. +(That's what you would expect if the digits were random... and we have every reason to believe that pi's digits behave like random ones _from this particular point of view_.) + +That leads to asking the very natural question: +"Out of the $10^d$ sequences of $d$ digits, which one takes the longest to appear, and how many digits does it take?" + +* For d=1, the digit 0 is the last one to show up in pi, all the way out at the 32nd place after the decimal point: 3.1415926535897932384626433832795**0**2... +* For d=2 you need to go out to 606 places before you finally see the two-digit sequence 68. +* When Fabrice Bellard calculated 2.7 trillion digits of pi, he scanned for all sequences up to d=11, reported [here](https://bellard.org/pi/pi2700e9/pidigits.html#:~:text=scan%20decimal%20expansion%20of%20pi) in 2010. +* The scan for d=12 used the code in this directory, running on the [100 trillion digits computed by Google](https://pi.delivery/). +* The scan for d=13 used the code in this directory, running on the [314 trillion digits computed by StorageReview](https://www.storagereview.com/review/storagereview-sets-new-pi-record-314-trillion-digits-on-a-dell-poweredge-r7725). + +| d | digits needed | last d-digit seq | +|:-:|---------------------:|:----------------:| +| 1| 32 | `0` | +| 2| 606 | `68` | +| 3| 8,555 | `483` | +| 4| 99,849 | `6716` | +| 5| 1,369,564 | `33394` | +| 6| 14,118,312 | `569540` | +| 7| 166,100,506 | `1075656` | +| 8| 1,816,743,912 | `36432643` | +| 9| 22,445,207,406 | `172484538` | +| 10| 241,641,121,048 | `5918289042` | +| 11| 2,512,258,603,207 | `56377726040` | +| 12| 27,261,146,164,637 | `717542605965` | +| 13| 294,420,436,740,325 | `8683109988379` | + +* These are recorded in the [On-line Encyclopedia of Integer Sequences](https://oeis.org/) as entries [A036903](https://oeis.org/A036903) and [A032510](https://oeis.org/A032510). + +For a 50-50 chance of seeing all sequences of 14 digits, you would need +[around 3.26 _quadrillion_](https://www.wolframalpha.com/input?i=N%5Bexp%28-n+exp%28-w%2Fn%29%29%5D+where+n+%3D+10%5E14+and+w+%3D+3.26+quadrillion) +random digits, so don't hold your breath. + + +## Algorithm + +### Basic idea +To search for every string of $d$ digits: +* Make a bitvector of $10^d$ zeros +* Look at strings of $d$ digits one at a time, considered as a $d$-digit number $n$. + * If the $n$'th bit in the bitstring is a $0$, then you've found a new string! + * Go you! Add one to the variable "how many strings I've found so far." + * If that variable equals $10^d$, you've seen them all! Have a party. + * If the $n$'th bit in the bitstring is already a $1$, nothing to see here, move along. + +If you have a lot of digits, a lot of memory, and a lot of time, this will do the job. + +If you don't have $10^d$ bits of memory, then you could scan the digits more than once — +"Okay _this_ time I'm going to only pay attention to $d$-digit strings that start with a 7." +This multi-scan idea is not implemented here. Call a friend with more RAM. + +### Parallelization and efficiency +To run this search faster, we use many threads. We can't have all those threads writing to the same memory at once +(their changes might clobber each other), so we implement a little mapreduce-like arrangement: The mapper threads each own a +chunk of digits and convert them into d-digit values; the reducer threads each own a chunk of memory and flip bits from 0 to 1 +when the value is seen. The shuffling between mappers and reducers is implemented by storing the values in an NxN array +of vectors of values, where vector (i,j) holds values produced by mapper i and consumed by reducer j. + +We stop that approach when the bitvector is getting close to all 1's, and switch to a new phase where we track the arrival +of the last few thousand strings in a (mutex-guarded) hash map that remembers at what position those strings finally appear. +This lets us keep using many threads and still find out which string took the longest to first show up. + +The bitvector phase of the search is sped up by issuing memory prefetch hints, since the CPU spending all its time +asking for randomly-placed individual bits in a very large span of memory is a latency-pessimal access pattern. +The hash map phase uses a quick little Bloom filter to do less hashing. + +The cutover point between the two search phases, the memory prefetch hint details, and the number of threads to use +are definitely sensitive to what exact hardware you're running on. If you plan to run this code for large $d$ +(say 10 or up), you may profit from tuning these to your setup. diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp index 3a20d3a..80d2b6b 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp +++ b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.cpp @@ -30,6 +30,7 @@ #include "DigitViewer2/DigitWriters/BasicDigitWriter.h" #include "DigitViewer2/DigitWriters/BasicTextWriter.h" #include "DigitViewer2/DigitWriters/BasicYcdSetWriter.h" +#include "DigitViewer2/DigitScanner/DigitScanner.h" #include "DigitViewerTasks.h" namespace DigitViewer2{ //////////////////////////////////////////////////////////////////////////////// @@ -479,8 +480,21 @@ void to_ycd_file_partial(BasicDigitReader& reader){ ); process_write(reader, start_pos, end_pos - start_pos, writer, start_pos); } +void find_last_d_string(BasicDigitReader& reader){ + Console::println("\n\nFind Last d-Digit String"); + Console::println(); + + // Get d from the user. + upL_t d = Console::scan_label_upL_range("Enter d (1-13): ", 1, 13); + Console::println(); + + DigitScanner scanner(reader, d); + scanner.search(); +} //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } + + diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h index bb83f2f..6f25a87 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h +++ b/Source/DigitViewer2/DigitViewer/DigitViewerTasks.h @@ -25,9 +25,11 @@ void compute_stats(BasicDigitReader& reader); void to_text_file(BasicDigitReader& reader); void to_ycd_file_all(BasicDigitReader& reader); void to_ycd_file_partial(BasicDigitReader& reader); +void find_last_d_string(BasicDigitReader& reader); //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } #endif + diff --git a/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp b/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp index 66d4876..d2bfae0 100644 --- a/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp +++ b/Source/DigitViewer2/DigitViewer/DigitViewerUI2.cpp @@ -52,9 +52,11 @@ void Menu_TextFile(BasicTextReader& reader){ Console::println("Compress digits 1 - N into one or more .ycd files.", 'G'); Console::print(" 4 ", 'w'); Console::println("Compress a subset of digits into .ycd files.", 'G'); + Console::print(" 5 ", 'w'); + Console::println("Search for all d-digit strings.", 'G'); Console::println("\nEnter your choice:", 'w'); - upL_t c = Console::scan_label_upL_range("option: ", 0, 4); + upL_t c = Console::scan_label_upL_range("option: ", 0, 5); Console::println(); switch (c){ @@ -73,6 +75,9 @@ void Menu_TextFile(BasicTextReader& reader){ case 4: to_ycd_file_partial(reader); return; + case 5: + find_last_d_string(reader); + return; default:; } } @@ -115,14 +120,16 @@ void Menu_YcdFile(BasicYcdSetReader& reader){ Console::println("Compress digits 1 - N into one or more .ycd files.", 'G'); Console::print(" 4 ", 'w'); Console::println("Compress a subset of digits into .ycd files.", 'G'); + Console::print(" 5 ", 'w'); + Console::println("Search for all d-digit strings.", 'G'); Console::println(); - Console::print(" 5 ", 'w'); + Console::print(" 6 ", 'w'); Console::print("Add search directory.", 'G'); Console::println(" (if .ycd files are in multiple paths)", 'Y'); Console::println("\nEnter your choice:", 'w'); - upL_t c = Console::scan_label_upL_range("option: ", 0, 5); + upL_t c = Console::scan_label_upL_range("option: ", 0, 6); Console::println(); switch (c){ @@ -142,6 +149,10 @@ void Menu_YcdFile(BasicYcdSetReader& reader){ to_ycd_file_partial(reader); return; case 5: + find_last_d_string(reader); + return; + + case 6: Console::println("\nEnter directory:"); reader.add_search_path(Console::scan_utf8()); break; @@ -200,3 +211,5 @@ void Menu_Main(){ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// } + + diff --git a/Source/DigitViewer2/Objects.mk b/Source/DigitViewer2/Objects.mk index 26d5255..2b5f60f 100644 --- a/Source/DigitViewer2/Objects.mk +++ b/Source/DigitViewer2/Objects.mk @@ -23,9 +23,12 @@ CURRENT += DigitWriters/BasicTextWriter.cpp CURRENT += DigitWriters/BasicYcdFileWriter.cpp CURRENT += DigitWriters/BasicYcdSetWriter.cpp +CURRENT += DigitScanner/DigitScanner.cpp + CURRENT += DigitViewer/DigitViewerTasks.cpp CURRENT += DigitViewer/DigitViewerUI2.cpp SOURCES := $(SOURCES) $(addprefix $(CURRENT_DIR)/, $(CURRENT)) endif + diff --git a/Source/DigitViewer2/SMC_DigitViewer2.cpp b/Source/DigitViewer2/SMC_DigitViewer2.cpp index 32b39dd..cc91907 100644 --- a/Source/DigitViewer2/SMC_DigitViewer2.cpp +++ b/Source/DigitViewer2/SMC_DigitViewer2.cpp @@ -26,3 +26,5 @@ #include "DigitViewer/DigitViewerTasks.cpp" #include "DigitViewer/DigitViewerUI2.cpp" + +#include "DigitScanner/DigitScanner.cpp" diff --git a/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp b/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp index 38d22f1..ab9d8c0 100644 --- a/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp +++ b/Source/PublicLibs/BasicLibs/StringTools/ToString.cpp @@ -216,6 +216,13 @@ YM_NO_INLINE std::string tostrln(uiL_t x, NumberFormat format){ YM_NO_INLINE std::string tostrln(siL_t x, NumberFormat format){ return tostr(x, format) += "\r\n"; } +YM_NO_INLINE std::string tostr_width(uiL_t x, int width){ + std::ostringstream out; + out << std::setfill('0'); + out << std::setw(width); + out << x; + return out.str(); +} //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// diff --git a/Source/PublicLibs/BasicLibs/StringTools/ToString.h b/Source/PublicLibs/BasicLibs/StringTools/ToString.h index 1957e5d..ded4bbf 100644 --- a/Source/PublicLibs/BasicLibs/StringTools/ToString.h +++ b/Source/PublicLibs/BasicLibs/StringTools/ToString.h @@ -40,6 +40,7 @@ YM_NO_INLINE std::string tostrln (uiL_t x, NumberFormat format = NORMAL); YM_NO_INLINE std::string tostrln (siL_t x, NumberFormat format = NORMAL); static std::string tostrln (u32_t x, NumberFormat format = NORMAL){ return tostrln((uiL_t)x, format); } static std::string tostrln (s32_t x, NumberFormat format = NORMAL){ return tostrln((siL_t)x, format); } +YM_NO_INLINE std::string tostr_width (uiL_t x, int width); //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Float diff --git a/TinyTestData/README.md b/TinyTestData/README.md new file mode 100644 index 0000000..35a37a1 --- /dev/null +++ b/TinyTestData/README.md @@ -0,0 +1 @@ +Minimal .ycd file of 1 million decimal digits, just to have for testing purposes. diff --git a/TinyTestData/pi1m - 0.ycd b/TinyTestData/pi1m - 0.ycd new file mode 100644 index 0000000..42fe4a4 Binary files /dev/null and b/TinyTestData/pi1m - 0.ycd differ diff --git a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj index 7e7507c..8fb2bde 100644 --- a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj +++ b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj @@ -62,102 +62,102 @@ 15.0 {78460907-F11F-45DF-A8B3-BCF1D8E54EC5} DigitViewer2 - 10.0.17763.0 + 10.0 Application true - v141 + v145 MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application true - v141 + v145 MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte Application false - v141 + v145 true MultiByte @@ -564,6 +564,7 @@ + @@ -699,6 +700,7 @@ + diff --git a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters index da4d516..37e57d7 100644 --- a/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters +++ b/VSS - DigitViewer2/DigitViewer2/DigitViewer2.vcxproj.filters @@ -401,6 +401,9 @@ Source Files\PublicLibs\SystemLibs\FileIO + + Source Files + @@ -769,5 +772,8 @@ Source Files\PublicLibs\SystemLibs\FileIO\BaseFile + + Header Files + \ No newline at end of file