diff --git a/.gitignore b/.gitignore index 26c65b8..f044c7d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,10 @@ /vendor/bundle/ # rspec failure tracking .rspec_status +.claude/settings.local.json + +# C extension build artifacts +ext/ruby_units/*.o +ext/ruby_units/*.so +ext/ruby_units/Makefile +lib/ruby_units/*.so diff --git a/Gemfile b/Gemfile index 30b6c6a..08472c7 100644 --- a/Gemfile +++ b/Gemfile @@ -24,6 +24,7 @@ end gem "bigdecimal" gem "rake" +gem "rake-compiler" gem "rspec", "~> 3.0" gem "simplecov" gem "yard" diff --git a/Gemfile.lock b/Gemfile.lock index 9e79c3d..1af9bcb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -51,6 +51,7 @@ GEM zeitwerk (~> 2.6) erb (6.0.1) erb (6.0.1-java) + ffi (1.17.2) ffi (1.17.2-arm64-darwin) ffi (1.17.2-java) ffi (1.17.2-x86_64-linux-gnu) @@ -96,7 +97,11 @@ GEM logger (1.7.0) lumberjack (1.4.2) method_source (1.1.0) + mini_portile2 (2.8.9) nenv (0.3.0) + nokogiri (1.18.10) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.10-arm64-darwin) racc (~> 1.4) nokogiri (1.18.10-java) @@ -107,6 +112,7 @@ GEM nenv (~> 0.1) shellany (~> 0.0) observer (0.1.2) + open3 (0.2.1) ostruct (0.6.3) parallel (1.27.0) parser (3.3.10.0) @@ -133,6 +139,8 @@ GEM racc (1.8.1-java) rainbow (3.1.1) rake (13.3.1) + rake-compiler (1.3.1) + rake rb-fsevent (0.11.2) rb-inotify (0.11.1) ffi (~> 1.0) @@ -205,16 +213,18 @@ GEM simplecov_json_formatter (~> 0.1) simplecov-html (0.13.2) simplecov_json_formatter (0.1.4) - solargraph (0.57.0) + solargraph (0.58.2) + ast (~> 2.4.3) backport (~> 1.2) benchmark (~> 0.4) - bundler (~> 2.0) + bundler (>= 2.0) diff-lcs (~> 1.4) jaro_winkler (~> 1.6, >= 1.6.1) kramdown (~> 2.3) kramdown-parser-gfm (~> 1.1) logger (~> 1.6) observer (~> 0.1) + open3 (~> 0.2.1) ostruct (~> 0.6) parser (~> 3.0) prism (~> 1.4) @@ -246,6 +256,7 @@ GEM zeitwerk (2.7.4) PLATFORMS + aarch64-linux arm64-darwin-25 java universal-java-11 @@ -259,6 +270,7 @@ DEPENDENCIES guard-rspec pry rake + rake-compiler redcarpet reek rspec (~> 3.0) @@ -278,4 +290,4 @@ DEPENDENCIES yard BUNDLED WITH - 2.7.2 + 4.0.3 diff --git a/Rakefile b/Rakefile index b6ae734..d0358d1 100644 --- a/Rakefile +++ b/Rakefile @@ -5,4 +5,15 @@ require "rspec/core/rake_task" RSpec::Core::RakeTask.new(:spec) +unless RUBY_ENGINE == "jruby" + require "rake/extensiontask" + + Rake::ExtensionTask.new("ruby_units_ext") do |ext| + ext.lib_dir = "lib/ruby_units" + ext.ext_dir = "ext/ruby_units" + end + + task spec: :compile +end + task default: :spec diff --git a/ext/ruby_units/extconf.rb b/ext/ruby_units/extconf.rb new file mode 100644 index 0000000..2da8d5c --- /dev/null +++ b/ext/ruby_units/extconf.rb @@ -0,0 +1,5 @@ +# frozen_string_literal: true + +require "mkmf" + +create_makefile("ruby_units/ruby_units_ext") diff --git a/ext/ruby_units/ruby_units_ext.c b/ext/ruby_units/ruby_units_ext.c new file mode 100644 index 0000000..48c7966 --- /dev/null +++ b/ext/ruby_units/ruby_units_ext.c @@ -0,0 +1,893 @@ +/* + * ruby_units_ext.c - C extension for ruby-units + * + * Replaces hot-path Ruby methods with C implementations: + * - finalize_initialization (Phase 2) + * - eliminate_terms (Phase 3) + * - convert_scalar (Phase 4) + * + * The C code reads Ruby state directly via rb_ivar_get and rb_hash_aref. + * No data is copied or synced -- everything lives in Ruby objects. + * + * Optimization: Definition object properties (kind, display_name, prefix?, + * base?, unity?) are accessed via rb_ivar_get instead of rb_funcall to + * eliminate Ruby method dispatch overhead (~300-700ns per call). + */ + +#include +#include + +/* ======================================================================== + * Interned IDs + * ======================================================================== */ + +/* Unit instance variable IDs */ +static ID id_iv_scalar; +static ID id_iv_numerator; +static ID id_iv_denominator; +static ID id_iv_base_scalar; +static ID id_iv_signature; +static ID id_iv_base; +static ID id_iv_unit_name; + +/* Definition object ivar IDs (direct access, bypassing Ruby dispatch) */ +static ID id_defn_kind; +static ID id_defn_display_name; +static ID id_defn_scalar; +static ID id_defn_numerator; +static ID id_defn_denominator; +static ID id_defn_name; + +/* Method IDs (only for methods we still need to call via rb_funcall) */ +static ID id_definitions; +static ID id_prefix_values; +static ID id_unit_values; +static ID id_cached; +static ID id_set; +static ID id_to_unit; +static ID id_parse_into_numbers_and_units; +static ID id_normalize_to_i; +static ID id_keys; +static ID id_concat; +static ID id_eq; +static ID id_to_r; + +/* ======================================================================== + * Ruby symbol/string constants + * ======================================================================== */ + +/* Hash key symbols */ +static VALUE sym_scalar; +static VALUE sym_numerator; +static VALUE sym_denominator; +static VALUE sym_signature; + +/* Kind symbols */ +static VALUE sym_prefix; +static VALUE sym_length; +static VALUE sym_time; +static VALUE sym_temperature; +static VALUE sym_mass; +static VALUE sym_current; +static VALUE sym_substance; +static VALUE sym_luminosity; +static VALUE sym_currency; +static VALUE sym_information; +static VALUE sym_angle; + +#define SIGNATURE_VECTOR_SIZE 10 + +/* Map from vector index to kind symbol (for pointer comparison) */ +static VALUE signature_kind_symbols[SIGNATURE_VECTOR_SIZE]; + +/* Cached frozen strings */ +static VALUE str_unity; /* "<1>" */ +static VALUE str_empty; /* "" */ + +/* Cached class reference */ +static VALUE cUnit; + +/* ======================================================================== + * Inline helpers for Definition object property access + * + * These replace rb_funcall(defn, method, 0) with direct rb_ivar_get, + * saving ~300-700ns per access. + * ======================================================================== */ + +/* + * Check if a token is the unity token "<1>" + */ +static inline int is_unity(VALUE token) { + /* Fast pointer check first (works when token is the same frozen string) */ + if (token == str_unity) return 1; + return rb_str_equal(token, str_unity) == Qtrue; +} + +/* + * Get Definition.kind via direct ivar access. + * Returns the kind symbol (e.g., :length, :mass, :prefix). + */ +static inline VALUE defn_kind(VALUE defn) { + return rb_ivar_get(defn, id_defn_kind); +} + +/* + * Get Definition.display_name via direct ivar access. + */ +static inline VALUE defn_display_name(VALUE defn) { + return rb_ivar_get(defn, id_defn_display_name); +} + +/* + * Check Definition.prefix? -- kind == :prefix + * Symbols are singletons, so pointer comparison is correct. + */ +static inline int defn_is_prefix(VALUE defn) { + return rb_ivar_get(defn, id_defn_kind) == sym_prefix; +} + +/* + * Check Definition.base? without Ruby dispatch. + * base? = scalar == 1 && numerator.size == 1 && denominator == ["<1>"] + * && numerator.first == "<@name>" + */ +static int defn_is_base(VALUE defn) { + VALUE scalar = rb_ivar_get(defn, id_defn_scalar); + /* Fast path for Fixnum 1 (most common) */ + if (scalar != INT2FIX(1)) { + if (FIXNUM_P(scalar)) return 0; + /* Handle Rational(1/1) etc. */ + if (rb_funcall(scalar, id_eq, 1, INT2FIX(1)) != Qtrue) return 0; + } + + VALUE numerator = rb_ivar_get(defn, id_defn_numerator); + if (NIL_P(numerator) || !RB_TYPE_P(numerator, T_ARRAY) || RARRAY_LEN(numerator) != 1) + return 0; + + VALUE denominator = rb_ivar_get(defn, id_defn_denominator); + if (NIL_P(denominator) || !RB_TYPE_P(denominator, T_ARRAY) || RARRAY_LEN(denominator) != 1) + return 0; + if (rb_str_equal(rb_ary_entry(denominator, 0), str_unity) != Qtrue) + return 0; + + /* Check numerator.first == "<#{@name}>" */ + VALUE first_num = rb_ary_entry(numerator, 0); + VALUE raw_name = rb_ivar_get(defn, id_defn_name); /* e.g., "meter" (no brackets) */ + + if (!RB_TYPE_P(first_num, T_STRING) || NIL_P(raw_name) || !RB_TYPE_P(raw_name, T_STRING)) + return 0; + + const char *num_ptr = RSTRING_PTR(first_num); + long num_len = RSTRING_LEN(first_num); + const char *name_ptr = RSTRING_PTR(raw_name); + long name_len = RSTRING_LEN(raw_name); + + if (num_len != name_len + 2) return 0; + if (num_ptr[0] != '<' || num_ptr[num_len - 1] != '>') return 0; + if (memcmp(num_ptr + 1, name_ptr, name_len) != 0) return 0; + + return 1; +} + +/* + * Check Definition.unity? -- prefix? && scalar == 1 + */ +static inline int defn_is_unity(VALUE defn) { + if (!defn_is_prefix(defn)) return 0; + VALUE scalar = rb_ivar_get(defn, id_defn_scalar); + return scalar == INT2FIX(1); +} + +/* + * Check if any tokens in numerator/denominator are temperature-related. + * Replaces Ruby's temperature_tokens? method. + * Checks for tokens starting with "= 6 && str[0] == '<' && + (strncmp(str + 1, "temp", 4) == 0 || strncmp(str + 1, "deg", 3) == 0)) + return 1; + } + + len = RARRAY_LEN(denominator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(denominator, i); + if (!RB_TYPE_P(token, T_STRING)) continue; + str = RSTRING_PTR(token); + slen = RSTRING_LEN(token); + if (slen >= 6 && str[0] == '<' && + (strncmp(str + 1, "temp", 4) == 0 || strncmp(str + 1, "deg", 3) == 0)) + return 1; + } + + return 0; +} + +/* ======================================================================== + * Core computation functions + * ======================================================================== */ + +/* + * Check if all tokens in numerator and denominator are base units. + * Uses direct Definition ivar access instead of rb_funcall. + */ +static int check_base(VALUE definitions, VALUE numerator, VALUE denominator) { + long i, len; + VALUE token, defn; + + len = RARRAY_LEN(numerator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(numerator, i); + if (is_unity(token)) continue; + + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn)) return 0; + + if (defn_is_unity(defn)) continue; + if (defn_is_base(defn)) continue; + return 0; + } + + len = RARRAY_LEN(denominator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(denominator, i); + if (is_unity(token)) continue; + + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn)) return 0; + + if (defn_is_unity(defn)) continue; + if (defn_is_base(defn)) continue; + return 0; + } + + return 1; +} + +/* + * Compute base_scalar without creating intermediate Unit objects. + * prefix_vals and unit_vals are passed in (fetched once by caller). + */ +static VALUE compute_base_scalar_c(VALUE scalar, VALUE numerator, VALUE denominator, + VALUE prefix_vals, VALUE unit_vals) { + VALUE factor = rb_rational_new(INT2FIX(1), INT2FIX(1)); + long i, len; + VALUE token, pv, uv, uv_scalar; + + len = RARRAY_LEN(numerator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(numerator, i); + if (is_unity(token)) continue; + + pv = rb_hash_aref(prefix_vals, token); + if (!NIL_P(pv)) { + factor = rb_funcall(factor, '*', 1, pv); + } else { + uv = rb_hash_aref(unit_vals, token); + if (!NIL_P(uv)) { + uv_scalar = rb_hash_aref(uv, sym_scalar); + if (!NIL_P(uv_scalar)) { + factor = rb_funcall(factor, '*', 1, uv_scalar); + } + } + } + } + + len = RARRAY_LEN(denominator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(denominator, i); + if (is_unity(token)) continue; + + pv = rb_hash_aref(prefix_vals, token); + if (!NIL_P(pv)) { + factor = rb_funcall(factor, '/', 1, pv); + } else { + uv = rb_hash_aref(unit_vals, token); + if (!NIL_P(uv)) { + uv_scalar = rb_hash_aref(uv, sym_scalar); + if (!NIL_P(uv_scalar)) { + factor = rb_funcall(factor, '/', 1, uv_scalar); + } + } + } + } + + return rb_funcall(scalar, '*', 1, factor); +} + +/* + * Expand tokens into signature vector, accumulating with sign. + * Uses direct ivar access for Definition.kind and pointer comparison + * for symbol matching (symbols are singletons). + */ +static void expand_tokens_to_signature_c(VALUE tokens, int vector[SIGNATURE_VECTOR_SIZE], int sign, + VALUE prefix_vals, VALUE unit_vals, VALUE definitions) { + long i, len, j; + VALUE token, uv, base_arr, bt, defn, kind_sym; + + len = RARRAY_LEN(tokens); + for (i = 0; i < len; i++) { + token = rb_ary_entry(tokens, i); + if (is_unity(token)) continue; + + /* Skip prefix tokens - use rb_hash_aref instead of funcall key? */ + if (!NIL_P(rb_hash_aref(prefix_vals, token))) continue; + + uv = rb_hash_aref(unit_vals, token); + if (!NIL_P(uv)) { + /* Has a unit_values entry -- expand its base numerator/denominator */ + base_arr = rb_hash_aref(uv, sym_numerator); + if (!NIL_P(base_arr)) { + long blen = RARRAY_LEN(base_arr); + for (j = 0; j < blen; j++) { + bt = rb_ary_entry(base_arr, j); + defn = rb_hash_aref(definitions, bt); + if (NIL_P(defn)) continue; + kind_sym = defn_kind(defn); + /* Pointer comparison -- symbols are singletons */ + for (int k = 0; k < SIGNATURE_VECTOR_SIZE; k++) { + if (kind_sym == signature_kind_symbols[k]) { + vector[k] += sign; + break; + } + } + } + } + base_arr = rb_hash_aref(uv, sym_denominator); + if (!NIL_P(base_arr)) { + long blen = RARRAY_LEN(base_arr); + for (j = 0; j < blen; j++) { + bt = rb_ary_entry(base_arr, j); + defn = rb_hash_aref(definitions, bt); + if (NIL_P(defn)) continue; + kind_sym = defn_kind(defn); + for (int k = 0; k < SIGNATURE_VECTOR_SIZE; k++) { + if (kind_sym == signature_kind_symbols[k]) { + vector[k] -= sign; + break; + } + } + } + } + } else { + /* Direct base unit token */ + defn = rb_hash_aref(definitions, token); + if (!NIL_P(defn)) { + kind_sym = defn_kind(defn); + for (int k = 0; k < SIGNATURE_VECTOR_SIZE; k++) { + if (kind_sym == signature_kind_symbols[k]) { + vector[k] += sign; + break; + } + } + } + } + } +} + +/* + * Compute signature from numerator/denominator. + * Returns the integer signature (base-20 encoding of the signature vector). + */ +static long compute_signature_c(VALUE numerator, VALUE denominator, + VALUE prefix_vals, VALUE unit_vals, VALUE definitions) { + int vector[SIGNATURE_VECTOR_SIZE]; + int i; + long signature = 0; + long power = 1; + + for (i = 0; i < SIGNATURE_VECTOR_SIZE; i++) vector[i] = 0; + + expand_tokens_to_signature_c(numerator, vector, 1, prefix_vals, unit_vals, definitions); + expand_tokens_to_signature_c(denominator, vector, -1, prefix_vals, unit_vals, definitions); + + for (i = 0; i < SIGNATURE_VECTOR_SIZE; i++) { + if (abs(vector[i]) >= 20) { + rb_raise(rb_eArgError, "Power out of range (-20 < net power of a unit < 20)"); + } + } + + for (i = 0; i < SIGNATURE_VECTOR_SIZE; i++) { + signature += vector[i] * power; + power *= 20; + } + + return signature; +} + +/* + * Build the units string from numerator/denominator arrays. + * Uses direct ivar access for Definition properties. + */ +static VALUE build_units_string(VALUE definitions, VALUE numerator, VALUE denominator) { + long num_len = RARRAY_LEN(numerator); + long den_len = RARRAY_LEN(denominator); + + /* Quick check for unitless */ + if (num_len == 1 && den_len == 1 && + is_unity(rb_ary_entry(numerator, 0)) && + is_unity(rb_ary_entry(denominator, 0))) { + return str_empty; + } + + VALUE output_num = rb_ary_new(); + VALUE output_den = rb_ary_new(); + long i; + VALUE token, defn, display, current_str; + + /* Process numerator: group prefixes with their units */ + if (!(num_len == 1 && is_unity(rb_ary_entry(numerator, 0)))) { + current_str = Qnil; + for (i = 0; i < num_len; i++) { + token = rb_ary_entry(numerator, i); + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn)) continue; + + display = defn_display_name(defn); + if (defn_is_prefix(defn)) { + current_str = rb_str_dup(display); + } else { + if (!NIL_P(current_str)) { + rb_str_append(current_str, display); + rb_ary_push(output_num, current_str); + current_str = Qnil; + } else { + rb_ary_push(output_num, rb_str_dup(display)); + } + } + } + } + + /* Process denominator: same grouping */ + if (!(den_len == 1 && is_unity(rb_ary_entry(denominator, 0)))) { + current_str = Qnil; + for (i = 0; i < den_len; i++) { + token = rb_ary_entry(denominator, i); + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn)) continue; + + display = defn_display_name(defn); + if (defn_is_prefix(defn)) { + current_str = rb_str_dup(display); + } else { + if (!NIL_P(current_str)) { + rb_str_append(current_str, display); + rb_ary_push(output_den, current_str); + current_str = Qnil; + } else { + rb_ary_push(output_den, rb_str_dup(display)); + } + } + } + } + + /* If numerator is empty, use "1" */ + if (RARRAY_LEN(output_num) == 0) { + rb_ary_push(output_num, rb_str_new_cstr("1")); + } + + /* Build result string with exponent notation for repeated units */ + VALUE result = rb_str_buf_new(64); + VALUE seen = rb_hash_new(); + long total; + + total = RARRAY_LEN(output_num); + int first = 1; + for (i = 0; i < total; i++) { + VALUE elem = rb_ary_entry(output_num, i); + if (rb_hash_aref(seen, elem) != Qnil) continue; + + long count = 0; + long j; + for (j = 0; j < total; j++) { + if (rb_str_equal(rb_ary_entry(output_num, j), elem) == Qtrue) count++; + } + rb_hash_aset(seen, elem, Qtrue); + + if (!first) rb_str_cat_cstr(result, "*"); + first = 0; + + /* Display names don't have leading/trailing whitespace, skip strip */ + rb_str_append(result, elem); + if (count > 1) { + char buf[16]; + snprintf(buf, sizeof(buf), "^%ld", count); + rb_str_cat_cstr(result, buf); + } + } + + /* Build denominator string */ + total = RARRAY_LEN(output_den); + if (total > 0) { + rb_str_cat_cstr(result, "/"); + seen = rb_hash_new(); + first = 1; + for (i = 0; i < total; i++) { + VALUE elem = rb_ary_entry(output_den, i); + if (rb_hash_aref(seen, elem) != Qnil) continue; + + long count = 0; + long j; + for (j = 0; j < total; j++) { + if (rb_str_equal(rb_ary_entry(output_den, j), elem) == Qtrue) count++; + } + rb_hash_aset(seen, elem, Qtrue); + + if (!first) rb_str_cat_cstr(result, "*"); + first = 0; + + rb_str_append(result, elem); + if (count > 1) { + char buf[16]; + snprintf(buf, sizeof(buf), "^%ld", count); + rb_str_cat_cstr(result, buf); + } + } + } + + return result; +} + +/* ======================================================================== + * Public Ruby methods + * ======================================================================== */ + +/* + * Phase 2: rb_unit_finalize - replaces finalize_initialization + * + * Called from Ruby's initialize after parsing is complete. + * Computes base?, base_scalar, signature, builds units string, caches, and freezes. + * + * Returns Qtrue on success, Qfalse if temperature tokens detected (caller + * should fall back to Ruby path). + * + * call-seq: + * unit._c_finalize(options_first_arg) -> true/false + */ +static VALUE rb_unit_finalize(VALUE self, VALUE options_first) { + VALUE unit_class = rb_obj_class(self); + VALUE scalar = rb_ivar_get(self, id_iv_scalar); + VALUE numerator = rb_ivar_get(self, id_iv_numerator); + VALUE denominator = rb_ivar_get(self, id_iv_denominator); + VALUE signature = rb_ivar_get(self, id_iv_signature); + + /* Guard: fall back to Ruby if ivars aren't arrays yet */ + if (NIL_P(numerator) || !RB_TYPE_P(numerator, T_ARRAY) || + NIL_P(denominator) || !RB_TYPE_P(denominator, T_ARRAY)) { + return Qfalse; + } + + /* Check for temperature tokens -- fall back to Ruby path */ + if (has_temperature_token(numerator, denominator)) { + return Qfalse; + } + + /* Fetch class-level hashes ONCE and pass to all helpers */ + VALUE definitions = rb_funcall(unit_class, id_definitions, 0); + VALUE prefix_vals = rb_funcall(unit_class, id_prefix_values, 0); + VALUE unit_vals = rb_funcall(unit_class, id_unit_values, 0); + + int is_base; + VALUE base_scalar_val; + long sig_val; + + /* 1. Compute base?, base_scalar, signature */ + if (!NIL_P(signature)) { + /* Signature was pre-supplied (e.g., from arithmetic fast-path) */ + is_base = check_base(definitions, numerator, denominator); + if (is_base) { + base_scalar_val = scalar; + } else { + base_scalar_val = compute_base_scalar_c(scalar, numerator, denominator, + prefix_vals, unit_vals); + } + sig_val = NUM2LONG(signature); + } else { + is_base = check_base(definitions, numerator, denominator); + if (is_base) { + base_scalar_val = scalar; + sig_val = compute_signature_c(numerator, denominator, + prefix_vals, unit_vals, definitions); + } else { + base_scalar_val = compute_base_scalar_c(scalar, numerator, denominator, + prefix_vals, unit_vals); + sig_val = compute_signature_c(numerator, denominator, + prefix_vals, unit_vals, definitions); + } + } + + rb_ivar_set(self, id_iv_base, is_base ? Qtrue : Qfalse); + rb_ivar_set(self, id_iv_base_scalar, base_scalar_val); + rb_ivar_set(self, id_iv_signature, LONG2NUM(sig_val)); + + /* 2. Build units string */ + VALUE unary_unit = build_units_string(definitions, numerator, denominator); + rb_ivar_set(self, id_iv_unit_name, unary_unit); + + /* 3. Cache the unit if appropriate */ + int scalar_is_one = FIXNUM_P(scalar) ? (scalar == INT2FIX(1)) + : (rb_funcall(scalar, id_eq, 1, INT2FIX(1)) == Qtrue); + + if (RB_TYPE_P(options_first, T_STRING)) { + VALUE parse_result = rb_funcall(unit_class, id_parse_into_numbers_and_units, 1, options_first); + VALUE opt_units = rb_ary_entry(parse_result, 1); + if (!NIL_P(opt_units) && RSTRING_LEN(opt_units) > 0) { + VALUE cache = rb_funcall(unit_class, id_cached, 0); + if (scalar_is_one) { + rb_funcall(cache, id_set, 2, opt_units, self); + } else { + VALUE unit_from_str = rb_funcall(opt_units, id_to_unit, 0); + rb_funcall(cache, id_set, 2, opt_units, unit_from_str); + } + } + } + + if (RSTRING_LEN(unary_unit) > 0) { + VALUE cache = rb_funcall(unit_class, id_cached, 0); + if (scalar_is_one) { + rb_funcall(cache, id_set, 2, unary_unit, self); + } else { + VALUE unit_from_str = rb_funcall(unary_unit, id_to_unit, 0); + rb_funcall(cache, id_set, 2, unary_unit, unit_from_str); + } + } + + /* 4. Freeze instance variables using rb_obj_freeze (direct C API, no dispatch) */ + rb_obj_freeze(scalar); + rb_obj_freeze(numerator); + rb_obj_freeze(denominator); + rb_obj_freeze(base_scalar_val); + /* Fixnums, true/false, and nil are always frozen -- skip */ + + return Qtrue; +} + +/* + * Phase 3: rb_unit_eliminate_terms - replaces eliminate_terms class method + * + * Uses direct Definition ivar access instead of rb_funcall for prefix? check. + */ +static VALUE rb_unit_eliminate_terms(VALUE klass, VALUE scalar, VALUE numerator, VALUE denominator) { + VALUE definitions = rb_funcall(klass, id_definitions, 0); + VALUE combined = rb_hash_new(); + long i, len; + VALUE token, defn; + + /* Count numerator groups */ + VALUE current_group = rb_ary_new(); + len = RARRAY_LEN(numerator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(numerator, i); + if (is_unity(token)) continue; + + rb_ary_push(current_group, token); + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn) || !defn_is_prefix(defn)) { + VALUE existing = rb_hash_aref(combined, current_group); + long val = NIL_P(existing) ? 0 : NUM2LONG(existing); + rb_hash_aset(combined, current_group, LONG2NUM(val + 1)); + current_group = rb_ary_new(); + } + } + + /* Count denominator groups */ + current_group = rb_ary_new(); + len = RARRAY_LEN(denominator); + for (i = 0; i < len; i++) { + token = rb_ary_entry(denominator, i); + if (is_unity(token)) continue; + + rb_ary_push(current_group, token); + defn = rb_hash_aref(definitions, token); + if (NIL_P(defn) || !defn_is_prefix(defn)) { + VALUE existing = rb_hash_aref(combined, current_group); + long val = NIL_P(existing) ? 0 : NUM2LONG(existing); + rb_hash_aset(combined, current_group, LONG2NUM(val - 1)); + current_group = rb_ary_new(); + } + } + + /* Build result arrays */ + VALUE result_num = rb_ary_new(); + VALUE result_den = rb_ary_new(); + + VALUE keys = rb_funcall(combined, id_keys, 0); + long keys_len = RARRAY_LEN(keys); + for (i = 0; i < keys_len; i++) { + VALUE key = rb_ary_entry(keys, i); + long val = NUM2LONG(rb_hash_aref(combined, key)); + long j; + if (val > 0) { + for (j = 0; j < val; j++) { + rb_funcall(result_num, id_concat, 1, key); + } + } else if (val < 0) { + for (j = 0; j < -val; j++) { + rb_funcall(result_den, id_concat, 1, key); + } + } + } + + /* Default to UNITY_ARRAY if empty */ + if (RARRAY_LEN(result_num) == 0) result_num = rb_ary_new_from_args(1, str_unity); + if (RARRAY_LEN(result_den) == 0) result_den = rb_ary_new_from_args(1, str_unity); + + VALUE result = rb_hash_new(); + rb_hash_aset(result, sym_scalar, scalar); + rb_hash_aset(result, sym_numerator, result_num); + rb_hash_aset(result, sym_denominator, result_den); + return result; +} + +/* + * Phase 4: rb_unit_convert_scalar - computes conversion factor between two units + */ +static VALUE rb_unit_convert_scalar(VALUE klass, VALUE self_unit, VALUE target_unit) { + VALUE prefix_vals = rb_funcall(klass, id_prefix_values, 0); + VALUE unit_vals = rb_funcall(klass, id_unit_values, 0); + VALUE self_num = rb_ivar_get(self_unit, id_iv_numerator); + VALUE self_den = rb_ivar_get(self_unit, id_iv_denominator); + VALUE target_num = rb_ivar_get(target_unit, id_iv_numerator); + VALUE target_den = rb_ivar_get(target_unit, id_iv_denominator); + VALUE self_scalar = rb_ivar_get(self_unit, id_iv_scalar); + + long i, len; + VALUE token, pv, uv, uv_scalar; + + #define COMPUTE_ARRAY_SCALAR(arr, result_var) do { \ + result_var = INT2FIX(1); \ + len = RARRAY_LEN(arr); \ + for (i = 0; i < len; i++) { \ + token = rb_ary_entry(arr, i); \ + pv = rb_hash_aref(prefix_vals, token); \ + if (!NIL_P(pv)) { \ + result_var = rb_funcall(result_var, '*', 1, pv); \ + } else { \ + uv = rb_hash_aref(unit_vals, token); \ + if (!NIL_P(uv)) { \ + uv_scalar = rb_hash_aref(uv, sym_scalar); \ + if (!NIL_P(uv_scalar)) { \ + result_var = rb_funcall(result_var, '*', 1, uv_scalar); \ + } \ + } \ + } \ + } \ + } while(0) + + VALUE self_num_scalar, self_den_scalar, target_num_scalar, target_den_scalar; + + COMPUTE_ARRAY_SCALAR(self_num, self_num_scalar); + COMPUTE_ARRAY_SCALAR(self_den, self_den_scalar); + COMPUTE_ARRAY_SCALAR(target_num, target_num_scalar); + COMPUTE_ARRAY_SCALAR(target_den, target_den_scalar); + + #undef COMPUTE_ARRAY_SCALAR + + VALUE numerator_factor = rb_funcall(self_num_scalar, '*', 1, target_den_scalar); + VALUE denominator_factor = rb_funcall(target_num_scalar, '*', 1, self_den_scalar); + + VALUE conversion_scalar; + if (RB_TYPE_P(self_scalar, T_FIXNUM) || RB_TYPE_P(self_scalar, T_BIGNUM)) { + conversion_scalar = rb_funcall(self_scalar, id_to_r, 0); + } else { + conversion_scalar = self_scalar; + } + + VALUE converted = rb_funcall(conversion_scalar, '*', 1, numerator_factor); + converted = rb_funcall(converted, '/', 1, denominator_factor); + converted = rb_funcall(klass, id_normalize_to_i, 1, converted); + + return converted; +} + +/* ======================================================================== + * Module initialization + * ======================================================================== */ + +void Init_ruby_units_ext(void) { + /* Unit instance variable IDs */ + id_iv_scalar = rb_intern("@scalar"); + id_iv_numerator = rb_intern("@numerator"); + id_iv_denominator = rb_intern("@denominator"); + id_iv_base_scalar = rb_intern("@base_scalar"); + id_iv_signature = rb_intern("@signature"); + id_iv_base = rb_intern("@base"); + id_iv_unit_name = rb_intern("@unit_name"); + + /* Definition object ivar IDs */ + id_defn_kind = rb_intern("@kind"); + id_defn_display_name = rb_intern("@display_name"); + id_defn_scalar = rb_intern("@scalar"); + id_defn_numerator = rb_intern("@numerator"); + id_defn_denominator = rb_intern("@denominator"); + id_defn_name = rb_intern("@name"); + + /* Method IDs (only those still needed) */ + id_definitions = rb_intern("definitions"); + id_prefix_values = rb_intern("prefix_values"); + id_unit_values = rb_intern("unit_values"); + id_cached = rb_intern("cached"); + id_set = rb_intern("set"); + id_to_unit = rb_intern("to_unit"); + id_parse_into_numbers_and_units = rb_intern("parse_into_numbers_and_units"); + id_normalize_to_i = rb_intern("normalize_to_i"); + id_keys = rb_intern("keys"); + id_concat = rb_intern("concat"); + id_eq = rb_intern("=="); + id_to_r = rb_intern("to_r"); + + /* Hash key symbols */ + sym_scalar = ID2SYM(rb_intern("scalar")); + sym_numerator = ID2SYM(rb_intern("numerator")); + sym_denominator = ID2SYM(rb_intern("denominator")); + sym_signature = ID2SYM(rb_intern("signature")); + + /* Kind symbols */ + sym_prefix = ID2SYM(rb_intern("prefix")); + sym_length = ID2SYM(rb_intern("length")); + sym_time = ID2SYM(rb_intern("time")); + sym_temperature = ID2SYM(rb_intern("temperature")); + sym_mass = ID2SYM(rb_intern("mass")); + sym_current = ID2SYM(rb_intern("current")); + sym_substance = ID2SYM(rb_intern("substance")); + sym_luminosity = ID2SYM(rb_intern("luminosity")); + sym_currency = ID2SYM(rb_intern("currency")); + sym_information = ID2SYM(rb_intern("information")); + sym_angle = ID2SYM(rb_intern("angle")); + + signature_kind_symbols[0] = sym_length; + signature_kind_symbols[1] = sym_time; + signature_kind_symbols[2] = sym_temperature; + signature_kind_symbols[3] = sym_mass; + signature_kind_symbols[4] = sym_current; + signature_kind_symbols[5] = sym_substance; + signature_kind_symbols[6] = sym_luminosity; + signature_kind_symbols[7] = sym_currency; + signature_kind_symbols[8] = sym_information; + signature_kind_symbols[9] = sym_angle; + + /* Mark all symbols/strings as GC roots */ + rb_gc_register_address(&sym_scalar); + rb_gc_register_address(&sym_numerator); + rb_gc_register_address(&sym_denominator); + rb_gc_register_address(&sym_signature); + rb_gc_register_address(&sym_prefix); + rb_gc_register_address(&sym_length); + rb_gc_register_address(&sym_time); + rb_gc_register_address(&sym_temperature); + rb_gc_register_address(&sym_mass); + rb_gc_register_address(&sym_current); + rb_gc_register_address(&sym_substance); + rb_gc_register_address(&sym_luminosity); + rb_gc_register_address(&sym_currency); + rb_gc_register_address(&sym_information); + rb_gc_register_address(&sym_angle); + + str_unity = rb_str_freeze(rb_str_new_cstr("<1>")); + rb_gc_register_address(&str_unity); + + str_empty = rb_str_freeze(rb_str_new_cstr("")); + rb_gc_register_address(&str_empty); + + /* Get the Unit class and define methods */ + VALUE mRubyUnits = rb_define_module("RubyUnits"); + cUnit = rb_define_class_under(mRubyUnits, "Unit", rb_cNumeric); + + /* Instance methods */ + rb_define_private_method(cUnit, "_c_finalize", rb_unit_finalize, 1); + + /* Class methods */ + rb_define_singleton_method(cUnit, "_c_eliminate_terms", rb_unit_eliminate_terms, 3); + rb_define_singleton_method(cUnit, "_c_convert_scalar", rb_unit_convert_scalar, 2); +} diff --git a/lib/ruby_units/cache.rb b/lib/ruby_units/cache.rb index 6086804..acf0e32 100644 --- a/lib/ruby_units/cache.rb +++ b/lib/ruby_units/cache.rb @@ -36,7 +36,7 @@ def clear end def should_skip_caching?(key) - keys.include?(key) || key =~ RubyUnits::Unit.special_format_regex + data.key?(key) || key =~ RubyUnits::Unit.special_format_regex end end end diff --git a/lib/ruby_units/namespaced.rb b/lib/ruby_units/namespaced.rb index 08e57ad..e04f19d 100644 --- a/lib/ruby_units/namespaced.rb +++ b/lib/ruby_units/namespaced.rb @@ -12,4 +12,5 @@ require_relative "numeric" require_relative "string" require_relative "unit" +require_relative "native" require_relative "unit_definitions" diff --git a/lib/ruby_units/native.rb b/lib/ruby_units/native.rb new file mode 100644 index 0000000..c8ef4d7 --- /dev/null +++ b/lib/ruby_units/native.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# Load the C extension for ruby-units if available. +# Falls back to pure Ruby when: +# - RUBY_UNITS_PURE=1 environment variable is set +# - The C extension hasn't been compiled +# - Running on a platform that doesn't support C extensions (JRuby, etc.) + +unless ENV["RUBY_UNITS_PURE"] + begin + require_relative "ruby_units_ext" + rescue LoadError + # C extension not available, pure Ruby will be used + end +end diff --git a/lib/ruby_units/unit.rb b/lib/ruby_units/unit.rb index 0155025..c7b9b5e 100644 --- a/lib/ruby_units/unit.rb +++ b/lib/ruby_units/unit.rb @@ -55,6 +55,8 @@ class << self self.unit_values = {} @unit_regex = nil @unit_match_regex = nil + @max_unit_name_length = 0 + @max_prefix_name_length = 0 UNITY = "<1>" UNITY_ARRAY = [UNITY].freeze @@ -220,6 +222,8 @@ def self.setup # rubocop:disable Naming/PredicateMethod @unit_regex = nil @unit_match_regex = nil @prefix_regex = nil + @max_unit_name_length = 0 + @max_prefix_name_length = 0 definitions.each_value do |definition| use_definition(definition) @@ -332,32 +336,40 @@ def self.parse(input) # @param denominator_units [Array] denominator # @return [Hash] def self.eliminate_terms(scalar, numerator_units, denominator_units) - working_numerator = numerator_units.dup - working_denominator = denominator_units.dup - working_numerator.delete(UNITY) - working_denominator.delete(UNITY) + return _c_eliminate_terms(scalar, numerator_units, denominator_units) if respond_to?(:_c_eliminate_terms) combined = ::Hash.new(0) - [[working_numerator, 1], [working_denominator, -1]].each do |array, increment| - array.chunk_while { |elt_before, _| definition(elt_before).prefix? } - .to_a - .each { combined[_1] += increment } - end + count_units(numerator_units, combined, 1) + count_units(denominator_units, combined, -1) result_numerator = [] result_denominator = [] combined.each do |key, value| - if value.positive? - value.times { result_numerator << key } - elsif value.negative? - value.abs.times { result_denominator << key } + if value > 0 + value.times { result_numerator.concat(key) } + elsif value < 0 + (-value).times { result_denominator.concat(key) } end end + result_numerator = UNITY_ARRAY if result_numerator.empty? result_denominator = UNITY_ARRAY if result_denominator.empty? - { scalar:, numerator: result_numerator.flatten, denominator: result_denominator.flatten } + { scalar:, numerator: result_numerator, denominator: result_denominator } + end + + private_class_method def self.count_units(unit_array, combined, increment) + current_group = [] + unit_array.each do |token| + next if token == UNITY + + current_group << token + unless definition(token)&.prefix? + combined[current_group] += increment + current_group = [] + end + end end # Creates a new unit from the current one with all common terms eliminated. @@ -385,6 +397,7 @@ def self.base_units # @raise [ArgumentError] if the value cannot be coerced by the underlying constructors # @example # Unit.parse_number("3.14") #=> 3.14 (Float) unless use_bigdecimal is enabled + # Unit.parse_number("1") #=> 1 (Integer) # Unit.parse_number(2) #=> 2 (unchanged) def self.parse_number(value) return value if value.is_a?(Numeric) @@ -392,7 +405,9 @@ def self.parse_number(value) if RubyUnits.configuration.use_bigdecimal BigDecimal(value) else - Float(value) + f = Float(value) + i = f.to_i + i == f ? i : f end end @@ -413,6 +428,7 @@ def self.parse_number(value) # :reek:ManualDispatch def self.normalize_to_i(value) return value unless value.is_a?(Numeric) + return value if value.is_a?(Float) responds_to_int = value.respond_to?(:to_int) if responds_to_int || value.respond_to?(:to_i) @@ -507,11 +523,23 @@ def self.special_format_regex ) end + # Batch-load definitions, deferring regex cache invalidation until the end. + # + # @yield block containing definition loading code + # @return [void] + def self.batch_define + @batch_loading = true + yield + ensure + @batch_loading = false + invalidate_regex_cache + end + # inject a definition into the internal array and set it up for use # # @param definition [RubyUnits::Unit::Definition] def self.use_definition(definition) - invalidate_regex_cache + invalidate_regex_cache unless @batch_loading if definition.prefix? register_prefix_definition(definition) else @@ -537,6 +565,7 @@ def self.register_prefix_definition(definition) prefix_values[definition_name] = definition.scalar register_aliases(definition.aliases, definition_name, prefix_map) @prefix_regex = nil + definition.aliases.each { |a| @max_prefix_name_length = a.length if a.length > @max_prefix_name_length } end # Register a unit definition @@ -549,6 +578,7 @@ def self.register_unit_definition(definition) unit_values[definition_name] = unit_value register_aliases(definition.aliases, definition_name, unit_map) @unit_regex = nil + definition.aliases.each { |a| @max_unit_name_length = a.length if a.length > @max_unit_name_length } end # Create a hash for unit value @@ -573,6 +603,31 @@ def self.register_aliases(aliases, name, map) aliases.each { map[_1] = name } end + # Resolve a single unit token via hash-based longest-match lookup + # Returns an array of canonical names (e.g., ["", ""]) or nil if not found + # + # @param token [String] the unit token to resolve (e.g., "kg", "meter", "km") + # @return [Array, nil] array of canonical names, or nil if not found + def self.resolve_unit_token(token) + # Try direct unit match first (handles aliases like "kg", "meter", etc.) + unit_name = unit_map[token] + return [unit_name] if unit_name + + # Try prefix+unit decomposition: longest prefix first + max_plen = [@max_prefix_name_length, token.length - 1].min + max_plen.downto(1) do |plen| + prefix_candidate = token[0, plen] + prefix_name = prefix_map[prefix_candidate] + next unless prefix_name + + unit_candidate = token[plen..] + unit_name = unit_map[unit_candidate] + return [prefix_name, unit_name] if unit_name + end + + nil + end + # Format a fraction part with optional rationalization # @param frac [Float] the fractional part # @param precision [Float] the precision for rationalization @@ -686,12 +741,8 @@ def to_unit(other = nil) def base? return @base if defined? @base - @base = (@numerator + @denominator) - .compact - .uniq - .map { unit_class.definition(_1) } - .all? { _1.unity? || _1.base? } - @base + @base = @numerator.all? { |t| t == UNITY || (d = unit_class.definition(t)) && (d.unity? || d.base?) } && + @denominator.all? { |t| t == UNITY || (d = unit_class.definition(t)) && (d.unity? || d.base?) } end alias is_base? base? @@ -703,14 +754,23 @@ def base? def to_base return self if base? + @base_unit ||= compute_to_base + end + + alias base to_base + + private + + # Compute the base unit representation (called lazily by to_base) + # @return [Unit] + def compute_to_base if unit_class.unit_map[units] =~ /\A<(?:temp|deg)[CRF]>\Z/ @signature = unit_class.kinds.key(:temperature) - base = if temperature? - convert_to("tempK") - elsif degree? - convert_to("degK") - end - return base + if temperature? + return convert_to("tempK") + elsif degree? + return convert_to("degK") + end end base_cache = unit_class.base_unit_cache @@ -723,7 +783,9 @@ def to_base prefix_vals = unit_class.prefix_values unit_vals = unit_class.unit_values - process_unit_for_numerator = lambda do |num_unit| + @numerator.each do |num_unit| + next if num_unit == UNITY + prefix_value = prefix_vals[num_unit] if prefix_value conversion_factor *= prefix_value @@ -738,7 +800,9 @@ def to_base end end - process_unit_for_denominator = lambda do |den_unit| + @denominator.each do |den_unit| + next if den_unit == UNITY + prefix_value = prefix_vals[den_unit] if prefix_value conversion_factor /= prefix_value @@ -753,9 +817,6 @@ def to_base end end - @numerator.compact.each(&process_unit_for_numerator) - @denominator.compact.each(&process_unit_for_denominator) - num = num.flatten.compact den = den.flatten.compact num = UNITY_ARRAY if num.empty? @@ -764,7 +825,7 @@ def to_base base * @scalar end - alias base to_base + public # # @example @@ -951,6 +1012,8 @@ def +(other) when Unit if zero? other.dup + elsif @numerator == other.numerator && @denominator == other.denominator && !temperature? && !other.temperature? + unit_class.new(scalar: @scalar + other.scalar, numerator: @numerator, denominator: @denominator, signature: @signature) elsif compatible_with?(other) raise ArgumentError, "Cannot add two temperatures" if [self, other].all?(&:temperature?) @@ -988,6 +1051,8 @@ def -(other) else -other_copy end + elsif @numerator == other.numerator && @denominator == other.denominator && !temperature? && !other.temperature? + unit_class.new(scalar: @scalar - other.scalar, numerator: @numerator, denominator: @denominator, signature: @signature) elsif compatible_with?(other) scalar_difference = base_scalar - other.base_scalar if [self, other].all?(&:temperature?) @@ -1281,23 +1346,21 @@ def convert_to(other) ensure_compatible_with(target) - prefix_vals = unit_class.prefix_values - unit_vals = unit_class.unit_values - to_scalar = ->(unit_array) { unit_array.map { prefix_vals[_1] || _1 }.map { _1.is_a?(Numeric) ? _1 : unit_vals[_1][:scalar] }.compact } - target_num = target.numerator target_den = target.denominator - source_numerator_values = to_scalar.call(@numerator) - source_denominator_values = to_scalar.call(@denominator) - target_numerator_values = to_scalar.call(target_num) - target_denominator_values = to_scalar.call(target_den) - # @type [Rational, Numeric] - scalar_is_integer = @scalar.is_a?(Integer) - conversion_scalar = scalar_is_integer ? @scalar.to_r : @scalar - converted_value = conversion_scalar * (source_numerator_values + target_denominator_values).reduce(1, :*) / (target_numerator_values + source_denominator_values).reduce(1, :*) - # Convert the scalar to an Integer if the result is equivalent to an - # integer - converted_value = unit_class.normalize_to_i(converted_value) + + if unit_class.respond_to?(:_c_convert_scalar) + converted_value = unit_class._c_convert_scalar(self, target) + else + # Compute conversion factor directly without intermediate arrays + numerator_factor = unit_array_scalar(@numerator) * unit_array_scalar(target_den) + denominator_factor = unit_array_scalar(target_num) * unit_array_scalar(@denominator) + + scalar_is_integer = @scalar.is_a?(Integer) + conversion_scalar = scalar_is_integer ? @scalar.to_r : @scalar + converted_value = conversion_scalar * numerator_factor / denominator_factor + converted_value = unit_class.normalize_to_i(converted_value) + end unit_class.new(scalar: converted_value, numerator: target_num, denominator: target_den, signature: target.signature) end end @@ -1361,6 +1424,9 @@ def as_json(*) def units(with_prefix: true, format: nil) return "" if @numerator == UNITY_ARRAY && @denominator == UNITY_ARRAY + # Fast path: use cached unit_name for default args (rational format, with prefix) + return @unit_name if @unit_name && with_prefix && format != :exponential + output_numerator = ["1"] output_denominator = [] num = @numerator.clone.compact @@ -1672,10 +1738,103 @@ def update_base_scalar if base? @base_scalar = @scalar @signature = unit_signature - else + elsif unit_class.unit_map[units] =~ /\A<(?:temp|deg)[CRF]>\Z/ base = to_base @base_scalar = base.scalar @signature = base.signature + else + @base_scalar = compute_base_scalar_fast + @signature = compute_signature_fast + end + end + + # Compute base_scalar without creating an intermediate Unit object + # @return [Numeric] + def compute_base_scalar_fast + factor = Rational(1) + prefix_vals = unit_class.prefix_values + unit_vals = unit_class.unit_values + + @numerator.each do |token| + next if token == UNITY + + pv = prefix_vals[token] + if pv + factor *= pv + else + uv = unit_vals[token] + factor *= uv[:scalar] if uv + end + end + + @denominator.each do |token| + next if token == UNITY + + pv = prefix_vals[token] + if pv + factor /= pv + else + uv = unit_vals[token] + factor /= uv[:scalar] if uv + end + end + + @scalar * factor + end + + # Compute signature without creating a base Unit object + # @return [Integer] + def compute_signature_fast + vector = ::Array.new(SIGNATURE_VECTOR.size, 0) + expand_tokens_to_signature(@numerator, vector, 1) + expand_tokens_to_signature(@denominator, vector, -1) + raise ArgumentError, "Power out of range (-20 < net power of a unit < 20)" if vector.any? { _1.abs >= 20 } + + vector.each_with_index { |item, index| vector[index] = item * (20**index) } + vector.inject(0, :+) + end + + # Expand unit tokens to their base units and accumulate signature vector + # @param tokens [Array] unit tokens + # @param vector [Array] signature vector to accumulate into + # @param sign [Integer] +1 for numerator, -1 for denominator + def expand_tokens_to_signature(tokens, vector, sign) + prefix_vals = unit_class.prefix_values + unit_vals = unit_class.unit_values + + tokens.each do |token| + next if token == UNITY + next if prefix_vals.key?(token) + + uv = unit_vals[token] + if uv + base_num = uv[:numerator] + base_den = uv[:denominator] + if base_num + base_num.each do |bt| + defn = unit_class.definition(bt) + next unless defn + + idx = SIGNATURE_VECTOR.index(defn.kind) + vector[idx] += sign if idx + end + end + if base_den + base_den.each do |bt| + defn = unit_class.definition(bt) + next unless defn + + idx = SIGNATURE_VECTOR.index(defn.kind) + vector[idx] -= sign if idx + end + end + else + defn = unit_class.definition(token) + if defn + idx = SIGNATURE_VECTOR.index(defn.kind) + vector[idx] += sign if idx + end + end end end @@ -1687,6 +1846,26 @@ def ensure_compatible_with(other) raise ArgumentError, "Incompatible Units ('#{self}' not compatible with '#{other}')" unless compatible_with?(other) end + # Compute the scalar product of a unit array (numerator or denominator tokens) + # without creating intermediate arrays. + # @param unit_array [Array] array of canonical unit token names + # @return [Numeric] the accumulated scalar value + def unit_array_scalar(unit_array) + prefix_vals = unit_class.prefix_values + unit_vals = unit_class.unit_values + result = 1 + unit_array.each do |token| + pv = prefix_vals[token] + if pv + result *= pv + else + uv = unit_vals[token] + result *= uv[:scalar] if uv && uv[:scalar] + end + end + result + end + # Validate that a time_point is a Time, Date, or DateTime # @param [Object] time_point the object to validate # @return [void] @@ -1888,8 +2067,12 @@ def parse_array(options) case options in [first] if first parse_single_arg(first) + in [nil, String => second] + parse_single_arg(second) in [first, String => second] if first parse_two_args(first, second) + in [Numeric => scalar, Unit => unit_obj] + copy(unit_obj * scalar) in [first, String | Array => second, String | Array => third] if first parse_three_args(first, second, third) else @@ -1936,11 +2119,12 @@ def parse_string_arg(str) # @param [String] unit_string # @return [void] def parse_two_args(scalar, unit_string) - cached = unit_class.cached.get(unit_string) - if cached - copy(cached * scalar) + if unit_string.strip.empty? + parse_numeric(scalar) else - parse_string("#{scalar} #{unit_string}") + cached = unit_class.cached.get(unit_string) + unit = cached || unit_class.new(unit_string) + copy(unit * scalar) end end @@ -1956,7 +2140,7 @@ def parse_three_args(scalar, numerator, denominator) if cached copy(cached * scalar) else - parse_string("#{scalar} #{unit_str}") + copy(unit_class.new(unit_str) * scalar) end end @@ -1965,7 +2149,7 @@ def parse_three_args(scalar, numerator, denominator) # @param [Hash] hash # @return [void] def parse_hash(hash) - @scalar = validate_scalar(hash.fetch(:scalar, 1)) + @scalar = unit_class.normalize_to_i(validate_scalar(hash.fetch(:scalar, 1))) @numerator = validate_unit_array(hash.fetch(:numerator, UNITY_ARRAY), :numerator) @denominator = validate_unit_array(hash.fetch(:denominator, UNITY_ARRAY), :denominator) @signature = validate_signature(hash[:signature]) @@ -2039,6 +2223,10 @@ def parse_string(str) # @param [Array] options original options passed to initialize # @return [void] def finalize_initialization(options) + # C finalize returns true on success, false if temperature tokens detected + if respond_to?(:_c_finalize, true) && _c_finalize(options[0]) + return + end update_base_scalar validate_temperature cache_unit_if_needed(options) @@ -2170,19 +2358,21 @@ def parse(passed_unit_string = "0") if unit copy(unit) @scalar *= mult - @base_scalar *= mult + # Temperature base_scalar involves an offset (e.g. 0°F = 255.37K), + # so linear scaling is incorrect. Let update_base_scalar recompute it. + if temperature? + @base_scalar = nil + else + @base_scalar *= mult + end return self end - while unit_string.gsub!(/<(#{unit_class.prefix_regex})><(#{unit_class.unit_regex})>/, '<\1\2>') - # replace with - end - unit_match_regex = unit_class.unit_match_regex - while unit_string.gsub!(/<#{unit_match_regex}><#{unit_match_regex}>/, '<\1\2>*<\3\4>') - # collapse into *... + # Handle angle bracket format: insert separators between groups, then strip + if unit_string.include?("<") + unit_string.gsub!(">", "> ") + unit_string.gsub!(/[<>]/, "") end - # ... and then strip the remaining brackets for x*y*z - unit_string.gsub!(/[<>]/, "") if (match = unit_string.match(TIME_REGEX)) hours, minutes, seconds, milliseconds = match.values_at(:hour, :min, :sec, :msec) @@ -2266,25 +2456,8 @@ def parse(passed_unit_string = "0") @numerator ||= UNITY_ARRAY @denominator ||= UNITY_ARRAY - @numerator = top.scan(unit_match_regex).delete_if(&:empty?).compact if top - @denominator = bottom.scan(unit_match_regex).delete_if(&:empty?).compact if bottom - - # eliminate all known terms from this string. This is a quick check to see if the passed unit - # contains terms that are not defined. - used = "#{top} #{bottom}".gsub(unit_match_regex, "").gsub(%r{[\d*, "'_^/$]}, "") - invalid_unit(passed_unit_string) unless used.empty? - - prefix_map = unit_class.prefix_map - unit_map = unit_class.unit_map - transform_units = lambda do |(prefix, unit)| - prefix_value = prefix_map[prefix] - unit_value = unit_map[unit] - prefix_value ? [prefix_value, unit_value] : [unit_value] - end - - @numerator = @numerator.map(&transform_units).flatten.compact.delete_if(&:empty?) - - @denominator = @denominator.map(&transform_units).flatten.compact.delete_if(&:empty?) + @numerator = resolve_expression_tokens(top, passed_unit_string) if top + @denominator = resolve_expression_tokens(bottom, passed_unit_string) if bottom @numerator = UNITY_ARRAY if @numerator.empty? @denominator = UNITY_ARRAY if @denominator.empty? @@ -2310,6 +2483,46 @@ def validate_unit_string_format(passed_unit_string, unit_string) end end + # Resolve tokens in a unit expression string (numerator or denominator half) + # using hash-based lookup instead of regex scanning. + # + # @param expression [String] the expression string after exponent expansion (e.g., "kg m" or "s s") + # @param passed_unit_string [String] original input string for error messages + # @return [Array] array of canonical unit names (e.g., ["", ""]) + # @raise [ArgumentError] if an unknown unit token is encountered + def resolve_expression_tokens(expression, passed_unit_string) + result = [] + tokens = expression.split(/[\s*]+/) + i = 0 + while i < tokens.length + token = tokens[i] + i += 1 + next if token.empty? + # Skip pure numeric tokens (like "1" in "1/mol") - they are not units + next if token.match?(/\A\d+\z/) + + # Try multi-word match: greedily join consecutive tokens to find + # multi-word aliases like "square meter" or "short ton" + resolved = nil + matched_count = 0 + max_lookahead = [tokens.length - (i - 1), 4].min # limit lookahead + max_lookahead.downto(1) do |n| + candidate = tokens[(i - 1), n].join(" ") + resolved = unit_class.resolve_unit_token(candidate) + if resolved + matched_count = n + break + end + end + + invalid_unit(passed_unit_string) unless resolved + result.concat(resolved) + # Skip the extra tokens consumed by the multi-word match + i += matched_count - 1 + end + result + end + # Raise a standardized ArgumentError for an unrecognized unit string. # # @param unit_string [String] the (possibly invalid) unit text diff --git a/lib/ruby_units/unit_definitions.rb b/lib/ruby_units/unit_definitions.rb index 4a9af13..b8dc48e 100644 --- a/lib/ruby_units/unit_definitions.rb +++ b/lib/ruby_units/unit_definitions.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true -require_relative "unit_definitions/prefix" -require_relative "unit_definitions/base" -require_relative "unit_definitions/standard" +RubyUnits::Unit.batch_define do + require_relative "unit_definitions/prefix" + require_relative "unit_definitions/base" + require_relative "unit_definitions/standard" +end diff --git a/ruby-units.gemspec b/ruby-units.gemspec index 7b84b4f..ba62a97 100644 --- a/ruby-units.gemspec +++ b/ruby-units.gemspec @@ -24,8 +24,9 @@ Gem::Specification.new do |spec| # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. spec.files = Dir.chdir(File.expand_path(__dir__)) do - `git ls-files -z`.split("\x0").reject { _1.match(%r{^(test|spec|features)/}) } + `git ls-files -z`.split("\x0").reject { _1.match(%r{^(test|spec|features)/|^plan.*\.md$|^\.claude/}) } end + spec.extensions = ["ext/ruby_units/extconf.rb"] spec.require_paths = ["lib"] spec.metadata["rubygems_mfa_required"] = "true" end diff --git a/spec/benchmarks/cold_start.rb b/spec/benchmarks/cold_start.rb new file mode 100644 index 0000000..827c339 --- /dev/null +++ b/spec/benchmarks/cold_start.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +# Benchmark: Cold start (require) time +# Usage: ruby spec/benchmarks/cold_start.rb +# +# Measures how long it takes to require the gem and have all unit +# definitions parsed and ready. This runs in a subprocess to get a +# true cold-start measurement each iteration. + +require "benchmark" + +ITERATIONS = 5 + +puts "=== Cold Start Benchmark ===" +puts "Measuring time to `require 'ruby-units'` (#{ITERATIONS} iterations)" +puts + +times = ITERATIONS.times.map do |i| + result = Benchmark.measure do + system("ruby", "-I", File.expand_path("../../lib", __dir__), "-e", "require 'ruby-units'") + end + real = result.real + printf " Run %d: %.4fs\n", i + 1, real + real +end + +puts +printf " Average: %.4fs\n", times.sum / times.size +printf " Min: %.4fs\n", times.min +printf " Max: %.4fs\n", times.max diff --git a/spec/benchmarks/unit_operations.rb b/spec/benchmarks/unit_operations.rb new file mode 100644 index 0000000..f4ef0d7 --- /dev/null +++ b/spec/benchmarks/unit_operations.rb @@ -0,0 +1,189 @@ +# frozen_string_literal: true + +# Benchmark: Unit creation and operations +# Usage: ruby -I lib spec/benchmarks/unit_operations.rb +# +# Uses benchmark-ips to measure iterations/second for: +# 1. Unit creation from various string formats +# 2. Unit conversions +# 3. Arithmetic operations +# 4. Scaling with complexity + +require "ruby-units" +require "benchmark/ips" + +puts "Ruby #{RUBY_VERSION} | ruby-units #{RubyUnits::Unit::VERSION}" +puts "Definitions: #{RubyUnits::Unit.definitions.size} | Unit map entries: #{RubyUnits::Unit.unit_map.size}" +puts + +# ── 1. Unit Creation (String Parsing) ────────────────────────────────────────── +puts "=" * 70 +puts "1. UNIT CREATION FROM STRINGS" +puts "=" * 70 + +# Clear the cache so we measure real parsing cost +RubyUnits::Unit.clear_cache + +Benchmark.ips do |x| + x.config(warmup: 2, time: 5) + + x.report("simple: '1 m'") do + RubyUnits::Unit.clear_cache + Unit.new("1 m") + end + + x.report("prefixed: '1 km'") do + RubyUnits::Unit.clear_cache + Unit.new("1 km") + end + + x.report("compound: '1 kg*m/s^2'") do + RubyUnits::Unit.clear_cache + Unit.new("1 kg*m/s^2") + end + + x.report("scientific: '1.5e-3 mm'") do + RubyUnits::Unit.clear_cache + Unit.new("1.5e-3 mm") + end + + x.report("rational: '1/2 cup'") do + RubyUnits::Unit.clear_cache + Unit.new("1/2 cup") + end + + x.report("feet-inch: \"6'4\\\"\"") do + RubyUnits::Unit.clear_cache + Unit.new("6'4\"") + end + + x.report("lbs-oz: '8 lbs 8 oz'") do + RubyUnits::Unit.clear_cache + Unit.new("8 lbs 8 oz") + end + + x.report("temperature: '37 degC'") do + RubyUnits::Unit.clear_cache + Unit.new("37 degC") + end + + x.compare! +end + +# ── 2. Unit Creation WITH Cache ──────────────────────────────────────────────── +puts +puts "=" * 70 +puts "2. UNIT CREATION WITH CACHE (repeated same unit)" +puts "=" * 70 + +Unit.new("1 m") # prime the cache + +Benchmark.ips do |x| + x.config(warmup: 2, time: 5) + + x.report("cached: '1 m'") { Unit.new("1 m") } + x.report("cached: '5 kg*m/s^2'") { Unit.new("5 kg*m/s^2") } + x.report("numeric: Unit.new(1)") { Unit.new(1) } + x.report("hash: {scalar:1, ...}") do + Unit.new(scalar: 1, numerator: [""], denominator: ["<1>"]) + end + + x.compare! +end + +# ── 3. Conversions ───────────────────────────────────────────────────────────── +puts +puts "=" * 70 +puts "3. UNIT CONVERSIONS" +puts "=" * 70 + +meter = Unit.new("1 m") +km = Unit.new("1 km") +mph = Unit.new("60 mph") +degc = Unit.new("100 degC") + +Benchmark.ips do |x| + x.config(warmup: 2, time: 5) + + x.report("m -> km") { meter.convert_to("km") } + x.report("km -> m") { km.convert_to("m") } + x.report("mph -> m/s") { mph.convert_to("m/s") } + x.report("degC -> degF") { degc.convert_to("degF") } + x.report("to_base (km)") { km.to_base } + + x.compare! +end + +# ── 4. Arithmetic ───────────────────────────────────────────────────────────── +puts +puts "=" * 70 +puts "4. ARITHMETIC OPERATIONS" +puts "=" * 70 + +a = Unit.new("5 m") +b = Unit.new("3 m") +c = Unit.new("2 kg") +d = Unit.new("10 s") + +Benchmark.ips do |x| + x.config(warmup: 2, time: 5) + + x.report("addition: 5m + 3m") { a + b } + x.report("subtraction: 5m - 3m") { a - b } + x.report("multiply: 5m * 2kg") { a * c } + x.report("divide: 5m / 10s") { a / d } + x.report("power: (5m) ** 2") { a**2 } + x.report("scalar multiply: 5m * 3") { a * 3 } + + x.compare! +end + +# ── 5. Complexity Scaling ────────────────────────────────────────────────────── +puts +puts "=" * 70 +puts "5. COMPLEXITY SCALING (uncached parsing)" +puts "=" * 70 + +# Various levels of unit string complexity +simple_units = %w[m kg s ampere degK mol candela] +medium_units = %w[km kPa MHz mA degC lbs gal] +complex_units = [ + "kg*m/s^2", + "kg*m^2/s^2", + "kg*m^2/s^3", + "kg*m*s^-2", + "kg*m^2*s^-3*A^-2" +] +very_complex_units = [ + "kg*m^2*s^-3*A^-2", + "kg*m^2*s^-2*degK^-1*mol^-1", + "kg^2*m^3*s^-4*A^-2", + "kg*m^2*s^-3*A^-1", + "kg^-1*m^-3*s^4*A^2" +] + +Benchmark.ips do |x| + x.config(warmup: 2, time: 5) + + x.report("simple (m, kg, s)") do + RubyUnits::Unit.clear_cache + simple_units.each { Unit.new("1 #{_1}") } + end + + x.report("medium (km, kPa, MHz)") do + RubyUnits::Unit.clear_cache + medium_units.each { Unit.new("1 #{_1}") } + end + + x.report("complex (kg*m/s^2)") do + RubyUnits::Unit.clear_cache + complex_units.each { Unit.new("1 #{_1}") } + end + + x.report("very complex") do + RubyUnits::Unit.clear_cache + very_complex_units.each { Unit.new("1 #{_1}") } + end + + x.compare! +end diff --git a/spec/ruby_units/bugs_spec.rb b/spec/ruby_units/bugs_spec.rb index fbfe866..0dcd648 100644 --- a/spec/ruby_units/bugs_spec.rb +++ b/spec/ruby_units/bugs_spec.rb @@ -10,3 +10,84 @@ expect(b - RubyUnits::Unit.new("1.5 cm^3")).to eq(RubyUnits::Unit.new("1.5 cm^3")) end end + +describe "normalize_to_i preserves Float scalar type" do + it "preserves Float when constructing with a numeric scalar" do + unit = RubyUnits::Unit.new(400.0, "m^2") + expect(unit.scalar).to be_a(Float) + expect(unit.scalar).to eq(400.0) + end + + it "preserves Float when multiplying a unit by a Float" do + unit = RubyUnits::Unit.new("m^2") * 400.0 + expect(unit.scalar).to be_a(Float) + expect(unit.scalar).to eq(400.0) + end + + it "does not break Float division semantics on extracted scalars" do + a = RubyUnits::Unit.new("m^2") * 400.0 + b = RubyUnits::Unit.new("m^2") * 1000.0 + expect(a.scalar / b.scalar).to eq(0.4) + end + + it "normalizes whole Rationals to Integer" do + unit = RubyUnits::Unit.new(Rational(400, 1), "m^2") + expect(unit.scalar).to be_a(Integer) + expect(unit.scalar).to eq(400) + end + + it "preserves non-whole Rationals" do + unit = RubyUnits::Unit.new(Rational(3, 2), "m") + expect(unit.scalar).to be_a(Rational) + expect(unit.scalar).to eq(Rational(3, 2)) + end +end + +describe "Unit.new(numeric, unit_object) — Unit as second argument" do + it "creates a unit with the given scalar and the Unit's unit" do + du = RubyUnits::Unit.new("1 m^2") + result = RubyUnits::Unit.new(9.290304, du) + expect(result.units).to eq("m^2") + expect(result.scalar).to be_within(1e-6).of(9.290304) + end + + it "works with integer scalar and simple unit" do + du = RubyUnits::Unit.new("1 kg") + result = RubyUnits::Unit.new(5, du) + expect(result).to eq(RubyUnits::Unit.new("5 kg")) + end + + it "works when the Unit has a non-1 scalar" do + du = RubyUnits::Unit.new("2 m") + result = RubyUnits::Unit.new(3, du) + expect(result).to eq(RubyUnits::Unit.new("6 m")) + end +end + +describe "Unit aliases containing spaces" do + it "parses a unit alias with a space" do + # "square meter" is a standard alias for m^2 if defined + # First verify the alias is in the unit_map + if RubyUnits::Unit.unit_map.key?("square meter") + result = RubyUnits::Unit.new("1 square meter") + expect(result).to be_compatible_with(RubyUnits::Unit.new("1 m^2")) + else + # Define it for the test + RubyUnits::Unit.define("m2_test") do |u| + u.definition = RubyUnits::Unit.new("1 m^2") + u.aliases = ["square meter test"] + end + result = RubyUnits::Unit.new("1 square meter test") + expect(result).to eq(RubyUnits::Unit.new("1 m^2")) + end + end + + it "parses 'short ton' when registered as an alias" do + if RubyUnits::Unit.unit_map.key?("short ton") + result = RubyUnits::Unit.new("1 short ton") + expect(result.scalar).to eq(1) + else + skip "short ton alias not registered" + end + end +end diff --git a/spec/ruby_units/initialization_spec.rb b/spec/ruby_units/initialization_spec.rb index 04f2d88..3bf849a 100644 --- a/spec/ruby_units/initialization_spec.rb +++ b/spec/ruby_units/initialization_spec.rb @@ -126,9 +126,10 @@ expect(unit.units).to eq("m^2/s^2") end - it "raises error for array with nil first element" do - expect { RubyUnits::Unit.new([nil, "m"]) } - .to raise_error(ArgumentError, "Invalid Unit Format") + it "treats nil first element as scalar 1" do + unit = RubyUnits::Unit.new([nil, "m"]) + expect(unit.scalar).to eq(1) + expect(unit.units).to eq("m") end end end @@ -184,6 +185,18 @@ expect { RubyUnits::Unit.new(nil) } .to raise_error(ArgumentError, "Invalid Unit Format") end + + it "treats nil scalar in two-arg form as scalar 1" do + unit = RubyUnits::Unit.new(nil, "m") + expect(unit.scalar).to eq(1) + expect(unit.units).to eq("m") + end + + it "treats nil scalar in two-arg form with compound unit" do + unit = RubyUnits::Unit.new(nil, "m/s") + expect(unit.scalar).to eq(1) + expect(unit.units).to eq("m/s") + end end describe "three-argument initialization with arrays" do diff --git a/spec/ruby_units/unit_spec.rb b/spec/ruby_units/unit_spec.rb index 8702996..6319034 100644 --- a/spec/ruby_units/unit_spec.rb +++ b/spec/ruby_units/unit_spec.rb @@ -1499,8 +1499,13 @@ expect { RubyUnits::Unit.new("-500/9 tempR") }.to raise_error(ArgumentError, "Temperatures must not be less than absolute zero") end - specify "no nil scalar" do - expect { RubyUnits::Unit.new(nil, "feet") }.to raise_error(ArgumentError, "Invalid Unit Format") + specify "nil scalar in two-arg form treated as scalar 1" do + unit = RubyUnits::Unit.new(nil, "feet") + expect(unit.scalar).to eq(1) + expect(unit.units).to eq("ft") + end + + specify "nil scalar in three-arg form raises" do expect { RubyUnits::Unit.new(nil, "feet", "min") }.to raise_error(ArgumentError, "Invalid Unit Format") end