diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3b140e2..1f2c201 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,12 +2,18 @@ # See https://github.com/metanorma/cimas name: release +permissions: + contents: write + packages: write + id-token: write + on: workflow_dispatch: inputs: next_version: description: | - Next release version. Possible values: x.y.z, major, minor, patch or pre|rc|etc + Next release version. Possible values: x.y.z, major, minor, patch (or pre|rc|etc). + Also, you can pass 'skip' to skip 'git tag' and do 'gem push' for the current version required: true default: 'skip' repository_dispatch: diff --git a/.gitignore b/.gitignore index b04a8c8..5c249f1 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ # rspec failure tracking .rspec_status + +Gemfile.lock diff --git a/.rubocop.yml b/.rubocop.yml index 5880b84..f2e8eb5 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -12,3 +12,4 @@ AllCops: NewCops: enable Exclude: - 'vendor/**/*' + - 'spec/fixtures/unitsdb/**/*' diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index b5c7e8e..a838e07 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,26 +1,16 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2025-07-17 07:21:41 UTC using RuboCop version 1.78.0. +# on 2026-03-27 10:55:21 UTC using RuboCop version 1.86.0. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new # versions of RuboCop, may require this file to be generated again. # Offense count: 1 -# Configuration parameters: Severity, Include. -# Include: **/*.gemspec Gemspec/RequiredRubyVersion: Exclude: - 'unitsdb.gemspec' -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: EnforcedStyle, IndentationWidth. -# SupportedStyles: with_first_argument, with_fixed_indentation -Layout/ArgumentAlignment: - Exclude: - - 'lib/unitsdb/commands/qudt/updater.rb' - # Offense count: 1 # This cop supports safe autocorrection (--autocorrect). # Configuration parameters: EnforcedStyleAlignWith. @@ -29,9 +19,9 @@ Layout/BlockAlignment: Exclude: - 'spec/unitsdb/commands/normalize_spec.rb' -# Offense count: 370 +# Offense count: 373 # This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, IgnoreCopDirectives, AllowedPatterns, SplitStrings. +# Configuration parameters: Max, AllowHeredoc, AllowURI, AllowQualifiedName, URISchemes, AllowRBSInlineAnnotation, AllowCopDirectives, AllowedPatterns, SplitStrings. # URISchemes: http, https Layout/LineLength: Enabled: false @@ -40,19 +30,12 @@ Layout/LineLength: # Configuration parameters: IgnoreLiteralBranches, IgnoreConstantBranches, IgnoreDuplicateElseBranch. Lint/DuplicateBranch: Exclude: + - 'lib/unitsdb/commands/check_si/si_formatter.rb' - 'lib/unitsdb/commands/qudt/matcher.rb' - - 'lib/unitsdb/commands/si_formatter.rb' - 'lib/unitsdb/commands/ucum/matcher.rb' - 'lib/unitsdb/database.rb' -# Offense count: 1 -# This cop supports safe autocorrection (--autocorrect). -# Configuration parameters: AutoCorrect. -Lint/UselessAssignment: - Exclude: - - 'lib/unitsdb/commands/qudt/updater.rb' - -# Offense count: 87 +# Offense count: 83 # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max. Metrics/AbcSize: Enabled: false @@ -63,12 +46,12 @@ Metrics/AbcSize: Metrics/BlockLength: Max: 62 -# Offense count: 77 +# Offense count: 73 # Configuration parameters: AllowedMethods, AllowedPatterns, Max. Metrics/CyclomaticComplexity: Enabled: false -# Offense count: 113 +# Offense count: 110 # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns. Metrics/MethodLength: Max: 150 @@ -78,7 +61,7 @@ Metrics/MethodLength: Metrics/ParameterLists: Max: 6 -# Offense count: 68 +# Offense count: 64 # Configuration parameters: AllowedMethods, AllowedPatterns, Max. Metrics/PerceivedComplexity: Enabled: false @@ -88,18 +71,17 @@ Metrics/PerceivedComplexity: # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to Naming/MethodParameterName: Exclude: - - 'lib/unitsdb/commands/si_ttl_parser.rb' + - 'lib/unitsdb/commands/check_si/si_ttl_parser.rb' - 'lib/unitsdb/utils.rb' -# Offense count: 10 +# Offense count: 9 # Configuration parameters: MinSize. Performance/CollectionLiteralInLoop: Exclude: - - 'lib/unitsdb/commands/si_formatter.rb' - - 'lib/unitsdb/commands/si_matcher.rb' - - 'lib/unitsdb/commands/si_updater.rb' + - 'lib/unitsdb/commands/check_si/si_formatter.rb' + - 'lib/unitsdb/commands/check_si/si_matcher.rb' + - 'lib/unitsdb/commands/check_si/si_updater.rb' - 'lib/unitsdb/database.rb' - - 'test_prefixes_yaml.rb' # Offense count: 2 RSpec/BeforeAfterAll: @@ -114,9 +96,9 @@ RSpec/BeforeAfterAll: # Prefixes: when, with, without RSpec/ContextWording: Exclude: - - 'spec/unitsdb/commands/check_si_spec.rb' + - 'spec/unitsdb/commands/check_si_command_spec.rb' -# Offense count: 3 +# Offense count: 2 # Configuration parameters: IgnoredMetadata. RSpec/DescribeClass: Exclude: @@ -126,10 +108,9 @@ RSpec/DescribeClass: - '**/spec/system/**/*' - '**/spec/views/**/*' - 'spec/exe/unitsdb_spec.rb' - - 'spec/fixtures/unitsdb/spec/units_spec.rb' - 'spec/unitsdb/version_compatibility_spec.rb' -# Offense count: 32 +# Offense count: 30 # Configuration parameters: CountAsOne. RSpec/ExampleLength: Max: 30 @@ -137,20 +118,32 @@ RSpec/ExampleLength: # Offense count: 26 RSpec/ExpectOutput: Exclude: - - 'spec/unitsdb/commands/check_si_spec.rb' + - 'spec/unitsdb/commands/check_si_command_spec.rb' - 'spec/unitsdb/commands/get_spec.rb' - 'spec/unitsdb/commands/normalize_spec.rb' - 'spec/unitsdb/commands/search_spec.rb' - 'spec/unitsdb/commands/validate/identifiers_spec.rb' - 'spec/unitsdb/commands/validate/si_references_spec.rb' +# Offense count: 8 +RSpec/LeakyLocalVariable: + Exclude: + - 'spec/unitsdb/database_spec.rb' + - 'spec/unitsdb/dimensions_spec.rb' + - 'spec/unitsdb/prefixes_spec.rb' + - 'spec/unitsdb/quantities_spec.rb' + - 'spec/unitsdb/scales_spec.rb' + - 'spec/unitsdb/unit_spec.rb' + - 'spec/unitsdb/unit_systems_spec.rb' + - 'spec/unitsdb/units_spec.rb' + # Offense count: 7 # Configuration parameters: . # SupportedStyles: have_received, receive RSpec/MessageSpies: EnforcedStyle: receive -# Offense count: 44 +# Offense count: 45 RSpec/MultipleExpectations: Max: 15 @@ -176,13 +169,13 @@ RSpec/VerifiedDoubles: # This cop supports unsafe autocorrection (--autocorrect-all). Style/CombinableLoops: Exclude: - - 'lib/unitsdb/commands/si_matcher.rb' + - 'lib/unitsdb/commands/check_si/si_matcher.rb' # Offense count: 2 # This cop supports unsafe autocorrection (--autocorrect-all). Style/IdenticalConditionalBranches: Exclude: - - 'lib/unitsdb/commands/si_formatter.rb' + - 'lib/unitsdb/commands/check_si/si_formatter.rb' # Offense count: 14 # Configuration parameters: AllowedMethods. @@ -190,10 +183,10 @@ Style/IdenticalConditionalBranches: Style/OptionalBooleanParameter: Exclude: - 'lib/unitsdb/commands/check_si.rb' + - 'lib/unitsdb/commands/check_si/si_updater.rb' - 'lib/unitsdb/commands/qudt/check.rb' - 'lib/unitsdb/commands/qudt/update.rb' - 'lib/unitsdb/commands/qudt/updater.rb' - - 'lib/unitsdb/commands/si_updater.rb' - 'lib/unitsdb/commands/ucum/check.rb' - 'lib/unitsdb/commands/ucum/updater.rb' @@ -201,4 +194,4 @@ Style/OptionalBooleanParameter: # Configuration parameters: Max. Style/SafeNavigationChainLength: Exclude: - - 'lib/unitsdb/commands/si_matcher.rb' + - 'lib/unitsdb/commands/check_si/si_matcher.rb' diff --git a/Gemfile b/Gemfile index 0337a8e..ecfe332 100644 --- a/Gemfile +++ b/Gemfile @@ -5,7 +5,7 @@ source "https://rubygems.org" # Specify your gem's dependencies in suma.gemspec gemspec -gem "diffy" +gem "lutaml-model", github: "lutaml/lutaml-model", branch: "main" gem "nokogiri" gem "rake" gem "rspec" diff --git a/lib/unitsdb.rb b/lib/unitsdb.rb index 6ab3a12..90dde0c 100644 --- a/lib/unitsdb.rb +++ b/lib/unitsdb.rb @@ -2,26 +2,50 @@ require "lutaml/model" -# Configure XML adapter for lutaml-model -Lutaml::Model::Config.configure do |config| - require "lutaml/model/xml/nokogiri_adapter" - config.xml_adapter = Lutaml::Model::Xml::NokogiriAdapter -end - module Unitsdb + autoload :CLI, "unitsdb/cli" + autoload :Config, "unitsdb/config" + autoload :Commands, "unitsdb/commands" + autoload :Database, "unitsdb/database" + autoload :Dimension, "unitsdb/dimension" + autoload :DimensionDetails, "unitsdb/dimension_details" + autoload :DimensionReference, "unitsdb/dimension_reference" + autoload :Dimensions, "unitsdb/dimensions" + autoload :Errors, "unitsdb/errors" + autoload :ExternalReference, "unitsdb/external_reference" + autoload :Identifier, "unitsdb/identifier" + autoload :LocalizedString, "unitsdb/localized_string" + autoload :Prefix, "unitsdb/prefix" + autoload :PrefixReference, "unitsdb/prefix_reference" + autoload :Prefixes, "unitsdb/prefixes" + autoload :Quantities, "unitsdb/quantities" + autoload :Quantity, "unitsdb/quantity" + autoload :QuantityReference, "unitsdb/quantity_reference" + autoload :QudtUnit, "unitsdb/qudt" + autoload :QudtQuantityKind, "unitsdb/qudt" + autoload :QudtDimensionVector, "unitsdb/qudt" + autoload :QudtSystemOfUnits, "unitsdb/qudt" + autoload :QudtPrefix, "unitsdb/qudt" + autoload :QudtVocabularies, "unitsdb/qudt" + autoload :RootUnitReference, "unitsdb/root_unit_reference" + autoload :Scale, "unitsdb/scale" + autoload :ScaleProperties, "unitsdb/scale_properties" + autoload :ScaleReference, "unitsdb/scale_reference" + autoload :Scales, "unitsdb/scales" + autoload :SiDerivedBase, "unitsdb/si_derived_base" + autoload :SymbolPresentations, "unitsdb/symbol_presentations" + autoload :UcumBaseUnit, "unitsdb/ucum" + autoload :UcumPrefixValue, "unitsdb/ucum" + autoload :UcumPrefix, "unitsdb/ucum" + autoload :UcumUnitValueFunction, "unitsdb/ucum" + autoload :UcumUnitValue, "unitsdb/ucum" + autoload :UcumUnit, "unitsdb/ucum" + autoload :UcumFile, "unitsdb/ucum" + autoload :Unit, "unitsdb/unit" + autoload :UnitReference, "unitsdb/unit_reference" + autoload :UnitSystem, "unitsdb/unit_system" + autoload :UnitSystemReference, "unitsdb/unit_system_reference" + autoload :UnitSystems, "unitsdb/unit_systems" + autoload :Units, "unitsdb/units" + autoload :Utils, "unitsdb/utils" end - -require_relative "unitsdb/version" -require_relative "unitsdb/config" -require_relative "unitsdb/errors" -require_relative "unitsdb/database" -require_relative "unitsdb/dimensions" -require_relative "unitsdb/prefixes" -require_relative "unitsdb/quantities" -require_relative "unitsdb/unit_systems" -require_relative "unitsdb/scales" -require_relative "unitsdb/units" -require_relative "unitsdb/utils" - -# CLI-related requires -require_relative "unitsdb/cli" if defined?(Thor) diff --git a/lib/unitsdb/cli.rb b/lib/unitsdb/cli.rb index 1274c53..b7a5942 100644 --- a/lib/unitsdb/cli.rb +++ b/lib/unitsdb/cli.rb @@ -1,11 +1,6 @@ # frozen_string_literal: true require "thor" -require_relative "commands/base" -require_relative "commands/validate" -require_relative "commands/_modify" -require_relative "commands/ucum" -require_relative "commands/qudt" require "fileutils" module Unitsdb @@ -41,7 +36,6 @@ def self.exit_on_failure? desc: "Path to UnitsDB database (required)" def search(query) - require_relative "commands/search" Commands::Search.new(options).run(query) end @@ -53,7 +47,6 @@ def search(query) option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def get(id) - require_relative "commands/get" Commands::Get.new(options).get(id) end @@ -73,8 +66,7 @@ def get(id) desc: "Path to UnitsDB database (required)" def check_si - require_relative "commands/check_si" - Commands::CheckSi.new(options).run + Commands::CheckSiCommand.new(options).run end desc "release", "Create release files (unified YAML and/or ZIP archive)" @@ -87,7 +79,6 @@ def check_si option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def release - require_relative "commands/release" Commands::Release.new(options).run end end diff --git a/lib/unitsdb/commands.rb b/lib/unitsdb/commands.rb new file mode 100644 index 0000000..bb91b5e --- /dev/null +++ b/lib/unitsdb/commands.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Unitsdb + module Commands + autoload :ModifyCommand, "unitsdb/commands/_modify" + autoload :Base, "unitsdb/commands/base" + autoload :CheckSi, "unitsdb/commands/check_si" + autoload :CheckSiCommand, "unitsdb/commands/check_si" + autoload :Get, "unitsdb/commands/get" + autoload :Normalize, "unitsdb/commands/normalize" + autoload :Qudt, "unitsdb/commands/qudt" + autoload :QudtCommand, "unitsdb/commands/qudt" + autoload :Release, "unitsdb/commands/release" + autoload :Search, "unitsdb/commands/search" + autoload :Ucum, "unitsdb/commands/ucum" + autoload :UcumCommand, "unitsdb/commands/ucum" + autoload :Validate, "unitsdb/commands/validate" + autoload :ValidateCommand, "unitsdb/commands/validate" + end +end diff --git a/lib/unitsdb/commands/_modify.rb b/lib/unitsdb/commands/_modify.rb index 4d70661..f26404e 100644 --- a/lib/unitsdb/commands/_modify.rb +++ b/lib/unitsdb/commands/_modify.rb @@ -18,7 +18,6 @@ class ModifyCommand < Thor desc: "Process all YAML files in the repository" def normalize(input = nil, output = nil) - require_relative "normalize" Normalize.new(options).run(input, output) end end diff --git a/lib/unitsdb/commands/check_si.rb b/lib/unitsdb/commands/check_si.rb index bb42191..c2eb5da 100644 --- a/lib/unitsdb/commands/check_si.rb +++ b/lib/unitsdb/commands/check_si.rb @@ -1,16 +1,15 @@ # frozen_string_literal: true -require_relative "base" -require_relative "../database" -require_relative "../errors" -require_relative "si_ttl_parser" -require_relative "si_formatter" -require_relative "si_matcher" -require_relative "si_updater" - module Unitsdb module Commands - class CheckSi < Base + module CheckSi + autoload :SiTtlParser, "unitsdb/commands/check_si/si_ttl_parser" + autoload :SiMatcher, "unitsdb/commands/check_si/si_matcher" + autoload :SiFormatter, "unitsdb/commands/check_si/si_formatter" + autoload :SiUpdater, "unitsdb/commands/check_si/si_updater" + end + + class CheckSiCommand < Base # Constants ENTITY_TYPES = %w[units quantities prefixes].freeze @@ -35,7 +34,7 @@ def run puts "Include potential matches: #{include_potential ? 'Yes' : 'No'}" # Parse TTL files - graph = SiTtlParser.parse_ttl_files(ttl_dir) + graph = ::Unitsdb::Commands::CheckSi::SiTtlParser.parse_ttl_files(ttl_dir) # Process entity types process_entities(entity_type, graph, direction, output_dir, @@ -64,7 +63,9 @@ def process_entity_type(entity_type, graph, direction, output_dir, puts "\n========== Processing #{entity_type.upcase} References ==========\n" db_entities = @db.send(entity_type) - ttl_entities = SiTtlParser.extract_entities_from_ttl(entity_type, graph) + ttl_entities = ::Unitsdb::Commands::CheckSi::SiTtlParser.extract_entities_from_ttl( + entity_type, graph + ) puts "Found #{ttl_entities.size} #{entity_type} in SI digital framework" puts "Found #{db_entities.size} #{entity_type} in database" @@ -97,44 +98,44 @@ def validate_parameters(direction, ttl_dir) # Direction handler: TTL → DB def check_from_si(entity_type, ttl_entities, db_entities, output_dir, include_potential = false) - SiFormatter.print_direction_header("SI → UnitsDB") + ::Unitsdb::Commands::CheckSi::SiFormatter.print_direction_header("SI → UnitsDB") - matches, missing_matches, unmatched_ttl = SiMatcher.match_ttl_to_db( + matches, missing_matches, unmatched_ttl = ::Unitsdb::Commands::CheckSi::SiMatcher.match_ttl_to_db( entity_type, ttl_entities, db_entities ) # Print results - SiFormatter.display_si_results(entity_type, matches, missing_matches, - unmatched_ttl) + ::Unitsdb::Commands::CheckSi::SiFormatter.display_si_results(entity_type, matches, missing_matches, + unmatched_ttl) # Update references if needed return unless output_dir && !missing_matches.empty? output_file = File.join(output_dir, "#{entity_type}.yaml") - SiUpdater.update_references(entity_type, missing_matches, db_entities, output_file, include_potential, - database_path) + ::Unitsdb::Commands::CheckSi::SiUpdater.update_references(entity_type, missing_matches, db_entities, output_file, include_potential, + database_path) puts "\nUpdated references written to #{output_file}" end # Direction handler: DB → TTL def check_to_si(entity_type, ttl_entities, db_entities, output_dir, include_potential = false) - SiFormatter.print_direction_header("UnitsDB → SI") + ::Unitsdb::Commands::CheckSi::SiFormatter.print_direction_header("UnitsDB → SI") - matches, missing_refs, unmatched_db = SiMatcher.match_db_to_ttl( + matches, missing_refs, unmatched_db = ::Unitsdb::Commands::CheckSi::SiMatcher.match_db_to_ttl( entity_type, ttl_entities, db_entities ) # Print results - SiFormatter.display_db_results(entity_type, matches, missing_refs, - unmatched_db) + ::Unitsdb::Commands::CheckSi::SiFormatter.display_db_results(entity_type, matches, missing_refs, + unmatched_db) # Update references if needed return unless output_dir && !missing_refs.empty? output_file = File.join(output_dir, "#{entity_type}.yaml") - SiUpdater.update_db_references(entity_type, missing_refs, output_file, - include_potential, @options[:database]) + ::Unitsdb::Commands::CheckSi::SiUpdater.update_db_references(entity_type, missing_refs, output_file, + include_potential, @options[:database]) puts "\nUpdated references written to #{output_file}" end end diff --git a/lib/unitsdb/commands/check_si/si_formatter.rb b/lib/unitsdb/commands/check_si/si_formatter.rb new file mode 100644 index 0000000..348a5e3 --- /dev/null +++ b/lib/unitsdb/commands/check_si/si_formatter.rb @@ -0,0 +1,488 @@ +# frozen_string_literal: true + +require "terminal-table" + +module Unitsdb + module Commands + module CheckSi + # Formatter for SI check results + module SiFormatter + module_function + + # Display TTL → DB results + def display_si_results(entity_type, matches, missing_matches, + unmatched_ttl) + puts "\n=== #{entity_type.capitalize} with matching SI references ===" + if matches.empty? + puts "None" + else + rows = [] + matches.each do |match| + si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) + rows << [ + "UnitsDB: #{match[:entity_id]}", + "(#{match[:entity_name] || 'unnamed'})", + ] + rows << [ + "SI TTL: #{si_suffix}", + "(#{match[:si_label] || match[:si_name] || 'unnamed'})", + ] + rows << :separator unless match == matches.last + end + + table = Terminal::Table.new( + title: "Valid SI Reference Mappings", + rows: rows, + ) + puts table + end + + puts "\n=== #{entity_type.capitalize} without SI references ===" + if missing_matches.empty? + puts "None" + else + # Split matches into exact and potential + exact_matches = [] + potential_matches = [] + + missing_matches.each do |match| + # Get match details + match_details = match[:match_details] + match_desc = match_details&.dig(:match_desc) || "" + + # Symbol matches and partial matches should always be potential matches + if %w[symbol_match partial_match].include?(match_desc) + potential_matches << match + elsif match_details&.dig(:exact) == false + potential_matches << match + else + exact_matches << match + end + end + + # Display exact matches + puts "\n=== Exact Matches (#{exact_matches.size}) ===" + if exact_matches.empty? + puts "None" + else + rows = [] + exact_matches.each do |match| + # First row: UnitsDB entity + rows << [ + "UnitsDB: #{match[:entity_id]}", + "(#{match[:entity_name] || 'unnamed'})", + ] + + # Handle multiple SI matches in a single cell if present + if match[:multiple_si] + # Ensure no duplicate URIs + si_text_parts = [] + si_label_parts = [] + seen_uris = {} + + match[:multiple_si].each do |si_data| + uri = si_data[:uri] + next if seen_uris[uri] # Skip if we've already seen this URI + + seen_uris[uri] = true + + suffix = SiTtlParser.extract_identifying_suffix(uri) + si_text_parts << suffix + si_label_parts << (si_data[:label] || si_data[:name]) + end + + rows << [ + "SI TTL: #{si_text_parts.join(', ')}", + "(#{si_label_parts.join(', ')})", + ] + else + # Second row: SI TTL suffix and label/name + si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) + rows << [ + "SI TTL: #{si_suffix}", + "(#{match[:si_label] || match[:si_name] || 'unnamed'})", + ] + end + + # Status line with match type + match_details = match[:match_details] + match_desc = match_details&.dig(:match_desc) || "" + match_info = format_match_info(match_desc) + status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" + + rows << [ + "Status: #{status_text}", + "✗", + ] + rows << :separator unless match == exact_matches.last + end + + table = Terminal::Table.new( + title: "Exact Match Missing SI References", + rows: rows, + ) + puts table + end + + # Display potential matches + puts "\n=== Potential Matches (#{potential_matches.size}) ===" + if potential_matches.empty? + puts "None" + else + rows = [] + potential_matches.each do |match| + # First row: UnitsDB entity + rows << [ + "UnitsDB: #{match[:entity_id]}", + "(#{match[:entity_name] || 'unnamed'})", + ] + + # Handle multiple SI matches in a single cell if present + if match[:multiple_si] + # Ensure no duplicate URIs + si_text_parts = [] + seen_uris = {} + + match[:multiple_si].each do |si_data| + uri = si_data[:uri] + next if seen_uris[uri] # Skip if we've already seen this URI + + seen_uris[uri] = true + + suffix = SiTtlParser.extract_identifying_suffix(uri) + si_text_parts << "#{suffix} (#{si_data[:label] || si_data[:name]})" + end + + rows << [ + "SI TTL: #{si_text_parts.join(', ')}", + "", + ] + else + # Single TTL entity + si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) + rows << [ + "SI TTL: #{si_suffix}", + "(#{match[:si_label] || match[:si_name] || 'unnamed'})", + ] + end + + # Status line + match_details = match[:match_details] + match_desc = match_details&.dig(:match_desc) || "" + match_info = format_match_info(match_desc) + status_text = match_info.empty? ? "Missing reference" : "Missing reference" + + rows << [ + "Status: #{status_text}", + "✗", + ] + rows << :separator unless match == potential_matches.last + end + + table = Terminal::Table.new( + title: "Potential Match Missing SI References", + rows: rows, + ) + puts table + end + end + + puts "\n=== SI #{entity_type.capitalize} not mapped to our database ===" + if unmatched_ttl.empty? + puts "None (All TTL entities are referenced - Good job!)" + else + # Group unmatched ttl entities by their URI to avoid duplicates + grouped_unmatched = {} + + unmatched_ttl.each do |entity| + uri = entity[:uri] + grouped_unmatched[uri] = entity unless grouped_unmatched.key?(uri) + end + + rows = [] + unique_entities = grouped_unmatched.values + + unique_entities.each do |entity| + # Create the SI TTL row + si_suffix = SiTtlParser.extract_identifying_suffix(entity[:uri]) + ttl_row = ["SI TTL: #{si_suffix}", + "(#{entity[:label] || entity[:name] || 'unnamed'})"] + + rows << ttl_row + rows << [ + "Status: No matching UnitsDB entity", + "?", + ] + rows << :separator unless entity == unique_entities.last + end + + table = Terminal::Table.new( + title: "Unmapped SI Entities", + rows: rows, + ) + puts table + end + end + + # Display DB → TTL results + def display_db_results(entity_type, matches, missing_refs, unmatched_db) + puts "\n=== Summary of database entities referencing SI ===" + puts "#{entity_type.capitalize} with SI references: #{matches.size}" + puts "#{entity_type.capitalize} missing SI references: #{missing_refs.size}" + puts "Database #{entity_type} not matching any SI entity: #{unmatched_db.size}" + + # Show entities with valid references + unless matches.empty? + puts "\n=== #{entity_type.capitalize} with SI references ===" + rows = [] + matches.each do |match| + db_entity = match[:db_entity] + entity_id = match[:entity_id] || db_entity.short + entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" + si_suffix = SiTtlParser.extract_identifying_suffix(match[:ttl_uri]) + + ttl_label = match[:ttl_entity] ? (match[:ttl_entity][:label] || match[:ttl_entity][:name]) : "Unknown" + + rows << [ + "UnitsDB: #{entity_id}", + "(#{entity_name})", + ] + rows << [ + "SI TTL: #{si_suffix}", + "(#{ttl_label})", + ] + rows << :separator unless match == matches.last + end + + table = Terminal::Table.new( + title: "Valid SI References", + rows: rows, + ) + puts table + end + + puts "\n=== #{entity_type.capitalize} that should reference SI ===" + if missing_refs.empty? + puts "None" + else + # Split missing_refs into exact and potential matches + exact_matches = [] + potential_matches = [] + + missing_refs.each do |match| + # Determine match type + ttl_entities = match[:ttl_entities] + uri = ttl_entities.first[:uri] + match_type = "Exact match" # Default + match_type = match[:match_types][uri] if match[:match_types] && match[:match_types][uri] + + # Get match description if available + entity_id = match[:db_entity].short + match_pair_key = "#{entity_id}:#{ttl_entities.first[:uri]}" + match_details = Unitsdb::Commands::CheckSi::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) + match_desc = match_details[:match_desc] if match_details && match_details[:match_desc] + + # Symbol matches and partial matches should always be potential matches + if %w[symbol_match partial_match].include?(match_desc) + potential_matches << match + elsif match_type == "Exact match" + exact_matches << match + else + potential_matches << match + end + end + + # Display exact matches + puts "\n=== Exact Matches (#{exact_matches.size}) ===" + if exact_matches.empty? + puts "None" + else + rows = [] + exact_matches.each do |match| + db_entity = match[:db_entity] + entity_id = match[:entity_id] || db_entity.short + entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" + + # Handle multiple TTL entities in a single row + ttl_entities = match[:ttl_entities] + if ttl_entities.size == 1 + # Single TTL entity + ttl_entity = ttl_entities.first + si_suffix = SiTtlParser.extract_identifying_suffix(ttl_entity[:uri]) + + rows << [ + "UnitsDB: #{entity_id}", + "(#{entity_name})", + ] + rows << [ + "SI TTL: #{si_suffix}", + "(#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})", + ] + else + # Multiple TTL entities, combine them - ensure no duplicates + si_text_parts = [] + seen_uris = {} + + ttl_entities.each do |ttl_entity| + uri = ttl_entity[:uri] + next if seen_uris[uri] # Skip if we've already seen this URI + + seen_uris[uri] = true + + suffix = SiTtlParser.extract_identifying_suffix(uri) + si_text_parts << "#{suffix} (#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})" + end + + si_text = si_text_parts.join(", ") + + rows << [ + "UnitsDB: #{entity_id}", + "(#{entity_name})", + ] + rows << [ + "SI TTL: #{si_text}", + "", + ] + end + + # Get match details for this match + match_pair_key = "#{db_entity.short}:#{ttl_entities.first[:uri]}" + match_details = Unitsdb::Commands::CheckSi::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) + + # Format match info + match_info = "" + match_info = format_match_info(match_details[:match_desc]) if match_details + + status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" + rows << [ + "Status: #{status_text}", + "✗", + ] + rows << :separator unless match == exact_matches.last + end + + table = Terminal::Table.new( + title: "Exact Match Missing SI References", + rows: rows, + ) + puts table + end + + # Display potential matches + puts "\n=== Potential Matches (#{potential_matches.size}) ===" + if potential_matches.empty? + puts "None" + else + rows = [] + potential_matches.each do |match| + db_entity = match[:db_entity] + entity_id = match[:entity_id] || db_entity.short + entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" + + # Handle multiple TTL entities in a single row + ttl_entities = match[:ttl_entities] + if ttl_entities.size == 1 + # Single TTL entity + ttl_entity = ttl_entities.first + si_suffix = SiTtlParser.extract_identifying_suffix(ttl_entity[:uri]) + + rows << [ + "UnitsDB: #{entity_id}", + "(#{entity_name})", + ] + rows << [ + "SI TTL: #{si_suffix}", + "(#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})", + ] + else + # Multiple TTL entities, combine them - ensure no duplicates + si_text_parts = [] + seen_uris = {} + + ttl_entities.each do |ttl_entity| + uri = ttl_entity[:uri] + next if seen_uris[uri] # Skip if we've already seen this URI + + seen_uris[uri] = true + + suffix = SiTtlParser.extract_identifying_suffix(uri) + si_text_parts << "#{suffix} (#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})" + end + + si_text = si_text_parts.join(", ") + + rows << [ + "UnitsDB: #{entity_id}", + "(#{entity_name})", + ] + rows << [ + "SI TTL: #{si_text}", + "", + ] + end + + # Get match details + match_pair_key = "#{db_entity.short}:#{ttl_entities.first[:uri]}" + match_details = Unitsdb::Commands::CheckSi::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) + + # Format match info + match_info = "" + match_info = format_match_info(match_details[:match_desc]) if match_details + + status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" + rows << [ + "Status: #{status_text}", + "✗", + ] + rows << :separator unless match == potential_matches.last + end + + table = Terminal::Table.new( + title: "Potential Match Missing SI References", + rows: rows, + ) + puts table + end + end + end + + # Print direction header + def print_direction_header(direction) + case direction + when "SI → UnitsDB" + puts "\n=== Checking SI → UnitsDB (TTL entities referenced by database) ===" + when "UnitsDB → SI" + puts "\n=== Checking UnitsDB → SI (database entities referencing TTL) ===" + end + + puts "\n=== Instructions for #{direction} direction ===" + case direction + when "SI → UnitsDB" + puts "If you are the UnitsDB Register Manager, please ensure that all SI entities have proper references in the UnitsDB database." + puts "For each missing reference, add a reference with the appropriate URI and 'authority: \"si-digital-framework\"'." + when "UnitsDB → SI" + puts "If you are the UnitsDB Register Manager, please add SI references to UnitsDB entities that should have them." + puts "For each entity that should reference SI, add a reference with 'authority: \"si-digital-framework\"' and the SI TTL URI." + end + end + + def set_match_details(details) + @match_details = details + end + + # Format match info for display + def format_match_info(match_desc) + { + "short_to_name" => "short → name", + "short_to_label" => "short → label", + "name_to_name" => "name → name", + "name_to_label" => "name → label", + "name_to_alt_label" => "name → alt_label", + "symbol_match" => "symbol → symbol", + "partial_match" => "partial match", + }[match_desc] || "" + end + end + end + end +end diff --git a/lib/unitsdb/commands/check_si/si_matcher.rb b/lib/unitsdb/commands/check_si/si_matcher.rb new file mode 100644 index 0000000..6a59052 --- /dev/null +++ b/lib/unitsdb/commands/check_si/si_matcher.rb @@ -0,0 +1,487 @@ +# frozen_string_literal: true + +module Unitsdb + module Commands + module CheckSi + # Matcher for SI entities and UnitsDB entities + module SiMatcher + SI_AUTHORITY = "si-digital-framework" + @match_details = {} + + module_function + + # Match TTL entities to database entities (from_si direction) + def match_ttl_to_db(entity_type, ttl_entities, db_entities) + matches = [] + missing_matches = [] + matched_ttl_uris = [] + processed_pairs = {} # Track processed entity-ttl pairs to avoid duplicates + entity_matches = {} # Track matches by entity ID + + # First pass: find direct references + db_entities.each do |entity| + next unless entity.respond_to?(:references) && entity.references + + entity.references.each do |ref| + next unless ref.authority == SI_AUTHORITY + + matched_ttl_uris << ref.uri + ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri } + next unless ttl_entity + + matches << { + entity_id: entity.short, + entity_name: format_entity_name(entity), + si_uri: ttl_entity[:uri], + si_name: ttl_entity[:name], + si_label: ttl_entity[:label], + si_alt_label: ttl_entity[:alt_label], + si_symbol: ttl_entity[:symbol], + entity: entity, + } + end + end + + # Second pass: find matching entities + ttl_entities.each do |ttl_entity| + next if matched_ttl_uris.include?(ttl_entity[:uri]) + + matching_entities = find_matching_entities(entity_type, ttl_entity, + db_entities) + next if matching_entities.empty? + + matched_ttl_uris << ttl_entity[:uri] + + matching_entities.each do |entity| + entity_id = entity.short + entity_name = format_entity_name(entity) + + # Create a unique key for this entity-ttl pair to avoid duplicates + pair_key = "#{entity_id}:#{ttl_entity[:uri]}" + next if processed_pairs[pair_key] + + processed_pairs[pair_key] = true + + # Get detailed match information + match_result = match_entity_names?(entity_type, entity, + ttl_entity) + next unless match_result[:match] + + # Save match details for later use + @match_details[pair_key] = match_result + + # Check if already has reference + has_reference = entity.references&.any? do |ref| + ref.uri == ttl_entity[:uri] && ref.authority == SI_AUTHORITY + end + + match_data = { + entity_id: entity_id, + entity_name: entity_name, + si_uri: ttl_entity[:uri], + si_name: ttl_entity[:name], + si_label: ttl_entity[:label], + si_alt_label: ttl_entity[:alt_label], + si_symbol: ttl_entity[:symbol], + entity: entity, + match_type: match_result[:match_type], + match_details: match_result, + match_types: { ttl_entity[:uri] => match_result[:match_type] }, + } + + if has_reference + matches << match_data + else + # Group by entity_id for multiple SI matches + entity_matches[entity_id] ||= [] + entity_matches[entity_id] << { + uri: ttl_entity[:uri], + name: ttl_entity[:name], + label: ttl_entity[:label], + } + + # Add first occurrence of this entity to missing_matches + missing_matches << match_data unless missing_matches.any? do |m| + m[:entity_id] == entity_id + end + end + end + end + + # Update missing_matches to include multiple SI entities + missing_matches.each do |match| + entity_id = match[:entity_id] + si_matches = entity_matches[entity_id] + + # If entity matches multiple SI entities, record them + if si_matches && si_matches.size > 1 + match[:multiple_si] = + si_matches + end + end + + # Find unmatched TTL entities + unmatched_ttl = ttl_entities.reject do |entity| + matched_ttl_uris.include?(entity[:uri]) || + entity[:uri].end_with?("/units/") || + entity[:uri].end_with?("/quantities/") || + entity[:uri].end_with?("/prefixes/") + end + + [matches, missing_matches, unmatched_ttl] + end + + # Match database entities to TTL entities (to_si direction) + def match_db_to_ttl(entity_type, ttl_entities, db_entities) + matches = [] + missing_refs = [] + matched_db_ids = [] + processed_db_ids = {} # Track processed entities + + # Map from NIST IDs to display names for original output compatibility + nist_id_to_display = {} + + # Build mappings for each entity type + db_entities.each do |entity| + next unless entity.respond_to?(:identifiers) && entity.identifiers&.first&.id&.start_with?("NIST") + + nist_id = entity.identifiers.first.id + + # For quantities and prefixes, we want to show the "short" field + nist_id_to_display[nist_id] = entity.short if %w[quantities + prefixes].include?(entity_type) && entity.respond_to?(:short) + end + + db_entities.each do |db_entity| + entity_id = find_entity_id(db_entity) + + # For display purposes - use original display names + display_id = entity_id + + # Apply the NIST ID mapping if available + display_id = nist_id_to_display[entity_id] if entity_id.start_with?("NIST") && nist_id_to_display[entity_id] + + # Skip if we've already processed this entity + next if processed_db_ids[entity_id] + + processed_db_ids[entity_id] = true + has_reference = false + + # Check for existing SI references + if db_entity.respond_to?(:references) && db_entity.references + db_entity.references.each do |ref| + next unless ref.authority == SI_AUTHORITY + + has_reference = true + # Find the matching TTL entity for display + ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri } + + matches << { + entity_id: display_id, + db_entity: db_entity, + ttl_uri: ref.uri, + ttl_entity: ttl_entity, + } + end + end + + # If already has reference, continue to next entity + if has_reference + matched_db_ids << entity_id + next + end + + # Find matching TTL entities + matching_ttl = [] + match_types = {} + + ttl_entities.each do |ttl_entity| + match_result = match_entity_names?(entity_type, db_entity, + ttl_entity) + next unless match_result[:match] + + matching_ttl << ttl_entity + match_types[ttl_entity[:uri]] = match_result[:match_type] + + # Save detailed match info + @match_details["#{entity_id}:#{ttl_entity[:uri]}"] = match_result + end + + # If found matches, add to missing_refs + next if matching_ttl.empty? + + matched_db_ids << entity_id + missing_refs << { + entity_id: display_id, + db_entity: db_entity, + ttl_entities: matching_ttl, + match_types: match_types, + } + end + + # Find unmatched db entities + unmatched_db = db_entities.reject do |entity| + matched_db_ids.include?(find_entity_id(entity)) + end + + [matches, missing_refs, unmatched_db] + end + + # Find entity ID + def find_entity_id(entity) + return entity.id if entity.respond_to?(:id) && entity.id + return entity.identifiers.first.id if entity.respond_to?(:identifiers) && !entity.identifiers.empty? && + entity.identifiers.first.respond_to?(:id) + + entity.short + end + + # Format entity name correctly + def format_entity_name(entity) + return nil unless entity.respond_to?(:names) && entity.names&.first + + entity.names.first + + # # Special handling for sidereal names - use comma format + # if name.include?("sidereal") + # if name.start_with?("sidereal ") + # # For names that already start with "sidereal " - strip it + # base_name = name.gsub("sidereal ", "") + # return "#{base_name}, sidereal" + # elsif name.end_with?(" sidereal") + # # For names that already have comma format but missing comma + # parts = name.split + # return "#{parts.first}, #{parts.last}" + # end + # end + + # # Handle other special cases + # return name if name == "year (365 days)" + + # # Default to the original name + end + + # Find matching entities for a TTL entity + def find_matching_entities(entity_type, ttl_entity, db_entities) + case entity_type + when "units" + find_matching_units(ttl_entity, db_entities) + when "quantities" + find_matching_quantities(ttl_entity, db_entities) + when "prefixes" + find_matching_prefixes(ttl_entity, db_entities) + else + [] + end + end + + # Find exact matches for units + def find_matching_units(ttl_unit, units) + matching_units = [] + + units.each do |unit| + # Match by short + if unit.short&.downcase == ttl_unit[:name]&.downcase || + unit.short&.downcase == ttl_unit[:label]&.downcase + matching_units << unit + next + end + + # Match by name + if unit.respond_to?(:names) && unit.names&.any? do |name| + name.downcase == ttl_unit[:name]&.downcase || + name.downcase == ttl_unit[:label]&.downcase + end + matching_units << unit + next + end + + # Match by symbol + next unless ttl_unit[:symbol] && unit.respond_to?(:symbols) && unit.symbols&.any? do |sym| + sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_unit[:symbol].downcase + end + + matching_units << unit + end + + matching_units.uniq + end + + # Find exact matches for quantities + def find_matching_quantities(ttl_quantity, quantities) + matching_quantities = [] + + quantities.each do |quantity| + # Match by short + if quantity.short&.downcase == ttl_quantity[:name]&.downcase || + quantity.short&.downcase == ttl_quantity[:label]&.downcase || + quantity.short&.downcase == ttl_quantity[:alt_label]&.downcase + matching_quantities << quantity + next + end + + # Match by name + next unless quantity.respond_to?(:names) && quantity.names&.any? do |name| + name.downcase == ttl_quantity[:name]&.downcase || + name.downcase == ttl_quantity[:label]&.downcase || + name.downcase == ttl_quantity[:alt_label]&.downcase + end + + matching_quantities << quantity + end + + matching_quantities.uniq + end + + # Find exact matches for prefixes + def find_matching_prefixes(ttl_prefix, prefixes) + matching_prefixes = [] + + prefixes.each do |prefix| + # Match by short + if prefix.short&.downcase == ttl_prefix[:name]&.downcase || + prefix.short&.downcase == ttl_prefix[:label]&.downcase + matching_prefixes << prefix + next + end + + # Match by name + if prefix.respond_to?(:names) && prefix.names&.any? do |name| + name.downcase == ttl_prefix[:name]&.downcase || + name.downcase == ttl_prefix[:label]&.downcase + end + matching_prefixes << prefix + next + end + + # Match by symbol + next unless ttl_prefix[:symbol] && prefix.respond_to?(:symbol) && prefix.symbol && + prefix.symbol.respond_to?(:ascii) && prefix.symbol.ascii && + prefix.symbol.ascii.downcase == ttl_prefix[:symbol].downcase + + matching_prefixes << prefix + end + + matching_prefixes.uniq + end + + # Match entity names with detailed type information + def match_entity_names?(entity_type, db_entity, ttl_entity) + match_details = { match: false } + + # Match by short name - EXACT match + if db_entity.short && db_entity.short.downcase == ttl_entity[:name].downcase + match_details = { + match: true, + exact: true, + match_type: "Exact match", + match_desc: "short_to_name", + details: "UnitsDB short '#{db_entity.short}' matches SI name '#{ttl_entity[:name]}'", + } + # Match by short to label + elsif db_entity.short && ttl_entity[:label] && db_entity.short.downcase == ttl_entity[:label].downcase + match_details = { + match: true, + exact: true, + match_type: "Exact match", + match_desc: "short_to_label", + details: "UnitsDB short '#{db_entity.short}' matches SI label '#{ttl_entity[:label]}'", + } + # Match by names - EXACT match + elsif db_entity.respond_to?(:names) && db_entity.names + # Match by TTL name + db_name_match = db_entity.names.find do |name| + name.downcase == ttl_entity[:name].downcase + end + if db_name_match + match_details = { + match: true, + exact: true, + match_type: "Exact match", + match_desc: "name_to_name", + details: "UnitsDB name '#{db_name_match}' matches SI name '#{ttl_entity[:name]}'", + } + # Match by TTL label + elsif ttl_entity[:label] + db_name_match = db_entity.names.find do |name| + name.downcase == ttl_entity[:label].downcase + end + if db_name_match + match_details = { + match: true, + exact: true, + match_type: "Exact match", + match_desc: "name_to_label", + details: "UnitsDB name '#{db_name_match}' matches SI label '#{ttl_entity[:label]}'", + } + end + end + + # Match by TTL alt_label + if !match_details[:match] && ttl_entity[:alt_label] + db_name_match = db_entity.names.find do |name| + name.downcase == ttl_entity[:alt_label].downcase + end + if db_name_match + match_details = { + match: true, + exact: true, + match_type: "Exact match", + match_desc: "name_to_alt_label", + details: "UnitsDB name '#{db_name_match}' matches SI alt_label '#{ttl_entity[:alt_label]}'", + } + end + end + end + + # Special validation for "sidereal_" units + if match_details[:match] && match_details[:exact] && db_entity.short&.include?("sidereal_") && + !(ttl_entity[:name]&.include?("sidereal") || ttl_entity[:label]&.include?("sidereal")) + match_details = { + match: true, + exact: false, + match_type: "Potential match", + match_desc: "partial_match", + details: "UnitsDB '#{db_entity.short}' partially matches SI '#{ttl_entity[:name]}'", + } + end + + # Match by symbol if available (units and prefixes) - POTENTIAL match + if !match_details[:match] && %w[units + prefixes].include?(entity_type) && ttl_entity[:symbol] + if entity_type == "units" && db_entity.respond_to?(:symbols) && db_entity.symbols + matching_symbol = db_entity.symbols.find do |sym| + sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_entity[:symbol].downcase + end + + if matching_symbol + match_details = { + match: true, + exact: false, + match_type: "Potential match", + match_desc: "symbol_match", + details: "UnitsDB symbol '#{matching_symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'", + } + end + elsif entity_type == "prefixes" && db_entity.respond_to?(:symbol) && db_entity.symbol + if db_entity.symbol.respond_to?(:ascii) && + db_entity.symbol.ascii && + db_entity.symbol.ascii.downcase == ttl_entity[:symbol].downcase + + match_details = { + match: true, + exact: false, + match_type: "Potential match", + match_desc: "symbol_match", + details: "UnitsDB symbol '#{db_entity.symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'", + } + end + end + end + + match_details + end + end + end + end +end diff --git a/lib/unitsdb/commands/check_si/si_ttl_parser.rb b/lib/unitsdb/commands/check_si/si_ttl_parser.rb new file mode 100644 index 0000000..0c1eda5 --- /dev/null +++ b/lib/unitsdb/commands/check_si/si_ttl_parser.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require "rdf" +require "rdf/turtle" + +module Unitsdb + module Commands + module CheckSi + # Parser for SI TTL files + module SiTtlParser + SI_URI_PREFIX = "http://si-digital-framework.org/SI/" + + module_function + + # Parse TTL files and return RDF graph + def parse_ttl_files(dir) + puts "Parsing TTL files in #{dir}..." + graph = RDF::Graph.new + + Dir.glob(File.join(dir, "*.ttl")).each do |file| + puts " Reading #{File.basename(file)}" + graph.load(file, format: :ttl) + end + + graph + end + + # Extract entities from TTL based on entity type + def extract_entities_from_ttl(entity_type, graph) + skos = RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#") + si = RDF::Vocabulary.new("http://si-digital-framework.org/SI#") + + namespace_uri = case entity_type + when "units" then "http://si-digital-framework.org/SI/units/" + when "quantities" then "http://si-digital-framework.org/quantities/" + when "prefixes" then "http://si-digital-framework.org/SI/prefixes/" + else return [] + end + + namespace = RDF::Vocabulary.new(namespace_uri) + entities = extract_base_entities(graph, namespace, skos) + add_symbols_to_entities(entities, graph, si) if %w[units + prefixes].include?(entity_type) + entities + end + + # Extract base entities from graph + def extract_base_entities(graph, namespace, skos) + entities = [] + processed_uris = {} + + RDF::Query.new({ entity: { skos.prefLabel => :label } }) + .execute(graph).each do |solution| + entity_uri = solution.entity.to_s + next unless entity_uri.start_with?(namespace.to_s) + next if processed_uris[entity_uri] + + processed_uris[entity_uri] = true + + entity_name = entity_uri.split("/").last + label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.prefLabel => :value } }) + .execute(graph).first&.value&.to_s + alt_label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.altLabel => :value } }) + .execute(graph).first&.value&.to_s + + entities << { + uri: entity_uri, + name: entity_name, + label: label, + alt_label: alt_label, + } + end + + entities + end + + # Add symbols to entities + def add_symbols_to_entities(entities, graph, si) + entities.each do |entity| + symbol = RDF::Query.new({ RDF::URI(entity[:uri]) => { si.hasSymbol => :value } }) + .execute(graph).first&.value&.to_s + entity[:symbol] = symbol if symbol + end + end + + # Extract suffix from URI for display + def extract_identifying_suffix(uri) + return "" unless uri + + # For display, we need to format as exactly like the original + # This helps format the comma-separated multi-units correctly + if uri.include?("/units/") + # Return units/name format for units (without duplicating "units/") + "units/#{uri.split('/').last}" + else + # Otherwise strip the prefix + uri.gsub(SI_URI_PREFIX, "") + end + end + end + end + end +end diff --git a/lib/unitsdb/commands/check_si/si_updater.rb b/lib/unitsdb/commands/check_si/si_updater.rb new file mode 100644 index 0000000..36c5807 --- /dev/null +++ b/lib/unitsdb/commands/check_si/si_updater.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +require "yaml" +require "fileutils" + +module Unitsdb + module Commands + module CheckSi + # Updater for SI references in YAML + module SiUpdater + SI_AUTHORITY = "si-digital-framework" + + module_function + + # Update references in YAML file (TTL → DB direction) + def update_references(entity_type, missing_matches, db_entities, + output_file, include_potential = false) + # Use the database objects to access the data directly + original_yaml_file = db_entities.first.send(:yaml_file) if db_entities&.first.respond_to?( + :yaml_file, true + ) + + # If we can't get the path from the database object, use the output file path as a fallback + if original_yaml_file.nil? || !File.exist?(original_yaml_file) + puts "Warning: Could not determine original YAML file path. Using output file as template." + original_yaml_file = output_file + + # Create an empty template if output file doesn't exist + unless File.exist?(original_yaml_file) + FileUtils.mkdir_p(File.dirname(original_yaml_file)) + File.write(original_yaml_file, { entity_type => [] }.to_yaml) + end + end + + # Load the original YAML file + yaml_content = File.read(original_yaml_file) + output_data = YAML.safe_load(yaml_content) + + # Group by entity ID to avoid duplicates + grouped_matches = missing_matches.group_by do |match| + match[:entity_id] + end + + # Process each entity that needs updating + grouped_matches.each do |entity_id, matches| + # Filter matches based on include_potential parameter + filtered_matches = matches.select do |match| + # Check if it's an exact match or if we're including potential matches + match_details = match[:match_details] + if match_details&.dig(:exact) == false || %w[symbol_match + partial_match].include?(match_details&.dig(:match_desc) || "") + include_potential + else + true + end + end + + # Skip if no matches after filtering + next if filtered_matches.empty? + + # Find the entity in the array under the entity_type key + entity_index = output_data[entity_type].find_index do |e| + # Find entity with matching identifier + e["identifiers"]&.any? { |id| id["id"] == entity_id } + end + + next unless entity_index + + # Get the entity + entity = output_data[entity_type][entity_index] + + # Initialize references array if it doesn't exist + entity["references"] ||= [] + + # Add new references + filtered_matches.each do |match| + # If this match has multiple SI references, add them all + if match[:multiple_si] + match[:multiple_si].each do |si_data| + # Check if reference already exists + next if entity["references"].any? do |ref| + ref["uri"] == si_data[:uri] && ref["authority"] == SI_AUTHORITY + end + + # Add new reference + entity["references"] << { + "uri" => si_data[:uri], + "type" => "normative", + "authority" => SI_AUTHORITY, + } + end + else + # Check if reference already exists + next if entity["references"].any? do |ref| + ref["uri"] == match[:si_uri] && ref["authority"] == SI_AUTHORITY + end + + # Add new reference + entity["references"] << { + "uri" => match[:si_uri], + "type" => "normative", + "authority" => SI_AUTHORITY, + } + end + end + end + + write_yaml_file(output_file, output_data) + end + + # Update references in YAML file (DB → TTL direction) + def update_db_references(entity_type, missing_refs, output_file, + include_potential = false) + # Try to get the original YAML file from the first entity + first_entity = missing_refs.first&.dig(:db_entity) + original_yaml_file = first_entity.send(:yaml_file) if first_entity.respond_to?( + :yaml_file, true + ) + + # If we can't get the path from the database object, use the output file path as a fallback + if original_yaml_file.nil? || !File.exist?(original_yaml_file) + puts "Warning: Could not determine original YAML file path. Using output file as template." + original_yaml_file = output_file + + # Create an empty template if output file doesn't exist + unless File.exist?(original_yaml_file) + FileUtils.mkdir_p(File.dirname(original_yaml_file)) + File.write(original_yaml_file, { entity_type => [] }.to_yaml) + end + end + + # Load the original YAML file + yaml_content = File.read(original_yaml_file) + output_data = YAML.safe_load(yaml_content) + + # Group by entity ID to avoid duplicates + missing_refs_by_id = {} + + missing_refs.each do |match| + entity_id = match[:entity_id] || match[:db_entity].short + ttl_entities = match[:ttl_entities] + match_types = match[:match_types] || {} + + # Filter TTL entities based on include_potential parameter + filtered_ttl_entities = ttl_entities.select do |ttl_entity| + # Check if it's an exact match or if we're including potential matches + match_type = match_types[ttl_entity[:uri]] || "Exact match" # Default to exact match + match_pair_key = "#{entity_id}:#{ttl_entity[:uri]}" + match_details = Unitsdb::Commands::CheckSi::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) + + if match_details && %w[symbol_match + partial_match].include?(match_details[:match_desc]) + include_potential + else + match_type == "Exact match" || include_potential + end + end + + # Skip if no entities after filtering + next if filtered_ttl_entities.empty? + + missing_refs_by_id[entity_id] ||= [] + + # Add filtered matching TTL entities for this DB entity + filtered_ttl_entities.each do |ttl_entity| + missing_refs_by_id[entity_id] << { + uri: ttl_entity[:uri], + type: "normative", + authority: SI_AUTHORITY, + } + end + end + + # Update the YAML content + output_data[entity_type].each do |entity_yaml| + # Find entity by ID or short + entity_id = if entity_yaml["identifiers"] + begin + entity_yaml["identifiers"].first["id"] + rescue StandardError + nil + end + elsif entity_yaml["id"] + entity_yaml["id"] + end + + next unless entity_id && missing_refs_by_id.key?(entity_id) + + # Add references + entity_yaml["references"] ||= [] + + missing_refs_by_id[entity_id].each do |ref| + # Check if this reference already exists + next if entity_yaml["references"].any? do |existing_ref| + existing_ref["uri"] == ref[:uri] && + existing_ref["authority"] == ref[:authority] + end + + # Add the reference + entity_yaml["references"] << { + "uri" => ref[:uri], + "type" => ref[:type], + "authority" => ref[:authority], + } + end + end + + write_yaml_file(output_file, output_data) + end + + # Helper to write YAML file + def write_yaml_file(output_file, output_data) + # Ensure the output directory exists + output_dir = File.dirname(output_file) + FileUtils.mkdir_p(output_dir) + + # Write to YAML file with proper formatting + yaml_content = output_data.to_yaml + + # Preserve existing schema header or add default one + yaml_content = preserve_schema_header(output_file, yaml_content) + + File.write(output_file, yaml_content) + end + + # Preserve existing schema header or add default one + def preserve_schema_header(original_file, yaml_content) + schema_header = nil + + # Extract existing schema header if file exists + if File.exist?(original_file) + original_content = File.read(original_file) + if (match = original_content.match(/^# yaml-language-server: \$schema=.+$/)) + schema_header = match[0] + end + end + + # Remove any existing schema header from new content to avoid duplication + yaml_content = yaml_content.gsub( + /^# yaml-language-server: \$schema=.+$\n/, "" + ) + + # Add preserved or default schema header + if schema_header + "#{schema_header}\n#{yaml_content}" + else + entity_type = File.basename(original_file, ".yaml") + "# yaml-language-server: $schema=schemas/#{entity_type}-schema.yaml\n#{yaml_content}" + end + end + end + end + end +end diff --git a/lib/unitsdb/commands/get.rb b/lib/unitsdb/commands/get.rb index 659e527..946cc0e 100644 --- a/lib/unitsdb/commands/get.rb +++ b/lib/unitsdb/commands/get.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true -require_relative "base" require "json" -require_relative "../errors" module Unitsdb module Commands diff --git a/lib/unitsdb/commands/normalize.rb b/lib/unitsdb/commands/normalize.rb index 15ccabe..cc69229 100644 --- a/lib/unitsdb/commands/normalize.rb +++ b/lib/unitsdb/commands/normalize.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative "base" require "yaml" module Unitsdb diff --git a/lib/unitsdb/commands/qudt.rb b/lib/unitsdb/commands/qudt.rb index aef9600..67fbd5b 100644 --- a/lib/unitsdb/commands/qudt.rb +++ b/lib/unitsdb/commands/qudt.rb @@ -4,6 +4,14 @@ module Unitsdb module Commands + module Qudt + autoload :Check, "unitsdb/commands/qudt/check" + autoload :Formatter, "unitsdb/commands/qudt/formatter" + autoload :TtlParser, "unitsdb/commands/qudt/ttl_parser" + autoload :Update, "unitsdb/commands/qudt/update" + autoload :Updater, "unitsdb/commands/qudt/updater" + end + class QudtCommand < Thor desc "check", "Check QUDT references in UnitsDB" option :entity_type, type: :string, aliases: "-e", @@ -19,7 +27,6 @@ class QudtCommand < Thor option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def check - require_relative "qudt/check" Qudt::Check.new(options).run end @@ -35,7 +42,6 @@ def check option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def update - require_relative "qudt/update" Qudt::Update.new(options).run end end diff --git a/lib/unitsdb/commands/qudt/check.rb b/lib/unitsdb/commands/qudt/check.rb index 019eace..a7cf26c 100644 --- a/lib/unitsdb/commands/qudt/check.rb +++ b/lib/unitsdb/commands/qudt/check.rb @@ -1,12 +1,5 @@ # frozen_string_literal: true -require_relative "../base" -require_relative "../../database" -require_relative "../../errors" -require_relative "ttl_parser" -require_relative "formatter" -require_relative "matcher" -require_relative "updater" require "fileutils" module Unitsdb diff --git a/lib/unitsdb/commands/qudt/matcher.rb b/lib/unitsdb/commands/qudt/matcher.rb index aaec161..18753d5 100644 --- a/lib/unitsdb/commands/qudt/matcher.rb +++ b/lib/unitsdb/commands/qudt/matcher.rb @@ -569,7 +569,7 @@ def normalize_name(name) name.downcase .gsub(/\s+/, " ") # normalize whitespace .gsub(/[-_]/, " ") # convert dashes/underscores to spaces - .gsub(/[()\\[\\]]/, "") # remove parentheses and brackets + .gsub(/[()\\]/, "") # remove parentheses and brackets .gsub(/\bof\b/, "") # remove "of" .gsub(/\bper\b/, "/") # convert "per" to "/" .strip diff --git a/lib/unitsdb/commands/qudt/ttl_parser.rb b/lib/unitsdb/commands/qudt/ttl_parser.rb index 397733e..c75377a 100644 --- a/lib/unitsdb/commands/qudt/ttl_parser.rb +++ b/lib/unitsdb/commands/qudt/ttl_parser.rb @@ -5,7 +5,6 @@ require "set" require "rdf" require "rdf/turtle" -require_relative "../../qudt" module Unitsdb module Commands diff --git a/lib/unitsdb/commands/qudt/update.rb b/lib/unitsdb/commands/qudt/update.rb index 67a8a20..b2ac7ea 100644 --- a/lib/unitsdb/commands/qudt/update.rb +++ b/lib/unitsdb/commands/qudt/update.rb @@ -1,11 +1,5 @@ # frozen_string_literal: true -require_relative "../base" -require_relative "../../database" -require_relative "../../errors" -require_relative "ttl_parser" -require_relative "matcher" -require_relative "updater" require "fileutils" module Unitsdb diff --git a/lib/unitsdb/commands/search.rb b/lib/unitsdb/commands/search.rb index 4770cc1..54d5ffe 100644 --- a/lib/unitsdb/commands/search.rb +++ b/lib/unitsdb/commands/search.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true -require_relative "base" require "json" -require_relative "../errors" module Unitsdb module Commands diff --git a/lib/unitsdb/commands/si_formatter.rb b/lib/unitsdb/commands/si_formatter.rb deleted file mode 100644 index 6e4bf8d..0000000 --- a/lib/unitsdb/commands/si_formatter.rb +++ /dev/null @@ -1,487 +0,0 @@ -# frozen_string_literal: true - -require "terminal-table" -require_relative "si_ttl_parser" - -module Unitsdb - module Commands - # Formatter for SI check results - module SiFormatter - module_function - - # Display TTL → DB results - def display_si_results(entity_type, matches, missing_matches, -unmatched_ttl) - puts "\n=== #{entity_type.capitalize} with matching SI references ===" - if matches.empty? - puts "None" - else - rows = [] - matches.each do |match| - si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) - rows << [ - "UnitsDB: #{match[:entity_id]}", - "(#{match[:entity_name] || 'unnamed'})", - ] - rows << [ - "SI TTL: #{si_suffix}", - "(#{match[:si_label] || match[:si_name] || 'unnamed'})", - ] - rows << :separator unless match == matches.last - end - - table = Terminal::Table.new( - title: "Valid SI Reference Mappings", - rows: rows, - ) - puts table - end - - puts "\n=== #{entity_type.capitalize} without SI references ===" - if missing_matches.empty? - puts "None" - else - # Split matches into exact and potential - exact_matches = [] - potential_matches = [] - - missing_matches.each do |match| - # Get match details - match_details = match[:match_details] - match_desc = match_details&.dig(:match_desc) || "" - - # Symbol matches and partial matches should always be potential matches - if %w[symbol_match partial_match].include?(match_desc) - potential_matches << match - elsif match_details&.dig(:exact) == false - potential_matches << match - else - exact_matches << match - end - end - - # Display exact matches - puts "\n=== Exact Matches (#{exact_matches.size}) ===" - if exact_matches.empty? - puts "None" - else - rows = [] - exact_matches.each do |match| - # First row: UnitsDB entity - rows << [ - "UnitsDB: #{match[:entity_id]}", - "(#{match[:entity_name] || 'unnamed'})", - ] - - # Handle multiple SI matches in a single cell if present - if match[:multiple_si] - # Ensure no duplicate URIs - si_text_parts = [] - si_label_parts = [] - seen_uris = {} - - match[:multiple_si].each do |si_data| - uri = si_data[:uri] - next if seen_uris[uri] # Skip if we've already seen this URI - - seen_uris[uri] = true - - suffix = SiTtlParser.extract_identifying_suffix(uri) - si_text_parts << suffix - si_label_parts << (si_data[:label] || si_data[:name]) - end - - rows << [ - "SI TTL: #{si_text_parts.join(', ')}", - "(#{si_label_parts.join(', ')})", - ] - else - # Second row: SI TTL suffix and label/name - si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) - rows << [ - "SI TTL: #{si_suffix}", - "(#{match[:si_label] || match[:si_name] || 'unnamed'})", - ] - end - - # Status line with match type - match_details = match[:match_details] - match_desc = match_details&.dig(:match_desc) || "" - match_info = format_match_info(match_desc) - status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" - - rows << [ - "Status: #{status_text}", - "✗", - ] - rows << :separator unless match == exact_matches.last - end - - table = Terminal::Table.new( - title: "Exact Match Missing SI References", - rows: rows, - ) - puts table - end - - # Display potential matches - puts "\n=== Potential Matches (#{potential_matches.size}) ===" - if potential_matches.empty? - puts "None" - else - rows = [] - potential_matches.each do |match| - # First row: UnitsDB entity - rows << [ - "UnitsDB: #{match[:entity_id]}", - "(#{match[:entity_name] || 'unnamed'})", - ] - - # Handle multiple SI matches in a single cell if present - if match[:multiple_si] - # Ensure no duplicate URIs - si_text_parts = [] - seen_uris = {} - - match[:multiple_si].each do |si_data| - uri = si_data[:uri] - next if seen_uris[uri] # Skip if we've already seen this URI - - seen_uris[uri] = true - - suffix = SiTtlParser.extract_identifying_suffix(uri) - si_text_parts << "#{suffix} (#{si_data[:label] || si_data[:name]})" - end - - rows << [ - "SI TTL: #{si_text_parts.join(', ')}", - "", - ] - else - # Single TTL entity - si_suffix = SiTtlParser.extract_identifying_suffix(match[:si_uri]) - rows << [ - "SI TTL: #{si_suffix}", - "(#{match[:si_label] || match[:si_name] || 'unnamed'})", - ] - end - - # Status line - match_details = match[:match_details] - match_desc = match_details&.dig(:match_desc) || "" - match_info = format_match_info(match_desc) - status_text = match_info.empty? ? "Missing reference" : "Missing reference" - - rows << [ - "Status: #{status_text}", - "✗", - ] - rows << :separator unless match == potential_matches.last - end - - table = Terminal::Table.new( - title: "Potential Match Missing SI References", - rows: rows, - ) - puts table - end - end - - puts "\n=== SI #{entity_type.capitalize} not mapped to our database ===" - if unmatched_ttl.empty? - puts "None (All TTL entities are referenced - Good job!)" - else - # Group unmatched ttl entities by their URI to avoid duplicates - grouped_unmatched = {} - - unmatched_ttl.each do |entity| - uri = entity[:uri] - grouped_unmatched[uri] = entity unless grouped_unmatched.key?(uri) - end - - rows = [] - unique_entities = grouped_unmatched.values - - unique_entities.each do |entity| - # Create the SI TTL row - si_suffix = SiTtlParser.extract_identifying_suffix(entity[:uri]) - ttl_row = ["SI TTL: #{si_suffix}", - "(#{entity[:label] || entity[:name] || 'unnamed'})"] - - rows << ttl_row - rows << [ - "Status: No matching UnitsDB entity", - "?", - ] - rows << :separator unless entity == unique_entities.last - end - - table = Terminal::Table.new( - title: "Unmapped SI Entities", - rows: rows, - ) - puts table - end - end - - # Display DB → TTL results - def display_db_results(entity_type, matches, missing_refs, unmatched_db) - puts "\n=== Summary of database entities referencing SI ===" - puts "#{entity_type.capitalize} with SI references: #{matches.size}" - puts "#{entity_type.capitalize} missing SI references: #{missing_refs.size}" - puts "Database #{entity_type} not matching any SI entity: #{unmatched_db.size}" - - # Show entities with valid references - unless matches.empty? - puts "\n=== #{entity_type.capitalize} with SI references ===" - rows = [] - matches.each do |match| - db_entity = match[:db_entity] - entity_id = match[:entity_id] || db_entity.short - entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" - si_suffix = SiTtlParser.extract_identifying_suffix(match[:ttl_uri]) - - ttl_label = match[:ttl_entity] ? (match[:ttl_entity][:label] || match[:ttl_entity][:name]) : "Unknown" - - rows << [ - "UnitsDB: #{entity_id}", - "(#{entity_name})", - ] - rows << [ - "SI TTL: #{si_suffix}", - "(#{ttl_label})", - ] - rows << :separator unless match == matches.last - end - - table = Terminal::Table.new( - title: "Valid SI References", - rows: rows, - ) - puts table - end - - puts "\n=== #{entity_type.capitalize} that should reference SI ===" - if missing_refs.empty? - puts "None" - else - # Split missing_refs into exact and potential matches - exact_matches = [] - potential_matches = [] - - missing_refs.each do |match| - # Determine match type - ttl_entities = match[:ttl_entities] - uri = ttl_entities.first[:uri] - match_type = "Exact match" # Default - match_type = match[:match_types][uri] if match[:match_types] && match[:match_types][uri] - - # Get match description if available - entity_id = match[:db_entity].short - match_pair_key = "#{entity_id}:#{ttl_entities.first[:uri]}" - match_details = Unitsdb::Commands::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) - match_desc = match_details[:match_desc] if match_details && match_details[:match_desc] - - # Symbol matches and partial matches should always be potential matches - if %w[symbol_match partial_match].include?(match_desc) - potential_matches << match - elsif match_type == "Exact match" - exact_matches << match - else - potential_matches << match - end - end - - # Display exact matches - puts "\n=== Exact Matches (#{exact_matches.size}) ===" - if exact_matches.empty? - puts "None" - else - rows = [] - exact_matches.each do |match| - db_entity = match[:db_entity] - entity_id = match[:entity_id] || db_entity.short - entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" - - # Handle multiple TTL entities in a single row - ttl_entities = match[:ttl_entities] - if ttl_entities.size == 1 - # Single TTL entity - ttl_entity = ttl_entities.first - si_suffix = SiTtlParser.extract_identifying_suffix(ttl_entity[:uri]) - - rows << [ - "UnitsDB: #{entity_id}", - "(#{entity_name})", - ] - rows << [ - "SI TTL: #{si_suffix}", - "(#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})", - ] - else - # Multiple TTL entities, combine them - ensure no duplicates - si_text_parts = [] - seen_uris = {} - - ttl_entities.each do |ttl_entity| - uri = ttl_entity[:uri] - next if seen_uris[uri] # Skip if we've already seen this URI - - seen_uris[uri] = true - - suffix = SiTtlParser.extract_identifying_suffix(uri) - si_text_parts << "#{suffix} (#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})" - end - - si_text = si_text_parts.join(", ") - - rows << [ - "UnitsDB: #{entity_id}", - "(#{entity_name})", - ] - rows << [ - "SI TTL: #{si_text}", - "", - ] - end - - # Get match details for this match - match_pair_key = "#{db_entity.short}:#{ttl_entities.first[:uri]}" - match_details = Unitsdb::Commands::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) - - # Format match info - match_info = "" - match_info = format_match_info(match_details[:match_desc]) if match_details - - status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" - rows << [ - "Status: #{status_text}", - "✗", - ] - rows << :separator unless match == exact_matches.last - end - - table = Terminal::Table.new( - title: "Exact Match Missing SI References", - rows: rows, - ) - puts table - end - - # Display potential matches - puts "\n=== Potential Matches (#{potential_matches.size}) ===" - if potential_matches.empty? - puts "None" - else - rows = [] - potential_matches.each do |match| - db_entity = match[:db_entity] - entity_id = match[:entity_id] || db_entity.short - entity_name = db_entity.respond_to?(:names) ? db_entity.names&.first : "unnamed" - - # Handle multiple TTL entities in a single row - ttl_entities = match[:ttl_entities] - if ttl_entities.size == 1 - # Single TTL entity - ttl_entity = ttl_entities.first - si_suffix = SiTtlParser.extract_identifying_suffix(ttl_entity[:uri]) - - rows << [ - "UnitsDB: #{entity_id}", - "(#{entity_name})", - ] - rows << [ - "SI TTL: #{si_suffix}", - "(#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})", - ] - else - # Multiple TTL entities, combine them - ensure no duplicates - si_text_parts = [] - seen_uris = {} - - ttl_entities.each do |ttl_entity| - uri = ttl_entity[:uri] - next if seen_uris[uri] # Skip if we've already seen this URI - - seen_uris[uri] = true - - suffix = SiTtlParser.extract_identifying_suffix(uri) - si_text_parts << "#{suffix} (#{ttl_entity[:label] || ttl_entity[:name] || 'unnamed'})" - end - - si_text = si_text_parts.join(", ") - - rows << [ - "UnitsDB: #{entity_id}", - "(#{entity_name})", - ] - rows << [ - "SI TTL: #{si_text}", - "", - ] - end - - # Get match details - match_pair_key = "#{db_entity.short}:#{ttl_entities.first[:uri]}" - match_details = Unitsdb::Commands::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) - - # Format match info - match_info = "" - match_info = format_match_info(match_details[:match_desc]) if match_details - - status_text = match_info.empty? ? "Missing reference" : "Missing reference (#{match_info})" - rows << [ - "Status: #{status_text}", - "✗", - ] - rows << :separator unless match == potential_matches.last - end - - table = Terminal::Table.new( - title: "Potential Match Missing SI References", - rows: rows, - ) - puts table - end - end - end - - # Print direction header - def print_direction_header(direction) - case direction - when "SI → UnitsDB" - puts "\n=== Checking SI → UnitsDB (TTL entities referenced by database) ===" - when "UnitsDB → SI" - puts "\n=== Checking UnitsDB → SI (database entities referencing TTL) ===" - end - - puts "\n=== Instructions for #{direction} direction ===" - case direction - when "SI → UnitsDB" - puts "If you are the UnitsDB Register Manager, please ensure that all SI entities have proper references in the UnitsDB database." - puts "For each missing reference, add a reference with the appropriate URI and 'authority: \"si-digital-framework\"'." - when "UnitsDB → SI" - puts "If you are the UnitsDB Register Manager, please add SI references to UnitsDB entities that should have them." - puts "For each entity that should reference SI, add a reference with 'authority: \"si-digital-framework\"' and the SI TTL URI." - end - end - - def set_match_details(details) - @match_details = details - end - - # Format match info for display - def format_match_info(match_desc) - { - "short_to_name" => "short → name", - "short_to_label" => "short → label", - "name_to_name" => "name → name", - "name_to_label" => "name → label", - "name_to_alt_label" => "name → alt_label", - "symbol_match" => "symbol → symbol", - "partial_match" => "partial match", - }[match_desc] || "" - end - end - end -end diff --git a/lib/unitsdb/commands/si_matcher.rb b/lib/unitsdb/commands/si_matcher.rb deleted file mode 100644 index f97193a..0000000 --- a/lib/unitsdb/commands/si_matcher.rb +++ /dev/null @@ -1,483 +0,0 @@ -# frozen_string_literal: true - -require_relative "si_ttl_parser" - -module Unitsdb - module Commands - # Matcher for SI entities and UnitsDB entities - module SiMatcher - SI_AUTHORITY = "si-digital-framework" - @match_details = {} - - module_function - - # Match TTL entities to database entities (from_si direction) - def match_ttl_to_db(entity_type, ttl_entities, db_entities) - matches = [] - missing_matches = [] - matched_ttl_uris = [] - processed_pairs = {} # Track processed entity-ttl pairs to avoid duplicates - entity_matches = {} # Track matches by entity ID - - # First pass: find direct references - db_entities.each do |entity| - next unless entity.respond_to?(:references) && entity.references - - entity.references.each do |ref| - next unless ref.authority == SI_AUTHORITY - - matched_ttl_uris << ref.uri - ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri } - next unless ttl_entity - - matches << { - entity_id: entity.short, - entity_name: format_entity_name(entity), - si_uri: ttl_entity[:uri], - si_name: ttl_entity[:name], - si_label: ttl_entity[:label], - si_alt_label: ttl_entity[:alt_label], - si_symbol: ttl_entity[:symbol], - entity: entity, - } - end - end - - # Second pass: find matching entities - ttl_entities.each do |ttl_entity| - next if matched_ttl_uris.include?(ttl_entity[:uri]) - - matching_entities = find_matching_entities(entity_type, ttl_entity, - db_entities) - next if matching_entities.empty? - - matched_ttl_uris << ttl_entity[:uri] - - matching_entities.each do |entity| - entity_id = entity.short - entity_name = format_entity_name(entity) - - # Create a unique key for this entity-ttl pair to avoid duplicates - pair_key = "#{entity_id}:#{ttl_entity[:uri]}" - next if processed_pairs[pair_key] - - processed_pairs[pair_key] = true - - # Get detailed match information - match_result = match_entity_names?(entity_type, entity, ttl_entity) - next unless match_result[:match] - - # Save match details for later use - @match_details[pair_key] = match_result - - # Check if already has reference - has_reference = entity.references&.any? do |ref| - ref.uri == ttl_entity[:uri] && ref.authority == SI_AUTHORITY - end - - match_data = { - entity_id: entity_id, - entity_name: entity_name, - si_uri: ttl_entity[:uri], - si_name: ttl_entity[:name], - si_label: ttl_entity[:label], - si_alt_label: ttl_entity[:alt_label], - si_symbol: ttl_entity[:symbol], - entity: entity, - match_type: match_result[:match_type], - match_details: match_result, - match_types: { ttl_entity[:uri] => match_result[:match_type] }, - } - - if has_reference - matches << match_data - else - # Group by entity_id for multiple SI matches - entity_matches[entity_id] ||= [] - entity_matches[entity_id] << { - uri: ttl_entity[:uri], - name: ttl_entity[:name], - label: ttl_entity[:label], - } - - # Add first occurrence of this entity to missing_matches - missing_matches << match_data unless missing_matches.any? do |m| - m[:entity_id] == entity_id - end - end - end - end - - # Update missing_matches to include multiple SI entities - missing_matches.each do |match| - entity_id = match[:entity_id] - si_matches = entity_matches[entity_id] - - # If entity matches multiple SI entities, record them - match[:multiple_si] = si_matches if si_matches && si_matches.size > 1 - end - - # Find unmatched TTL entities - unmatched_ttl = ttl_entities.reject do |entity| - matched_ttl_uris.include?(entity[:uri]) || - entity[:uri].end_with?("/units/") || - entity[:uri].end_with?("/quantities/") || - entity[:uri].end_with?("/prefixes/") - end - - [matches, missing_matches, unmatched_ttl] - end - - # Match database entities to TTL entities (to_si direction) - def match_db_to_ttl(entity_type, ttl_entities, db_entities) - matches = [] - missing_refs = [] - matched_db_ids = [] - processed_db_ids = {} # Track processed entities - - # Map from NIST IDs to display names for original output compatibility - nist_id_to_display = {} - - # Build mappings for each entity type - db_entities.each do |entity| - next unless entity.respond_to?(:identifiers) && entity.identifiers&.first&.id&.start_with?("NIST") - - nist_id = entity.identifiers.first.id - - # For quantities and prefixes, we want to show the "short" field - nist_id_to_display[nist_id] = entity.short if %w[quantities - prefixes].include?(entity_type) && entity.respond_to?(:short) - end - - db_entities.each do |db_entity| - entity_id = find_entity_id(db_entity) - - # For display purposes - use original display names - display_id = entity_id - - # Apply the NIST ID mapping if available - display_id = nist_id_to_display[entity_id] if entity_id.start_with?("NIST") && nist_id_to_display[entity_id] - - # Skip if we've already processed this entity - next if processed_db_ids[entity_id] - - processed_db_ids[entity_id] = true - has_reference = false - - # Check for existing SI references - if db_entity.respond_to?(:references) && db_entity.references - db_entity.references.each do |ref| - next unless ref.authority == SI_AUTHORITY - - has_reference = true - # Find the matching TTL entity for display - ttl_entity = ttl_entities.find { |e| e[:uri] == ref.uri } - - matches << { - entity_id: display_id, - db_entity: db_entity, - ttl_uri: ref.uri, - ttl_entity: ttl_entity, - } - end - end - - # If already has reference, continue to next entity - if has_reference - matched_db_ids << entity_id - next - end - - # Find matching TTL entities - matching_ttl = [] - match_types = {} - - ttl_entities.each do |ttl_entity| - match_result = match_entity_names?(entity_type, db_entity, - ttl_entity) - next unless match_result[:match] - - matching_ttl << ttl_entity - match_types[ttl_entity[:uri]] = match_result[:match_type] - - # Save detailed match info - @match_details["#{entity_id}:#{ttl_entity[:uri]}"] = match_result - end - - # If found matches, add to missing_refs - next if matching_ttl.empty? - - matched_db_ids << entity_id - missing_refs << { - entity_id: display_id, - db_entity: db_entity, - ttl_entities: matching_ttl, - match_types: match_types, - } - end - - # Find unmatched db entities - unmatched_db = db_entities.reject do |entity| - matched_db_ids.include?(find_entity_id(entity)) - end - - [matches, missing_refs, unmatched_db] - end - - # Find entity ID - def find_entity_id(entity) - return entity.id if entity.respond_to?(:id) && entity.id - return entity.identifiers.first.id if entity.respond_to?(:identifiers) && !entity.identifiers.empty? && - entity.identifiers.first.respond_to?(:id) - - entity.short - end - - # Format entity name correctly - def format_entity_name(entity) - return nil unless entity.respond_to?(:names) && entity.names&.first - - entity.names.first - - # # Special handling for sidereal names - use comma format - # if name.include?("sidereal") - # if name.start_with?("sidereal ") - # # For names that already start with "sidereal " - strip it - # base_name = name.gsub("sidereal ", "") - # return "#{base_name}, sidereal" - # elsif name.end_with?(" sidereal") - # # For names that already have comma format but missing comma - # parts = name.split - # return "#{parts.first}, #{parts.last}" - # end - # end - - # # Handle other special cases - # return name if name == "year (365 days)" - - # # Default to the original name - end - - # Find matching entities for a TTL entity - def find_matching_entities(entity_type, ttl_entity, db_entities) - case entity_type - when "units" - find_matching_units(ttl_entity, db_entities) - when "quantities" - find_matching_quantities(ttl_entity, db_entities) - when "prefixes" - find_matching_prefixes(ttl_entity, db_entities) - else - [] - end - end - - # Find exact matches for units - def find_matching_units(ttl_unit, units) - matching_units = [] - - units.each do |unit| - # Match by short - if unit.short&.downcase == ttl_unit[:name]&.downcase || - unit.short&.downcase == ttl_unit[:label]&.downcase - matching_units << unit - next - end - - # Match by name - if unit.respond_to?(:names) && unit.names&.any? do |name| - name.downcase == ttl_unit[:name]&.downcase || - name.downcase == ttl_unit[:label]&.downcase - end - matching_units << unit - next - end - - # Match by symbol - next unless ttl_unit[:symbol] && unit.respond_to?(:symbols) && unit.symbols&.any? do |sym| - sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_unit[:symbol].downcase - end - - matching_units << unit - end - - matching_units.uniq - end - - # Find exact matches for quantities - def find_matching_quantities(ttl_quantity, quantities) - matching_quantities = [] - - quantities.each do |quantity| - # Match by short - if quantity.short&.downcase == ttl_quantity[:name]&.downcase || - quantity.short&.downcase == ttl_quantity[:label]&.downcase || - quantity.short&.downcase == ttl_quantity[:alt_label]&.downcase - matching_quantities << quantity - next - end - - # Match by name - next unless quantity.respond_to?(:names) && quantity.names&.any? do |name| - name.downcase == ttl_quantity[:name]&.downcase || - name.downcase == ttl_quantity[:label]&.downcase || - name.downcase == ttl_quantity[:alt_label]&.downcase - end - - matching_quantities << quantity - end - - matching_quantities.uniq - end - - # Find exact matches for prefixes - def find_matching_prefixes(ttl_prefix, prefixes) - matching_prefixes = [] - - prefixes.each do |prefix| - # Match by short - if prefix.short&.downcase == ttl_prefix[:name]&.downcase || - prefix.short&.downcase == ttl_prefix[:label]&.downcase - matching_prefixes << prefix - next - end - - # Match by name - if prefix.respond_to?(:names) && prefix.names&.any? do |name| - name.downcase == ttl_prefix[:name]&.downcase || - name.downcase == ttl_prefix[:label]&.downcase - end - matching_prefixes << prefix - next - end - - # Match by symbol - next unless ttl_prefix[:symbol] && prefix.respond_to?(:symbol) && prefix.symbol && - prefix.symbol.respond_to?(:ascii) && prefix.symbol.ascii && - prefix.symbol.ascii.downcase == ttl_prefix[:symbol].downcase - - matching_prefixes << prefix - end - - matching_prefixes.uniq - end - - # Match entity names with detailed type information - def match_entity_names?(entity_type, db_entity, ttl_entity) - match_details = { match: false } - - # Match by short name - EXACT match - if db_entity.short && db_entity.short.downcase == ttl_entity[:name].downcase - match_details = { - match: true, - exact: true, - match_type: "Exact match", - match_desc: "short_to_name", - details: "UnitsDB short '#{db_entity.short}' matches SI name '#{ttl_entity[:name]}'", - } - # Match by short to label - elsif db_entity.short && ttl_entity[:label] && db_entity.short.downcase == ttl_entity[:label].downcase - match_details = { - match: true, - exact: true, - match_type: "Exact match", - match_desc: "short_to_label", - details: "UnitsDB short '#{db_entity.short}' matches SI label '#{ttl_entity[:label]}'", - } - # Match by names - EXACT match - elsif db_entity.respond_to?(:names) && db_entity.names - # Match by TTL name - db_name_match = db_entity.names.find do |name| - name.downcase == ttl_entity[:name].downcase - end - if db_name_match - match_details = { - match: true, - exact: true, - match_type: "Exact match", - match_desc: "name_to_name", - details: "UnitsDB name '#{db_name_match}' matches SI name '#{ttl_entity[:name]}'", - } - # Match by TTL label - elsif ttl_entity[:label] - db_name_match = db_entity.names.find do |name| - name.downcase == ttl_entity[:label].downcase - end - if db_name_match - match_details = { - match: true, - exact: true, - match_type: "Exact match", - match_desc: "name_to_label", - details: "UnitsDB name '#{db_name_match}' matches SI label '#{ttl_entity[:label]}'", - } - end - end - - # Match by TTL alt_label - if !match_details[:match] && ttl_entity[:alt_label] - db_name_match = db_entity.names.find do |name| - name.downcase == ttl_entity[:alt_label].downcase - end - if db_name_match - match_details = { - match: true, - exact: true, - match_type: "Exact match", - match_desc: "name_to_alt_label", - details: "UnitsDB name '#{db_name_match}' matches SI alt_label '#{ttl_entity[:alt_label]}'", - } - end - end - end - - # Special validation for "sidereal_" units - if match_details[:match] && match_details[:exact] && db_entity.short&.include?("sidereal_") && - !(ttl_entity[:name]&.include?("sidereal") || ttl_entity[:label]&.include?("sidereal")) - match_details = { - match: true, - exact: false, - match_type: "Potential match", - match_desc: "partial_match", - details: "UnitsDB '#{db_entity.short}' partially matches SI '#{ttl_entity[:name]}'", - } - end - - # Match by symbol if available (units and prefixes) - POTENTIAL match - if !match_details[:match] && %w[units - prefixes].include?(entity_type) && ttl_entity[:symbol] - if entity_type == "units" && db_entity.respond_to?(:symbols) && db_entity.symbols - matching_symbol = db_entity.symbols.find do |sym| - sym.respond_to?(:ascii) && sym.ascii && sym.ascii.downcase == ttl_entity[:symbol].downcase - end - - if matching_symbol - match_details = { - match: true, - exact: false, - match_type: "Potential match", - match_desc: "symbol_match", - details: "UnitsDB symbol '#{matching_symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'", - } - end - elsif entity_type == "prefixes" && db_entity.respond_to?(:symbol) && db_entity.symbol - if db_entity.symbol.respond_to?(:ascii) && - db_entity.symbol.ascii && - db_entity.symbol.ascii.downcase == ttl_entity[:symbol].downcase - - match_details = { - match: true, - exact: false, - match_type: "Potential match", - match_desc: "symbol_match", - details: "UnitsDB symbol '#{db_entity.symbol.ascii}' matches SI symbol '#{ttl_entity[:symbol]}'", - } - end - end - end - - match_details - end - end - end -end diff --git a/lib/unitsdb/commands/si_ttl_parser.rb b/lib/unitsdb/commands/si_ttl_parser.rb deleted file mode 100644 index 0545ffd..0000000 --- a/lib/unitsdb/commands/si_ttl_parser.rb +++ /dev/null @@ -1,101 +0,0 @@ -# frozen_string_literal: true - -require "rdf" -require "rdf/turtle" - -module Unitsdb - module Commands - # Parser for SI TTL files - module SiTtlParser - SI_URI_PREFIX = "http://si-digital-framework.org/SI/" - - module_function - - # Parse TTL files and return RDF graph - def parse_ttl_files(dir) - puts "Parsing TTL files in #{dir}..." - graph = RDF::Graph.new - - Dir.glob(File.join(dir, "*.ttl")).each do |file| - puts " Reading #{File.basename(file)}" - graph.load(file, format: :ttl) - end - - graph - end - - # Extract entities from TTL based on entity type - def extract_entities_from_ttl(entity_type, graph) - skos = RDF::Vocabulary.new("http://www.w3.org/2004/02/skos/core#") - si = RDF::Vocabulary.new("http://si-digital-framework.org/SI#") - - namespace_uri = case entity_type - when "units" then "http://si-digital-framework.org/SI/units/" - when "quantities" then "http://si-digital-framework.org/quantities/" - when "prefixes" then "http://si-digital-framework.org/SI/prefixes/" - else return [] - end - - namespace = RDF::Vocabulary.new(namespace_uri) - entities = extract_base_entities(graph, namespace, skos) - add_symbols_to_entities(entities, graph, si) if %w[units - prefixes].include?(entity_type) - entities - end - - # Extract base entities from graph - def extract_base_entities(graph, namespace, skos) - entities = [] - processed_uris = {} - - RDF::Query.new({ entity: { skos.prefLabel => :label } }) - .execute(graph).each do |solution| - entity_uri = solution.entity.to_s - next unless entity_uri.start_with?(namespace.to_s) - next if processed_uris[entity_uri] - - processed_uris[entity_uri] = true - - entity_name = entity_uri.split("/").last - label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.prefLabel => :value } }) - .execute(graph).first&.value&.to_s - alt_label = RDF::Query.new({ RDF::URI(entity_uri) => { skos.altLabel => :value } }) - .execute(graph).first&.value&.to_s - - entities << { - uri: entity_uri, - name: entity_name, - label: label, - alt_label: alt_label, - } - end - - entities - end - - # Add symbols to entities - def add_symbols_to_entities(entities, graph, si) - entities.each do |entity| - symbol = RDF::Query.new({ RDF::URI(entity[:uri]) => { si.hasSymbol => :value } }) - .execute(graph).first&.value&.to_s - entity[:symbol] = symbol if symbol - end - end - - # Extract suffix from URI for display - def extract_identifying_suffix(uri) - return "" unless uri - - # For display, we need to format as exactly like the original - # This helps format the comma-separated multi-units correctly - if uri.include?("/units/") - # Return units/name format for units (without duplicating "units/") - "units/#{uri.split('/').last}" - else - # Otherwise strip the prefix - uri.gsub(SI_URI_PREFIX, "") - end - end - end - end -end diff --git a/lib/unitsdb/commands/si_updater.rb b/lib/unitsdb/commands/si_updater.rb deleted file mode 100644 index 3ad9d9d..0000000 --- a/lib/unitsdb/commands/si_updater.rb +++ /dev/null @@ -1,250 +0,0 @@ -# frozen_string_literal: true - -require "yaml" -require "fileutils" - -module Unitsdb - module Commands - # Updater for SI references in YAML - module SiUpdater - SI_AUTHORITY = "si-digital-framework" - - module_function - - # Update references in YAML file (TTL → DB direction) - def update_references(entity_type, missing_matches, db_entities, -output_file, include_potential = false) - # Use the database objects to access the data directly - original_yaml_file = db_entities.first.send(:yaml_file) if db_entities&.first.respond_to?( - :yaml_file, true - ) - - # If we can't get the path from the database object, use the output file path as a fallback - if original_yaml_file.nil? || !File.exist?(original_yaml_file) - puts "Warning: Could not determine original YAML file path. Using output file as template." - original_yaml_file = output_file - - # Create an empty template if output file doesn't exist - unless File.exist?(original_yaml_file) - FileUtils.mkdir_p(File.dirname(original_yaml_file)) - File.write(original_yaml_file, { entity_type => [] }.to_yaml) - end - end - - # Load the original YAML file - yaml_content = File.read(original_yaml_file) - output_data = YAML.safe_load(yaml_content) - - # Group by entity ID to avoid duplicates - grouped_matches = missing_matches.group_by { |match| match[:entity_id] } - - # Process each entity that needs updating - grouped_matches.each do |entity_id, matches| - # Filter matches based on include_potential parameter - filtered_matches = matches.select do |match| - # Check if it's an exact match or if we're including potential matches - match_details = match[:match_details] - if match_details&.dig(:exact) == false || %w[symbol_match - partial_match].include?(match_details&.dig(:match_desc) || "") - include_potential - else - true - end - end - - # Skip if no matches after filtering - next if filtered_matches.empty? - - # Find the entity in the array under the entity_type key - entity_index = output_data[entity_type].find_index do |e| - # Find entity with matching identifier - e["identifiers"]&.any? { |id| id["id"] == entity_id } - end - - next unless entity_index - - # Get the entity - entity = output_data[entity_type][entity_index] - - # Initialize references array if it doesn't exist - entity["references"] ||= [] - - # Add new references - filtered_matches.each do |match| - # If this match has multiple SI references, add them all - if match[:multiple_si] - match[:multiple_si].each do |si_data| - # Check if reference already exists - next if entity["references"].any? do |ref| - ref["uri"] == si_data[:uri] && ref["authority"] == SI_AUTHORITY - end - - # Add new reference - entity["references"] << { - "uri" => si_data[:uri], - "type" => "normative", - "authority" => SI_AUTHORITY, - } - end - else - # Check if reference already exists - next if entity["references"].any? do |ref| - ref["uri"] == match[:si_uri] && ref["authority"] == SI_AUTHORITY - end - - # Add new reference - entity["references"] << { - "uri" => match[:si_uri], - "type" => "normative", - "authority" => SI_AUTHORITY, - } - end - end - end - - write_yaml_file(output_file, output_data) - end - - # Update references in YAML file (DB → TTL direction) - def update_db_references(entity_type, missing_refs, output_file, -include_potential = false) - # Try to get the original YAML file from the first entity - first_entity = missing_refs.first&.dig(:db_entity) - original_yaml_file = first_entity.send(:yaml_file) if first_entity.respond_to?( - :yaml_file, true - ) - - # If we can't get the path from the database object, use the output file path as a fallback - if original_yaml_file.nil? || !File.exist?(original_yaml_file) - puts "Warning: Could not determine original YAML file path. Using output file as template." - original_yaml_file = output_file - - # Create an empty template if output file doesn't exist - unless File.exist?(original_yaml_file) - FileUtils.mkdir_p(File.dirname(original_yaml_file)) - File.write(original_yaml_file, { entity_type => [] }.to_yaml) - end - end - - # Load the original YAML file - yaml_content = File.read(original_yaml_file) - output_data = YAML.safe_load(yaml_content) - - # Group by entity ID to avoid duplicates - missing_refs_by_id = {} - - missing_refs.each do |match| - entity_id = match[:entity_id] || match[:db_entity].short - ttl_entities = match[:ttl_entities] - match_types = match[:match_types] || {} - - # Filter TTL entities based on include_potential parameter - filtered_ttl_entities = ttl_entities.select do |ttl_entity| - # Check if it's an exact match or if we're including potential matches - match_type = match_types[ttl_entity[:uri]] || "Exact match" # Default to exact match - match_pair_key = "#{entity_id}:#{ttl_entity[:uri]}" - match_details = Unitsdb::Commands::SiMatcher.instance_variable_get(:@match_details)&.dig(match_pair_key) - - if match_details && %w[symbol_match - partial_match].include?(match_details[:match_desc]) - include_potential - else - match_type == "Exact match" || include_potential - end - end - - # Skip if no entities after filtering - next if filtered_ttl_entities.empty? - - missing_refs_by_id[entity_id] ||= [] - - # Add filtered matching TTL entities for this DB entity - filtered_ttl_entities.each do |ttl_entity| - missing_refs_by_id[entity_id] << { - uri: ttl_entity[:uri], - type: "normative", - authority: SI_AUTHORITY, - } - end - end - - # Update the YAML content - output_data[entity_type].each do |entity_yaml| - # Find entity by ID or short - entity_id = if entity_yaml["identifiers"] - begin - entity_yaml["identifiers"].first["id"] - rescue StandardError - nil - end - elsif entity_yaml["id"] - entity_yaml["id"] - end - - next unless entity_id && missing_refs_by_id.key?(entity_id) - - # Add references - entity_yaml["references"] ||= [] - - missing_refs_by_id[entity_id].each do |ref| - # Check if this reference already exists - next if entity_yaml["references"].any? do |existing_ref| - existing_ref["uri"] == ref[:uri] && - existing_ref["authority"] == ref[:authority] - end - - # Add the reference - entity_yaml["references"] << { - "uri" => ref[:uri], - "type" => ref[:type], - "authority" => ref[:authority], - } - end - end - - write_yaml_file(output_file, output_data) - end - - # Helper to write YAML file - def write_yaml_file(output_file, output_data) - # Ensure the output directory exists - output_dir = File.dirname(output_file) - FileUtils.mkdir_p(output_dir) - - # Write to YAML file with proper formatting - yaml_content = output_data.to_yaml - - # Preserve existing schema header or add default one - yaml_content = preserve_schema_header(output_file, yaml_content) - - File.write(output_file, yaml_content) - end - - # Preserve existing schema header or add default one - def preserve_schema_header(original_file, yaml_content) - schema_header = nil - - # Extract existing schema header if file exists - if File.exist?(original_file) - original_content = File.read(original_file) - if (match = original_content.match(/^# yaml-language-server: \$schema=.+$/)) - schema_header = match[0] - end - end - - # Remove any existing schema header from new content to avoid duplication - yaml_content = yaml_content.gsub( - /^# yaml-language-server: \$schema=.+$\n/, "" - ) - - # Add preserved or default schema header - if schema_header - "#{schema_header}\n#{yaml_content}" - else - entity_type = File.basename(original_file, ".yaml") - "# yaml-language-server: $schema=schemas/#{entity_type}-schema.yaml\n#{yaml_content}" - end - end - end - end -end diff --git a/lib/unitsdb/commands/ucum.rb b/lib/unitsdb/commands/ucum.rb index 3493d9f..78f01b7 100644 --- a/lib/unitsdb/commands/ucum.rb +++ b/lib/unitsdb/commands/ucum.rb @@ -4,6 +4,15 @@ module Unitsdb module Commands + module Ucum + autoload :Check, "unitsdb/commands/ucum/check" + autoload :Update, "unitsdb/commands/ucum/update" + autoload :Formatter, "unitsdb/commands/ucum/formatter" + autoload :Matcher, "unitsdb/commands/ucum/matcher" + autoload :Updater, "unitsdb/commands/ucum/updater" + autoload :XmlParser, "unitsdb/commands/ucum/xml_parser" + end + class UcumCommand < Thor desc "check", "Check UCUM references in UnitsDB" option :entity_type, type: :string, aliases: "-e", @@ -19,7 +28,6 @@ class UcumCommand < Thor option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def check - require_relative "ucum/check" Ucum::Check.new(options).run end @@ -35,7 +43,6 @@ def check option :database, type: :string, required: true, aliases: "-d", desc: "Path to UnitsDB database (required)" def update - require_relative "ucum/update" Ucum::Update.new(options).run end end diff --git a/lib/unitsdb/commands/ucum/check.rb b/lib/unitsdb/commands/ucum/check.rb index 8fe16e9..4dd278e 100644 --- a/lib/unitsdb/commands/ucum/check.rb +++ b/lib/unitsdb/commands/ucum/check.rb @@ -1,12 +1,5 @@ # frozen_string_literal: true -require_relative "../base" -require_relative "../../database" -require_relative "../../errors" -require_relative "xml_parser" -require_relative "formatter" -require_relative "matcher" -require_relative "updater" require "fileutils" module Unitsdb diff --git a/lib/unitsdb/commands/ucum/update.rb b/lib/unitsdb/commands/ucum/update.rb index 51edeee..61b7fa1 100644 --- a/lib/unitsdb/commands/ucum/update.rb +++ b/lib/unitsdb/commands/ucum/update.rb @@ -1,10 +1,5 @@ # frozen_string_literal: true -require_relative "../base" -require_relative "../../database" -require_relative "xml_parser" -require_relative "matcher" -require_relative "updater" require "fileutils" module Unitsdb diff --git a/lib/unitsdb/commands/ucum/xml_parser.rb b/lib/unitsdb/commands/ucum/xml_parser.rb index a754ab7..e48c8cb 100644 --- a/lib/unitsdb/commands/ucum/xml_parser.rb +++ b/lib/unitsdb/commands/ucum/xml_parser.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../../ucum" - module Unitsdb module Commands module Ucum diff --git a/lib/unitsdb/commands/validate.rb b/lib/unitsdb/commands/validate.rb index 2353859..e149d73 100644 --- a/lib/unitsdb/commands/validate.rb +++ b/lib/unitsdb/commands/validate.rb @@ -4,6 +4,14 @@ module Unitsdb module Commands + module Validate + autoload :Identifiers, "unitsdb/commands/validate/identifiers" + autoload :QudtReferences, "unitsdb/commands/validate/qudt_references" + autoload :References, "unitsdb/commands/validate/references" + autoload :SiReferences, "unitsdb/commands/validate/si_references" + autoload :UcumReferences, "unitsdb/commands/validate/ucum_references" + end + class ValidateCommand < Thor desc "references", "Validate that all references exist" option :debug_registry, type: :boolean, @@ -13,8 +21,6 @@ class ValidateCommand < Thor option :print_valid, type: :boolean, default: false, desc: "Print valid references too" def references - require_relative "validate/references" - Commands::Validate::References.new(options).run end @@ -23,8 +29,6 @@ def references desc: "Path to UnitsDB database (required)" def identifiers - require_relative "validate/identifiers" - Commands::Validate::Identifiers.new(options).run end @@ -34,8 +38,6 @@ def identifiers desc: "Path to UnitsDB database (required)" def si_references - require_relative "validate/si_references" - Commands::Validate::SiReferences.new(options).run end @@ -45,8 +47,6 @@ def si_references desc: "Path to UnitsDB database (required)" def qudt_references - require_relative "validate/qudt_references" - Commands::Validate::QudtReferences.new(options).run end @@ -56,8 +56,6 @@ def qudt_references desc: "Path to UnitsDB database (required)" def ucum_references - require_relative "validate/ucum_references" - Commands::Validate::UcumReferences.new(options).run end end diff --git a/lib/unitsdb/commands/validate/identifiers.rb b/lib/unitsdb/commands/validate/identifiers.rb index e746508..c9bed36 100644 --- a/lib/unitsdb/commands/validate/identifiers.rb +++ b/lib/unitsdb/commands/validate/identifiers.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../base" - module Unitsdb module Commands module Validate diff --git a/lib/unitsdb/commands/validate/qudt_references.rb b/lib/unitsdb/commands/validate/qudt_references.rb index af1652c..e0e0297 100644 --- a/lib/unitsdb/commands/validate/qudt_references.rb +++ b/lib/unitsdb/commands/validate/qudt_references.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../base" - module Unitsdb module Commands module Validate diff --git a/lib/unitsdb/commands/validate/references.rb b/lib/unitsdb/commands/validate/references.rb index f820f69..d0096e7 100644 --- a/lib/unitsdb/commands/validate/references.rb +++ b/lib/unitsdb/commands/validate/references.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../base" - module Unitsdb module Commands module Validate diff --git a/lib/unitsdb/commands/validate/si_references.rb b/lib/unitsdb/commands/validate/si_references.rb index 159145f..429f7c7 100644 --- a/lib/unitsdb/commands/validate/si_references.rb +++ b/lib/unitsdb/commands/validate/si_references.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../base" - module Unitsdb module Commands module Validate diff --git a/lib/unitsdb/commands/validate/ucum_references.rb b/lib/unitsdb/commands/validate/ucum_references.rb index fc5b91f..2ab8590 100644 --- a/lib/unitsdb/commands/validate/ucum_references.rb +++ b/lib/unitsdb/commands/validate/ucum_references.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "../base" - module Unitsdb module Commands module Validate diff --git a/lib/unitsdb/database.rb b/lib/unitsdb/database.rb index 96bd411..a73510d 100644 --- a/lib/unitsdb/database.rb +++ b/lib/unitsdb/database.rb @@ -1,12 +1,5 @@ # frozen_string_literal: true -require_relative "unit" -require_relative "prefix" -require_relative "quantity" -require_relative "dimension" -require_relative "unit_system" -require_relative "errors" - module Unitsdb class Database < Lutaml::Model::Serializable # model Config.model_for(:units) diff --git a/lib/unitsdb/dimension.rb b/lib/unitsdb/dimension.rb index 3b19e39..2ebd5bf 100644 --- a/lib/unitsdb/dimension.rb +++ b/lib/unitsdb/dimension.rb @@ -1,10 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" -require_relative "dimension_details" -require_relative "quantity" -require_relative "localized_string" -require_relative "external_reference" # NISTd1: # length: # power: 1 diff --git a/lib/unitsdb/dimension_details.rb b/lib/unitsdb/dimension_details.rb index de43b41..82b7c38 100644 --- a/lib/unitsdb/dimension_details.rb +++ b/lib/unitsdb/dimension_details.rb @@ -10,7 +10,6 @@ # mathml: "M" # unicode: "\U0001D5AC" -require_relative "symbol_presentations" module Unitsdb class DimensionDetails < Lutaml::Model::Serializable attribute :power, :integer diff --git a/lib/unitsdb/dimensions.rb b/lib/unitsdb/dimensions.rb index 8e08a32..a5aa840 100644 --- a/lib/unitsdb/dimensions.rb +++ b/lib/unitsdb/dimensions.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "dimension" - module Unitsdb class Dimensions < Lutaml::Model::Serializable # model Config.model_for(:dimensions) diff --git a/lib/unitsdb/prefix.rb b/lib/unitsdb/prefix.rb index 068da26..ca78713 100644 --- a/lib/unitsdb/prefix.rb +++ b/lib/unitsdb/prefix.rb @@ -1,9 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" -require_relative "symbol_presentations" -require_relative "external_reference" -require_relative "localized_string" # --- # NISTp10_30: # name: quetta diff --git a/lib/unitsdb/prefix_reference.rb b/lib/unitsdb/prefix_reference.rb index 07bc4be..d757045 100644 --- a/lib/unitsdb/prefix_reference.rb +++ b/lib/unitsdb/prefix_reference.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" - module Unitsdb class PrefixReference < Identifier attribute :id, :string diff --git a/lib/unitsdb/prefixes.rb b/lib/unitsdb/prefixes.rb index d096cc3..40ee559 100644 --- a/lib/unitsdb/prefixes.rb +++ b/lib/unitsdb/prefixes.rb @@ -1,6 +1,5 @@ # frozen_string_literal: true -require_relative "prefix" # --- # NISTp10_30: # name: quetta diff --git a/lib/unitsdb/quantities.rb b/lib/unitsdb/quantities.rb index 2095a15..5a33f83 100644 --- a/lib/unitsdb/quantities.rb +++ b/lib/unitsdb/quantities.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "quantity" - module Unitsdb class Quantities < Lutaml::Model::Serializable # model Config.model_for(:quantities) diff --git a/lib/unitsdb/quantity.rb b/lib/unitsdb/quantity.rb index 621b37e..3738a55 100644 --- a/lib/unitsdb/quantity.rb +++ b/lib/unitsdb/quantity.rb @@ -1,11 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" -require_relative "unit_reference" -require_relative "dimension_reference" -require_relative "external_reference" -require_relative "localized_string" - module Unitsdb class Quantity < Lutaml::Model::Serializable # model Config.model_for(:quantity) diff --git a/lib/unitsdb/qudt.rb b/lib/unitsdb/qudt.rb index a1bfd3a..74a76ad 100644 --- a/lib/unitsdb/qudt.rb +++ b/lib/unitsdb/qudt.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require "lutaml/model" - module Unitsdb # QUDT Unit from units vocabulary # Example: http://qudt.org/vocab/unit/M (meter) diff --git a/lib/unitsdb/root_unit_reference.rb b/lib/unitsdb/root_unit_reference.rb index 1f1759a..b09ed8f 100644 --- a/lib/unitsdb/root_unit_reference.rb +++ b/lib/unitsdb/root_unit_reference.rb @@ -1,8 +1,5 @@ # frozen_string_literal: true -require_relative "unit_reference" -require_relative "prefix_reference" - module Unitsdb class RootUnitReference < Lutaml::Model::Serializable # model Config.model_for(:root_unit) diff --git a/lib/unitsdb/scale.rb b/lib/unitsdb/scale.rb index 174b4e0..117ea06 100644 --- a/lib/unitsdb/scale.rb +++ b/lib/unitsdb/scale.rb @@ -1,9 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" -require_relative "localized_string" -require_relative "scale_properties" - module Unitsdb class Scale < Lutaml::Model::Serializable # model Config.model_for(:quantity) diff --git a/lib/unitsdb/scale_reference.rb b/lib/unitsdb/scale_reference.rb index 31983d6..c3fb19a 100644 --- a/lib/unitsdb/scale_reference.rb +++ b/lib/unitsdb/scale_reference.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "identifier" - module Unitsdb class ScaleReference < Identifier attribute :id, :string diff --git a/lib/unitsdb/scales.rb b/lib/unitsdb/scales.rb index 78de17b..716cf8c 100644 --- a/lib/unitsdb/scales.rb +++ b/lib/unitsdb/scales.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "scale" - module Unitsdb class Scales < Lutaml::Model::Serializable # model Config.model_for(:Scale) diff --git a/lib/unitsdb/si_derived_base.rb b/lib/unitsdb/si_derived_base.rb index bd36f2a..7bf45ad 100644 --- a/lib/unitsdb/si_derived_base.rb +++ b/lib/unitsdb/si_derived_base.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require_relative "root_unit_reference" - # si_derived_bases: # - power: 2 # unit_reference: diff --git a/lib/unitsdb/ucum.rb b/lib/unitsdb/ucum.rb index bec4b45..6d3fd5d 100644 --- a/lib/unitsdb/ucum.rb +++ b/lib/unitsdb/ucum.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require "lutaml/model" - module Unitsdb # # second @@ -17,7 +15,7 @@ class UcumBaseUnit < Lutaml::Model::Serializable attribute :property, :string xml do - root "base-unit" + element "base-unit" map_attribute "Code", to: :code_sensitive map_attribute "CODE", to: :code map_attribute "dim", to: :dimension @@ -43,7 +41,7 @@ class UcumPrefixValue < Lutaml::Model::Serializable attribute :content, :string xml do - root "value" + element "value" map_attribute "value", to: :value map_content to: :content end @@ -57,7 +55,7 @@ class UcumPrefix < Lutaml::Model::Serializable attribute :value, UcumPrefixValue xml do - root "prefix" + element "prefix" map_attribute "Code", to: :code_sensitive map_attribute "CODE", to: :code map_element "name", to: :name @@ -110,7 +108,7 @@ class UcumUnitValueFunction < Lutaml::Model::Serializable attribute :unit_sensitive, :string xml do - root "function" + element "function" map_attribute "name", to: :name map_attribute "value", to: :value map_attribute "Unit", to: :unit_sensitive @@ -125,7 +123,7 @@ class UcumUnitValue < Lutaml::Model::Serializable attribute :content, :string xml do - root "value" + element "value" map_attribute "Unit", to: :unit_sensitive map_attribute "UNIT", to: :unit map_attribute "value", to: :value @@ -147,7 +145,7 @@ class UcumUnit < Lutaml::Model::Serializable attribute :value, UcumUnitValue xml do - root "unit" + element "unit" map_attribute "Code", to: :code_sensitive map_attribute "CODE", to: :code map_attribute "isMetric", to: :is_metric @@ -168,6 +166,11 @@ def identifier end end + class UcumNamespace < Lutaml::Xml::Namespace + uri "http://unitsofmeasure.org/ucum-essence" + prefix_default "ucum" + end + # This is the root element of the UCUM XML "ucum-essence.xml" file. # # 0.7" + spec.add_dependency "lutaml-model", "~> 0.8.0" spec.add_dependency "rdf", "~> 3.1" spec.add_dependency "rdf-turtle", "~> 3.1" spec.add_dependency "rubyzip", "~> 2.3" - spec.add_dependency "terminal-table" + spec.add_dependency "table_tennis", "~> 0.0.7" spec.add_dependency "thor", "~> 1.0" end