From 4716f28da3e9611cea6d1502781098ab51e28e8a Mon Sep 17 00:00:00 2001 From: Randall Theuns Date: Tue, 15 Jul 2025 09:03:03 +0200 Subject: [PATCH] Upgrade Elixir & deps Inlines the simetric dep (as it's not maintained and we only use it for the levenshtein distance func). --- .github/workflows/elixir.yml | 4 +-- .tool-versions | 2 ++ lib/match_engine/levenshtein.ex | 48 ++++++++++++++++++++++++++ lib/match_engine/score.ex | 2 +- mix.exs | 1 - mix.lock | 18 +++++----- test/match_engine/levenshtein_test.exs | 41 ++++++++++++++++++++++ 7 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 .tool-versions create mode 100644 lib/match_engine/levenshtein.ex create mode 100644 test/match_engine/levenshtein_test.exs diff --git a/.github/workflows/elixir.yml b/.github/workflows/elixir.yml index 6dd625b..6152661 100644 --- a/.github/workflows/elixir.yml +++ b/.github/workflows/elixir.yml @@ -18,8 +18,8 @@ jobs: MIX_ENV: test strategy: matrix: - otp: ["26.0"] - elixir: ["1.16"] + otp: ["27.0"] + elixir: ["1.18"] steps: - uses: actions/checkout@v3 diff --git a/.tool-versions b/.tool-versions new file mode 100644 index 0000000..d8fbee6 --- /dev/null +++ b/.tool-versions @@ -0,0 +1,2 @@ +elixir 1.18.4-otp-27 +erlang 27.3.4.1 diff --git a/lib/match_engine/levenshtein.ex b/lib/match_engine/levenshtein.ex new file mode 100644 index 0000000..1e73055 --- /dev/null +++ b/lib/match_engine/levenshtein.ex @@ -0,0 +1,48 @@ +defmodule MatchEngine.Levenshtein do + @moduledoc """ + The implementation of the [Levenshtein](http://en.wikipedia.org/wiki/Levenshtein_distance) distance metric. + + Inlined from the Simetric library. + """ + + @doc """ + Returns an integer representing the minimum number of + single-character edits (i.e. insertions, deletions or substitutions) + required to change `string1` into the `string2`. + + ## Examples + + iex> MatchEngine.Levenshtein.compare("kitten", "sitting") + 3 + + """ + @spec compare(String.t, String.t) :: non_neg_integer + def compare(string1, string2) + + def compare(string, string), do: 0 + + def compare(string1, ""), do: String.length(string1) + + def compare("", string2), do: String.length(string2) + + def compare(string1, string2) do + chars1 = String.graphemes(string1) + chars2 = String.graphemes(string2) + distance(chars1, chars2, length(chars2)..0//-1, 1) + end + + defp distance([], _, [result | _], _), do: result + + defp distance([char | rest], chars2, distlist, step) do + distlist = proceed(char, chars2, Enum.reverse(distlist), [step], step) + distance(rest, chars2, distlist, step + 1) + end + + defp proceed(_, [], _, acc, _), do: acc + + defp proceed(char1, [char2 | rest], [head | [prev | _] = distlist], acc, score) do + diff = if char1 == char2, do: 0, else: 1 + score = min(min(score + 1, prev + 1), head + diff) + proceed(char1, rest, distlist, [score | acc], score) + end +end diff --git a/lib/match_engine/score.ex b/lib/match_engine/score.ex index bcde560..186d262 100644 --- a/lib/match_engine/score.ex +++ b/lib/match_engine/score.ex @@ -291,7 +291,7 @@ defmodule MatchEngine.Score do defp string_sim("", ""), do: 0 defp string_sim(a, b) do - d1 = 1 - Simetric.Levenshtein.compare(a, b) / max(String.length(a), String.length(b)) + d1 = 1 - MatchEngine.Levenshtein.compare(a, b) / max(String.length(a), String.length(b)) d2 = String.jaro_distance(a, b) max(d1, d2) end diff --git a/mix.exs b/mix.exs index 06fc9bc..a69f922 100644 --- a/mix.exs +++ b/mix.exs @@ -41,7 +41,6 @@ defmodule MatchEngine.Mixfile do defp deps do [ {:timex, "~> 3.1"}, - {:simetric, "~> 0.2.0"}, {:jason, "~> 1.0", only: :test}, {:ex_doc, "~> 0.19", runtime: false, only: :dev} ] diff --git a/mix.lock b/mix.lock index e1299ed..671d4d7 100644 --- a/mix.lock +++ b/mix.lock @@ -1,23 +1,23 @@ %{ - "certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"}, + "certifi": {:hex, :certifi, "2.15.0", "0e6e882fcdaaa0a5a9f2b3db55b1394dba07e8d6d9bcad08318fb604c6839712", [:rebar3], [], "hexpm", "b147ed22ce71d72eafdad94f055165c1c182f61a2ff49df28bcc71d1d5b94a60"}, "combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"}, "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, "ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"}, - "expo": {:hex, :expo, "0.5.1", "249e826a897cac48f591deba863b26c16682b43711dd15ee86b92f25eafd96d9", [:mix], [], "hexpm", "68a4233b0658a3d12ee00d27d37d856b1ba48607e7ce20fd376958d0ba6ce92b"}, - "gettext": {:hex, :gettext, "0.24.0", "6f4d90ac5f3111673cbefc4ebee96fe5f37a114861ab8c7b7d5b30a1108ce6d8", [:mix], [{:expo, "~> 0.5.1", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "bdf75cdfcbe9e4622dd18e034b227d77dd17f0f133853a1c73b97b3d6c770e8b"}, - "hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"}, + "expo": {:hex, :expo, "1.1.0", "f7b9ed7fb5745ebe1eeedf3d6f29226c5dd52897ac67c0f8af62a07e661e5c75", [:mix], [], "hexpm", "fbadf93f4700fb44c331362177bdca9eeb8097e8b0ef525c9cc501cb9917c960"}, + "gettext": {:hex, :gettext, "0.26.2", "5978aa7b21fada6deabf1f6341ddba50bc69c999e812211903b169799208f2a8", [:mix], [{:expo, "~> 0.5.1 or ~> 1.0", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "aa978504bcf76511efdc22d580ba08e2279caab1066b76bb9aa81c4a1e0a32a5"}, + "hackney": {:hex, :hackney, "1.24.1", "f5205a125bba6ed4587f9db3cc7c729d11316fa8f215d3e57ed1c067a9703fa9", [:rebar3], [{:certifi, "~> 2.15.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.4", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "f4a7392a0b53d8bbc3eb855bdcc919cd677358e65b2afd3840b5b3690c4c8a39"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, - "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, "makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"}, "makeup_erlang": {:hex, :makeup_erlang, "0.1.3", "d684f4bac8690e70b06eb52dad65d26de2eefa44cd19d64a8095e1417df7c8fd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "b78dc853d2e670ff6390b605d807263bf606da3c82be37f9d7f68635bd886fc9"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, - "mimerl": {:hex, :mimerl, "1.2.0", "67e2d3f571088d5cfd3e550c383094b47159f3eee8ffa08e64106cdf5e981be3", [:rebar3], [], "hexpm", "f278585650aa581986264638ebf698f8bb19df297f66ad91b18910dfc6e19323"}, + "mimerl": {:hex, :mimerl, "1.4.0", "3882a5ca67fbbe7117ba8947f27643557adec38fa2307490c4c4207624cb213b", [:rebar3], [], "hexpm", "13af15f9f68c65884ecca3a3891d50a7b57d82152792f3e19d88650aa126b144"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, "simetric": {:hex, :simetric, "0.2.0", "0d6559aed7b08fd1b6794526c33836e55cee8dd99d2f9bf3e8ea46fdeb47d9e3", [:mix], [], "hexpm", "b43e80abe3d91505bf06d98b1088eb87a0b0281e931c987e35836524be4944fa"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, - "timex": {:hex, :timex, "3.7.11", "bb95cb4eb1d06e27346325de506bcc6c30f9c6dea40d1ebe390b262fad1862d1", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "8b9024f7efbabaf9bd7aa04f65cf8dcd7c9818ca5737677c7b76acbc6a94d1aa"}, - "tzdata": {:hex, :tzdata, "1.1.1", "20c8043476dfda8504952d00adac41c6eda23912278add38edc140ae0c5bcc46", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a69cec8352eafcd2e198dea28a34113b60fdc6cb57eb5ad65c10292a6ba89787"}, - "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, + "timex": {:hex, :timex, "3.7.13", "0688ce11950f5b65e154e42b47bf67b15d3bc0e0c3def62199991b8a8079a1e2", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.26", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "09588e0522669328e973b8b4fd8741246321b3f0d32735b589f78b136e6d4c54"}, + "tzdata": {:hex, :tzdata, "1.1.3", "b1cef7bb6de1de90d4ddc25d33892b32830f907e7fc2fccd1e7e22778ab7dfbc", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "d4ca85575a064d29d4e94253ee95912edfb165938743dbf002acdf0dcecb0c28"}, + "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.1", "a48703a25c170eedadca83b11e88985af08d35f37c6f664d6dcfb106a97782fc", [:rebar3], [], "hexpm", "b3a917854ce3ae233619744ad1e0102e05673136776fb2fa76234f3e03b23642"}, } diff --git a/test/match_engine/levenshtein_test.exs b/test/match_engine/levenshtein_test.exs new file mode 100644 index 0000000..0dbe89d --- /dev/null +++ b/test/match_engine/levenshtein_test.exs @@ -0,0 +1,41 @@ +defmodule MatchEngine.LevenshteinTest do + use ExUnit.Case, async: true + + cases = %{ + # empty strings + ["", ""] => 0, + ["", "ab"] => 2, + ["abc", ""] => 3, + # equal strings + ["a", "a"] => 0, + ["abc", "abc"] => 0, + # inserts only + ["a", "ab"] => 1, + ["b", "ab"] => 1, + ["ac", "abc"] => 1, + ["abcdefg", "xabxcdxxefxgx"] => 6, + # deletions only + ["a", ""] => 1, + ["ab", "a"] => 1, + ["ab", "b"] => 1, + ["abc", "ac"] => 1, + ["xabxcdxxefxgx", "abcdefg"] => 6, + # substitutions only + ["a", "b"] => 1, + ["ab", "ac"] => 1, + ["ac", "bc"] => 1, + ["abc", "axc"] => 1, + ["xabxcdxxefxgx", "1ab2cd34ef5g6"] => 6, + # mixed operations + ["example", "samples"] => 3, + ["sturgeon", "urgently"] => 6, + ["levenshtein", "frankenstein"] => 6, + ["distance", "difference"] => 5 + } + + for {input, distance} <- cases do + test "compare #{inspect input}" do + assert MatchEngine.Levenshtein.compare(unquote_splicing(input)) == unquote(distance) + end + end +end