From 9dbaa1cae5dc4085c38b759598903a349afa9eaa Mon Sep 17 00:00:00 2001
From: Steve Hall <sh41@users.noreply.github.com>
Date: Sat, 2 Aug 2025 12:38:35 +0100
Subject: [PATCH 1/3] fix(data): Remove ambiguous release names

Previously, ambiguous duplicate release names could be produced for builds that differed only by features not in the allowed set ("musl", "javafx", "lite", "large_heap", "certified", "crac", "fiber"). Since only an approved list of features is used to create the final release name, these variants were being conflated.

This meant that the plugin, could not reliably distinguish between these variants, sometimes leading to the installation of an unintended JDK flavor.

This commit implements filtering of the incoming data to ensure each JDK flavor is represented by a single, unique entry.

We canonicalize each release name by normalizing its features against the allowed list. It then groups all releases that share the same canonical name and intelligently selects a single, primary entry from each group, preferring the one with the most minimal feature set.

This prevents valid, unique JDK distributions with extra features from being discarded while still eliminating ambiguity & keeping the original release names.

Additionally, it resolves an issue with some `musl` builds not having a `musl` feature by assuming that `alpine-linux` in the filename indicates a `musl` build. This was seen specifically in `adoptopenjdk`.

Fixes: https://github.com/halcyon/asdf-java/issues/255
---
 update_data.bash | 91 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 75 insertions(+), 16 deletions(-)

diff --git a/update_data.bash b/update_data.bash
index 5d613628..05b71f10 100755
--- a/update_data.bash
+++ b/update_data.bash
@@ -9,8 +9,7 @@ LIST_RELEASE_TYPE="ga ea"
 
 DATA_DIR="./data"
 
-if [[ ! -d "${DATA_DIR}" ]]
-then
+if [[ ! -d "${DATA_DIR}" ]]; then
 	mkdir "${DATA_DIR}"
 fi
 
@@ -37,26 +36,86 @@ function fetch_metadata {
 	curl "${args[@]}" -o "${DATA_DIR}/jdk-${os}-${arch}-${release}.json" "${url}"
 }
 
-for OS in $LIST_OS
-do
-	for ARCH in $LIST_ARCH
-	do
-		for RELEASE_TYPE in $LIST_RELEASE_TYPE
-		do
+for OS in $LIST_OS; do
+	for ARCH in $LIST_ARCH; do
+		for RELEASE_TYPE in $LIST_RELEASE_TYPE; do
 			fetch_metadata "$OS" "$ARCH" "$RELEASE_TYPE"
 		done
-		cat "${DATA_DIR}/jdk-${OS}-${ARCH}"-*.json | jq -s 'add' > "${DATA_DIR}/jdk-${OS}-${ARCH}-all.json"
+		cat "${DATA_DIR}/jdk-${OS}-${ARCH}"-*.json | jq -s 'add' >"${DATA_DIR}/jdk-${OS}-${ARCH}-all.json"
 		ln -s "jdk-${OS}-${ARCH}-ga.json" "${DATA_DIR}/jdk-${OS}-${ARCH}.json"
 	done
 done
 
-RELEASE_QUERY='.[]
+# shellcheck disable=SC2016
+RELEASE_QUERY='
+# Function to generate a canonical key for a release.
+# This key is used to group releases that would be duplicates after feature cleanup.
+def key($features_to_use):
+  [
+    .vendor,
+    # Add "jre" to the key if the image_type is "jre".
+    if .image_type == "jre" then "jre" else empty end,
+    # Add "openj9" to the key if the jvm_impl is "openj9".
+    if .jvm_impl == "openj9" then "openj9" else empty end,
+    # Add the sorted and filtered features to the key if any exist.
+    if ($features_to_use | length) > 0 then $features_to_use | join("-") else empty end,
+    .version
+  ] | join("-");
+
+# Function to parse a version string into an array of numbers for comparison.
+# It handles various formats like "11.0.10+9", "8u191+12", "21.0.3+9.0.LTS".
+def parse_version:
+ [splits("[-.]")]
+ | map(tonumber? // 0) ;
+
+# Define the list of allowed features.
+["musl", "javafx", "lite", "large_heap", "certified", "crac", "fiber"] as $allowed_features
+| [
+  .[]
+  # Filter for supported file types.
   | select(.file_type | IN("tar.gz", "tgz", "zip"))
-  | .["features"] = (.features | map(select(IN("musl", "javafx", "lite", "large_heap", "certified", "crac", "fiber"))))
-  | [([.vendor, if (.image_type == "jre") then .image_type else empty end, if (.jvm_impl == "openj9") then .jvm_impl else empty end, if ((.features | length) == 0) then empty else (.features | join("-")) end, .version] | join("-")), .filename, .url, .sha256]
-  | @tsv'
-for FILE in "${DATA_DIR}"/*.json
-do
+  | {
+      # Keep the original release data.
+      original: .,
+      # Generate the canonical key by cleaning and sorting the features.
+      key_text: key(.features | map(select(IN($allowed_features[]))) | sort)
+    }
+]
+# Group releases by their canonical key.
+| group_by(.key_text)
+# For each group of potential duplicates, select the best candidate. The "best"
+# is determined by selecting the release with the minimum value based on the
+# following criteria (in order):
+#  1. The number of features (fewer is better).
+#  2. The length of the filename without the version part (shorter is better), the assumption being that the shortest filename has the least additional features.
+# As a tie-breaker, releases are pre-sorted by version helping to ensure that the earliest version is always chosen so that it does not change even if new versions are added
+# additional tie-breakers are file_type so that the same file_type is chosen each time and finally filename as a fallback.
+| map(
+  # First, filter the group to ensure we only process valid objects.
+  [ .[] | select(type == "object" and .original) ]
+  # Pre-sort releases by version (now using the reliable .version field) and file_type
+  # to provide a stable tie-breaker for min_by.
+  | sort_by([(.original.version | parse_version), .original.file_type, .original.filename])
+  # From the sorted list, find the best candidate.
+  | min_by([
+      # 1. Prefer fewer features.
+      (.original.features | length),
+      # 2. Prefer shorter sanitized filename.
+      ((.original.filename | gsub("\\.(tar\\.gz|tgz|zip)$"; "") | length) - (.original.version | length))
+    ])
+)
+# Flatten the array of selected releases.
+| .[]
+# Format the final output as a TSV line.
+| [
+    .key_text,
+    .original.filename,
+    .original.url,
+    .original.sha256
+  ] | @tsv
+'
+
+for FILE in "${DATA_DIR}"/*.json; do
 	TSV_FILE="$(basename "${FILE}" .json).tsv"
-	jq -r "${RELEASE_QUERY}" "${FILE}" | sort -V > "${DATA_DIR}/${TSV_FILE}"
+	jq -r "${RELEASE_QUERY}" "${FILE}" | sort -V >"${DATA_DIR}/${TSV_FILE}"
 done

From febe6eed8cc7e706db3664e45dca54d405a41ab0 Mon Sep 17 00:00:00 2001
From: Steve Hall <sh41@users.noreply.github.com>
Date: Sun, 14 Sep 2025 08:36:31 +0100
Subject: [PATCH 2/3] Resolve shellcheck error in test workflow

```
ShellCheck - shell script analysis tool
version: 0.9.0
license: GNU General Public License, version 3
website: https://www.shellcheck.net

In ./bin/functions line 179:
    if [ $? -ne 0 ]; then
         ^-- SC2181 (style): Check exit code directly with e.g. 'if ! mycmd;', not indirectly with $?.

For more information:
  https://www.shellcheck.net/wiki/SC2181 -- Check exit code directly with e.g...
##[error]Process completed with exit code 1.
```
---
 bin/functions | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/bin/functions b/bin/functions
index 4e786457..7db3ed74 100755
--- a/bin/functions
+++ b/bin/functions
@@ -165,21 +165,17 @@ function install {
     ${SHA256SUM} -c <<<"${checksum}  ${package_filename}"
 
     case "${package_filename}" in
-        *zip) unzip "${package_filename}"
-              ;;
-        *tar.gz) tar xf "${package_filename}"
-                 ;;
-        *tgz) tar xf "${package_filename}"
-                 ;;
-        *) echo "Cannot extract ${package_filename}"
-           exit 1
-           ;;
-    esac
-
-    if [ $? -ne 0 ]; then
-        echo "Failed to extract ${package_filename}"
+    *zip)
+        unzip "${package_filename}" || { echo "Failed to extract ${package_filename}" >&2; exit 1; }
+        ;;
+    *tar.gz|*tgz)
+        tar xf "${package_filename}" || { echo "Failed to extract ${package_filename}" >&2; exit 1; }
+        ;;
+    *)
+        echo "Cannot extract ${package_filename}: unsupported file type." >&2
         exit 1
-    fi
+        ;;
+    esac
 
     read -r -a dirs <<<"$(ls -d ./*/)"
     cd "${dirs[0]}" || return 1

From 9c861292d28c972bda65ce21db76572c928cc84c Mon Sep 17 00:00:00 2001
From: Steve Hall <sh41@users.noreply.github.com>
Date: Sun, 14 Sep 2025 08:51:34 +0100
Subject: [PATCH 3/3] Update tests to verify that there is exactly one entry
 for each test case & add jetbrains to test cases

---
 .github/workflows/tests.yml | 16 ++--------
 run_tests.bash              | 62 +++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 13 deletions(-)
 create mode 100755 run_tests.bash

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2222f45b..74788a28 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -25,6 +25,7 @@ jobs:
         shellcheck -V
         shellcheck ./bin/functions
         shellcheck ./update_data.bash
+        shellcheck ./run_tests.bash
     - name: Install asdf
       uses: actions/checkout@v2
       with:
@@ -40,19 +41,8 @@ jobs:
       env:
         GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
       run: |
-        ./update_data.bash
-        grep -q adoptopenjdk-openj9-11 data/jdk-linux-x86_64.tsv
-        echo "Found adoptopenjdk-openj9-11"
-        grep -q adoptopenjdk-openj9-large_heap-11 data/jdk-macosx-x86_64.tsv
-        echo "Found adoptopenjdk-openj9-large_heap-11"
-        grep -q zulu-musl-11 data/jdk-linux-x86_64.tsv
-        echo "Found zulu-musl-11 "
-        grep -q liberica-javafx-16 data/jdk-linux-arm32-vfp-hflt.tsv
-        echo "Found liberica-javafx-16"
-        grep -q liberica-lite-11 data/jdk-macosx-x86_64.tsv
-        echo "Found liberica-lite-11"
-        grep "graalvm-21" data/jdk-linux-aarch64.tsv | grep -q -v "graalvm-graalvm-21"
-        echo "Found graalvm-21"
+        ./update_data.bash       
+        ./run_tests.bash
     - name: macOS Check java_home integration
       env:
         GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/run_tests.bash b/run_tests.bash
new file mode 100755
index 00000000..f3e1dfc4
--- /dev/null
+++ b/run_tests.bash
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+set -e
+set -Euo pipefail
+
+check_file_uniqueness() {
+    local file="$1"
+    if [ ! -f "${file}" ]; then
+        echo "WARNING: File '${file}' not found, skipping uniqueness check."
+        return
+    fi
+    echo "Checking for duplicate lines in '${file}'..."
+
+    local total_lines
+    total_lines=$(wc -l < "${file}")
+    local unique_lines
+    unique_lines=$(sort -u "${file}" | wc -l)
+
+    if [ "${total_lines}" -ne "${unique_lines}" ]; then
+        echo "ERROR: Found duplicate lines in '${file}'."
+        echo "The following lines are duplicated:"
+        sort "${file}" | uniq -d
+        exit 1
+    fi
+    echo "OK: All ${total_lines} lines in '${file}' are unique."
+}
+
+check_entry_present() {
+    local entry="$1"
+    local file="$2"
+    echo "Checking for presence of entry starting with '${entry}' in '${file}'..."
+
+    # Use `grep -q` for a quiet check. `^` anchors the search to the start of the line.
+    if grep -q "^${entry}" "${file}"; then
+        echo "OK: Entry '${entry}' found."
+    else
+        echo "ERROR: Entry '${entry}' not found at the start of any line in '${file}'."
+        exit 1
+    fi
+}
+
+echo "--- Verifying uniqueness of all lines in data/*.tsv files ---"
+for file in data/*.tsv; do
+    check_file_uniqueness "${file}"
+done
+echo "--- All .tsv files contain unique lines. ---"
+echo ""
+
+
+echo "--- Checking for presence of required entries ---"
+check_entry_present "adoptopenjdk-openj9-11" "data/jdk-linux-x86_64.tsv"
+check_entry_present "adoptopenjdk-openj9-large_heap-11" "data/jdk-macosx-x86_64.tsv"
+check_entry_present "zulu-musl-11" "data/jdk-linux-x86_64.tsv"
+check_entry_present "liberica-javafx-16" "data/jdk-linux-arm32-vfp-hflt.tsv"
+check_entry_present "liberica-lite-11" "data/jdk-macosx-x86_64.tsv"
+check_entry_present "graalvm-21" "data/jdk-linux-aarch64.tsv"
+check_entry_present "jetbrains-21" "data/jdk-linux-x86_64.tsv"
+check_entry_present "jetbrains-jre-21" "data/jdk-linux-x86_64.tsv"
+check_entry_present "adoptopenjdk-21" "data/jdk-linux-x86_64.tsv"
+echo "--- All required entries are present. ---"
+echo ""
+
+echo "All checks passed successfully."