diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2222f45b..74788a28 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -25,6 +25,7 @@ jobs: shellcheck -V shellcheck ./bin/functions shellcheck ./update_data.bash + shellcheck ./run_tests.bash - name: Install asdf uses: actions/checkout@v2 with: @@ -40,19 +41,8 @@ jobs: env: GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - ./update_data.bash - grep -q adoptopenjdk-openj9-11 data/jdk-linux-x86_64.tsv - echo "Found adoptopenjdk-openj9-11" - grep -q adoptopenjdk-openj9-large_heap-11 data/jdk-macosx-x86_64.tsv - echo "Found adoptopenjdk-openj9-large_heap-11" - grep -q zulu-musl-11 data/jdk-linux-x86_64.tsv - echo "Found zulu-musl-11 " - grep -q liberica-javafx-16 data/jdk-linux-arm32-vfp-hflt.tsv - echo "Found liberica-javafx-16" - grep -q liberica-lite-11 data/jdk-macosx-x86_64.tsv - echo "Found liberica-lite-11" - grep "graalvm-21" data/jdk-linux-aarch64.tsv | grep -q -v "graalvm-graalvm-21" - echo "Found graalvm-21" + ./update_data.bash + ./run_tests.bash - name: macOS Check java_home integration env: GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/bin/functions b/bin/functions index 4e786457..7db3ed74 100755 --- a/bin/functions +++ b/bin/functions @@ -165,21 +165,17 @@ function install { ${SHA256SUM} -c <<<"${checksum} ${package_filename}" case "${package_filename}" in - *zip) unzip "${package_filename}" - ;; - *tar.gz) tar xf "${package_filename}" - ;; - *tgz) tar xf "${package_filename}" - ;; - *) echo "Cannot extract ${package_filename}" - exit 1 - ;; - esac - - if [ $? -ne 0 ]; then - echo "Failed to extract ${package_filename}" + *zip) + unzip "${package_filename}" || { echo "Failed to extract ${package_filename}" >&2; exit 1; } + ;; + *tar.gz|*tgz) + tar xf "${package_filename}" || { echo "Failed to extract ${package_filename}" >&2; exit 1; } + ;; + *) + echo "Cannot extract ${package_filename}: unsupported file type." >&2 exit 1 - fi + ;; + esac read -r -a dirs <<<"$(ls -d ./*/)" cd "${dirs[0]}" || return 1 diff --git a/run_tests.bash b/run_tests.bash new file mode 100755 index 00000000..f3e1dfc4 --- /dev/null +++ b/run_tests.bash @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +set -e +set -Euo pipefail + +check_file_uniqueness() { + local file="$1" + if [ ! -f "${file}" ]; then + echo "WARNING: File '${file}' not found, skipping uniqueness check." + return + fi + echo "Checking for duplicate lines in '${file}'..." + + local total_lines + total_lines=$(wc -l < "${file}") + local unique_lines + unique_lines=$(sort -u "${file}" | wc -l) + + if [ "${total_lines}" -ne "${unique_lines}" ]; then + echo "ERROR: Found duplicate lines in '${file}'." + echo "The following lines are duplicated:" + sort "${file}" | uniq -d + exit 1 + fi + echo "OK: All ${total_lines} lines in '${file}' are unique." +} + +check_entry_present() { + local entry="$1" + local file="$2" + echo "Checking for presence of entry starting with '${entry}' in '${file}'..." + + # Use `grep -q` for a quiet check. `^` anchors the search to the start of the line. + if grep -q "^${entry}" "${file}"; then + echo "OK: Entry '${entry}' found." + else + echo "ERROR: Entry '${entry}' not found at the start of any line in '${file}'." + exit 1 + fi +} + +echo "--- Verifying uniqueness of all lines in data/*.tsv files ---" +for file in data/*.tsv; do + check_file_uniqueness "${file}" +done +echo "--- All .tsv files contain unique lines. ---" +echo "" + + +echo "--- Checking for presence of required entries ---" +check_entry_present "adoptopenjdk-openj9-11" "data/jdk-linux-x86_64.tsv" +check_entry_present "adoptopenjdk-openj9-large_heap-11" "data/jdk-macosx-x86_64.tsv" +check_entry_present "zulu-musl-11" "data/jdk-linux-x86_64.tsv" +check_entry_present "liberica-javafx-16" "data/jdk-linux-arm32-vfp-hflt.tsv" +check_entry_present "liberica-lite-11" "data/jdk-macosx-x86_64.tsv" +check_entry_present "graalvm-21" "data/jdk-linux-aarch64.tsv" +check_entry_present "jetbrains-21" "data/jdk-linux-x86_64.tsv" +check_entry_present "jetbrains-jre-21" "data/jdk-linux-x86_64.tsv" +check_entry_present "adoptopenjdk-21" "data/jdk-linux-x86_64.tsv" +echo "--- All required entries are present. ---" +echo "" + +echo "All checks passed successfully." diff --git a/update_data.bash b/update_data.bash index 5d613628..05b71f10 100755 --- a/update_data.bash +++ b/update_data.bash @@ -9,8 +9,7 @@ LIST_RELEASE_TYPE="ga ea" DATA_DIR="./data" -if [[ ! -d "${DATA_DIR}" ]] -then +if [[ ! -d "${DATA_DIR}" ]]; then mkdir "${DATA_DIR}" fi @@ -37,26 +36,86 @@ function fetch_metadata { curl "${args[@]}" -o "${DATA_DIR}/jdk-${os}-${arch}-${release}.json" "${url}" } -for OS in $LIST_OS -do - for ARCH in $LIST_ARCH - do - for RELEASE_TYPE in $LIST_RELEASE_TYPE - do +for OS in $LIST_OS; do + for ARCH in $LIST_ARCH; do + for RELEASE_TYPE in $LIST_RELEASE_TYPE; do fetch_metadata "$OS" "$ARCH" "$RELEASE_TYPE" done - cat "${DATA_DIR}/jdk-${OS}-${ARCH}"-*.json | jq -s 'add' > "${DATA_DIR}/jdk-${OS}-${ARCH}-all.json" + cat "${DATA_DIR}/jdk-${OS}-${ARCH}"-*.json | jq -s 'add' >"${DATA_DIR}/jdk-${OS}-${ARCH}-all.json" ln -s "jdk-${OS}-${ARCH}-ga.json" "${DATA_DIR}/jdk-${OS}-${ARCH}.json" done done -RELEASE_QUERY='.[] +# shellcheck disable=SC2016 +RELEASE_QUERY=' +# Function to generate a canonical key for a release. +# This key is used to group releases that would be duplicates after feature cleanup. +def key($features_to_use): + [ + .vendor, + # Add "jre" to the key if the image_type is "jre". + if .image_type == "jre" then "jre" else empty end, + # Add "openj9" to the key if the jvm_impl is "openj9". + if .jvm_impl == "openj9" then "openj9" else empty end, + # Add the sorted and filtered features to the key if any exist. + if ($features_to_use | length) > 0 then $features_to_use | join("-") else empty end, + .version + ] | join("-"); + +# Function to parse a version string into an array of numbers for comparison. +# It handles various formats like "11.0.10+9", "8u191+12", "21.0.3+9.0.LTS". +def parse_version: + [splits("[-.]")] + | map(tonumber? // 0) ; + +# Define the list of allowed features. +["musl", "javafx", "lite", "large_heap", "certified", "crac", "fiber"] as $allowed_features +| [ + .[] + # Filter for supported file types. | select(.file_type | IN("tar.gz", "tgz", "zip")) - | .["features"] = (.features | map(select(IN("musl", "javafx", "lite", "large_heap", "certified", "crac", "fiber")))) - | [([.vendor, if (.image_type == "jre") then .image_type else empty end, if (.jvm_impl == "openj9") then .jvm_impl else empty end, if ((.features | length) == 0) then empty else (.features | join("-")) end, .version] | join("-")), .filename, .url, .sha256] - | @tsv' -for FILE in "${DATA_DIR}"/*.json -do + | { + # Keep the original release data. + original: ., + # Generate the canonical key by cleaning and sorting the features. + key_text: key(.features | map(select(IN($allowed_features[]))) | sort) + } +] +# Group releases by their canonical key. +| group_by(.key_text) +# For each group of potential duplicates, select the best candidate. The "best" +# is determined by selecting the release with the minimum value based on the +# following criteria (in order): +# 1. The number of features (fewer is better). +# 2. The length of the filename without the version part (shorter is better), the assumption being that the shortest filename has the least additional features. +# As a tie-breaker, releases are pre-sorted by version helping to ensure that the earliest version is always chosen so that it does not change even if new versions are added +# additional tie-breakers are file_type so that the same file_type is chosen each time and finally filename as a fallback. +| map( + # First, filter the group to ensure we only process valid objects. + [ .[] | select(type == "object" and .original) ] + # Pre-sort releases by version (now using the reliable .version field) and file_type + # to provide a stable tie-breaker for min_by. + | sort_by([(.original.version | parse_version), .original.file_type, .original.filename]) + # From the sorted list, find the best candidate. + | min_by([ + # 1. Prefer fewer features. + (.original.features | length), + # 2. Prefer shorter sanitized filename. + ((.original.filename | gsub("\\.(tar\\.gz|tgz|zip)$"; "") | length) - (.original.version | length)) + ]) +) +# Flatten the array of selected releases. +| .[] +# Format the final output as a TSV line. +| [ + .key_text, + .original.filename, + .original.url, + .original.sha256 + ] | @tsv +' + +for FILE in "${DATA_DIR}"/*.json; do TSV_FILE="$(basename "${FILE}" .json).tsv" - jq -r "${RELEASE_QUERY}" "${FILE}" | sort -V > "${DATA_DIR}/${TSV_FILE}" + jq -r "${RELEASE_QUERY}" "${FILE}" | sort -V >"${DATA_DIR}/${TSV_FILE}" done