diff --git a/.editorconfig b/.editorconfig index e127b9a9e5db..a98ddb5476ed 100644 --- a/.editorconfig +++ b/.editorconfig @@ -671,6 +671,9 @@ ij_yaml_space_before_colon = false ij_yaml_spaces_within_braces = true ij_yaml_spaces_within_brackets = true +[dev-tools/scripts/releaseWizard.yaml] +trim_trailing_whitespace = false + # dos files [{*.bat,*.cmd}] end_of_line = crlf \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 60338d389cf4..4fd94fb8c383 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ Before creating a pull request, please file an issue in the ASF Jira system for * https://issues.apache.org/jira/projects/SOLR -For something minor (i.e. that wouldn't be worth putting in release notes), you can skip JIRA. +For something minor (i.e. that wouldn't be worth putting in release notes), you can skip JIRA. To create a Jira issue, you will need to create an account there first. The title of the PR should reference the Jira issue number in the form: @@ -42,3 +42,4 @@ Please review the following and check all that apply: - [ ] I have run `./gradlew check`. - [ ] I have added tests for my changes. - [ ] I have added documentation for the [Reference Guide](https://github.com/apache/solr/tree/main/solr/solr-ref-guide) +- [ ] I have added a [changelog entry](https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc) for my change diff --git a/.github/scripts/validate-changelog-yaml.py b/.github/scripts/validate-changelog-yaml.py new file mode 100644 index 000000000000..be1c9205cdb5 --- /dev/null +++ b/.github/scripts/validate-changelog-yaml.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Validates changelog YAML files in changelog/unreleased/ folder. + +Checks: +- File is valid YAML +- Contains required 'title' field (non-empty string) +- Contains required 'type' field (one of: added, changed, fixed, deprecated, removed, dependency_update, security, other) +- Contains required 'authors' field with at least one author +- Each author has a 'name' field (non-empty string) +""" + +import sys +import yaml + + +def validate_changelog_yaml(file_path): + """Validate a changelog YAML file.""" + valid_types = ['added', 'changed', 'fixed', 'deprecated', 'removed', 'dependency_update', 'security', 'other'] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + + # Check if file contains a mapping (dictionary) + if not isinstance(data, dict): + print(f"::error file={file_path}::File must contain YAML mapping (key-value pairs)") + return False + + # Validate 'title' field + if 'title' not in data or not data['title']: + print(f"::error file={file_path}::Missing or empty 'title' field") + return False + + if not isinstance(data['title'], str) or not data['title'].strip(): + print(f"::error file={file_path}::Field 'title' must be a non-empty string") + return False + + # Validate 'type' field + if 'type' not in data or not data['type']: + print(f"::error file={file_path}::Missing or empty 'type' field") + return False + + if data['type'] not in valid_types: + print(f"::error file={file_path}::Invalid 'type': '{data['type']}'. Must be one of: {', '.join(valid_types)}") + return False + + # Validate 'authors' field + if 'authors' not in data or not data['authors']: + print(f"::error file={file_path}::Missing or empty 'authors' field") + return False + + if not isinstance(data['authors'], list): + print(f"::error file={file_path}::Field 'authors' must be a list") + return False + + if len(data['authors']) == 0: + print(f"::error file={file_path}::Field 'authors' must contain at least one author") + return False + + # Validate each author + for i, author in enumerate(data['authors']): + if not isinstance(author, dict): + print(f"::error file={file_path}::Author {i} must be a mapping (key-value pairs)") + return False + if 'name' not in author or not author['name']: + print(f"::error file={file_path}::Author {i} missing or empty 'name' field") + return False + if not isinstance(author['name'], str) or not author['name'].strip(): + print(f"::error file={file_path}::Author {i} 'name' must be a non-empty string") + return False + + # All validations passed + print(f"βœ“ {file_path} is valid") + print(f" Title: {data['title']}") + print(f" Type: {data['type']}") + print(f" Authors: {', '.join(a['name'] for a in data['authors'])}") + return True + + except yaml.YAMLError as e: + print(f"::error file={file_path}::Invalid YAML: {e}") + return False + except Exception as e: + print(f"::error file={file_path}::Error validating file: {e}") + return False + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print("Usage: validate-changelog-yaml.py ") + sys.exit(1) + + file_path = sys.argv[1] + if not validate_changelog_yaml(file_path): + sys.exit(1) diff --git a/.github/workflows/validate-changelog.yml b/.github/workflows/validate-changelog.yml new file mode 100644 index 000000000000..e5b7cf158ac3 --- /dev/null +++ b/.github/workflows/validate-changelog.yml @@ -0,0 +1,127 @@ +name: Validate Changelog + +on: + pull_request: + branches: + - '*' + +jobs: + validate-changelog: + name: Check changelog entry + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Check for no-changelog label + id: check-label + run: | + LABELS='${{ toJson(github.event.pull_request.labels) }}' + if echo "$LABELS" | grep -q '"no-changelog"'; then + echo "skip=true" >> $GITHUB_OUTPUT + else + echo "skip=false" >> $GITHUB_OUTPUT + fi + + - name: Check for CHANGES.txt edits + if: steps.check-label.outputs.skip == 'false' + run: | + # Get the list of changed files + CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD) + + if echo "$CHANGED_FILES" | grep -q "^solr/CHANGES\.txt$"; then + echo "::error::Use of solr/CHANGES.txt is deprecated. Please create a changelog yaml file instead." + echo "" + echo "Instead of editing CHANGES.txt, please:" + echo "1. Run: ./gradlew writeChangelog" + echo "2. Edit the generated YAML file in changelog/unreleased/" + echo "3. Commit both the code change and the YAML file" + echo "" + echo "For more information, see: dev-docs/changelog.adoc" + echo "" + echo "If this PR should not have a changelog entry (e.g., documentation-only changes)," + echo "add the 'no-changelog' label to this PR." + exit 1 + fi + + - name: Check for changelog entry + if: steps.check-label.outputs.skip == 'false' + run: | + # Get the list of changed files + CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD) + + # Check if any files were added to changelog/unreleased/ + if echo "$CHANGED_FILES" | grep -q "^changelog/unreleased/"; then + echo "βœ“ Changelog entry found" + exit 0 + fi + + # Check if only docs/tests/comments were changed (common exceptions) + HAS_NON_DOCS_CHANGES=false + while IFS= read -r file; do + # Skip changelog, docs, tests, and certain config files + if ! echo "$file" | grep -qE "(^changelog/|^solr/solr-ref-guide/|^dev-docs/|\.md$|\.adoc$|^solr/.*/test|\.gradle$|\.properties$|README|NOTICE|LICENSE)"; then + HAS_NON_DOCS_CHANGES=true + break + fi + done <<< "$CHANGED_FILES" + + if [ "$HAS_NON_DOCS_CHANGES" = false ]; then + echo "βœ“ No code changes detected (docs/tests only)" + exit 0 + fi + + echo "::error::This PR appears to contain code changes but no changelog entry was added." + echo "" + echo "Please add a changelog entry by:" + echo "1. Running: ./gradlew writeChangelog" + echo "2. Editing the generated YAML file in changelog/unreleased/" + echo "3. Committing the YAML file" + echo "" + echo "For more information, see: dev-docs/changelog.adoc" + echo "" + echo "If this PR should not have a changelog entry (e.g., refactoring, internal cleanup)," + echo "add the 'no-changelog' label to this PR." + exit 1 + + - name: Validate changelog YAML structure + if: steps.check-label.outputs.skip == 'false' + run: | + # Get the list of changed files + CHANGED_FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD) + + # Find all YAML files added to changelog/unreleased/ + YAML_FILES=$(echo "$CHANGED_FILES" | grep "^changelog/unreleased/.*\.ya\?ml$" || true) + + if [ -z "$YAML_FILES" ]; then + exit 0 + fi + + echo "Validating changelog YAML files..." + VALIDATION_FAILED=false + + while IFS= read -r file; do + if [ -z "$file" ]; then + continue + fi + + echo "" + echo "Validating: $file" + + # Validate using a Python script + python3 .github/scripts/validate-changelog-yaml.py "$file" + + if [ $? -ne 0 ]; then + VALIDATION_FAILED=true + fi + + done <<< "$YAML_FILES" + + if [ "$VALIDATION_FAILED" = true ]; then + echo "Please see dev-docs/changelog.adoc for more info." + + exit 1 + fi diff --git a/.gitignore b/.gitignore index 42f1ceec7f3e..05199687470f 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,4 @@ gradle/wrapper/gradle-wrapper.jar # WANT TO ADD MORE? You can tell Git without adding to this file: # See https://git-scm.com/docs/gitignore -# In particular, if you have tools you use, add to $GIT_DIR/info/exclude or use core.excludesFile \ No newline at end of file +# In particular, if you have tools you use, add to $GIT_DIR/info/exclude or use core.excludesFile diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000000..cb8dfd9fbc14 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,26 @@ + + + + + + + + + +DUMMY changelog file. + +We are in the process of migrating from CHANGES.txt to a structured approach to changelog generation. See [SOLR-17619](https://issues.apache.org/jira/browse/SOLR-17619) as well as [dev-docs/changelog.adoc](dev-docs/changelog.adoc) for details. + +[unreleased] +------------ + +### Added (1 change) + +- Dummy issue [SOLR-123](https://issues.apache.org/jira/browse/SOLR-123) (janhoy) + +[9.9.0] - 2025-07-24 +-------------------- + +### Added (1 change) + +- Dummy issue for release 9.9 [SOLR-124](https://issues.apache.org/jira/browse/SOLR-124) (janhoy) diff --git a/build.gradle b/build.gradle index 2bf2bab82e42..50a618d6936f 100644 --- a/build.gradle +++ b/build.gradle @@ -33,6 +33,7 @@ plugins { alias(libs.plugins.diffplug.spotless) apply false alias(libs.plugins.nodegradle.node) apply false alias(libs.plugins.openapi.generator) apply false + alias(libs.plugins.logchange) } // Declare default Java versions for the entire project and for SolrJ separately @@ -216,3 +217,5 @@ apply from: file('gradle/solr/packaging.gradle') apply from: file('gradle/solr/solr-forbidden-apis.gradle') apply from: file('gradle/node.gradle') + +apply from: file('gradle/changelog.gradle') diff --git a/changelog/README.md b/changelog/README.md new file mode 100644 index 000000000000..3686afdfec07 --- /dev/null +++ b/changelog/README.md @@ -0,0 +1,21 @@ + +# New changelog process + +We are in the process of migrating to a new way of managing our changelog. Please see [dev-docs/changelog.adoc](../dev-docs/changelog.adoc) for details. + +In a transition period it is still possible to merge your changelog entry to `solr/CHANGES.txt`, but then you can only use the new process. diff --git a/changelog/logchange-config.yml b/changelog/logchange-config.yml new file mode 100644 index 000000000000..c25458d09cb0 --- /dev/null +++ b/changelog/logchange-config.yml @@ -0,0 +1,96 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# More info about configuration you can find https://github.com/logchange/logchange#configuration +changelog: + heading: This file lists Solr's raw release notes with details of every change to Solr. Most people will find the solr-upgrade-notes.adoc file more approachable. [https://github.com/apache/solr/blob/main/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc](https://github.com/apache/solr/blob/main/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc) + entryTypes: # you can define custom entry types + # Please do not use upgrade_notes type. + - key: upgrade_notes + order: 1 + - key: added + order: 2 + - key: changed + order: 3 + - key: fixed + order: 4 + - key: deprecated + order: 5 + - key: removed + order: 6 + - key: dependency_update + order: 7 + - key: security + order: 8 + - key: other + order: 9 + labels: + unreleased: unreleased + important_notes: Important notes + types: + entryTypesLabels: + added: Added + changed: Changed + fixed: Fixed + deprecated: Deprecated + removed: Removed + dependency_update: Dependency Upgrades + security: Security + other: Other + # Please do not use upgrade_notes type. + upgrade_notes: Upgrade Notes + number_of_changes: + singular: change + plural: changes + configuration: + heading: Configuration changes + type: Type + actions: + add: Added + update: Updated + delete: Deleted + with_default_value: with default value + description: Description + templates: + entry: "${prefix}${title} ${merge_requests} ${issues} ${links} ${authors}" + author: "([${name}](${url}) @${nick})" +# TODO: Using defaults for now, but want to test templates +# version_summary_templates: +# - path: version-summary.md +# changelog_templates: +# - path: CHANGELOG.md + # see examples of templates at examples/templates + # if you are missing some function, which will simplify your template (f.e getNumberOfEntries()) + # feel free to create issue or pull request with change +# version_summary_templates: + # Relative path to the changelog/.templates directory. + # Following definition will require from you existence of template at + # changelog/.templates/my-version-summary.html + # It will create my-version-summary.html in every version directory + # changelog/vX.X.X/my-version-summary.html + # HOW TO CREATE VERSION SUMMARY TEMPLATE? + # Main object is version and its type of https://github.com/logchange/logchange/blob/main/logchange-core/src/main/java/dev/logchange/core/domain/changelog/model/version/ChangelogVersion.java +# - path: my-version-summary.html +# changelog_templates: + # Relative path to the changelog/.templates directory. + # Following definition will require from you existence of template at + # changelog/.templates/my-changelog.html + # It will create my-changelog.html in every version directory + # changelog/vX.X.X/my-version-summary.html + # HOW TO CREATE CHANGELOG TEMPLATE? + # Main object is changelog and its type of https://github.com/logchange/logchange/blob/main/logchange-core/src/main/java/dev/logchange/core/domain/changelog/model/Changelog.java +# - path: my-changelog.html diff --git a/changelog/unreleased/SOLR-17619 Use logchange for changelog management.yml b/changelog/unreleased/SOLR-17619 Use logchange for changelog management.yml new file mode 100644 index 000000000000..63a14ab5278c --- /dev/null +++ b/changelog/unreleased/SOLR-17619 Use logchange for changelog management.yml @@ -0,0 +1,7 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Use logchange for changelog management +type: other # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Jan HΓΈydahl + nick: janhoy + url: https://home.apache.org/phonebook.html?uid=janhoy diff --git a/dev-docs/changelog.adoc b/dev-docs/changelog.adoc new file mode 100644 index 000000000000..057b2e6604bf --- /dev/null +++ b/dev-docs/changelog.adoc @@ -0,0 +1,160 @@ += Adding a Changelog Entry to Apache Solr +:toc: +:toclevels: 2 +:icons: font + +The project no longer maintains a monolithic *CHANGES.txt* file directly. +Instead, we create small YAML fragments **per bug-fix or feature**, checked into +the `solr/changelog/unreleased/ folder`. At release time these fragments are +organized in versioned folders and collated into a human-readable *CHANGELOG.md* file. + +This document walks you through the workflow, explains the Gradle helpers +available, and finishes with a concrete example fragment. We use a tool called +[logchange](https://logchange.dev/tools/logchange/) as a gradle plugin to manage +the changelog. + +== 1. Workflow Overview + +. Make a feature branch with a descriptive name like 'SOLR-12345-fix-memory-leak'. +. Implement your change +. Run `./gradlew writeChangelog` +. Edit the generated YAML file in `changelog/unreleased/` to add a clear, user-focused description. +. Commit the YAML fragment along with the code change. +. Open your PR as usual. + +== 2. Directory Layout + +[source] +---- +solr/ +└── changelog/ + β”œβ”€β”€ unreleased/ ← new fragments live here + | └── SOLR-12345-fix-memory-leak.yml + β”œβ”€β”€ v10.0.0/ ← changes in already released version + └── v10.0.1/ +---- + +== 3. The YAML format + +Below is an example of a changelog yaml fragment. The full yaml format is xref:https://logchange.dev/tools/logchange/reference/#tasks[documented here], but we normally only need `title`, `type`, `authors` and `links`: + +[source, yaml] +---- +title: Fix nasty bug in CoreContainer shutdown +# added, changed, fixed, deprecated, removed, dependency_update, security, other +type: fixed +authors: + - name: John Contributor + nick: johngithub + url: https://home.apache.org/phonebook.html?uid=johnasfid +links: + - name: SOLR-3333 + url: https://issues.apache.org/jira/browse/SOLR-3333 +---- + +=== 3.1 Tool to draft a YAML for your change + +We have a gradle task that bootstraps a YAML file in the `changelog/unreleased/` directory. The task will use your current branch name as a file name and also title, and will +try to parse JIRA id from the branch name if it exists to add the `links`. + +Invoke the task with: + +[source, bash] +---- +./gradlew writeChangelog +---- + +The task will use your git `user.name` as display name by default, but if you +add your name, GitHub username, and optionally Apache ID to the +`gradle.properties` file at the root of the Solr git checkout, this task will +use that information. Example: + +[source, properties] +---- +# Changelog generator settings (./gradlew writeChangelog) +user.name=John Doe +user.githubid=johngithub +user.asfid=johnapache +---- + +TIP: Aliases for `writeChangelog` task are `changelog` and `newChangelog`. + +== 4. Writing Good Entries + +* **Audience** is end-users and administrators, not committers. +* If the change is super minor, like a typo, don't bother adding a yaml file +* Keep the entry short and focused on the user impact. +* Choose the correct *type*: +** `added` for new features +** `changed` for improvements to existing code +** `fixed` for bug fixes +** `deprecated` for deprecated features +** `removed` for code removed in major releases +** `dependency_update` for updates to dependencies +** `security` for security-related changes +** `other` for anything else, like the build or documentation +* Reference issues as `SOLR-12345` or GitHub `PR#123`. + +== 5. Changelog Validation in Pull Requests + +The `validate-changelog` GitHub workflow automatically checks that: + +. **CHANGES.txt is not edited directly** - All changes must use the YAML fragment approach +. **A changelog entry is added** - Code changes must include a corresponding YAML file in `changelog/unreleased/` + +If your change does not require a changelog entry, it is still possible to merge the PR. + +== 6. For release managers + +=== 6.1 Gradle tasks for logchange + +The logchange gradle plugin offers some tasks, here are the two most important: + +[cols="1,2", options="header"] +|=== +| Task | Purpose + +| `logchangeGenerate` +| Generates changelog file `/CHANGELOG.md` based on `.yml` entries and archives (does not moves any files) + +| `logchangeRelease` +| Creates a new changelog release by moving files from `changelog/unreleased/` directory to `changelog/vX.Y.Z` directory +|=== + +These are integrated in the Release Wizard. + +=== 6.2 Migration tool + +There is a migration tool in `dev-tools/scripts/changes2logchange.py` for one-time use during the transition. +It will bulk convert the entire `solr/CHANGES.txt` file to files in the `changelog/` folder and versioned sub folders. + +The tool can also be used by a developer who needs to convert many entries from work-in-progress after the migration +cutover. You can paste one or more entries and get them converted in the Terminal as follows: + +[source,bash] +---- +cat << EOF | python3 dev-tools/scripts/changes2logchange.py - +* SOLR-17960: Removed TikaLanguageIdentifierUpdateProcessor. + Use LangDetectLanguageIdentifierUpdateProcessor or + OpenNLPLangDetectUpdateProcessor instead. (janhoy) +EOF +---- + +which will output to `stdout`: + +[source,yaml] +---- +title: Removed TikaLanguageIdentifierUpdateProcessor. Use LangDetectLanguageIdentifierUpdateProcessor + or OpenNLPLangDetectUpdateProcessor instead. +type: other # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: +- name: janhoy +links: +- name: SOLR-17960 + url: https://issues.apache.org/jira/browse/SOLR-17960 +---- + +== 7. Further Reading + +* xref:https://github.com/logchange/logchange[Logchange web page] +* xref:https://keepachangelog.com/en/1.1.0/[keepachangelog.com website] diff --git a/dev-docs/git.adoc b/dev-docs/git.adoc index a21b1313eb74..fe28ec0b0d49 100644 --- a/dev-docs/git.adoc +++ b/dev-docs/git.adoc @@ -229,5 +229,3 @@ $ ./gradlew check -x test # Run tests if you need to $ git show HEAD # This will show you the commit you are about to push, make sure it looks right $ git push apache branch_9x ---- - -There has been some issue with Solr's CHANGES.txt file "cherry picking" all of the changes for trunk, so check this file especially. diff --git a/dev-tools/scripts/README.md b/dev-tools/scripts/README.md index fc8074fd3da6..e7d781d91b16 100644 --- a/dev-tools/scripts/README.md +++ b/dev-tools/scripts/README.md @@ -18,11 +18,11 @@ the full tests. usage: smokeTestRelease.py [-h] [--tmp-dir PATH] [--not-signed] [--local-keys PATH] [--revision REVISION] [--version X.Y.Z(-ALPHA|-BETA)?] [--test-alt-java TEST_ALT_JAVA] [--download-only] [--dev-mode] url ... Utility to test a release. - + positional arguments: url Url pointing to release to test test_args Arguments to pass to gradle for testing, e.g. -Dwhat=ever. - + options: -h, --help show this help message and exit --tmp-dir PATH Temporary directory to test inside, defaults to /tmp/smoke_solr_$version_$revision @@ -35,32 +35,32 @@ the full tests. Path to Java alternative home directory, to run tests with if specified --download-only Only perform download and sha hash check steps --dev-mode Enable dev mode, will not check branch compatibility - + Example usage: python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/solr/solr-10.0.0-RC1-rev-c7510a0... ### releaseWizard.py -The Release Wizard guides the Release Manager through the release process step +The Release Wizard guides the Release Manager through the release process step by step, helping you to to run the right commands in the right order, generating e-mail templates with the correct texts, versions, paths etc, obeying the voting rules and much more. It also serves as a documentation of all the steps, with timestamps, preserving log files from each command etc, showing only the steps and commands required for a major/minor/bugfix release. It also lets -you generate a full Asciidoc guide for the release. The wizard will execute many -of the other tools in this folder. +you generate a full Asciidoc guide for the release. The wizard will execute many +of the other tools in this folder. usage: releaseWizard.py [-h] [--dry-run] [--init] - + Script to guide a RM through the whole release process - + optional arguments: -h, --help show this help message and exit --dry-run Do not execute any commands, but echo them instead. Display extra debug info --init Re-initialize root and version - + Go push that release! ### buildAndPushRelease.py @@ -68,9 +68,9 @@ of the other tools in this folder. usage: buildAndPushRelease.py [-h] [--no-prepare] [--local-keys PATH] [--push-local PATH] [--sign FINGERPRINT] [--sign-method-gradle] [--gpg-pass-noprompt] [--gpg-home PATH] [--rc-num NUM] [--root PATH] [--logfile PATH] [--dev-mode] - + Utility to build, push, and test a release. - + optional arguments: -h, --help show this help message and exit --no-prepare Use the already built release in the provided checkout @@ -89,45 +89,27 @@ of the other tools in this folder. --root PATH Root of Git working tree for solr. Default: "." (the current directory) --logfile PATH Specify log file path (default /tmp/release.log) --dev-mode Enable development mode, which disables some strict checks - + Example usage for a Release Manager: python3 -u dev-tools/scripts/buildAndPushRelease.py --push-local /tmp/releases/6.0.1 --sign 3782CBB60147010B330523DD26FBCC7836BF353A --rc-num 1 ### addVersion.py usage: addVersion.py [-h] [-l LUCENE_VERSION] version - + Add a new version to CHANGES, to Version.java, build.gradle and solrconfig.xml files - + positional arguments: version New Solr version - + optional arguments: -h, --help show this help message and exit -l LUCENE_VERSION Optional lucene version. By default will read gradle/libs.versions.toml -### releasedJirasRegex.py - -Pulls out all JIRAs mentioned at the beginning of bullet items -under the given version in the given CHANGES.txt file -and prints a regular expression that will match all of them - - usage: releasedJirasRegex.py [-h] version changes - - Prints a regex matching JIRAs fixed in the given version by parsing the given - CHANGES.txt file - - positional arguments: - version Version of the form X.Y.Z - changes CHANGES.txt file to parse - - optional arguments: - -h, --help show this help message and exit - ### reproduceJenkinsFailures.py usage: reproduceJenkinsFailures.py [-h] [--no-git] [--iters N] URL - + Must be run from a Solr git workspace. Downloads the Jenkins log pointed to by the given URL, parses it for Git revision and failed Solr tests, checks out the Git revision in the local workspace, @@ -136,10 +118,10 @@ and prints a regular expression that will match all of them in each module of interest, failing at the end if any of the runs fails. To control the maximum number of concurrent JVMs used for each module's test run, set 'tests.jvms', e.g. in ~/lucene.build.properties - + positional arguments: URL Points to the Jenkins log to parse - + optional arguments: -h, --help show this help message and exit --no-git Do not run "git" at all @@ -148,9 +130,9 @@ and prints a regular expression that will match all of them ### githubPRs.py usage: githubPRs.py [-h] [--json] [--token TOKEN] - + Find open Pull Requests that need attention - + optional arguments: -h, --help show this help message and exit --json Output as json @@ -162,19 +144,44 @@ Scaffold a new module and include it into the build. It will set up the folders and all for you, so the only thing you need to do is add classes, tests and test-data. usage: scaffoldNewModule.py [-h] name full_name description - + Scaffold new module into solr/modules/ - + positional arguments: name code-name/id, e.g. my-module full_name Readable name, e.g. "My Module" description Short description for docs - + optional arguments: -h, --help show this help message and exit Example: ./scaffoldNewModule.py foo "My Module" "Very Useful module here" +### changes2logchange.py + +Migrates the legacy CHANGES.txt file format to the new logchange YAML-based format. +This script parses the monolithic CHANGES.txt file and generates individual YAML +files for each changelog entry, organized by version (v10.0.0/, v9.9.0/, etc.). + +Each YAML file complies with the schema outlined in `dev-docs/changelog.adoc`. + + usage: changes2logchange.py [-h] [-o OUTPUT_DIR] [--last-released VERSION] changes_file + + Positional arguments: + changes_file Path to the CHANGES.txt file to migrate + + Optional arguments: + -h, --help Show this help message and exit + -o OUTPUT_DIR, --output-dir OUTPUT_DIR + Output directory for changelog structure (default: ./changelog) + --last-released VERSION Override auto-detected latest released version (e.g., 9.5.0) + Versions newer than this will be routed to unreleased/ folder + + Example usage: + + # Default behavior + python3 dev-tools/scripts/changes2logchange.py solr/CHANGES.txt + ### gitignore-gen.sh TBD @@ -191,11 +198,11 @@ TBD -r Specify remote to push to. Defaults to 'origin' -p Push to remote. Only done if both cherry-pick and tests succeeded WARNING: Never push changes to a remote branch before a thorough local test - + Simple script for aiding in back-porting one or more (trivial) commits to other branches. On merge conflict the script will run 'git mergetool'. See 'git mergetool --help' for help on configuring your favourite merge tool. Check out Sublime Merge (smerge). - + Example: # Backport two commits to both stable and release branches dev-tools/scripts/cherrypick.sh -b branch_9x -b branch_9_0 deadbeef0000 cafebabe1111 diff --git a/dev-tools/scripts/addDepsToChanges.py b/dev-tools/scripts/addDepsToChanges.py index 1fb5ceb7f5d4..3286d03cac40 100755 --- a/dev-tools/scripts/addDepsToChanges.py +++ b/dev-tools/scripts/addDepsToChanges.py @@ -16,7 +16,7 @@ # limitations under the License. """ -Script to add solrbot changes lines to CHANGES.txt +Script to create changelog YAML entries for solrbot dependency updates """ import os import sys @@ -26,6 +26,8 @@ import argparse import re +import yaml +from pathlib import Path line_re = re.compile(r"(.*?) (\(branch_\d+x\) )?\(#(\d+)\)$") @@ -58,6 +60,69 @@ def __str__(self) -> str: # Keep trailing newline to preserve existing blank-line formatting by update_changes return f"* PR#{self.pr_num}: {self.message} ({self.author})\n" + def to_yaml_dict(self) -> dict: + """ + Convert to a dictionary suitable for YAML serialization. + Extracts JIRA IDs from the title and adds them to links. + """ + # Extract JIRA IDs from the message + title, jira_links = extract_jira_issues_from_title(self.message) + + # Build links: JIRA issues first, then PR + links = jira_links.copy() # Start with JIRA links + links.append({ + 'name': f'PR#{self.pr_num}', + 'url': f'https://github.com/apache/solr/pull/{self.pr_num}' + }) + + return { + 'title': title, + 'type': 'dependency_update', + 'authors': [ + { + 'name': self.author + } + ], + 'links': links + } + + def yaml_filename(self) -> str: + """ + Generate a filesystem-safe filename for this entry. + Format: PR#####-slug.yaml + Truncates slug on whitespace boundaries, allowing up to 255 chars total. + """ + # Clean message for slug + slug = self.message.lower() + # Replace whitespace with single space + slug = re.sub(r'\s+', ' ', slug) + # Remove non-alphanumeric except dashes + slug = re.sub(r'[^a-z0-9-._ ]', '', slug) + + # Calculate available space for slug + # Format: "PR" + pr_num + "-" + slug + ".yaml" + # Typical PR#1234 = 8 chars + "-" = 9 chars, ".yaml" = 5 chars, total overhead = 14 chars + # Most filesystems limit filenames to 255 chars + max_filename_length = 255 + overhead = len(f"PR{self.pr_num}-.yaml") + max_slug_length = max_filename_length - overhead + + # Truncate to max length on word boundaries if necessary + if len(slug) > max_slug_length: + # Find the last space within the limit + truncated = slug[:max_slug_length] + last_dash = truncated.rfind(' ') + if last_dash > max_slug_length // 2: # Keep at least half the available space + slug = truncated[:last_dash] + else: + # If no good word boundary, use hard limit and clean up trailing spaces + slug = truncated.rstrip(' ') + else: + # Remove trailing spaces + slug = slug.rstrip(' ') + + return f"PR{self.pr_num}-{slug}.yaml" + def get_prev_release_tag(ver): """ @@ -79,7 +144,7 @@ def get_prev_release_tag(ver): def read_config(): - parser = argparse.ArgumentParser(description='Adds dependency changes section to CHANGES.txt.') + parser = argparse.ArgumentParser(description='Adds changelog entries in changelog/ folder') parser.add_argument('--version', type=Version.parse, help='Solr version to add changes to', required=True) parser.add_argument('--user', default='solrbot', help='Git user to get changes for. Defaults to solrbot') newconf = parser.parse_args() @@ -119,72 +184,28 @@ def parse_gitlog_lines(lines, author: str): return entries -def gitlog_to_changes(line, user="solrbot"): +def write_changelog_yaml(entries): """ - DEPRECATED: Use parse_gitlog_lines + ChangeEntry.__str__ instead. - Converts a git log formatted line ending in (# str: + """Map a section heading to a logchange type.""" + heading_normalized = heading.strip() + if heading_normalized in ChangeType.HEADING_MAP: + return ChangeType.HEADING_MAP[heading_normalized] + + # Fallback: try case-insensitive matching + for key, value in ChangeType.HEADING_MAP.items(): + if key.lower() == heading_normalized.lower(): + return value + + # Default to "other" if no match found + print(f"Warning: Unknown section heading '{heading}', defaulting to 'other'", file=sys.stderr) + return "other" + + +@dataclass +class Author: + """Represents a changelog entry author/contributor.""" + name: str + nick: Optional[str] = None + url: Optional[str] = None + + def to_dict(self): + """Convert to dictionary, excluding None values.""" + result = {"name": self.name} + if self.nick: + result["nick"] = self.nick + if self.url: + result["url"] = self.url + return result + + +@dataclass +class Link: + """Represents a link (JIRA issue or GitHub PR).""" + name: str + url: str + + def to_dict(self): + """Convert to dictionary.""" + return {"name": self.name, "url": self.url} + + +@dataclass +class ChangeEntry: + """Represents a single changelog entry.""" + title: str + change_type: str + authors: List[Author] = field(default_factory=list) + links: List[Link] = field(default_factory=list) + + def to_dict(self): + """Convert to dictionary for YAML serialization.""" + return { + "title": self.title, + "type": self.change_type, + "authors": [author.to_dict() for author in self.authors], + "links": [link.to_dict() for link in self.links], + } + + +class AuthorParser: + """Parses author/contributor information from entry text.""" + + # Pattern to match TRAILING author list at the end of an entry: (Author1, Author2 via Committer) + # Must be at the very end, possibly with trailing punctuation + # Strategy: Match from the last '(' that leads to end-of-string pattern matching + # This regex finds the LAST occurrence of a parenthesized group followed by optional whitespace/punctuation + # and then end of string + AUTHOR_PATTERN = re.compile(r'\s+\(([^()]+)\)\s*[.,]?\s*$', re.MULTILINE) + + @staticmethod + def parse_authors(entry_text: str) -> Tuple[str, List[Author]]: + """ + Extract authors from entry text. + + Returns: + Tuple of (cleaned_text, list_of_authors) + + Patterns handled: + - (Author Name) + - (Author1, Author2) + - (Author Name via CommitterName) + - (Author1 via Committer1, Author2 via Committer2) + + Only matches author attribution at the END of the entry text, + not in the middle of descriptions like (aka Standalone) + """ + # Find ALL matches and use the LAST one (rightmost) + # This ensures we get the actual author attribution, not mid-text parentheses + matches = list(AuthorParser.AUTHOR_PATTERN.finditer(entry_text)) + if not matches: + return entry_text, [] + + # Use the last match (rightmost) + match = matches[-1] + + author_text = match.group(1) + # Include the space before the parenthesis in what we remove + cleaned_text = entry_text[:match.start()].rstrip() + + authors = [] + + # Split by comma, but be aware of "via" keyword + # Pattern: "Author via Committer" or just "Author" + segments = [seg.strip() for seg in author_text.split(',')] + + for segment in segments: + segment = segment.strip() + if not segment: + continue + + # Handle "via" prefix (standalone or after author name) + if segment.startswith('via '): + # Malformed: standalone "via Committer" (comma was added incorrectly) + # Extract just the committer name + committer_name = segment[4:].strip() # Remove "via " prefix + if committer_name: + authors.append(Author(name=committer_name)) + elif ' via ' in segment: + # Format: "Author via Committer" + parts = segment.split(' via ') + author_name = parts[0].strip() + + if author_name: + # Normal case: "Author via Committer" - add the author + authors.append(Author(name=author_name)) + else: + # Should not happen, but handle it + committer_name = parts[1].strip() if len(parts) > 1 else "" + if committer_name: + authors.append(Author(name=committer_name)) + else: + # Just an author name + authors.append(Author(name=segment)) + + return cleaned_text, authors + + +class IssueExtractor: + """Extracts issue/PR references from entry text.""" + + JIRA_ISSUE_PATTERN = re.compile(r'(?:SOLR|LUCENE|INFRA)-(\d+)') + GITHUB_PR_PATTERN = re.compile(r'(?:GitHub\s*)?#(\d+)') + + @staticmethod + def extract_issues(entry_text: str) -> List[Link]: + """Extract JIRA and GitHub issue references.""" + links = [] + seen_issues = set() # Track seen issues to avoid duplicates + + # Extract SOLR, LUCENE, INFRA issues + for match in IssueExtractor.JIRA_ISSUE_PATTERN.finditer(entry_text): + issue_id = match.group(0) # Full "SOLR-12345" or "LUCENE-12345" format + if issue_id not in seen_issues: + url = f"https://issues.apache.org/jira/browse/{issue_id}" + links.append(Link(name=issue_id, url=url)) + seen_issues.add(issue_id) + + # Extract GitHub PRs in multiple formats: + # "PR#3758", "PR-2475", "GITHUB#3666" + github_patterns = [ + (r'PR[#-](\d+)', 'PR#'), # PR#1234 or PR-1234 + (r'GITHUB#(\d+)', 'GITHUB#'), # GITHUB#3666 + ] + + for pattern_str, prefix in github_patterns: + pattern = re.compile(pattern_str) + for match in pattern.finditer(entry_text): + pr_num = match.group(1) + pr_name = f"{prefix}{pr_num}" + if pr_name not in seen_issues: + url = f"https://github.com/apache/solr/pull/{pr_num}" + links.append(Link(name=pr_name, url=url)) + seen_issues.add(pr_name) + + return links + + +class SlugGenerator: + """Generates slug-style filenames for YAML files.""" + + # Characters that are unsafe in filenames on various filesystems + # Avoid: < > : " / \ | ? * and control characters + # Note: # is safe on most filesystems + UNSAFE_CHARS_PATTERN = re.compile(r'[<>:"/\\|?*\x00-\x1f]+') + + @staticmethod + def generate_slug(issue_id: str, title: str) -> str: + """ + Generate a slug from issue ID and title. + + Format: ISSUE-12345-short-slug or VERSION-entry-001-short-slug + Uses the actual issue ID without forcing SOLR- prefix + Ensures filesystem-safe filenames and respects word boundaries + Whitespace is preserved as spaces (not converted to dashes) + """ + # Sanitize issue_id to remove unsafe characters (preserve case and # for readability) + base_issue = SlugGenerator._sanitize_issue_id(issue_id) + + # Create slug from title: lowercase, preserve spaces, replace only unsafe chars with dash + title_slug = SlugGenerator._sanitize_filename_part(title) + + # Limit to reasonable length while respecting word boundaries + # Target max length: 50 chars for slug (leaving room for base_issue and dash) + if len(title_slug) > 50: + # Find last word/space boundary within 50 chars + truncated = title_slug[:50] + # Find the last space within the limit + last_space = truncated.rfind(' ') + if last_space > 20: # Keep at least 20 chars to avoid too-short slugs + title_slug = truncated[:last_space] + else: + # If no good space boundary, try to find a dash (from unsafe chars) + last_dash = truncated.rfind('-') + if last_dash > 20: + title_slug = truncated[:last_dash] + else: + # If no good boundary, use hard limit and clean up + title_slug = truncated.rstrip(' -') + + return f"{base_issue}-{title_slug}" + + @staticmethod + def _sanitize_issue_id(issue_id: str) -> str: + """ + Sanitize issue ID while preserving uppercase letters and # for readability. + Examples: SOLR-12345, LUCENE-1234, PR#3758, GITHUB#2408, v9.8.0-entry-001 + """ + # Replace unsafe characters with dash (preserving case) + sanitized = SlugGenerator.UNSAFE_CHARS_PATTERN.sub('-', issue_id) + + # Replace remaining unsafe characters (but keep letters/numbers/dash/hash/dot) + sanitized = re.sub(r'[^a-zA-Z0-9.#-]+', '-', sanitized) + + # Replace multiple consecutive dashes with single dash + sanitized = re.sub(r'-+', '-', sanitized) + + # Strip leading/trailing dashes + sanitized = sanitized.strip('-') + + return sanitized + + @staticmethod + def _sanitize_filename_part(text: str) -> str: + """ + Sanitize text for use in filenames. + - Convert to lowercase + - Replace unsafe characters with dashes + - Convert any whitespace to space (preserved in filename) + - Remove multiple consecutive spaces or dashes + - Strip leading/trailing spaces and dashes + """ + # Convert to lowercase + text = text.lower() + + # Normalize all whitespace to single spaces + text = re.sub(r'\s+', ' ', text) + + # Replace unsafe characters with dash + text = SlugGenerator.UNSAFE_CHARS_PATTERN.sub('-', text) + + # Replace other non-alphanumeric (except space and dash) with dash + text = re.sub(r'[^a-z0-9\s-]+', '-', text) + + # Replace multiple consecutive dashes with single dash (but preserve spaces) + text = re.sub(r'-+', '-', text) + + # Strip leading/trailing spaces and dashes + text = text.strip(' -') + + return text + + +class VersionSection: + """Represents all entries for a specific version.""" + + def __init__(self, version: str): + self.version = version + self.entries: List[ChangeEntry] = [] + + def add_entry(self, entry: ChangeEntry): + """Add an entry to this version.""" + self.entries.append(entry) + + def get_directory_name(self) -> str: + """Get the directory name for this version (e.g., 'v10.0.0').""" + return f"v{self.version}" + + +class ChangesParser: + """Main parser for CHANGES.txt file.""" + + # Pattern to match version headers: ================== 10.0.0 ================== + VERSION_HEADER_PATTERN = re.compile(r'=+\s+([\d.]+)\s+=+') + + # Pattern to match section headers: "Section Name" followed by dashes + # Matches patterns like "New Features\n---------------------" + SECTION_HEADER_PATTERN = re.compile(r'^([A-Za-z][A-Za-z0-9\s/&-]*?)\n\s*-+\s*$', re.MULTILINE) + + def __init__(self, changes_file_path: str): + self.changes_file_path = changes_file_path + self.versions: List[VersionSection] = [] + + def parse(self): + """Parse the CHANGES.txt file.""" + with open(self.changes_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Split into version sections + version_matches = list(self.VERSION_HEADER_PATTERN.finditer(content)) + + for i, version_match in enumerate(version_matches): + version = version_match.group(1) + start_pos = version_match.end() + + # Find the end of this version section (start of next version or EOF) + if i + 1 < len(version_matches): + end_pos = version_matches[i + 1].start() + else: + end_pos = len(content) + + version_content = content[start_pos:end_pos] + version_section = self._parse_version_section(version, version_content) + self.versions.append(version_section) + + def _parse_version_section(self, version: str, content: str) -> VersionSection: + """Parse all entries within a single version section.""" + version_section = VersionSection(version) + + # Split into subsections (New Features, Bug Fixes, etc.) + section_matches = list(self.SECTION_HEADER_PATTERN.finditer(content)) + + for i, section_match in enumerate(section_matches): + section_name = section_match.group(1) + + # Skip sections that should not be migrated + if section_name in ChangeType.SKIP_SECTIONS: + continue + + section_type = ChangeType.get_type(section_name) + + start_pos = section_match.end() + + # Find the end of this section (start of next section or EOF) + if i + 1 < len(section_matches): + end_pos = section_matches[i + 1].start() + else: + end_pos = len(content) + + section_content = content[start_pos:end_pos] + + # Parse entries in this section + entries = self._parse_entries(section_content, section_type) + for entry in entries: + version_section.add_entry(entry) + + return version_section + + def _parse_entries(self, section_content: str, change_type: str) -> List[ChangeEntry]: + """Parse individual entries within a section. + + Handles both: + - Bulleted entries: * text + - Numbered entries: 1. text, 2. text, etc. (older format) + """ + entries = [] + + # First try to split by bulleted entries (* prefix) + bulleted_pattern = re.compile(r'^\*\s+', re.MULTILINE) + bulleted_entries = bulleted_pattern.split(section_content) + + if len(bulleted_entries) > 1: + # Has bulleted entries + for entry_text in bulleted_entries[1:]: # Skip first empty split + entry_text = entry_text.strip() + if not entry_text or entry_text == "(No changes)": + continue + entry = self._parse_single_entry(entry_text, change_type) + if entry: + entries.append(entry) + else: + # No bulleted entries, try numbered entries (old format: "1. text", "2. text", etc.) + numbered_pattern = re.compile(r'^\s{0,2}\d+\.\s+', re.MULTILINE) + if numbered_pattern.search(section_content): + # Has numbered entries + numbered_entries = numbered_pattern.split(section_content) + for entry_text in numbered_entries[1:]: # Skip first empty split + entry_text = entry_text.strip() + if not entry_text: + continue + entry = self._parse_single_entry(entry_text, change_type) + if entry: + entries.append(entry) + else: + # No standard entries found, try as paragraph + entry_text = section_content.strip() + if entry_text and entry_text != "(No changes)": + entry = self._parse_single_entry(entry_text, change_type) + if entry: + entries.append(entry) + + return entries + + def _parse_single_entry(self, entry_text: str, change_type: str) -> Optional[ChangeEntry]: + """Parse a single entry into a ChangeEntry object.""" + # Extract authors + description, authors = AuthorParser.parse_authors(entry_text) + + # Extract issues/PRs + links = IssueExtractor.extract_issues(description) + + # Remove all issue/PR IDs from the description text + # Handle multiple formats of issue references at the beginning: + + # 1. Remove leading issues with mixed projects: "LUCENE-3323,SOLR-2659,LUCENE-3329,SOLR-2666: description" + description = re.sub(r'^(?:(?:SOLR|LUCENE|INFRA)-\d+(?:\s*[,:]?\s*)?)+:\s*', '', description) + + # 2. Remove SOLR-specific issues: "SOLR-12345: description" or "SOLR-12345, SOLR-12346: description" + description = re.sub(r'^(?:SOLR-\d+(?:\s*,\s*SOLR-\d+)*\s*[:,]?\s*)+', '', description) + + # 3. Remove PR references: "PR#123: description" or "GITHUB#456: description" + description = re.sub(r'^(?:(?:PR|GITHUB)#\d+(?:\s*,\s*(?:PR|GITHUB)#\d+)*\s*[:,]?\s*)+', '', description) + + # 4. Remove parenthesized issue lists at start: "(SOLR-123, SOLR-456)" + description = re.sub(r'^\s*\((?:SOLR-\d+(?:\s*,\s*)?)+\)\s*', '', description) + description = re.sub(r'^\s*\((?:(?:SOLR|LUCENE|INFRA)-\d+(?:\s*,\s*)?)+\)\s*', '', description) + + # 5. Remove any remaining leading issue references + description = re.sub(r'^[\s,;]*(?:SOLR-\d+|LUCENE-\d+|INFRA-\d+|PR#\d+|GITHUB#\d+)[\s,:;]*', '', description) + while re.match(r'^[\s,;]*(?:SOLR-\d+|LUCENE-\d+|INFRA-\d+|PR#\d+|GITHUB#\d+)', description): + description = re.sub(r'^[\s,;]*(?:SOLR-\d+|LUCENE-\d+|INFRA-\d+|PR#\d+|GITHUB#\d+)[\s,:;]*', '', description) + + description = description.strip() + + # Normalize whitespace: collapse multiple newlines/spaces into single spaces + # This joins multi-line formatted text into a single coherent paragraph + description = re.sub(r'\s+', ' ', description) + + # Escape HTML angle brackets to prevent markdown rendering issues + # Only escape < and > to avoid breaking markdown links and quotes + description = description.replace('<', '<').replace('>', '>') + + if not description: + return None + + return ChangeEntry( + title=description, + change_type=change_type, + authors=authors, + links=links, + ) + + +class YamlWriter: + """Writes ChangeEntry objects to YAML files.""" + + @staticmethod + def write_entry(entry: ChangeEntry, slug: str, output_dir: Path): + """Write a single entry to a YAML file.""" + # Ensure output directory exists + output_dir.mkdir(parents=True, exist_ok=True) + + filename = f"{slug}.yml" + filepath = output_dir / filename + + # Convert entry to dictionary and write as YAML + entry_dict = entry.to_dict() + + with open(filepath, 'w', encoding='utf-8') as f: + # Use custom YAML dumper for better formatting + yaml.dump( + entry_dict, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + width=80 # Line width for better readability + ) + + return filepath + + +class ReleaseDate: + """Fetches and manages release dates from Apache projects JSON.""" + + @staticmethod + def fetch_release_dates_and_latest() -> tuple: + """ + Fetch release dates from Apache projects JSON and identify latest version. + + Returns: + Tuple of (version_dates_dict, latest_version_string) + Example: ({'9.9.0': '2025-07-24', ...}, '9.9.0') + """ + import urllib.request + from packaging import version as pkg_version + + version_dates = {} + latest_version = None + latest_version_obj = None + + url = "https://projects.apache.org/json/projects/solr.json" + + try: + response = urllib.request.urlopen(url, timeout=10) + data = json.loads(response.read().decode('utf-8')) + + releases = data.get('release', []) + for release in releases: + ver = release.get('revision') + created = release.get('created') + + if ver and created: + version_dates[ver] = created + + # Track the latest (highest) version + try: + ver_obj = pkg_version.parse(ver) + if latest_version_obj is None or ver_obj > latest_version_obj: + latest_version_obj = ver_obj + latest_version = ver + except Exception: + # Skip invalid version strings + pass + except Exception as e: + print(f"Warning: Could not fetch release dates: {e}", file=sys.stderr) + + return version_dates, latest_version + + +class MigrationRunner: + """Orchestrates the complete migration process.""" + + def __init__(self, changes_file_path: str, output_base_dir: str, last_released_version: Optional[str] = None): + self.changes_file_path = changes_file_path + self.output_base_dir = Path(output_base_dir) + self.parser = ChangesParser(changes_file_path) + + # Fetch release dates and latest version + self.version_dates, detected_latest = ReleaseDate.fetch_release_dates_and_latest() + + # Use provided version or detected latest + self.last_released_version = last_released_version or detected_latest + + if self.last_released_version: + print(f"Latest released version: {self.last_released_version}", file=sys.stderr) + + self.stats = { + 'versions_processed': 0, + 'entries_migrated': 0, + 'entries_skipped': 0, + 'files_created': 0, + 'release_dates_written': 0, + 'unreleased_entries': 0, + } + + def run(self): + """Execute the migration.""" + print(f"Parsing CHANGES.txt from: {self.changes_file_path}") + self.parser.parse() + + print(f"Found {len(self.parser.versions)} versions") + + for version_section in self.parser.versions: + self._process_version(version_section) + + self._print_summary() + + def _process_version(self, version_section: VersionSection): + """Process all entries for a single version.""" + from packaging import version as pkg_version + + # Determine if this version should go to unreleased folder + is_unreleased = False + if self.last_released_version: + try: + current_ver = pkg_version.parse(version_section.version) + latest_ver = pkg_version.parse(self.last_released_version) + is_unreleased = current_ver > latest_ver + except Exception: + # If parsing fails, treat as unreleased (conservative approach) + is_unreleased = True + + # Route to appropriate directory + if is_unreleased: + version_dir = self.output_base_dir / "unreleased" + print(f"\nProcessing version {version_section.version} (unreleased):") + self.stats['unreleased_entries'] += len(version_section.entries) + else: + version_dir = self.output_base_dir / version_section.get_directory_name() + print(f"\nProcessing version {version_section.version}:") + + print(f" Found {len(version_section.entries)} entries") + + # Write release-date.txt if we have a date for this version + if version_section.version in self.version_dates: + release_date = self.version_dates[version_section.version] + release_date_file = version_dir / "release-date.txt" + version_dir.mkdir(parents=True, exist_ok=True) + + with open(release_date_file, 'w', encoding='utf-8') as f: + f.write(release_date + '\n') + + self.stats['release_dates_written'] += 1 + print(f" Release date: {release_date}") + + entry_counter = 0 # For entries without explicit issue IDs + + for entry in version_section.entries: + # Find primary issue ID from links + issue_id = None + for link in entry.links: + if link.name.startswith('SOLR-'): + issue_id = link.name + break + + if not issue_id: + # If no SOLR issue found, try to use other JIRA/PR formats + for link in entry.links: + if link.name.startswith(('LUCENE-', 'INFRA-', 'PR#', 'GITHUB#')): + issue_id = link.name + break + + if not issue_id: + # No standard issue/PR found, generate a synthetic ID + # Use format: unknown-001, unknown-002, etc. + entry_counter += 1 + synthetic_id = f"unknown-{entry_counter:03d}" + issue_id = synthetic_id + + # Generate slug and write YAML + slug = SlugGenerator.generate_slug(issue_id, entry.title) + filepath = YamlWriter.write_entry(entry, slug, version_dir) + + print(f" βœ“ {slug}.yml") + self.stats['entries_migrated'] += 1 + self.stats['files_created'] += 1 + + self.stats['versions_processed'] += 1 + + def _print_summary(self): + """Print migration summary.""" + print("\n" + "="*60) + print("Migration Summary:") + print(f" Versions processed: {self.stats['versions_processed']}") + print(f" Entries migrated: {self.stats['entries_migrated']}") + print(f" Entries skipped: {self.stats['entries_skipped']}") + print(f" Files created: {self.stats['files_created']}") + print(f" Release dates written: {self.stats['release_dates_written']}") + if self.stats['unreleased_entries'] > 0: + print(f" Unreleased entries: {self.stats['unreleased_entries']}") + print("="*60) + + +class StdinProcessor: + """Process individual changelog entries from stdin and output YAML to stdout.""" + + @staticmethod + def process(): + """ + Read from stdin, parse individual changelog entries, and output YAML. + + Ignores headers and nested structure. + Outputs YAML entries separated by '----' YAML separator. + """ + import sys + + # Read all lines from stdin + lines = sys.stdin.readlines() + + entries_yaml = [] + i = 0 + + while i < len(lines): + line = lines[i] + + # Skip empty lines and header lines (lines with only dashes or equals) + if not line.strip() or re.match(r'^[-=\s]+$', line): + i += 1 + continue + + # Check if this line starts a changelog entry (bullet point) + if line.strip().startswith('*') or line.strip().startswith('-'): + # Collect the full entry (may span multiple lines) + entry_text = line.strip()[1:].strip() # Remove bullet and leading spaces + + # Continue reading continuation lines + i += 1 + while i < len(lines): + next_line = lines[i] + # If the next line is another entry or empty, stop collecting + if (next_line.strip().startswith('*') or + next_line.strip().startswith('-') or + re.match(r'^[-=\s]+$', next_line) or + not next_line.strip()): + break + # Add to entry text + entry_text += ' ' + next_line.strip() + i += 1 + + # Parse the entry to a ChangeEntry + entry = EntryParser.parse_entry_line(entry_text) + if entry: + # Serialize to YAML + yaml_dict = { + 'title': entry.title, + 'type': entry.change_type, + } + if entry.authors: + yaml_dict['authors'] = [{'name': a.name} for a in entry.authors] + if entry.links: + yaml_dict['links'] = [ + {'name': link.name, 'url': link.url} + for link in entry.links + ] + + yaml_str = yaml.dump(yaml_dict, default_flow_style=False, sort_keys=False, allow_unicode=True) + entries_yaml.append(yaml_str.rstrip()) + else: + i += 1 + + # Output entries separated by YAML separators + for i, yaml_entry in enumerate(entries_yaml): + if i > 0: + print('----') + print(yaml_entry, end='') + if yaml_entry and not yaml_entry.endswith('\n'): + print() + + +class EntryParser: + """Parse a single changelog entry line.""" + + @staticmethod + def parse_entry_line(text: str) -> Optional[ChangeEntry]: + """ + Parse a single changelog entry line. + + Format: [ISSUE-ID: ]description (author1) (author2) ... + """ + if not text.strip(): + return None + + # Extract issue links + links = IssueExtractor.extract_issues(text) + + # Remove issue IDs from text + for link in links: + # Remove markdown link format [ID](url) + text = re.sub(rf'\[{re.escape(link.name)}\]\([^)]+\)', '', text) + # Remove plain text issue IDs + text = re.sub(rf'{re.escape(link.name)}\s*:?\s*', '', text) + + text = text.strip() + + # Extract authors + text, authors = AuthorParser.parse_authors(text) + text = text.strip() + + # Escape HTML angle brackets + text = text.replace('<', '<').replace('>', '>') + + if not text: + return None + + # Default to 'other' type + change_type = 'other' + + return ChangeEntry( + title=text, + change_type=change_type, + authors=authors, + links=links, + ) + + +def main(): + """Main entry point.""" + import argparse + + parser = argparse.ArgumentParser( + description="Migrate Apache Solr CHANGES.txt to logchange YAML format" + ) + parser.add_argument( + "changes_file", + help="Path to the CHANGES.txt file to migrate. Use '-' to read individual changelog entries from stdin and output YAML to stdout" + ) + parser.add_argument( + "-o", "--output-dir", + default="changelog", + help="Directory to write changelog/ structure (default: ./changelog)" + ) + parser.add_argument( + "--last-released", + help="Last released version (e.g., 9.9.0). Versions newer than this go to unreleased/. " + "If not specified, fetches from Apache projects JSON." + ) + + args = parser.parse_args() + + # Handle stdin/stdout mode + if args.changes_file == '-': + StdinProcessor.process() + return + + if not os.path.exists(args.changes_file): + print(f"Error: CHANGES.txt file not found: {args.changes_file}", file=sys.stderr) + sys.exit(1) + + runner = MigrationRunner(args.changes_file, args.output_dir, args.last_released) + runner.run() + + +if __name__ == "__main__": + main() diff --git a/dev-tools/scripts/releaseWizard.yaml b/dev-tools/scripts/releaseWizard.yaml index cd10ebc8a15b..a5493e6ddead 100644 --- a/dev-tools/scripts/releaseWizard.yaml +++ b/dev-tools/scripts/releaseWizard.yaml @@ -77,7 +77,7 @@ templates: Reason for fail is {{ reason }}. {% endif %} - This vote has {% if passed %}PASSED{% else %}FAILED{% endif %} + This vote has {% if passed %}PASSED{% else %}FAILED{% endif %} ---- {%- endmacro %} announce_solr: | @@ -102,7 +102,7 @@ templates: - Please read CHANGES.txt for a full list of {% if is_feature_release %}new features, changes and {% endif %}bugfixes: + Please read CHANGELOG.md for a full list of {% if is_feature_release %}new features, changes and {% endif %}bugfixes: announce_solr_mail: | @@ -665,12 +665,12 @@ groups: - bugfix - !Todo id: dependency_updates_changes - title: Add dependency updates to CHANGES.txt + title: Add dependency updates to changelog description: Bulk add all 'solrbot' dependency updates since last release depends: clean_git_checkout commands: !Commands root_folder: '{{ git_checkout_folder }}' - commands_text: We call out to a helper script that modifies CHANGES.txt, and then commit it. + commands_text: We call out to a helper script that compiles SolrBot changes into `changelog/unreleased` confirm_each_command: true commands: - !Command @@ -680,14 +680,46 @@ groups: cmd: python3 -u dev-tools/scripts/addDepsToChanges.py --user solrbot --version {{ release_version }} tee: true - !Command - cmd: "{{ editor }} solr/CHANGES.txt" - comment: | - Verify that the modifications in CHANGES.txt under the 'Dependency Upgrades' are ok. - Look for duplicates, especially when a committer has manually added a similar entry for the same upgrade. + cmd: git add -u . && git commit -m "Add dependency updates to changelog for {{ release_version }}" && git push + logfile: dependency-changes.log + - !Todo + id: generate_changelog_release + title: Generate CHANGELOG.md for release + description: | + Generate `CHANGELOG.md` by running `logchange release` and `logchange generate`. This will prepare both a + `changelog/v{{ release_version }}` folder and the `CHANGELOG.md` file in one commit. + depends: dependency_updates_changes + commands: !Commands + root_folder: '{{ git_checkout_folder }}' + commands_text: Generate CHANGELOG.md + commands: + - !Command + cmd: git checkout {{ release_branch }} stdout: true - !Command - cmd: git add -u . && git commit -m "Add dependency updates to CHANGES for {{ release_version }}" && git push - logfile: dependency-changes.log + cmd: git pull --ff-only + tee: true + - !Command + cmd: "{{ gradle_cmd }} logchangeRelease" + comment: Create `changelog/v{{ release_version }}` folder and move unreleased entries + tee: true + - !Command + cmd: "{{ gradle_cmd }} logchangeGenerate" + comment: Generate `CHANGELOG.md` in repository root + tee: true + - !Command + cmd: git add CHANGELOG.md changelog && git commit -m "Changelog for release v{{ release_version }}" && git push + comment: Commit and push changelog changes + logfile: commit_changelog.log + tee: true + - !Todo + id: persist_changelog_sha + title: Persist the SHA of the changelog commit + description: Store the current git sha + depends: generate_changelog_release + vars: + changelog_sha: '{{ current_git_rev }}' + persist_vars: changelog_sha - !Todo id: draft_release_notes title: Get a draft of the release notes in place @@ -695,7 +727,8 @@ groups: These are typically edited on the Wiki Clone a page for a previous version as a starting point for your release notes. - Edit the contents of `CHANGES.txt` into a more concise format for public consumption. + Use the contents of `CHANGELOG.md` and `major-changes-in-solr-N.adoc` as input and write a more + concise format for public consumption. Ask on dev@ for input. Ideally the timing of this request mostly coincides with the release branch creation. It's a good idea to remind the devs of this later in the release too. @@ -1689,52 +1722,43 @@ groups: cmd: git add -u . && git commit -m "Add bugfix version {{ release_version }}" && git push logfile: commit-stable.log - !Todo - id: synchronize_changes - title: Synchronize CHANGES.txt + id: sync_changelog_cherry_pick + title: Cherry-pick changelog to stable and unstable branches description: | - Copy the CHANGES.txt section for this release back to the stable and unstable branches' - CHANGES.txt files, removing any duplicate entries, but only from sections for as-yet - unreleased versions; leave intact duplicate entries for already-released versions. - - There is a script to generate a regex that will match JIRAs fixed in a release: - `releasedJirasRegex.py`. The following examples will print regexes matching all JIRAs - fixed in {{ release_version }}, which can then be used to find duplicates in unreleased - version sections of the corresponding CHANGES.txt files. + Cherry-pick the changelog commit from the release branch to the stable and main branches. + This syncs both CHANGELOG.md and changelog folder changes across all active branches. + + The changelog commit was created in the previous step (generate_changelog_release) and will + be referenced here. Cherry-pick will automatically handle conflict resolution if needed. + depends: generate_changelog_release commands: !Commands root_folder: '{{ git_checkout_folder }}' - commands_text: Synchronize CHANGES.txt + commands_text: Cherry-pick changelog changes to other branches commands: - !Command - cmd: git checkout {{ release_branch }} - comment: Go to release branch - logfile: checkout-release.log + cmd: git checkout {{ stable_branch }} + comment: Checkout stable branch stdout: true - !Command - cmd: python3 -u -B dev-tools/scripts/releasedJirasRegex.py {{ release_version }} solr/CHANGES.txt + cmd: git pull --ff-only tee: true - comment: Find version regexes - !Command - cmd: git checkout main && git pull --ff-only && git clean -df && git checkout -- . - comment: Go to main branch - logfile: checkout-main.log + cmd: git cherry-pick {{ persist_changelog_sha.changelog_sha }} && git push + comment: Cherry-pick changelog commit + logfile: cherry_pick_changelog_stable.log + tee: true - !Command - cmd: "{{ editor }} solr/CHANGES.txt" - comment: Edit CHANGES.txt for main branch, do necessary changes + cmd: git checkout main + comment: Checkout main branch stdout: true - !Command - cmd: git add -u . && git commit -m "Sync CHANGES for {{ release_version }}" && git push - logfile: commit-main.log - - !Command - cmd: git checkout {{ stable_branch }} && git pull --ff-only && git clean -df && git checkout -- . - comment: Go to stable branch - logfile: checkout-stable.log - - !Command - cmd: "{{ editor }} solr/CHANGES.txt" - comment: Edit CHANGES.txt for stable branch, do necessary changes - stdout: true + cmd: git pull --ff-only + tee: true - !Command - cmd: git add -u . && git commit -m "Sync CHANGES for {{ release_version }}" && git push - logfile: commit-stable.log + cmd: git cherry-pick {{ persist_changelog_sha.changelog_sha }} && git push + comment: Cherry-pick changelog commit + logfile: cherry_pick_changelog_main.log + tee: true - !Todo id: increment_release_version title: Add the next version on release branch diff --git a/dev-tools/scripts/releasedJirasRegex.py b/dev-tools/scripts/releasedJirasRegex.py deleted file mode 100755 index 77780ffcaa63..000000000000 --- a/dev-tools/scripts/releasedJirasRegex.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import sys -import os -sys.path.append(os.path.dirname(__file__)) -from scriptutil import * -import argparse -import re - -# Pulls out all JIRAs mentioned at the beginning of bullet items -# under the given version in the given CHANGES.txt file -# and prints a regular expression that will match all of them -# -# Caveat: In ancient versions (Solr v1.1 and older), -# does not find Bugzilla bugs or JIRAs not mentioned at the beginning of -# bullets or numbered entries. -# -def print_released_jiras_regex(version, filename): - release_boundary_re = re.compile(r'\s*====*\s+(.*)\s+===') - version_re = re.compile(r'%s(?:$|[^-])' % version) - bullet_re = re.compile(r'\s*(?:[-*]|\d+\.(?=(?:\s|(?:SOLR)-)))(.*)') - jira_ptn = r'(?:SOLR)-\d+' - jira_re = re.compile(jira_ptn) - jira_list_ptn = r'(?:[:,/()\s]*(?:%s))+' % jira_ptn - jira_list_re = re.compile(jira_list_ptn) - more_jiras_on_next_line_re = re.compile(r'%s\s*,\s*$' % jira_list_ptn) # JIRA list with trailing comma - under_requested_version = False - requested_version_found = False - more_jiras_on_next_line = False - solr_jiras = [] - with open(filename, 'r') as changes: - for line in changes: - version_boundary = release_boundary_re.match(line) - if version_boundary is not None: - if under_requested_version: - break # No longer under the requested version - stop looking for JIRAs - else: - if version_re.search(version_boundary.group(1)): - under_requested_version = True # Start looking for JIRAs - requested_version_found = True - else: - if under_requested_version: - bullet_match = bullet_re.match(line) - if more_jiras_on_next_line or bullet_match is not None: - content = line if bullet_match is None else bullet_match.group(1) - jira_list_match = jira_list_re.match(content) - if jira_list_match is not None: - jira_match = jira_re.findall(jira_list_match.group(0)) - for jira in jira_match: - solr_jiras.append(jira.rsplit('-', 1)[-1]) - more_jiras_on_next_line = more_jiras_on_next_line_re.match(content) - if not requested_version_found: - raise Exception('Could not find %s in %s' % (version, filename)) - print() - if (len(solr_jiras) == 0): - print('(No JIRAs => no regex)', end='') - else: - print(r'SOLR-(?:%s)\b' % '|'.join(solr_jiras), end='') - print() - -def read_config(): - parser = argparse.ArgumentParser( - description='Prints a regex matching JIRAs fixed in the given version by parsing the given CHANGES.txt file') - parser.add_argument('version', type=Version.parse, help='Version of the form X.Y.Z') - parser.add_argument('changes', help='CHANGES.txt file to parse') - return parser.parse_args() - -def main(): - config = read_config() - print_released_jiras_regex(config.version, config.changes) - -if __name__ == '__main__': - try: - main() - except KeyboardInterrupt: - print('\nReceived Ctrl-C, exiting early') diff --git a/dev-tools/scripts/scriptutil.py b/dev-tools/scripts/scriptutil.py index c766077356f0..2318b7159f86 100644 --- a/dev-tools/scripts/scriptutil.py +++ b/dev-tools/scripts/scriptutil.py @@ -33,12 +33,12 @@ def __init__(self, major, minor, bugfix, prerelease): self.bugfix = bugfix self.prerelease = prerelease self.previous_dot_matcher = self.make_previous_matcher() - self.dot = '%d.%d.%d' % (self.major, self.minor, self.bugfix) + self.dot = '%d.%d.%d' % (self.major, self.minor, self.bugfix) self.constant = 'LUCENE_%d_%d_%d' % (self.major, self.minor, self.bugfix) @classmethod def parse(cls, value): - match = re.search(r'(\d+)\.(\d+).(\d+)(.1|.2)?', value) + match = re.search(r'(\d+)\.(\d+).(\d+)(.1|.2)?', value) if match is None: raise argparse.ArgumentTypeError('Version argument must be of format x.y.z(.1|.2)?') parts = [int(v) for v in match.groups()[:-1]] @@ -135,12 +135,12 @@ def run(cmd, cwd=None): except subprocess.CalledProcessError as e: print(e.output.decode('utf-8')) raise e - return output.decode('utf-8') + return output.decode('utf-8') def update_file(filename, line_re, edit): infile = open(filename, 'r') - buffer = [] - + buffer = [] + changed = False for line in infile: if not changed: @@ -244,6 +244,28 @@ def find_current_lucene_version(): return lucene_version_prop_re.search(versions_file).group(1).strip() +def extract_jira_issues_from_title(title): + """Return (cleaned_title, links) where links list unique JIRA issues found in title.""" + jira = re.compile(r'(?:SOLR|LUCENE|INFRA)-\d+') + + seen = set() + links = [ + {'name': m, 'url': f'https://issues.apache.org/jira/browse/{m}'} + for m in jira.findall(title) + if not (m in seen or seen.add(m)) + ] + + cleaned = title + # Remove variants at start or when slash-separated, then normalize whitespace + cleaned = re.sub(r'^\s*/\s*' + jira.pattern + r'[\s:]*', '', cleaned) + cleaned = re.sub(r'\s+/\s*' + jira.pattern + r'[\s:]*', ' ', cleaned) + cleaned = re.sub(r'^' + jira.pattern + r'[\s:]*', '', cleaned) + cleaned = re.sub(r'^\s*/\s*', '', cleaned).strip() + cleaned = re.sub(r'\s+', ' ', cleaned) + + return cleaned, links + + if __name__ == '__main__': print('This is only a support module, it cannot be run') sys.exit(1) diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index 9ff1e24774c6..729e97823471 100755 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -380,15 +380,19 @@ def testOpenApi(version, openApiDirUrl): raise RuntimeError('Did not see %s in %s' % expectedSpecFileName, openApiDirUrl) -def testChangesText(dir, version): - "Checks all CHANGES.txt under this dir." - for root, dirs, files in os.walk(dir): # pylint: disable=unused-variable +def testChangelogMd(dir, version): + "Checks CHANGELOG.md file." + changelog_path = os.path.join(dir, 'CHANGELOG.md') - # NOTE: O(N) but N should be smallish: - if 'CHANGES.txt' in files: - fullPath = '%s/CHANGES.txt' % root - #print 'CHECK %s' % fullPath - checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, False) + if not os.path.exists(changelog_path): + raise RuntimeError('CHANGELOG.md not found at %s' % changelog_path) + + with open(changelog_path, encoding='UTF-8') as f: + content = f.read() + + # Verify that the changelog contains the current version + if 'v%s' % version not in content and version not in content: + raise RuntimeError('Version %s not found in CHANGELOG.md' % version) reChangesSectionHREF = re.compile('(.*?)', re.IGNORECASE) reUnderbarNotDashHTML = re.compile(r'
  • (\s*(SOLR)_\d\d\d\d+)') @@ -612,10 +616,10 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs): in_solr_folder = [] if isSrc: in_solr_folder.extend(os.listdir(os.path.join(unpackPath, 'solr'))) - is_in_list(in_root_folder, ['LICENSE.txt', 'NOTICE.txt', 'README.md', 'CONTRIBUTING.md']) - is_in_list(in_solr_folder, ['CHANGES.txt', 'README.adoc']) + is_in_list(in_root_folder, ['LICENSE.txt', 'NOTICE.txt', 'README.md', 'CONTRIBUTING.md', 'CHANGELOG.md']) + is_in_list(in_solr_folder, ['README.adoc']) else: - is_in_list(in_root_folder, ['LICENSE.txt', 'NOTICE.txt', 'README.txt', 'CHANGES.txt']) + is_in_list(in_root_folder, ['LICENSE.txt', 'NOTICE.txt', 'README.txt', 'CHANGELOG.md']) if SOLR_NOTICE is None: SOLR_NOTICE = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read() @@ -715,7 +719,7 @@ def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs): os.chdir(unpackPath) - testChangesText('.', version) + testChangelogMd('.', version) def readSolrOutput(p, startupEvent, failureEvent, logFile): @@ -1220,4 +1224,4 @@ def smokeTest(java, baseURL, gitRevision, version, tmpDir, isSigned, local_keys, try: main() except KeyboardInterrupt: - print('Keyboard interrupt...exiting') \ No newline at end of file + print('Keyboard interrupt...exiting') diff --git a/gradle/changelog.gradle b/gradle/changelog.gradle new file mode 100644 index 000000000000..cd54cce9ad77 --- /dev/null +++ b/gradle/changelog.gradle @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +logchange { + rootPath = "." + inputDir = "changelog" + outputFile = "CHANGELOG.md" + generateChangesXml = false +} + +task writeChangelog { + description = 'Generates a change/log description file (YAML)' + doLast { + def gitUserName = 'git config user.name'.execute().text.trim() + def configuredName = providers.gradleProperty("user.name").getOrElse(gitUserName) + def githubId = providers.gradleProperty("user.githubid").getOrElse(null) + def nick = githubId ? "\n nick: ${githubId}" : "" + def asfId = providers.gradleProperty("user.asfid").getOrElse(null) + def asfIdUrl = asfId ? "\n url: https://home.apache.org/phonebook.html?uid=${asfId}" : "" + def gitBranch = 'git rev-parse --abbrev-ref HEAD'.execute().text.trim() + def jiraMatcher = gitBranch =~ /SOLR-\d+/ + def jiraRef = jiraMatcher ? jiraMatcher[0] : "SOLR-XXXX" + def jiraUrl = "https://issues.apache.org/jira/browse/${jiraRef}" + def jiraLinks = jiraMatcher ? "links:\n - name: ${jiraRef}\n url: ${jiraUrl}" : "" + def githubMatcher = gitBranch =~ /(PR|GH|GITHUB|#)?(\d+)/ + def githubRef = githubMatcher ? githubMatcher.group(2) : "" + def githubLink = githubMatcher ? "issues:\n - ${githubRef}" : "" + def branchWithoutJira = gitBranch.replaceFirst(/(SOLR|LUCENE|INFRA)-\d+-/, "").replaceFirst(/(PR|GH|GITHUB|#)?(\d+)/, "").replace("-", " ").capitalize() + def fileName = "changelog/unreleased/${gitBranch}.yml" + def file = new File(fileName) + file.parentFile.mkdirs() + file.text = """# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: ${branchWithoutJira} +type: other # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: ${configuredName}${nick}${asfIdUrl} +${jiraLinks} +${githubLink} +""" + + println "Generated file: ${fileName} -- open it" + } +} + +task changelog { + dependsOn writeChangelog + description = 'Generates a change/log description file (YAML)' +} + +task newChangelog { + dependsOn writeChangelog + description = 'Generates a change/log description file (YAML)' +} diff --git a/gradle/documentation/changes-to-html.gradle b/gradle/documentation/changes-to-html.gradle index af9d1b5fa9a6..0f21cfca5061 100644 --- a/gradle/documentation/changes-to-html.gradle +++ b/gradle/documentation/changes-to-html.gradle @@ -20,7 +20,7 @@ def resources = scriptResources(buildscript) configure(project(':solr:documentation')) { task changesToHtml(type: ChangesToHtmlTask) { siteDir = resources - script = file("${resources}/changes2html.pl") + script = file("${resources}/changes2html.py") } // Make the rendered HTML of changes available as a separate @@ -36,7 +36,7 @@ configure(project(':solr:documentation')) { } } -// compile changes.txt into an html file +// compile CHANGELOG.md into an html file class ChangesToHtmlTask extends DefaultTask { @Internal @@ -46,10 +46,7 @@ class ChangesToHtmlTask extends DefaultTask { String productName = productProject.name @InputFile - File changesFile = productProject.file('CHANGES.txt') - - @InputFile - File changesDoapFile = project.rootProject.file("dev-tools/doap/${productName}.rdf") + File changesFile = project.rootProject.file('CHANGELOG.md') @InputDirectory File siteDir @@ -58,44 +55,27 @@ class ChangesToHtmlTask extends DefaultTask { final DirectoryProperty targetDir = project.objects.directoryProperty() .fileProvider(project.providers.provider { project.file("${project.docroot}/changes") }) - @Input - def luceneDocUrl = "${-> project.luceneDocUrl }" - @InputFile def script - def loadVersions(File outfile) { - // load version properties from DOAP RDF - def prefix = "doap.${productName}".toString() - ant.xmlproperty(keeproot: false, file: changesDoapFile, collapseAttributes: false, prefix: "${prefix}") - outfile.withWriter("UTF-8") { writer -> - writer.println(ant.properties["${prefix}.Project.release.Version.revision"]) - writer.println(ant.properties["${prefix}.Project.release.Version.created"]) - } - } - - def toHtml(File versionsFile) { + def toHtml() { def output = new ByteArrayOutputStream() - - // Check if the perl executable exists - if (!perlExists()) { - logger.warn("WARNING: Perl is not installed, skipping creating Changes.html") + + // Check if Python is available + if (!pythonExists()) { + logger.warn("WARNING: Python is not installed, skipping creating Changes.html") return } def result = project.exec { - executable project.externalTool("perl") - standardInput changesFile.newInputStream() + executable "python3" standardOutput project.file("${targetDir.get().getAsFile()}/Changes.html").newOutputStream() errorOutput = output ignoreExitValue = true args += [ - "-CSD", - script, - "${productName}", - versionsFile.toString(), - luceneDocUrl.concat('/') // slash required at end by perl script + script.toString(), + changesFile.toString() ] } @@ -107,24 +87,21 @@ class ChangesToHtmlTask extends DefaultTask { @TaskAction def convert() { project.mkdir targetDir - if (changesFile.exists() && changesDoapFile.exists()) { - File versionsFile = project.file("${project.buildDir}/doap.${project.name}.changes.version.dates.csv") - loadVersions(versionsFile) - toHtml(versionsFile) + if (changesFile.exists()) { + toHtml() project.copy { from siteDir into targetDir include "*.css" } - versionsFile.delete() } else { - throw new GradleException("Changes file ${changesFile} or Doap file ${changesDoapFile} not found.") + throw new GradleException("Changes file ${changesFile} not found.") } } - - def perlExists() { + + def pythonExists() { try { - def process = "perl -v".execute() + def process = "python3 --version".execute() process.waitFor() return process.exitValue() == 0 } catch (Exception e) { diff --git a/gradle/documentation/changes-to-html/changes2html.pl b/gradle/documentation/changes-to-html/changes2html.pl deleted file mode 100755 index 86f316467c8b..000000000000 --- a/gradle/documentation/changes-to-html/changes2html.pl +++ /dev/null @@ -1,1049 +0,0 @@ -#!/usr/bin/perl -# -# Transforms Lucene Core's or Solr's CHANGES.txt into Changes.html -# -# Input is on STDIN, output is to STDOUT -# -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -use strict; -use warnings; - -my $jira_url_prefix = 'http://issues.apache.org/jira/browse/'; -my $github_pull_request_prefix = 'https://github.com/apache/solr/pull/'; -my $month_regex = &setup_month_regex; -my %month_nums = &setup_month_nums; -my %lucene_bugzilla_jira_map = &setup_lucene_bugzilla_jira_map; -my $title = undef; -my $release = undef; -my $reldate = undef; -my $relinfo = undef; -my $sections = undef; -my $items = undef; -my $first_relid = undef; -my $second_relid = undef; -my @releases = (); - -my @lines = ; # Get all input at once - -# -# Cmdline args: (only from Solr) -# -my $product = uc($ARGV[0]); -if ($product !~ /^(LUCENE|SOLR)$/) { - print STDERR "Unknown product name '$ARGV[0]'\n"; - exit(1); -} -my %release_dates = &setup_release_dates($ARGV[1]); -my $lucene_javadoc_url = ($product eq 'SOLR' ? $ARGV[2] : ''); # Only Solr supplies this on the cmdline -my $in_major_component_versions_section = 0; - - -# -# Parse input and build hierarchical release structure in @releases -# -for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) { - $_ = $lines[$line_num]; - unless (/\S/) { # Skip blank lines - $in_major_component_versions_section = 0; - next; - } - next if (/^\s{0,4}-{5,}\s*$/); # Skip Solr's section underlines - - unless ($title) { - if (/\S/) { - s/^[^\p{N}\p{L}]*//; # Trim leading non-alphanum chars, including BOM chars, if any - s/\s+$//; # Trim trailing whitespace - } - $title = $_; - next; - } - - if (/\s*===+\s*(.*?)\s*===+\s*/) { # New-style release headings - $release = $1; - $release =~ s/^(?:release|lucene)\s*//i; # Trim "Release " or "Lucene " prefix - ($release, $relinfo) = ($release =~ /^(\d+(?:\.(?:\d+))*(?:-(?:ALPHA|BETA))?|Trunk)\s*(.*)/i); - $relinfo =~ s/\s*:\s*$//; # Trim trailing colon - $relinfo =~ s/^\s*,\s*//; # Trim leading comma - ($reldate, $relinfo) = get_release_date($release, $relinfo); - $sections = []; - push @releases, [ $release, $reldate, $relinfo, $sections ]; - ($first_relid = 'v'.lc($release)) =~ s/\s+/_/g - if ($#releases == 0 or ($#releases == 1 and not ($releases[0][0]))); - ($second_relid = 'v'.lc($release)) =~ s/\s+/_/g - if ( ($#releases == 1 and $releases[0][0]) - or ($#releases == 2 and not $releases[0][0])); - $items = undef; - next; - } - - if (/^\s*([01](?:\.[0-9]{1,2}){1,2}[a-z]?(?:\s*(?:RC\d+|final))?)\s* - ((?:200[0-7]-.*|.*,.*200[0-7].*)?)$/x) { # Old-style release heading - $release = $1; - $relinfo = $2; - $relinfo =~ s/\s*:\s*$//; # Trim trailing colon - $relinfo =~ s/^\s*,\s*//; # Trim leading comma - ($reldate, $relinfo) = get_release_date($release, $relinfo); - $sections = []; - push @releases, [ $release, $reldate, $relinfo, $sections ]; - $items = undef; - next; - } - - if (m!^20\d\d[-/]\d{1,2}[-/]\d{1,2}!) { # Collect dated postscripts - my $item = $_; - my $line = ''; - while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) { - $line =~ s/^\s+//; # Trim leading whitespace - $line =~ s/\s+$//; # Trim trailing whitespace - $item .= "$line\n"; - } - push @releases, [ $item, '', '', [] ]; - next; - } - - # Section heading: no leading whitespace, initial word capitalized, - # six words or less, and no trailing punctuation, - # except colons - don't match the one otherwise matching - # non-section-name by excluding "StandardTokenizer" - if ( /^(?!.*StandardTokenizer)([A-Z]\S*(?:\s+\S+){0,5})(?[0]) { - die "Section '$heading' appears more than once under release '$releases[-1][0]'"; - } - } - push @$sections, [ $heading, $items ]; - $in_major_component_versions_section - = ($heading =~ /Versions of Major Components/i); - next; - } - - # Handle earlier releases without sections - create a headless section - unless ($items) { - $items = []; - unless (@releases) { - $sections = []; - # Make a fake release to hold pre-release sections and items - push @releases, [ undef, undef, undef, $sections ]; - } - push @$sections, [ '', $items ]; - } - - my $type; - if (@$items) { # A list item has been encountered in this section before - $type = $items->[0]; # 0th position of items array is list type - } else { - $type = get_list_type($_); - push @$items, $type; - } - - if ($type eq 'numbered') { # The modern items list style - # List item boundary is another numbered item or an unindented line - my $line; - my $item = $_; - $item =~ s/^(\s{0,2}\d+\.\d?\s*)//; # Trim the leading item number - my $leading_ws_width = length($1); - $item =~ s/\s+$//; # Trim trailing whitespace - $item .= "\n"; - - while ($line_num < $#lines - and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) { - $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace - $line =~ s/\s+$//; # Trim trailing whitespace - $item .= "$line\n"; - } - $item =~ s/\n+\Z/\n/; # Trim trailing blank lines - push @$items, $item; - --$line_num unless ($line_num == $#lines && $lines[$line_num] !~ /^20/); - } elsif ($type eq 'paragraph') { # List item boundary is a blank line - my $line; - my $item = $_; - $item =~ s/^(\s+)//; - my $leading_ws_width = defined($1) ? length($1) : 0; - $item =~ s/\s+$//; # Trim trailing whitespace - $item .= "\n"; - - unless ($in_major_component_versions_section) { - while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) { - $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace - $line =~ s/\s+$//; # Trim trailing whitespace - $item .= "$line\n"; - } - } else { - ++$line_num; - } - push @$items, $item; - --$line_num unless ($line_num == $#lines && $lines[$line_num] !~ /^20/); - } else { # $type is one of the bulleted types - # List item boundary is another bullet or a blank line - my $line; - my $item = $_; - $item =~ s/^(\s*\Q$type\E\s*)//; # Trim the leading bullet - my $leading_ws_width = defined($1) ? length($1) : 0; - $item =~ s/\s+$//; # Trim trailing whitespace - $item .= "\n"; - - while ($line_num < $#lines - and ($line = $lines[++$line_num]) !~ /^(?:\S|\s*\Q$type\E\s+)/) { - $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace - $line =~ s/\s+$//; # Trim trailing whitespace - $item .= "$line\n"; - } - push @$items, $item; - --$line_num unless ($line_num == $#lines && $lines[$line_num] !~ /^20/); - } -} - -# Recognize IDs of top level nodes of the most recent two releases, -# escaping JavaScript regex metacharacters, e.g.: "^(?:trunk|2\\\\.4\\\\.0)" -my $first_relid_regex = $first_relid; -$first_relid_regex =~ s!([.+*?{}()|^$/\[\]\\])!\\\\\\\\$1!g; -my $second_relid_regex = $second_relid; -$second_relid_regex =~ s!([.+*?{}()|^$/\[\]\\])!\\\\\\\\$1!g; -my $newer_version_regex = "^(?:$first_relid_regex|$second_relid_regex)"; - -# -# Print HTML-ified version to STDOUT -# -print<<"__HTML_HEADER__"; - - - - - $title - - - - - - - - -

    $title

    - -
    - -__HTML_HEADER__ - -my $heading; -my $relcnt = 0; -my $header = 'h2'; -my $subheader = 'h3'; - -for my $rel (@releases) { - if ($relcnt == 2) { - $header = 'h3'; - $subheader = 'h4'; - print "

    "; - print "Older Releases"; - print "

    \n"; - print "
    \n" - } - - ($release, $reldate, $relinfo, $sections) = @$rel; - - # The first section heading is undefined for the older sectionless releases - my $has_release_sections = has_release_sections($sections); - - my $relid = ''; - if ($release) { # Pre-release sections have no release ID - ++$relcnt; - ($relid = 'v'.lc($release)) =~ s/\s+/_/g; - print "<$header>"; - print "" - unless ($release =~ /^20\d\d/); - print "Release " unless ($release =~ /^trunk$|^20\d\d/i); - print "$release $relinfo"; - print " [$reldate]" unless ($reldate eq 'unknown' or not $reldate); - print "" unless ($release =~ /^20\d\d/); - print "\n"; - print "
      \n" - if ($has_release_sections); - } - - my $licnt = 0; - for my $section (@$sections) { - ($heading, $items) = @$section; - (my $sectid = lc($heading)) =~ s/\s+/_/g; - my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)"; - - my $list_item = "li"; - if ($release) { - if ($heading) { - if ($heading eq 'Detailed Change List') { - print "
    • <$subheader>$heading
    • \n"; - next; - } elsif ($has_release_sections) { - print "
    • $heading", - "   $numItemsStr\n"; - ++$licnt; - } - } - } else { # $release is not defined - print "

      $heading

      \n" if ($heading); - $list_item = "p"; - } - - my $list_type = $items->[0] || ''; - my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul'); - my $listid = $sectid ? "$relid.$sectid" : $relid; - print " <$list id=\"$listid.list\">\n" - unless (not $release or ($has_release_sections and not $heading)); - - for my $itemnum (1..$#{$items}) { - my $item = $items->[$itemnum]; - $item =~ s:&:&:g; # Escape HTML metachars, but leave tags - $item =~ s~<(?!/?code>(?:[^,]|$))~<~gi; # intact - unless followed by a comma - and - $item =~ s:(?:>:gi; # add
       wrappers for non-inline sections
      -      $item =~ s{((?:^|.*\n)\s*)(?!,)(?!.+)(.+)(?![ \t]*\S)}
      -                { 
      -                  my $prefix = $1; 
      -                  my $code = $2;
      -                  $code =~ s/\s+$//;
      -                  "$prefix
      $code>
      " - }gise; - - $item = markup_trailing_attribution($item) unless ($item =~ /\n[ ]*-/); - - $item =~ s{(.*?)(
      .*?
      )|(.*)} - { - my $uncode = undef; - my ($one,$two,$three) = ($1,$2,$3); - if (defined($two)) { - $uncode = $one || ''; - $uncode =~ s{^(.*?)(?=\n[ ]*-)} - { - my $prefix = $1; - my ($primary,$additional_work) = $prefix =~ /^(.*?)((?:\s*Additional\s+Work:\s*)?)$/si; - my $result = markup_trailing_attribution($primary); - $result .= "
      \n$additional_work
      " if ($additional_work); - $result; - }se; - $uncode =~ s{((?<=\n)[ ]*-.*\n(?:.*\n)*)} - { - my $bulleted_list = $1; - $bulleted_list - =~ s{(?:(?<=\n)|\A)[ ]*-[ ]*(.*(?:\n|\z)(?:[ ]+[^ -].*(?:\n|\z))*)} - { - qq!
    • \n! - . markup_trailing_attribution($1) - . "
    • \n" - }ge; - $bulleted_list - =~ s{(\n)(.*)} - { - qq!
        \n$1
      \n! - . markup_trailing_attribution($2 || '') - }se; - $bulleted_list; - }ge; - "$uncode$two"; - } else { - $uncode = $three || ''; - $uncode =~ s{^(.*?)(?=\n[ ]*-)} - { - my $prefix = $1; - my ($primary,$additional_work) = $prefix =~ /^(.*?)((?:\s*Additional\s+Work:\s*)?)$/si; - my $result = markup_trailing_attribution($primary); - $result .= "
      \n$additional_work
      " if ($additional_work); - $result; - }se; - $uncode =~ s{((?<=\n)[ ]*-.*\n(?:.*\n)*)} - { - my $bulleted_list = $1; - $bulleted_list - =~ s{(?:(?<=\n)|\A)[ ]*-[ ]*(.*(?:\n|\z)(?:[ ]+[^ -].*(?:\n|\z))*)} - { - qq!
    • \n! - . markup_trailing_attribution($1) - . "
    • \n" - }ge; - $bulleted_list - =~ s{(\n)(.*)} - { - qq!
        \n$1
      \n! - . markup_trailing_attribution($2 || '') - }se; - $bulleted_list; - }ge; - $uncode = markup_trailing_attribution($uncode); - $uncode; - } - }sge; - - $item =~ s:\n{2,}:\n

      \n:g; # Keep paragraph breaks - $item =~ s:

    • \n

      \n\n\n

      \n(.*)\n:

    \n$1\n

      :g; - # Link LUCENE-XXX, SOLR-XXX and INFRA-XXX to JIRA - $item =~ s{(?:${jira_url_prefix})?((?:LUCENE|SOLR|INFRA)-\d+)} - {$1}g; - $item =~ s{(issue\s*\#?\s*(\d{3,}))} # Link Issue XXX to JIRA - {$1}gi; - # Link Lucene XXX, SOLR XXX and INFRA XXX to JIRA - $item =~ s{((LUCENE|SOLR|INFRA)\s+(\d{3,}))} - {$1}gi; - # Link "[ github | gh ] pull request [ # ] X+" to Github pull request - $item =~ s{((?:(?:(?:github|gh)\s+)?pull\s+request\s*(?:\#?\s*)?|gh-|(gh|github|pr)#)(\d+))} - {$1}gi; - # Link "LUCENE_CHANGES.txt" to Lucene's same-release Changes.html - # TODO: We can no longer rely on this since Solr may have a different Lucene version - # But it will still work for pre-9.0 changes, and from 9.0 we can instead provide - # full links. - if ($product eq 'SOLR') { - $item =~ s[(LUCENE_CHANGES.txt)] - [$1]g; - } - if ($product eq 'LUCENE') { - # Find single Bugzilla issues - $item =~ s~((?i:bug|patch|issue)\s*\#?\s*(\d+)) - ~ my $issue = $1; - my $jira_issue_num = $lucene_bugzilla_jira_map{$2}; # Link to JIRA copies - $issue = qq!! - . qq!$issue [LUCENE-$jira_issue_num]! - if (defined($jira_issue_num)); - $issue; - ~gex; - # Find multiple Bugzilla issues - $item =~ s~(?<=(?i:bugs))(\s*)(\d+)(\s*(?i:\&|and)\s*)(\d+) - ~ my $leading_whitespace = $1; - my $issue_num_1 = $2; - my $interlude = $3; - my $issue_num_2 = $4; - # Link to JIRA copies - my $jira_issue_1 = $lucene_bugzilla_jira_map{$issue_num_1}; - my $issue1 - = qq!! - . qq!$issue_num_1 [LUCENE-$jira_issue_1]! - if (defined($jira_issue_1)); - my $jira_issue_2 = $lucene_bugzilla_jira_map{$issue_num_2}; - my $issue2 - = qq!! - . qq!$issue_num_2 [LUCENE-$jira_issue_2]! - if (defined($jira_issue_2)); - $leading_whitespace . $issue1 . $interlude . $issue2; - ~gex; - } - - # Linkify URLs, except Bugzilla links, which don't work anymore - # also ignore localhost, ..., $SOLR_PORT, 127.0.0.1 - $item =~ s~(?])(https?://(?!(?:nagoya|issues)\.apache\.org/bugzilla|localhost|\.\.\.|\$SOLR_HOST|127\.0\.0\.1)[^\s\)]+)~$1~g; - - $item =~ s~
    \s+

    \s+~~; - - print " <$list_item>$item\n"; - } - print " \n" unless (not $release or ($has_release_sections and not $heading)); - if ($release and $has_release_sections and $licnt>0) { - print "

  • \n"; - --$licnt; - } - } - print "\n" if ($release and $has_release_sections); -} -print "\n" if ($relcnt > 3); -print "\n\n"; - - -# Subroutine: markup_trailing_attribution -# -# Takes one parameter: -# -# - text possibly containing a trailing parenthesized attribution -# -# Returns one scalar: -# -# - text with the trailing attribution, if any, marked up with the color green -# -sub markup_trailing_attribution { - my $item = shift; - - # Put attributions on their own lines - this already happens if there is a preceding - my $extra_newline = ($item =~ m::) ? '' : '
    '; - # Check for trailing parenthesized attribution with no following period. - # Exclude things like "(see #3 above)" and "(use the bug number instead of xxxx)" - unless ($item =~ s{\s+(\((?![Ss]ee ) - (?!spans\b) - (?!mainly\ ) - (?!LUCENE-\d+\)) - (?!SOLR-\d+\)) - (?!user's) - (?!like\ ) - (?!r\d{6}) # subversion revision - (?!and\ ) - (?!backported\ ) - (?!in\ ) - (?!inverse\ ) - (?![Tt]he\ ) - (?!use\ the\ bug\ number) - (?!e\.?g\.?\b) - [^()"]+?\))\s*$} - {\n${extra_newline}$1}x) { - # If attribution is not found, then look for attribution with a - # trailing period, but try not to include trailing parenthesized things - # that are not attributions. - # - # Rule of thumb: if a trailing parenthesized expression with a following - # period does not contain "LUCENE-XXX", and it either has three or - # fewer words or it includes the word "via" or the phrase "updates from", - # then it is considered to be an attribution. - - $item =~ s{(\s+(\((?![Ss]ee\ ) - (?!spans\b) - (?!mainly\ ) - (?!LUCENE-\d+\)) - (?!SOLR-\d+\)) - (?!user's) - (?!like\ ) - (?!r\d{6}) # subversion revision - (?!and\ ) - (?!backported\ ) - (?!in\ ) - (?!inverse\ ) - (?![Tt]he\ ) - (?!use\ the\ bug\ number) - (?!e\.?g\.?\b) - [^()"]+?\))) - ((?:\.|(?i:\.?\s*Issue\s+\d{3,}|LUCENE-\d+)\.?)\s*)$} - { - my $subst = $1; # default: no change - my $parenthetical = $2; - my $trailing_period_and_or_issue = $3; - if ($parenthetical !~ /LUCENE-\d+/) { - my ($no_parens) = $parenthetical =~ /^\((.*)\)$/s; - my @words = grep {/\S/} split /\s+/, $no_parens; - my $commas = $no_parens =~ s/,/,/g; # count commas - my $max_words = 4 + $commas; - if ($no_parens =~ /\b(?:via|updates\s+from)\b/i || scalar(@words) <= $max_words) { - $subst = "\n${extra_newline}$parenthetical"; - } - } - $subst . $trailing_period_and_or_issue; - }ex; - } - return $item; -} - -# -# Subroutine: has_release_sections -# -# Takes one parameter: -# -# - The $sections array reference -# -# Returns one scalar: -# -# - A boolean indicating whether there are release sections -# -sub has_release_sections { - my $sections = shift; - my $has_release_sections = 0; - for my $section_num (0 .. $#{$sections}) { - if ($sections->[$section_num][0]) { - $has_release_sections = 1; - last; - } - } - return $has_release_sections; -} - - -# -# Subroutine: get_list_type -# -# Takes one parameter: -# -# - The first line of a sub-section/point -# -# Returns one scalar: -# -# - The list type: 'numbered'; or one of the bulleted types '-', or '.' or -# 'paragraph'. -# -sub get_list_type { - my $first_list_item_line = shift; - my $type = 'paragraph'; # Default to paragraph type - - if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) { - $type = 'numbered'; - } elsif ($first_list_item_line =~ /^\s*([-.*])\s+\S+/) { - $type = $1; - } - return $type; -} - - -# -# Subroutine: get_release_date -# -# Takes two parameters: -# -# - Release name -# - Release info, potentially including a release date -# -# Returns two scalars: -# -# - The release date, in format YYYY-MM-DD -# - The remainder of the release info (if any), with release date stripped -# -sub get_release_date { - my $release = shift; - my $relinfo = shift; - - my ($year, $month, $dom, $reldate); - - if ($relinfo) { - if ($relinfo =~ s:\s*(2\d\d\d)([-./]) - (1[012]|0?[1-9])\2 - ([12][0-9]|30|31|0?[1-9])\s*: :x) { - # YYYY-MM-DD or YYYY-M-D or YYYY-MM-D or YYYY-M-DD - $year = $1; - $month = $3; - $dom = $4; - $dom = "0$dom" if (length($dom) == 1); - $reldate = "$year-$month-$dom"; - } elsif ($relinfo =~ s:\s*(1[012]|0?[1-9])([-./]) - ([12][0-9]|30|31|0?[1-9])\2 - (2\d\d\d)\s*: :x) { - # MM-DD-YYYY or M-D-YYYY or MM-D-YYYY or M-DD-YYYY - $month = $1; - $dom = $3; - $dom = "0$dom" if (length($dom) == 1); - $year = $4; - $reldate = "$year-$month-$dom"; - } elsif ($relinfo =~ s:($month_regex)\s* - ([12][0-9]|30|31|0?[1-9])((st|rd|th)\.?)?,?\s* - (2\d\d\d)\s*: :x) { - # MMMMM DD, YYYY or MMMMM DDth, YYYY - $month = $month_nums{$1}; - $dom = $2; - $dom = "0$dom" if (length($dom) == 1); - $year = $5; - $reldate = "$year-$month-$dom"; - } elsif ($relinfo =~ s:([12][0-9]|30|31|0?[1-9])(\s+|[-/.]) - ($month_regex)\2 - (2\d\d\d)\s*: :x) { - # DD MMMMM YYYY - $dom = $1; - $dom = "0$dom" if (length($dom) == 1); - $month = $month_nums{$3}; - $year = $4; - $reldate = "$year-$month-$dom"; - } - } - - unless ($reldate) { # No date found in $relinfo - # Handle '1.2 RC6', which should be '1.2 final' - $release = '1.2 final' if ($release eq '1.2 RC6'); - - $reldate = ( exists($release_dates{$release}) - ? $release_dates{$release} - : 'unknown'); - } - - $relinfo =~ s/,?\s*$//; # Trim trailing comma and whitespace - - return ($reldate, $relinfo); -} - - -# -# setup_release_dates -# -# Returns a list of alternating release names and dates, for use in populating -# the %release_dates hash. -# -# Pulls release dates from the project DOAP file. -# -sub setup_release_dates { - my %release_dates = (); - my $file = shift; - open(FILE, "<$file") || die "could not open $file: $!"; - my $version_list = ; - my $created_list = ; - close(FILE); - - $version_list =~ s/^\s+|\s+$//g; - my @versions = split /\s*,\s*/, $version_list; - $created_list =~ s/^\s+|\s+$//g; - my @created = split /\s*,\s*/, $created_list; - - if (scalar(@versions) != scalar(@created)) { - die $file . " contains" . scalar(@versions) . " versions but " . scalar(@created) . " creation dates."; - } - my $date; - for my $pos (0..$#versions) { - $date = normalize_date($created[$pos]); - $release_dates{$versions[$pos]} = $date; - if ($versions[$pos] =~ /^([1-9]\d*\.\d+)([^.0-9].*|$)/) { - my $padded_version_name = "$1.0$2"; # Alias w/trailing ".0" - $release_dates{$padded_version_name} = $date; - } elsif ($versions[$pos] =~ /\.0(?=[^.0-9]|$)/) { - my $trimmed_version_name = $versions[$pos]; - $trimmed_version_name =~ s/\.0(?=[^.0-9]|$)//; # Alias w/o trailing ".0" - $release_dates{$trimmed_version_name} = $date; - } - } - return %release_dates; -} - -# -# normalize_date -# -# Left-zero-pads month and day-of-month to 2 digits in dates of format YYYY-(M)M-(D)D -# -sub normalize_date { - my $date = shift; - my ($year, $month, $dom) = $date =~ /^(2\d\d\d)-(\d+)-(\d+)$/; - return sprintf("%04d-%02d-%02d", $year, $month, $dom); -} - - -# -# setup_month_regex -# -# Returns a string containing a regular expression with alternations for -# the standard month representations in English. -# -sub setup_month_regex { - return '(?i:Jan(?:|\.|uary)|Feb(?:|\.|ruary)|Mar(?:|\.|ch)' - . '|Apr(?:|\.|il)|May|Jun(?:|\.|e)|Jul(?:|\.|y)|Aug(?:|\.|ust)' - . '|Sep(?:|\.|t(?:|\.|ember))|Oct(?:|\.|ober)|Nov(?:|\.|ember)' - . '|Dec(?:|\.|ember))'; -} - - -# -# setup_month_nums -# -# Returns a list of alternating English month representations and the two-digit -# month number corresponding to them, for use in populating the %month_nums -# hash. -# -sub setup_month_nums { - return ( 'Jan' => '01', 'Jan.' => '01', 'January' => '01', - 'Feb' => '02', 'Feb.' => '02', 'February' => '02', - 'Mar' => '03', 'Mar.' => '03', 'March' => '03', - 'Apr' => '04', 'Apr.' => '04', 'April' => '04', - 'May' => '05', - 'Jun' => '06', 'Jun.' => '06', 'June' => '06', - 'Jul' => '07', 'Jul.' => '07', 'July' => '07', - 'Aug' => '08', 'Aug.' => '08', 'August' => '08', - 'Sep' => '09', 'Sep.' => '09', - 'Sept' => '09', 'Sept.' => '09', 'September' => '09', - 'Oct' => '10', 'Oct.' => '10', 'October' => '10', - 'Nov' => '11', 'Nov.' => '11', 'November' => '11', - 'Dec' => '12', 'Dec.' => '12', 'December' => '12' ); -} - - -# -# setup_lucene_bugzilla_jira_map -# -# Returns a list of alternating Bugzilla bug IDs and LUCENE-* JIRA issue -# numbers, for use in populating the %lucene_bugzilla_jira_map hash -# -sub setup_lucene_bugzilla_jira_map { - return ( 4049 => 1, 4102 => 2, 4105 => 3, 4254 => 4, - 4555 => 5, 4568 => 6, 4754 => 7, 5313 => 8, - 5456 => 9, 6078 => 10, 6091 => 11, 6140 => 12, - 6292 => 13, 6315 => 14, 6469 => 15, 6914 => 16, - 6968 => 17, 7017 => 18, 7019 => 19, 7088 => 20, - 7089 => 21, 7275 => 22, 7412 => 23, 7461 => 24, - 7574 => 25, 7710 => 26, 7750 => 27, 7782 => 28, - 7783 => 29, 7912 => 30, 7974 => 31, 8307 => 32, - 8525 => 33, 9015 => 34, 9110 => 35, 9347 => 36, - 9454 => 37, 9782 => 38, 9853 => 39, 9906 => 40, - 9970 => 41, 10340 => 42, 10341 => 43, 10342 => 44, - 10343 => 45, 10849 => 46, 11109 => 47, 11359 => 48, - 11636 => 49, 11918 => 50, 12137 => 51, 12273 => 52, - 12444 => 53, 12569 => 54, 12588 => 55, 12619 => 56, - 12667 => 57, 12723 => 58, 12749 => 59, 12761 => 60, - 12950 => 61, 13102 => 62, 13166 => 63, 14028 => 64, - 14355 => 65, 14373 => 66, 14412 => 67, 14485 => 68, - 14585 => 69, 14665 => 70, 14900 => 71, 15739 => 72, - 16025 => 73, 16043 => 74, 16167 => 75, 16245 => 76, - 16364 => 77, 16437 => 78, 16438 => 79, 16470 => 80, - 16677 => 81, 16719 => 82, 16730 => 83, 16816 => 84, - 16952 => 85, 17242 => 86, 17954 => 88, 18014 => 89, - 18088 => 90, 18177 => 91, 18410 => 87, 18833 => 92, - 18847 => 93, 18914 => 94, 18927 => 95, 18928 => 96, - 18929 => 97, 18931 => 98, 18932 => 99, 18933 => 100, - 18934 => 101, 19058 => 102, 19149 => 103, 19189 => 104, - 19253 => 105, 19468 => 106, 19686 => 107, 19736 => 108, - 19751 => 109, 19834 => 110, 19844 => 111, 20024 => 112, - 20081 => 113, 20123 => 114, 20196 => 115, 20283 => 116, - 20290 => 117, 20461 => 118, 20901 => 119, 21128 => 120, - 21149 => 121, 21150 => 122, 21189 => 123, 21446 => 124, - 21921 => 126, 22344 => 128, 22469 => 130, 22987 => 131, - 23307 => 133, 23308 => 134, 23422 => 135, 23466 => 136, - 23505 => 137, 23534 => 138, 23545 => 139, 23650 => 140, - 23655 => 141, 23685 => 142, 23702 => 143, 23727 => 144, - 23730 => 145, 23750 => 146, 23754 => 147, 23770 => 148, - 23771 => 149, 23773 => 150, 23774 => 151, 23782 => 152, - 23784 => 153, 23786 => 154, 23838 => 155, 23964 => 156, - 24084 => 129, 24237 => 157, 24265 => 158, 24301 => 159, - 24370 => 160, 24665 => 161, 24786 => 162, 24902 => 163, - 24903 => 164, 24913 => 165, 25666 => 125, 25793 => 166, - 25820 => 167, 25945 => 168, 26120 => 169, 26196 => 170, - 26268 => 171, 26360 => 172, 26396 => 173, 26397 => 174, - 26624 => 175, 26634 => 176, 26666 => 177, 26702 => 178, - 26716 => 179, 26763 => 180, 26884 => 181, 26939 => 182, - 27168 => 183, 27174 => 184, 27182 => 185, 27268 => 186, - 27326 => 187, 27354 => 188, 27408 => 189, 27423 => 190, - 27433 => 191, 27491 => 192, 27587 => 193, 27626 => 194, - 27638 => 195, 27743 => 196, 27772 => 197, 27799 => 198, - 27819 => 199, 27865 => 200, 27868 => 201, 27903 => 202, - 27987 => 203, 28030 => 204, 28050 => 205, 28065 => 206, - 28074 => 207, 28108 => 208, 28181 => 209, 28182 => 210, - 28183 => 211, 28187 => 212, 28285 => 213, 28336 => 214, - 28339 => 215, 28405 => 216, 28462 => 217, 28601 => 218, - 28640 => 219, 28748 => 220, 28827 => 221, 28855 => 222, - 28856 => 223, # Clone: 28856 => 507, - 28858 => 224, 28960 => 132, 28964 => 127, 29033 => 225, - 29256 => 226, 29299 => 227, 29302 => 228, 29370 => 229, - 29432 => 230, 29548 => 231, 29749 => 232, 29756 => 233, - 29774 => 234, 29931 => 235, 29984 => 236, 30013 => 237, - 30016 => 238, 30026 => 239, 30027 => 240, 30049 => 241, - 30058 => 242, 30232 => 243, 30237 => 244, 30240 => 245, - 30242 => 246, 30265 => 247, 30327 => 248, 30330 => 249, - 30360 => 250, 30376 => 251, 30382 => 252, 30421 => 253, - 30429 => 254, 30452 => 255, 30480 => 256, 30522 => 257, - 30617 => 258, 30621 => 259, 30628 => 260, 30629 => 261, - 30668 => 262, 30678 => 263, 30685 => 264, 30736 => 265, - 30785 => 266, 30818 => 267, 30835 => 268, 30844 => 269, - 30977 => 270, 30985 => 271, 31061 => 272, 31120 => 273, - 31149 => 274, 31174 => 275, 31240 => 276, 31241 => 277, - 31294 => 278, 31350 => 279, 31368 => 280, 31420 => 281, - 31469 => 282, 31508 => 283, 31554 => 284, 31617 => 285, - 31619 => 286, 31690 => 287, 31706 => 288, 31708 => 289, - 31746 => 290, 31747 => 291, 31748 => 292, 31784 => 293, - 31785 => 294, 31841 => 295, 31882 => 296, 31926 => 297, - 31976 => 298, 32053 => 299, 32055 => 300, 32088 => 301, - 32090 => 302, 32109 => 303, 32115 => 304, 32143 => 305, - 32167 => 306, 32171 => 307, 32192 => 308, 32227 => 309, - 32228 => 310, 32234 => 311, 32291 => 312, 32307 => 313, - 32334 => 314, 32353 => 315, 32365 => 316, 32403 => 317, - 32432 => 318, 32467 => 319, 32468 => 320, 32580 => 321, - 32626 => 322, 32674 => 323, 32687 => 324, 32712 => 325, - 32847 => 326, 32887 => 327, 32921 => 328, 32942 => 329, - 32965 => 330, 32981 => 331, 32999 => 332, 33019 => 333, - 33076 => 334, 33134 => 335, 33158 => 336, 33161 => 337, - 33197 => 338, 33239 => 339, 33389 => 340, 33395 => 341, - 33397 => 342, 33442 => 343, 33449 => 344, 33459 => 345, - 33472 => 346, 33642 => 347, 33648 => 348, 33649 => 349, - 33654 => 350, 33678 => 351, 33725 => 352, 33799 => 353, - 33820 => 354, 33835 => 355, 33848 => 356, 33851 => 357, - 33877 => 358, 33884 => 359, 33974 => 360, 34028 => 361, - 34066 => 362, 34149 => 363, 34154 => 364, 34193 => 365, - 34279 => 366, 34320 => 367, 34331 => 368, 34359 => 369, - 34407 => 370, 34408 => 371, 34447 => 372, 34453 => 373, - 34477 => 374, # Clone: 34477 => 459, - 34486 => 375, 34528 => 376, 34544 => 377, 34545 => 378, - 34563 => 379, 34570 => 380, 34585 => 381, 34629 => 382, - 34673 => 383, 34684 => 384, 34695 => 385, 34816 => 386, - 34882 => 387, 34930 => 388, 34946 => 389, 34995 => 390, - 35029 => 391, 35037 => 392, 35157 => 393, 35241 => 394, - 35284 => 395, # Clone: 35284 => 466, - 35388 => 396, 35446 => 397, 35454 => 398, 35455 => 399, - 35456 => 400, 35468 => 401, 35491 => 402, 35518 => 403, - 35626 => 404, 35664 => 405, 35665 => 406, 35668 => 407, - 35729 => 408, 35730 => 409, 35731 => 410, 35796 => 411, - 35822 => 412, 35823 => 413, 35838 => 414, 35879 => 415, - # Clone: 35879 => 616, - 35886 => 416, 35971 => 417, 36021 => 418, 36078 => 419, - 36101 => 420, 36135 => 421, 36147 => 422, 36197 => 423, - 36219 => 424, 36241 => 425, 36242 => 426, 36292 => 427, - 36296 => 428, 36333 => 429, 36622 => 430, 36623 => 431, - 36628 => 432); -} - -1; diff --git a/gradle/documentation/changes-to-html/changes2html.py b/gradle/documentation/changes-to-html/changes2html.py new file mode 100755 index 000000000000..ce302be11bfb --- /dev/null +++ b/gradle/documentation/changes-to-html/changes2html.py @@ -0,0 +1,576 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +Transforms Solr's CHANGELOG.md into Changes.html + +Input is from CHANGELOG.md, output is to STDOUT +""" + +import sys +import re +from pathlib import Path + + +class ChangelogParser: + """Parse CHANGELOG.md generated by logchange""" + + RELEASE_PATTERN = re.compile(r'^\[(\d+(?:\.\d+)*)\]\s*-\s*(.+)$') + SECTION_PATTERN = re.compile(r'^###\s+(\w+(?:\s+\w+)*)\s*(?:\(\d+\s+changes?\))?') + ITEM_PATTERN = re.compile(r'^###|^\[|^- ') + + def __init__(self): + self.title = "Solr Changelog" + self.releases = [] + self.preamble = None + + def _save_section(self, current_release, current_section, current_items): + """Save current section to release if valid""" + if current_release and current_section and current_items: + current_release['sections'].append({ + 'name': current_section, + 'items': current_items + }) + + def parse(self, content): + """Parse CHANGELOG.md content""" + lines = content.split('\n') + current_release = None + current_section = None + current_items = [] + i = 0 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + # Skip HTML comments + if stripped.startswith(''): + i += 1 + continue + + # Extract preamble (text before first release) + if not current_release and not self.preamble and stripped and not stripped.startswith('['): + self.preamble = stripped + i += 1 + continue + + # Match release header: [9.9.0] - 2025-07-24 + match = self.RELEASE_PATTERN.match(line) + if match: + self._save_section(current_release, current_section, current_items) + if current_release: + self.releases.append(current_release) + + current_release = { + 'version': match.group(1), + 'date': match.group(2).strip(), + 'sections': [] + } + current_section = None + current_items = [] + i += 1 + continue + + # Match section header: ### Added (9 changes) + match = self.SECTION_PATTERN.match(line) + if match and current_release: + self._save_section(current_release, current_section, current_items) + current_section = match.group(1) + current_items = [] + i += 1 + continue + + # Match list item + if line.startswith('- ') and current_release: + item_text = line[2:] + i += 1 + # Collect continuation lines + while i < len(lines) and not self.ITEM_PATTERN.match(lines[i]): + if lines[i].strip(): + item_text += ' ' + lines[i].strip() + i += 1 + current_items.append(item_text) + continue + + i += 1 + + # Save last section and release + self._save_section(current_release, current_section, current_items) + if current_release: + self.releases.append(current_release) + + +class HTMLGenerator: + """Generate HTML from parsed changelog""" + + JIRA_URL_PREFIX = 'https://issues.apache.org/jira/browse/' + GITHUB_PR_PREFIX = 'https://github.com/apache/solr/pull/' + GITHUB_ISSUE_PREFIX = 'https://github.com/apache/solr/issues/' + + def __init__(self, title="Solr Changelog"): + self.title = title + self.first_relid = None + self.second_relid = None + # Issue extraction patterns: (pattern, prefix, format_string) + self.issue_patterns = [ + (r'\[([A-Z]+-\d+)\]\(https://issues\.apache\.org/jira/browse/\1\)', + self.JIRA_URL_PREFIX, '{0}'), + (r'\[PR#(\d+)\]\(https://github\.com/apache/solr/pull/\1\)', + self.GITHUB_PR_PREFIX, 'PR#{0}'), + (r'\[GITHUB#(\d+)\]\(https://github\.com/apache/solr/issues/\1\)', + self.GITHUB_ISSUE_PREFIX, 'GITHUB#{0}') + ] + + def extract_issue_from_text(self, text): + """ + Extract the first JIRA/GitHub issue from markdown text. + Returns (issue_link_html, text_without_issue) + """ + for pattern, url_prefix, label_fmt in self.issue_patterns: + match = re.search(pattern, text) + if match: + issue_id = match.group(1) + label = label_fmt.format(issue_id) + issue_html = f'{label}' + text_without = (text[:match.start()] + text[match.end():]).strip() + return issue_html, text_without + return None, text + + def extract_authors(self, text): + """Extract authors from trailing parentheses""" + # Match (author1) (author2) ... at the end + match = re.search(r'\s*(\([^)]+(?:\)\s*\([^)]+)*\))\s*$', text) + if match: + authors_text = match.group(1) + text_without_authors = text[:match.start()].strip() + + # Parse individual authors + authors = re.findall(r'\(([^)]+)\)', authors_text) + authors_list = [] + for author_group in authors: + # Split by comma or "and" + for author in re.split(r',\s*|\s+and\s+', author_group): + author = author.strip() + if author: + authors_list.append(author) + + return authors_list, text_without_authors + return None, text + + def format_changelog_item(self, item_text): + """ + Format a changelog item from markdown to HTML + Format: [ISSUE](url) description (author1) (author2) + Output: ISSUE: description
    (authors) + """ + # Extract the issue + issue_html, text_after_issue = self.extract_issue_from_text(item_text) + + if not issue_html: + return self.linkify_remaining_text(item_text) + + # Extract authors and clean description + authors_list, description = self.extract_authors(text_after_issue) + description = re.sub(r'^[:\s]+', '', description).strip() + + # Build HTML + html = f'{issue_html}: {self.escape_html(description)}' + if authors_list: + html += f'
    ({self.escape_html(", ".join(authors_list))})' + return html + + def linkify_remaining_text(self, text): + """Linkify URLs and remaining JIRA references""" + text = self.escape_html(text) + + # Link remaining JIRA issues + text = re.sub( + r'([A-Z]+-\d+)', + lambda m: f'{m.group(1)}', + text + ) + + # Linkify URLs + text = re.sub( + r'(?])(https?://[^\s\)]+)', + lambda m: f'{m.group(1)}', + text + ) + + return text + + def convert_markdown_links(self, text): + """ + Convert markdown links [text](url) to HTML links text + Also linkifies plain HTTP/HTTPS URLs + Also escapes HTML in plain text portions + """ + placeholders = {} + placeholder_counter = [0] + + def protect_with_placeholder(content): + placeholder = f"__PLACEHOLDER_{placeholder_counter[0]}__" + placeholders[placeholder] = content + placeholder_counter[0] += 1 + return placeholder + + # Pattern: [text](url) + def replace_markdown_link(match): + link_text = match.group(1) + link_url = match.group(2) + html_link = f'{self.escape_html(link_text)}' + return protect_with_placeholder(html_link) + + # Replace all markdown links first + result = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', replace_markdown_link, text) + + # Now handle plain URLs + def replace_url(match): + url = match.group(1) + html_link = f'{url}' + return protect_with_placeholder(html_link) + + # Match HTTP/HTTPS URLs not already in links + result = re.sub(r'(?])(https?://[^\s\)]+)', replace_url, result) + + # Escape HTML in remaining text + result = self.escape_html(result) + + # Restore the protected tags + for placeholder, tag in placeholders.items(): + result = result.replace(placeholder, tag) + + return result + + def escape_html(self, text): + """Escape HTML angle brackets to prevent rendering issues""" + # Only escape < and > to avoid breaking markdown links and quotes + text = text.replace('<', '<') + text = text.replace('>', '>') + return text + + def generate_header(self, preamble=None): + """Generate HTML header""" + first_relid_regex = re.escape(self.first_relid or 'trunk') + first_relid_regex = first_relid_regex.replace('\\', '\\\\') + second_relid_regex = re.escape(self.second_relid or '') + second_relid_regex = second_relid_regex.replace('\\', '\\\\') + + newer_version_regex = f"^(?:{first_relid_regex}" + if self.second_relid: + newer_version_regex += f"|{second_relid_regex}" + newer_version_regex += ")" + + html = f''' + + + + Apache Solr Release Notes + + + + + + + + +

    Apache Solr Release Notes

    + +
    + +''' + # Add preamble if present + if preamble: + # Convert markdown links to HTML links + preamble_html = self.convert_markdown_links(preamble) + html += f'

    {preamble_html}

    \n\n' + + return html + + def _format_section(self, relid, section_name, items): + """Format a single section with items""" + sectid = section_name.lower().replace(' ', '_') + html = [f'
  • ' + f'{self.escape_html(section_name)}'] + html.append(f'   ({len(items)})\n') + html.append(f'
      \n') + for item in items: + html.append(f'
    • {self.format_changelog_item(item)}
    • \n') + html.append('
    \n') + return ''.join(html) + + def generate_releases(self, releases): + """Generate HTML for releases""" + html = [] + relcnt = 0 + + for release in releases: + version = release.get('version') + if not version: + continue + + relcnt += 1 + if relcnt == 3: + html.append('

    Older Releases

    \n') + html.append('
    \n') + + header = 'h3' if relcnt > 2 else 'h2' + relid = f'v{version}'.replace(' ', '_').lower() + date = release.get('date', '') + + # Build release header + html.append(f'<{header}>' + f'Release {self.escape_html(version)}') + if date: + html.append(f' [{self.escape_html(date)}]') + html.append(f'\n') + html.append(f'
      \n') + + # Render sections + for section in release.get('sections', []): + if section.get('name'): + html.append(self._format_section(relid, section['name'], section.get('items', []))) + + html.append('
    \n') + + if relcnt > 2: + html.append('
    \n') + + return ''.join(html) + + def generate(self, releases, title, preamble=None): + """Generate complete HTML""" + self.title = title or "Solr Changelog" + + # Determine first and second release IDs for collapsing + if releases: + self.first_relid = f'v{releases[0].get("version", "trunk")}'.replace(' ', '_').lower() + if len(releases) > 1: + self.second_relid = f'v{releases[1].get("version", "trunk")}'.replace(' ', '_').lower() + else: + self.second_relid = self.first_relid + + html_parts = [ + self.generate_header(preamble), + self.generate_releases(releases), + '\n\n' + ] + + return ''.join(html_parts) + + +def main(): + """Main entry point""" + if len(sys.argv) < 2: + # Try to read from CHANGELOG.md in current directory + changelog_file = Path('CHANGELOG.md') + if not changelog_file.exists(): + print("Usage: changes2html.py ", file=sys.stderr) + sys.exit(1) + else: + changelog_file = Path(sys.argv[1]) + + if not changelog_file.exists(): + print(f"Error: {changelog_file} not found", file=sys.stderr) + sys.exit(1) + + # Read changelog + with open(changelog_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Parse + parser = ChangelogParser() + parser.parse(content) + + # Generate HTML + generator = HTMLGenerator() + html = generator.generate(parser.releases, parser.title, parser.preamble) + + # Output + print(html) + + +if __name__ == '__main__': + main() diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 9c77912fcb92..3c9b0a9b6d2b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -79,6 +79,7 @@ cuvs-lucene = "25.10.0" cybozulabs-langdetect = "1.1-20120112" decompose = "3.3.0" diffplug-spotless = "7.2.1" +# @keep Use for dockerfile JRE version dockerfile-baseimage-java = "25" dropwizard-metrics = "4.2.26" eclipse-ecj = "3.39.0" @@ -162,6 +163,7 @@ langchain4j = "0.35.0" link-checker = "1.4.2" littlerobots-versioncatalogupdate = "0.8.5" lmax-disruptor = "3.4.4" +logchange = "1.19.10" ltgt-errorprone = "3.1.0" mockito = "5.19.0" morethan-jmhreport = "0.9.6" @@ -217,6 +219,7 @@ jetbrains-compose = { id = "org.jetbrains.compose", version.ref = "compose" } kotlin-multiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } kotlin-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" } littlerobots-versioncatalogupdate = { id = "nl.littlerobots.version-catalog-update", version.ref = "littlerobots-versioncatalogupdate" } +logchange = { id = "dev.logchange", version.ref = "logchange" } ltgt-errorprone = { id = "net.ltgt.errorprone", version.ref = "ltgt-errorprone" } morethan-jmhreport = { id = "io.morethan.jmhreport", version.ref = "morethan-jmhreport" } nodegradle-node = { id = "com.github.node-gradle.node", version.ref = "nodegradle-node" } diff --git a/gradle/template.gradle.properties b/gradle/template.gradle.properties index a159701f4a83..b6ae69b85e82 100644 --- a/gradle/template.gradle.properties +++ b/gradle/template.gradle.properties @@ -124,3 +124,8 @@ production=false # may not support compilation of the UI module, so they can turn it off via this # parameter. disableUiModule=false + +# Changelog generator settings (./gradlew writeChangelog, see dev-docs/changelog.adoc) +#user.name= +#user.githubid= +#user.asfid= diff --git a/gradle/validation/rat-sources.gradle b/gradle/validation/rat-sources.gradle index 37dde589a529..8f87dcb065db 100644 --- a/gradle/validation/rat-sources.gradle +++ b/gradle/validation/rat-sources.gradle @@ -96,6 +96,11 @@ allprojects { exclude "dev-tools/scripts/README.md" exclude "dev-tools/scripts/create_line_file_docs.py" + // Exclude new CHANGELOG and version-summary for each release + exclude "CHANGELOG.md" + exclude "changelog/**/version-summary.md" + exclude "changelog/.templates/*.md" + // The root project also includes patterns for the include composite // projects. Include their sources in the scan. include "build-tools/build-infra/src/**" diff --git a/solr/licenses/README.committers.txt b/solr/licenses/README.committers.txt index 17dab04632b8..2942a6c8953e 100644 --- a/solr/licenses/README.committers.txt +++ b/solr/licenses/README.committers.txt @@ -29,7 +29,7 @@ what changed with respect to licensing in the commit diff. --- -Any changes made to this directory should be noted in CHANGES.txt, +Any changes made to this directory should be noted in the changelog, along with the specific version information. If the version is a "snapshot" of another Apache project, include the SVN revision number. diff --git a/solr/packaging/build.gradle b/solr/packaging/build.gradle index 2c21f928d0b5..8d0e8723b617 100644 --- a/solr/packaging/build.gradle +++ b/solr/packaging/build.gradle @@ -80,13 +80,13 @@ distributions { from(rootDir, { include "LICENSE.txt" include "NOTICE.txt" + include "CHANGELOG.md" }) from(project(":solr").projectDir, { include "bin/**" include "licenses/**" exclude "licenses/README.committers.txt" - include "CHANGES.txt" }) from(projectDir, { diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc index 890a2a98ffa9..061aea238e4a 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/solr-upgrade-notes.adoc @@ -27,7 +27,7 @@ These notes highlight the important changes that may impact the largest number o It is not a comprehensive list of all changes to Solr in any release. When planning your Solr upgrade, consider the customizations to -your system and review the {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] +your system and review the {solr-javadocs}/changes/Changes.html[`CHANGELOG.md`] file found in your Solr package. That file includes all the changes and updates that may affect your existing implementation. @@ -67,7 +67,7 @@ Rolling upgrades from Solr 8 to Solr 9 require first upgrading the cluster to ve == Upgrading from Pre-8.x Versions -Users upgrading from versions prior to 8.x are strongly encouraged to consult {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] for the details of _all_ changes since the version they are upgrading from. +Users upgrading from versions prior to 8.x are strongly encouraged to consult {solr-javadocs}/changes/Changes.html[`CHANGELOG.md`] for the details of _all_ changes since the version they are upgrading from. The upgrade from 7.x to 8.0 introduced several *major* changes that you should be aware of before upgrading. Please do a thorough review of the section xref:major-changes-in-solr-8.adoc[] before starting your upgrade. diff --git a/solr/test-framework/src/java/org/apache/solr/util/ExternalPaths.java b/solr/test-framework/src/java/org/apache/solr/util/ExternalPaths.java index 9d5f82a24089..940ec58b9e94 100644 --- a/solr/test-framework/src/java/org/apache/solr/util/ExternalPaths.java +++ b/solr/test-framework/src/java/org/apache/solr/util/ExternalPaths.java @@ -19,6 +19,7 @@ import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; +import org.apache.solr.common.SolrException; /** * Some tests need to reach outside the classpath to get certain resources (e.g. the example @@ -82,13 +83,14 @@ static Path determineSourceHome() { } Path base = file.toAbsolutePath(); - while (!Files.exists(base.resolve("solr/CHANGES.txt")) && null != base) { + while (!Files.exists(base.resolve("solr/test-framework/build.gradle")) && null != base) { base = base.getParent(); } return (null == base) ? null : base.resolve("solr/").toAbsolutePath(); } catch (Exception e) { // all bets are off - return null; + throw new SolrException( + SolrException.ErrorCode.SERVER_ERROR, "Failed to determine source home", e); } } }