diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 36146d46aa4..d337ec797cf 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -31,6 +31,9 @@ on: - 'dev/tasks/**' - 'docker-compose.yml' +env: + ARCHERY_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true @@ -52,9 +55,7 @@ jobs: fetch-depth: 0 - name: Git Fixup shell: bash - run: | - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true + run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index 4a5461752b3..4cec82619d0 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -31,15 +31,29 @@ permissions: jobs: crossbow: name: Listen! - if: startsWith(github.event.comment.body, '@github-actions crossbow') + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions crossbow')}} runs-on: ubuntu-latest steps: + - name: Get PR SHA + id: sha + uses: actions/github-script@v4 + with: + result-encoding: string + script: | + const { owner, repo, number } = context.issue; + const pr = await github.pulls.get({ + owner, + repo, + pull_number: number, + }); + return pr.data.head.sha - name: Checkout Arrow uses: actions/checkout@v3 with: path: arrow # fetch the tags for version number generation fetch-depth: 0 + ref: ${{ steps.sha.outputs.result }} - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 2c5ffb084c0..0e6351c523e 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -85,7 +85,10 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration + run: > + archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e + ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: diff --git a/.travis.yml b/.travis.yml index a15822b4a74..a96e07f0c43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -184,6 +184,7 @@ install: - sudo -H pip3 install -e dev/archery[docker] script: + - export ARCHERY_DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - | archery docker run \ ${DOCKER_RUN_ARGS} \ diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh index eb9c4e3dd42..8d712a88a6a 100755 --- a/ci/scripts/install_dask.sh +++ b/ci/scripts/install_dask.sh @@ -26,7 +26,7 @@ fi dask=$1 -if [ "${dask}" = "master" ]; then +if [ "${dask}" = "upstream_devel" ]; then pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] elif [ "${dask}" = "latest" ]; then pip install dask[dataframe] diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 5aca65f825a..be29e5da554 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -35,7 +35,7 @@ else pip install numpy==${numpy} fi -if [ "${pandas}" = "master" ]; then +if [ "${pandas}" = "upstream_devel" ]; then pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index f76e326a8e1..105a64c0603 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -529,7 +529,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, help="Hide counters field in diff report.") @click.argument("contender", metavar="[", default=ArrowSources.WORKSPACE, required=False) -@click.argument("baseline", metavar="[]]", default="origin/master", +@click.argument("baseline", metavar="[]]", default="origin/HEAD", required=False) @click.pass_context def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, @@ -542,7 +542,8 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, The caller can optionally specify both the contender and the baseline. If unspecified, the contender will default to the current workspace (like git) - and the baseline will default to master. + and the baseline will default to the mainline development branch (i.e. + default git branch). Each target (contender or baseline) can either be a git revision (commit, tag, special values like HEAD) or a cmake build directory. This @@ -559,16 +560,18 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, Examples: \b - # Compare workspace (contender) with master (baseline) + # Compare workspace (contender) against the mainline development branch + # (baseline) \b archery benchmark diff \b - # Compare master (contender) with latest version (baseline) + # Compare the mainline development branch (contender) against the latest + # version (baseline) \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b - archery benchmark diff master "$LAST" + archery benchmark diff "$LAST" \b # Compare g++7 (contender) with clang++-8 (baseline) builds diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 456942173e7..58aed560329 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -96,7 +96,7 @@ def check_config(obj, config_path): 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') @@ -157,7 +157,7 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, @crossbow.command() -@click.option('--base-branch', default="master", +@click.option('--base-branch', default=None, help='Set base branch for the PR.') @click.option('--create-pr', is_flag=True, default=False, help='Create GitHub Pull Request') @@ -192,6 +192,12 @@ def verify_release_candidate(obj, base_branch, create_pr, # Redefine Arrow repo to use the correct arrow remote. arrow = Repo(path=obj['arrow'].path, remote_url=remote) + + # Default value for base_branch is the repository's default branch name + if base_branch is None: + # Get the default branch name from the repository + base_branch = arrow.default_branch_name + response = arrow.github_pr(title=pr_title, head=head_branch, base=base_branch, body=pr_body, github_token=obj['queue'].github_token, @@ -225,7 +231,7 @@ def verify_release_candidate(obj, base_branch, create_pr, 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index a83c190d121..553b2d69216 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -28,6 +28,7 @@ from io import StringIO from pathlib import Path from datetime import date +import warnings import jinja2 from ruamel.yaml import YAML @@ -133,7 +134,7 @@ def format_all(items, pattern): # configurations for setting up branch skipping # - appveyor has a feature to skip builds without an appveyor.yml -# - travis reads from the master branch and applies the rules +# - travis reads from the default branch and applies the rules # - circle requires the configuration to be present on all branch, even ones # that are configured to be skipped # - azure skips branches without azure-pipelines.yml by default @@ -361,6 +362,29 @@ def signature(self): return pygit2.Signature(self.user_name, self.user_email, int(time.time())) + @property + def default_branch_name(self): + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") + + if default_branch_name is None: + try: + ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + target_name = ref_obj.target + target_name_tokenized = target_name.split("/") + default_branch_name = target_name_tokenized[-1] + except KeyError: + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) + + return default_branch_name + def create_tree(self, files): builder = self.repo.TreeBuilder() @@ -382,7 +406,7 @@ def create_commit(self, files, parents=None, message='', if parents is None: # by default use the main branch as the base of the new branch # required to reuse github actions cache across crossbow tasks - commit, _ = self.repo.resolve_refish("master") + commit, _ = self.repo.resolve_refish(self.default_branch_name) parents = [commit.id] tree_id = self.create_tree(files) @@ -546,8 +570,10 @@ def github_overwrite_release_assets(self, tag_name, target_commitish, 'Unsupported upload method {}'.format(method) ) - def github_pr(self, title, head=None, base="master", body=None, + def github_pr(self, title, head=None, base=None, body=None, github_token=None, create=False): + # Default value for base is the default_branch name() + base = self.default_branch_name() if base is None else base github_token = github_token or self.github_token repo = self.as_github_repo(github_token=github_token) if create: @@ -1289,11 +1315,15 @@ def validate(self): 'is: `{}`'.format(task_name, str(e)) ) + # Get the default branch name from the repository + arrow_source_dir = ArrowSources.find() + repo = Repo(arrow_source_dir.path) + # validate that the defined tasks are renderable, in order to to that # define the required object with dummy data target = Target( head='e279a7e06e61c14868ca7d71dea795420aea6539', - branch='master', + branch=repo.default_branch_name, remote='https://github.com/apache/arrow', version='1.0.0dev123', r_version='0.13.0.100000123', diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index c7b42c094f6..6f571f27bff 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,7 +217,8 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache + conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index bc25738becf..386b7c2bdae 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -259,12 +259,12 @@ def test_arrow_example_validation_passes(arrow_compose_path): def test_compose_default_params_and_env(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( UBUNTU='18.04', - DASK='master' + DASK='upstream_devel' )) assert compose.config.dotenv == arrow_compose_env assert compose.config.params == { 'UBUNTU': '18.04', - 'DASK': 'master', + 'DASK': 'upstream_devel', } @@ -492,7 +492,7 @@ def test_compose_push(arrow_compose_path): def test_compose_error(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( PYTHON='3.8', - PANDAS='master' + PANDAS='upstream_devel' )) error = subprocess.CalledProcessError(99, []) @@ -503,7 +503,7 @@ def test_compose_error(arrow_compose_path): exception_message = str(exc.value) assert "exited with a non-zero exit code 99" in exception_message assert "PANDAS: latest" in exception_message - assert "export PANDAS=master" in exception_message + assert "export PANDAS=upstream_devel" in exception_message def test_image_with_gpu(arrow_compose_path): diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 2c775c7506b..d9c85740093 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -18,8 +18,9 @@ from abc import abstractmethod from collections import defaultdict import functools -import re +import os import pathlib +import re import shelve import warnings @@ -361,6 +362,45 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) + @cached_property + def default_branch(self): + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") + + if default_branch_name is None: + try: + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + + # Get git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + + # Get git.RemoteReference object to origin/main or + # origin/master + origin_head_reference = origin_head.reference + + # Get string value of remote head reference, should return + # "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name + origin_head_name_tokenized = origin_head_name.split("/") + + # The last token is the default branch name + default_branch_name = origin_head_name_tokenized[-1] + except KeyError: + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) + + return default_branch_name + def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from # jira and add it to the issues @@ -422,9 +462,9 @@ def changelog(self): return JiraChangelog(release=self, categories=categories) def commits_to_pick(self, exclude_already_applied=True): - # collect commits applied on the main branch since the root of the + # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - commit_range = f"{self.previous.tag}..master" + commit_range = f"{self.previous.tag}..{self.default_branch}" # keeping the original order of the commits helps to minimize the merge # conflicts during cherry-picks @@ -476,7 +516,7 @@ def branch(self): @property def base_branch(self): - return "master" + return self.default_branch @cached_property def siblings(self): diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index bdf53ff1dac..8cbb7615b7b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1492,7 +1492,7 @@ tasks: ("3.7", "latest", "latest", False), ("3.8", "latest", "latest", False), ("3.8", "nightly", "nightly", False), - ("3.9", "master", "nightly", False)] %} + ("3.9", "upstream_devel", "nightly", False)] %} test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}: ci: github template: docker-tests/github.linux.yml @@ -1512,7 +1512,7 @@ tasks: image: conda-python-pandas {% endfor %} -{% for dask_version in ["latest", "master"] %} +{% for dask_version in ["latest", "upstream_devel"] %} test-conda-python-3.9-dask-{{ dask_version }}: ci: github template: docker-tests/github.linux.yml diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 7035dfe9ec6..49061f5b847 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -85,13 +85,13 @@ where the leaf image is ``conda-python-pandas``. .. code:: bash - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache conda-python-pandas Which translates to: .. code:: bash - export PANDAS=master + export PANDAS=upstream_devel docker-compose pull --ignore-pull-failures conda-cpp docker-compose pull --ignore-pull-failures conda-python docker-compose build conda-cpp @@ -102,7 +102,7 @@ Which translates to: Note that it doesn't pull the conda-python-pandas image and disable the cache when building it. -``PANDAS`` is a `build parameter `_, see the +``PANDAS`` is a :ref:`build parameter `, see the defaults in the .env file. **To entirely skip building the image:** @@ -178,6 +178,7 @@ image when building Glib, Ruby, R and Python bindings. This reduces duplication and streamlines maintenance, but makes the docker-compose configuration more complicated. +.. _docker-build-parameters: Docker Build Parameters ~~~~~~~~~~~~~~~~~~~~~~~