From 74906f88b375a25c7ee711244072257325fc44fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 19 Oct 2022 15:51:41 +0200 Subject: [PATCH 01/48] Fake commit --- .github/workflows/comment_bot.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index 4a5461752b3..4cec82619d0 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -31,15 +31,29 @@ permissions: jobs: crossbow: name: Listen! - if: startsWith(github.event.comment.body, '@github-actions crossbow') + if: ${{ github.event.issue.pull_request && startsWith(github.event.comment.body, '@github-actions crossbow')}} runs-on: ubuntu-latest steps: + - name: Get PR SHA + id: sha + uses: actions/github-script@v4 + with: + result-encoding: string + script: | + const { owner, repo, number } = context.issue; + const pr = await github.pulls.get({ + owner, + repo, + pull_number: number, + }); + return pr.data.head.sha - name: Checkout Arrow uses: actions/checkout@v3 with: path: arrow # fetch the tags for version number generation fetch-depth: 0 + ref: ${{ steps.sha.outputs.result }} - name: Set up Python uses: actions/setup-python@v4 with: From b0c8cbde00bac5f3007be74d37b220b6d4ca28ec Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 10:53:27 -0400 Subject: [PATCH 02/48] Dask and Pandas repositories now use default branches named 'main'. --- dev/archery/archery/docker/tests/test_docker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index bc25738becf..fa6b59f936d 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -259,12 +259,12 @@ def test_arrow_example_validation_passes(arrow_compose_path): def test_compose_default_params_and_env(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( UBUNTU='18.04', - DASK='master' + DASK='main' )) assert compose.config.dotenv == arrow_compose_env assert compose.config.params == { 'UBUNTU': '18.04', - 'DASK': 'master', + 'DASK': 'main', } @@ -492,7 +492,7 @@ def test_compose_push(arrow_compose_path): def test_compose_error(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( PYTHON='3.8', - PANDAS='master' + PANDAS='main' )) error = subprocess.CalledProcessError(99, []) @@ -503,7 +503,7 @@ def test_compose_error(arrow_compose_path): exception_message = str(exc.value) assert "exited with a non-zero exit code 99" in exception_message assert "PANDAS: latest" in exception_message - assert "export PANDAS=master" in exception_message + assert "export PANDAS=main" in exception_message def test_image_with_gpu(arrow_compose_path): From 09eb082dc0279f8481e86cd04b6e6ebcc318e9b1 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 31 Aug 2022 11:05:02 -0400 Subject: [PATCH 03/48] Replace 'master' with 'default' in comment about Travis CI default behavior. --- dev/archery/archery/crossbow/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index a83c190d121..b64a6f733ae 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -133,7 +133,7 @@ def format_all(items, pattern): # configurations for setting up branch skipping # - appveyor has a feature to skip builds without an appveyor.yml -# - travis reads from the master branch and applies the rules +# - travis reads from the default branch and applies the rules # - circle requires the configuration to be present on all branch, even ones # that are configured to be skipped # - azure skips branches without azure-pipelines.yml by default From 363c7b358a1dc7cf264ed7103037064576dd6870 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 31 Aug 2022 11:13:16 -0400 Subject: [PATCH 04/48] Remove mention of "master" from help text for --arrow-branch crossbow CLI option. --- dev/archery/archery/crossbow/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 456942173e7..55a2338a280 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -96,7 +96,7 @@ def check_config(obj, config_path): 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') @@ -225,7 +225,7 @@ def verify_release_candidate(obj, base_branch, create_pr, 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') From d5fa3d4d7615554a701d172e7fdba5d0ed9c40df Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 11:30:52 -0400 Subject: [PATCH 05/48] Pandas repository uses 'main' as the default branch. --- dev/archery/archery/docker/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index c7b42c094f6..565efed05c5 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,7 +217,7 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=main archery docker run --no-leaf-cache conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python From 4a4f987ca1bcd6336d56398ada5920c36cc8e3ce Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 11:40:28 -0400 Subject: [PATCH 06/48] Add base_branch property to Release object and modify commits_to_pick to use the property. --- dev/archery/archery/release/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 2c775c7506b..00981bbae59 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -361,6 +361,10 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) + @property + def base_branch(self): + return "master" + def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from # jira and add it to the issues @@ -422,9 +426,9 @@ def changelog(self): return JiraChangelog(release=self, categories=categories) def commits_to_pick(self, exclude_already_applied=True): - # collect commits applied on the main branch since the root of the + # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - commit_range = f"{self.previous.tag}..master" + commit_range = f"{self.previous.tag}..{self.base_branch}" # keeping the original order of the commits helps to minimize the merge # conflicts during cherry-picks From ef4342d129e816d96a8d788674070ce9ed280f99 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 1 Sep 2022 10:56:02 -0400 Subject: [PATCH 07/48] Modify 'archery' command line interface to reference the mainline development branch generically. --- dev/archery/archery/cli.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index f76e326a8e1..677d935da67 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -529,7 +529,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, help="Hide counters field in diff report.") @click.argument("contender", metavar="[", default=ArrowSources.WORKSPACE, required=False) -@click.argument("baseline", metavar="[]]", default="origin/master", +@click.argument("baseline", metavar="[]]", default="origin/HEAD", required=False) @click.pass_context def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, @@ -542,7 +542,8 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, The caller can optionally specify both the contender and the baseline. If unspecified, the contender will default to the current workspace (like git) - and the baseline will default to master. + and the baseline will default to the mainline development branch (i.e. + default git branch). Each target (contender or baseline) can either be a git revision (commit, tag, special values like HEAD) or a cmake build directory. This @@ -559,12 +560,12 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, Examples: \b - # Compare workspace (contender) with master (baseline) + # Compare workspace (contender) against the mainline development branch (baseline) \b archery benchmark diff \b - # Compare master (contender) with latest version (baseline) + # Compare the mainline development branch (contender) against the latest version (baseline) \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b From 0fcd9026eb8a8621ec5b0d949833c9d1bac7fdd3 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 8 Sep 2022 16:25:26 -0400 Subject: [PATCH 08/48] Dynamically compute the default branch name for archery and crossbow core/cli files --- dev/archery/archery/crossbow/cli.py | 7 ++++++- dev/archery/archery/crossbow/core.py | 19 ++++++++++++++++--- dev/archery/archery/release/core.py | 27 +++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 55a2338a280..eb12fad25ef 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,8 +16,10 @@ # under the License. from pathlib import Path +from pickle import TRUE import time import sys +import pygit2 import click @@ -155,9 +157,12 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) +# Get the default branch name from the repository +arrow_source_dir = ArrowSources.find() +repo = Repo(arrow_source_dir.path) @crossbow.command() -@click.option('--base-branch', default="master", +@click.option('--base-branch', default=repo.default_branch_name, help='Set base branch for the PR.') @click.option('--create-pr', is_flag=True, default=False, help='Create GitHub Pull Request') diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index b64a6f733ae..ce03ac8795b 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -361,6 +361,13 @@ def signature(self): return pygit2.Signature(self.user_name, self.user_email, int(time.time())) + @property + def default_branch_name(self): + ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + target_name = ref_obj.target + target_name_tokenized = target_name.split("/") + return target_name_tokenized[-1] + def create_tree(self, files): builder = self.repo.TreeBuilder() @@ -382,7 +389,7 @@ def create_commit(self, files, parents=None, message='', if parents is None: # by default use the main branch as the base of the new branch # required to reuse github actions cache across crossbow tasks - commit, _ = self.repo.resolve_refish("master") + commit, _ = self.repo.resolve_refish(self.default_branch_name) parents = [commit.id] tree_id = self.create_tree(files) @@ -546,8 +553,10 @@ def github_overwrite_release_assets(self, tag_name, target_commitish, 'Unsupported upload method {}'.format(method) ) - def github_pr(self, title, head=None, base="master", body=None, + def github_pr(self, title, head=None, base=None, body=None, github_token=None, create=False): + # Default value for base is the default_branch name() + base = self.default_branch_name() if base is None else base github_token = github_token or self.github_token repo = self.as_github_repo(github_token=github_token) if create: @@ -1289,11 +1298,15 @@ def validate(self): 'is: `{}`'.format(task_name, str(e)) ) + # Get the default branch name from the repository + arrow_source_dir = ArrowSources.find() + repo = Repo(arrow_source_dir.path) + # validate that the defined tasks are renderable, in order to to that # define the required object with dummy data target = Target( head='e279a7e06e61c14868ca7d71dea795420aea6539', - branch='master', + branch=repo.default_branch_name, remote='https://github.com/apache/arrow', version='1.0.0dev123', r_version='0.13.0.100000123', diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 00981bbae59..c15ec8ffbc3 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -226,6 +226,27 @@ def url(self): def title(self): return self._title +class DefaultBranchName(object): + def __new__(self): + if not hasattr(self, 'instance'): + self.instance = super(DefaultBranchName, self).__new__(self) + arrow = ArrowSources.find() + arrow = ArrowSources.find() + repo = Repo(arrow.path) + remotes = repo.remotes + origin = repo.remotes["origin"] + origin_refs = origin.refs + origin_head = origin_refs["HEAD"] # git.RemoteReference object to origin/HEAD + origin_head_reference = origin_head.reference # git.RemoteReference object to origin/main + origin_head_name = origin_head_reference.name # Should return "origin/main" or "origin/master" + origin_head_name_tokenized = origin_head_name.split("/") + self.default_branch_name = origin_head_name_tokenized[-1] + return self.instance + + @property + def value(self): + return self.default_branch_name + class Release: @@ -363,7 +384,8 @@ def commits(self): @property def base_branch(self): - return "master" + default_branch_name = DefaultBranchName() + return default_branch_name.value() def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -480,7 +502,8 @@ def branch(self): @property def base_branch(self): - return "master" + default_branch_name = DefaultBranchName() + return default_branch_name.value() @cached_property def siblings(self): From 7cc70a74e2dd6ede659f30f89dcac9c294227ecd Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 8 Sep 2022 16:30:46 -0400 Subject: [PATCH 09/48] Performed python linting --- dev/archery/archery/cli.py | 6 ++++-- dev/archery/archery/crossbow/cli.py | 4 ++-- dev/archery/archery/release/core.py | 11 +++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 677d935da67..3f608872741 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -560,12 +560,14 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, Examples: \b - # Compare workspace (contender) against the mainline development branch (baseline) + # Compare workspace (contender) against the mainline development branch + # (baseline) \b archery benchmark diff \b - # Compare the mainline development branch (contender) against the latest version (baseline) + # Compare the mainline development branch (contender) against the latest + # version (baseline) \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index eb12fad25ef..b7390ef1417 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,10 +16,8 @@ # under the License. from pathlib import Path -from pickle import TRUE import time import sys -import pygit2 import click @@ -157,10 +155,12 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) + # Get the default branch name from the repository arrow_source_dir = ArrowSources.find() repo = Repo(arrow_source_dir.path) + @crossbow.command() @click.option('--base-branch', default=repo.default_branch_name, help='Set base branch for the PR.') diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index c15ec8ffbc3..b95a90b6d9c 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -226,6 +226,7 @@ def url(self): def title(self): return self._title + class DefaultBranchName(object): def __new__(self): if not hasattr(self, 'instance'): @@ -233,12 +234,14 @@ def __new__(self): arrow = ArrowSources.find() arrow = ArrowSources.find() repo = Repo(arrow.path) - remotes = repo.remotes origin = repo.remotes["origin"] origin_refs = origin.refs - origin_head = origin_refs["HEAD"] # git.RemoteReference object to origin/HEAD - origin_head_reference = origin_head.reference # git.RemoteReference object to origin/main - origin_head_name = origin_head_reference.name # Should return "origin/main" or "origin/master" + # git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + # git.RemoteReference object to origin/main + origin_head_reference = origin_head.reference + # Should return "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name origin_head_name_tokenized = origin_head_name.split("/") self.default_branch_name = origin_head_name_tokenized[-1] return self.instance From 20d6c223aea6bc8fc349ab2720c8ebe5a74ee96e Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 14:55:29 -0400 Subject: [PATCH 10/48] remove duplicate code --- dev/archery/archery/release/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index b95a90b6d9c..997ed76417b 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -232,7 +232,6 @@ def __new__(self): if not hasattr(self, 'instance'): self.instance = super(DefaultBranchName, self).__new__(self) arrow = ArrowSources.find() - arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs From f850d52160911a40c14563a1a6aa8db809581946 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 14:56:45 -0400 Subject: [PATCH 11/48] Print debugging info for default_branch_name Repo class function --- dev/archery/archery/crossbow/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index ce03ac8795b..175f59ea016 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,6 +363,10 @@ def signature(self): @property def default_branch_name(self): + for remote in self.repo.remotes: + print(remote.name) + for ref in self.repo.references.objects: + print(ref.target) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 5ff60d757b7ceb963079eb7b8e96460d9ad55686 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:04:28 -0400 Subject: [PATCH 12/48] Print more debugging info --- dev/archery/archery/crossbow/core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 175f59ea016..b144f16580c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -365,8 +365,9 @@ def signature(self): def default_branch_name(self): for remote in self.repo.remotes: print(remote.name) - for ref in self.repo.references.objects: - print(ref.target) + # for ref in self.repo.references.objects: + # print(ref.target) + print(self.repo.resolve_refish('origin/HEAD')) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From ec4bdf71ccb3eaf13bb3fb4969787b17328b5aa8 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:08:49 -0400 Subject: [PATCH 13/48] Remove resolve_refish print command --- dev/archery/archery/crossbow/core.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index b144f16580c..0e77bccc85d 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,11 +363,15 @@ def signature(self): @property def default_branch_name(self): + print("Remotes:") for remote in self.repo.remotes: print(remote.name) - # for ref in self.repo.references.objects: - # print(ref.target) - print(self.repo.resolve_refish('origin/HEAD')) + for ref in self.repo.references.objects: + print("ref.target" + ref.target) + print("ref.raw_target" + ref.raw_target) + print("ref.shorthand" + ref.shorthand) + print("ref.raw_shorthand" + ref.raw_shorthand) + print("ref.name" + ref.name) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 8dd439e8ab2906e8a617398aef231d2c4ff7ac51 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:11:31 -0400 Subject: [PATCH 14/48] Add new line between reference object details --- dev/archery/archery/crossbow/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 0e77bccc85d..86851ff7080 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -367,11 +367,12 @@ def default_branch_name(self): for remote in self.repo.remotes: print(remote.name) for ref in self.repo.references.objects: - print("ref.target" + ref.target) - print("ref.raw_target" + ref.raw_target) - print("ref.shorthand" + ref.shorthand) - print("ref.raw_shorthand" + ref.raw_shorthand) - print("ref.name" + ref.name) + print(ref.target) + print(ref.raw_target) + print(ref.shorthand) + print(ref.raw_shorthand) + print(ref.name) + print("\n") ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 7732c593c3ed2bb82263080795ee47ace2b5687d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:30:21 -0400 Subject: [PATCH 15/48] Print branches --- dev/archery/archery/crossbow/core.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 86851ff7080..ce110ae195a 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -366,13 +366,15 @@ def default_branch_name(self): print("Remotes:") for remote in self.repo.remotes: print(remote.name) - for ref in self.repo.references.objects: - print(ref.target) - print(ref.raw_target) - print(ref.shorthand) - print(ref.raw_shorthand) - print(ref.name) - print("\n") + # for ref in self.repo.references.objects: + # print(ref.target) + # print(ref.raw_target) + # print(ref.shorthand) + # print(ref.raw_shorthand) + # print(ref.name) + # print("\n") + for branch in self.repo.branches: + print(branch) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 3081853f269c3d08be3a9d5890563cd85a55bbf9 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 10:09:00 -0400 Subject: [PATCH 16/48] Use environment variable, DEFAULT_BRANCH, that is set in the yml file. --- dev/archery/archery/crossbow/core.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index ce110ae195a..a1cedd2379d 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,9 +363,11 @@ def signature(self): @property def default_branch_name(self): - print("Remotes:") - for remote in self.repo.remotes: - print(remote.name) + default_branch_name = os.getenv("DEFAULT_BRANCH") + print("**********default_branch_name" + default_branch_name) + # print("Remotes:") + # for remote in self.repo.remotes: + # print(remote.name) # for ref in self.repo.references.objects: # print(ref.target) # print(ref.raw_target) @@ -373,12 +375,13 @@ def default_branch_name(self): # print(ref.raw_shorthand) # print(ref.name) # print("\n") - for branch in self.repo.branches: - print(branch) - ref_obj = self.repo.references["refs/remotes/origin/HEAD"] - target_name = ref_obj.target - target_name_tokenized = target_name.split("/") - return target_name_tokenized[-1] + # for branch in self.repo.branches: + # print(branch) + # ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + # target_name = ref_obj.target + # target_name_tokenized = target_name.split("/") + # return target_name_tokenized[-1] + return default_branch_name def create_tree(self, files): builder = self.repo.TreeBuilder() From fd6299689afdd25d88bee5bc60966122e8aa2b55 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 10:12:51 -0400 Subject: [PATCH 17/48] Remove string concatenation, types incompatible --- dev/archery/archery/crossbow/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index a1cedd2379d..1ad0bb8633c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,7 +364,7 @@ def signature(self): @property def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - print("**********default_branch_name" + default_branch_name) + print(default_branch_name) # print("Remotes:") # for remote in self.repo.remotes: # print(remote.name) From eb36ffbe4e90fa44398398bba1be344ff00f616d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:15:22 -0400 Subject: [PATCH 18/48] Enable both CI workflows and local repository workflows for getting default branch in archery/crossbow/core.py --- dev/archery/archery/crossbow/core.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 1ad0bb8633c..688af5d55ff 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,23 +364,16 @@ def signature(self): @property def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - print(default_branch_name) - # print("Remotes:") - # for remote in self.repo.remotes: - # print(remote.name) - # for ref in self.repo.references.objects: - # print(ref.target) - # print(ref.raw_target) - # print(ref.shorthand) - # print(ref.raw_shorthand) - # print(ref.name) - # print("\n") - # for branch in self.repo.branches: - # print(branch) - # ref_obj = self.repo.references["refs/remotes/origin/HEAD"] - # target_name = ref_obj.target - # target_name_tokenized = target_name.split("/") - # return target_name_tokenized[-1] + + if default_branch_name == None: + try: + ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + target_name = ref_obj.target + target_name_tokenized = target_name.split("/") + default_branch_name = target_name_tokenized[-1] + except: + raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + return default_branch_name def create_tree(self, files): From 0e8c8ea95dd649793add1e3beef2d5b92475b726 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:26:43 -0400 Subject: [PATCH 19/48] Enable both CI workflows and local repository workflows for getting default branch in archery/release/core.py --- dev/archery/archery/release/core.py | 42 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 997ed76417b..15a952d22f7 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -22,6 +22,7 @@ import pathlib import shelve import warnings +import os from git import Repo from jira import JIRA @@ -231,18 +232,35 @@ class DefaultBranchName(object): def __new__(self): if not hasattr(self, 'instance'): self.instance = super(DefaultBranchName, self).__new__(self) - arrow = ArrowSources.find() - repo = Repo(arrow.path) - origin = repo.remotes["origin"] - origin_refs = origin.refs - # git.RemoteReference object to origin/HEAD - origin_head = origin_refs["HEAD"] - # git.RemoteReference object to origin/main - origin_head_reference = origin_head.reference - # Should return "origin/main" or "origin/master" - origin_head_name = origin_head_reference.name - origin_head_name_tokenized = origin_head_name.split("/") - self.default_branch_name = origin_head_name_tokenized[-1] + + default_branch_name = os.getenv("DEFAULT_BRANCH") + + if default_branch_name == None: + try: + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + + # git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + + # git.RemoteReference object to origin/main or origin/master + origin_head_reference = origin_head.reference + + # Should return "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name + origin_head_name_tokenized = origin_head_name.split("/") + + # The last token is the default branch name + default_branch_name = origin_head_name_tokenized[-1] + except: + raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + + # Set default branch as class property + self.default_branch_name = default_branch_name + return self.instance @property From c0d5ea22bb08e1d5645161b77c0607da841e5ef9 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:39:40 -0400 Subject: [PATCH 20/48] Add DEFAULT_BRANCH environment variable to archery.yml test step for unittests --- .github/workflows/archery.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 36146d46aa4..696c4242a55 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -65,7 +65,9 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -v archery + run: | + DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 07226f9afb85652c331c2f8304076c3ad58bd56d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:47:02 -0400 Subject: [PATCH 21/48] Set workflow-wide environment variable, DEFAULT_BRANCH, for archery.yml --- .github/workflows/archery.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 696c4242a55..7e32a262281 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -31,6 +31,9 @@ on: - 'dev/tasks/**' - 'docker-compose.yml' +env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true @@ -65,9 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: | - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - pytest -v archery + run: pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 99938f70139fbe0c76f8ae843396719ec9fe9466 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:49:35 -0400 Subject: [PATCH 22/48] Print reason for skipping tests --- .github/workflows/archery.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 7e32a262281..5d300d5ccee 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -68,7 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -v archery + run: pytest -rs -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 63ecf94affc628fd9d23c0f42cb906d8a744ab3e Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:53:53 -0400 Subject: [PATCH 23/48] Add 'enable-integration' flag to ensure crossbowcli tests run --- .github/workflows/archery.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 5d300d5ccee..2dad44cd5bc 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -68,7 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -rs -v archery + run: pytest -rs --enable-integration -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 1f0138e290b9daa671cc83464a2912d62312b117 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 12:09:20 -0400 Subject: [PATCH 24/48] Factor out GitFixup step DEFAULT_BRANCH value --- .github/workflows/archery.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 2dad44cd5bc..5e4d1764a6a 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -55,9 +55,7 @@ jobs: fetch-depth: 0 - name: Git Fixup shell: bash - run: | - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true + run: git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true - name: Setup Python uses: actions/setup-python@v4 with: @@ -68,7 +66,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -rs --enable-integration -v archery + run: pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 2e0740170d0cfea5576a6236e7cdee4bc75faabd Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 15 Sep 2022 10:04:30 -0400 Subject: [PATCH 25/48] Run python linting --- dev/archery/archery/cli.py | 2 +- dev/archery/archery/crossbow/core.py | 3 ++- dev/archery/archery/release/core.py | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 3f608872741..105a64c0603 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -571,7 +571,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b - archery benchmark diff master "$LAST" + archery benchmark diff "$LAST" \b # Compare g++7 (contender) with clang++-8 (baseline) builds diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 688af5d55ff..603dfdbd4f0 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -372,7 +372,8 @@ def default_branch_name(self): target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] except: - raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + raise RuntimeError( + 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 15a952d22f7..131be01a5b1 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -237,7 +237,7 @@ def __new__(self): if default_branch_name == None: try: - # Set up repo object + # Set up repo object arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] @@ -256,11 +256,12 @@ def __new__(self): # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] except: - raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + raise RuntimeError( + 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name - + return self.instance @property From 96e6535c9257f5778664d33e541d6f8082b87de9 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Mon, 19 Sep 2022 14:26:30 -0400 Subject: [PATCH 26/48] Address bare except and line lengths --- dev/archery/archery/crossbow/core.py | 8 +++++--- dev/archery/archery/release/core.py | 16 ++++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 603dfdbd4f0..d7d54b146a4 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -365,15 +365,17 @@ def signature(self): def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - if default_branch_name == None: + if default_branch_name is None: try: ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] - except: + except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a ' + '\'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 131be01a5b1..83fe659baa0 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -235,7 +235,7 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - if default_branch_name == None: + if default_branch_name is None: try: # Set up repo object arrow = ArrowSources.find() @@ -243,21 +243,25 @@ def __new__(self): origin = repo.remotes["origin"] origin_refs = origin.refs - # git.RemoteReference object to origin/HEAD + # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] - # git.RemoteReference object to origin/main or origin/master + # Get git.RemoteReference object to origin/main or + # origin/master origin_head_reference = origin_head.reference - # Should return "origin/main" or "origin/master" + # Get string value of remote head reference, should return + # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name origin_head_name_tokenized = origin_head_name.split("/") # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] - except: + except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a' + '\'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name From c66f3f10a988921dd20abf61c80ad4b3170a5391 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Mon, 19 Sep 2022 17:09:56 -0400 Subject: [PATCH 27/48] Add context to error message when obtaining default branch name. --- dev/archery/archery/crossbow/core.py | 6 +++--- dev/archery/archery/release/core.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index d7d54b146a4..02228cc66ee 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -373,9 +373,9 @@ def default_branch_name(self): default_branch_name = target_name_tokenized[-1] except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a ' - '\'refs/remotes/origin/HEAD\' reference.') + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 83fe659baa0..e242970bfcf 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -259,9 +259,9 @@ def __new__(self): default_branch_name = origin_head_name_tokenized[-1] except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a' - '\'refs/remotes/origin/HEAD\' reference.') + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name From 09d898f62e3e10f992208172c2dd3f6a9b9a12af Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 14:12:12 -0400 Subject: [PATCH 28/48] add debugging print statement in archery/archery/release/core.py commits_to_pick() --- dev/archery/archery/release/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index e242970bfcf..1ded212822b 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -475,6 +475,7 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) + print(self.base_branch) commit_range = f"{self.previous.tag}..{self.base_branch}" # keeping the original order of the commits helps to minimize the merge From 873cd8c312c8386570d0fc68652d3fd443b54abe Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 14:37:28 -0400 Subject: [PATCH 29/48] Debugging statements for DefaultBranchName constructor --- dev/archery/archery/release/core.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 1ded212822b..e4578ee6679 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -236,12 +236,21 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") if default_branch_name is None: + print("default_branch_name could not be determined by the environment variable") try: # Set up repo object arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs + + print("repo.remotes:") + for remote in repo.remotes: + print(remote) + + print("origin.refs:") + for ref in origin.refs: + print(ref) # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] @@ -250,6 +259,8 @@ def __new__(self): # origin/master origin_head_reference = origin_head.reference + print(origin_head_reference) + # Get string value of remote head reference, should return # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name From 9b94745108388bde051ed8ceaa0a73887e06da76 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 15:50:48 -0400 Subject: [PATCH 30/48] Remove base_branch property of Release, instead add default_branch_property --- dev/archery/archery/release/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index e4578ee6679..872fa91399e 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -243,7 +243,7 @@ def __new__(self): repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs - + print("repo.remotes:") for remote in repo.remotes: print(remote) @@ -419,7 +419,7 @@ def commits(self): return list(map(Commit, self.repo.iter_commits(commit_range))) @property - def base_branch(self): + def default_branch(self): default_branch_name = DefaultBranchName() return default_branch_name.value() @@ -486,8 +486,8 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - print(self.base_branch) - commit_range = f"{self.previous.tag}..{self.base_branch}" + print(self.default_branch) + commit_range = f"{self.previous.tag}..{self.default_branch}" # keeping the original order of the commits helps to minimize the merge # conflicts during cherry-picks From a6edc7f1d579ee2ca8e28e4b9f50b38d604b2e18 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 17:09:32 -0400 Subject: [PATCH 31/48] Use separate function for computing default branch. --- dev/archery/archery/release/core.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 872fa91399e..8896a04161a 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -236,7 +236,6 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") if default_branch_name is None: - print("default_branch_name could not be determined by the environment variable") try: # Set up repo object arrow = ArrowSources.find() @@ -244,14 +243,6 @@ def __new__(self): origin = repo.remotes["origin"] origin_refs = origin.refs - print("repo.remotes:") - for remote in repo.remotes: - print(remote) - - print("origin.refs:") - for ref in origin.refs: - print(ref) - # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] @@ -259,8 +250,6 @@ def __new__(self): # origin/master origin_head_reference = origin_head.reference - print(origin_head_reference) - # Get string value of remote head reference, should return # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name @@ -420,8 +409,8 @@ def commits(self): @property def default_branch(self): - default_branch_name = DefaultBranchName() - return default_branch_name.value() + dbn = DefaultBranchName() + return dbn.value def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -486,7 +475,6 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - print(self.default_branch) commit_range = f"{self.previous.tag}..{self.default_branch}" # keeping the original order of the commits helps to minimize the merge From ba7fc6f9c2e40e821173d8f501dcd1bc13b3e655 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 17:29:17 -0400 Subject: [PATCH 32/48] Refactor the default branch code to be calculated within Release class --- dev/archery/archery/release/core.py | 83 ++++++++++++----------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 8896a04161a..20cb47a2eb4 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -228,51 +228,6 @@ def title(self): return self._title -class DefaultBranchName(object): - def __new__(self): - if not hasattr(self, 'instance'): - self.instance = super(DefaultBranchName, self).__new__(self) - - default_branch_name = os.getenv("DEFAULT_BRANCH") - - if default_branch_name is None: - try: - # Set up repo object - arrow = ArrowSources.find() - repo = Repo(arrow.path) - origin = repo.remotes["origin"] - origin_refs = origin.refs - - # Get git.RemoteReference object to origin/HEAD - origin_head = origin_refs["HEAD"] - - # Get git.RemoteReference object to origin/main or - # origin/master - origin_head_reference = origin_head.reference - - # Get string value of remote head reference, should return - # "origin/main" or "origin/master" - origin_head_name = origin_head_reference.name - origin_head_name_tokenized = origin_head_name.split("/") - - # The last token is the default branch name - default_branch_name = origin_head_name_tokenized[-1] - except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') - - # Set default branch as class property - self.default_branch_name = default_branch_name - - return self.instance - - @property - def value(self): - return self.default_branch_name - - class Release: def __new__(self, version, jira=None, repo=None): @@ -407,10 +362,39 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) - @property + @cached_property def default_branch(self): - dbn = DefaultBranchName() - return dbn.value + default_branch_name = os.getenv("DEFAULT_BRANCH") + + if default_branch_name is None: + try: + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + + # Get git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + + # Get git.RemoteReference object to origin/main or + # origin/master + origin_head_reference = origin_head.reference + + # Get string value of remote head reference, should return + # "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name + origin_head_name_tokenized = origin_head_name.split("/") + + # The last token is the default branch name + default_branch_name = origin_head_name_tokenized[-1] + except KeyError: + raise RuntimeError( + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') + + return default_branch_name def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -527,8 +511,7 @@ def branch(self): @property def base_branch(self): - default_branch_name = DefaultBranchName() - return default_branch_name.value() + return self.default_branch @cached_property def siblings(self): From c0979987e7f340a1101a69fe911fa3660dab8281 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 21 Sep 2022 16:28:00 -0400 Subject: [PATCH 33/48] Add DEFAULT_BRANCH environment variable to Execute Docker Build step in integration.yml --- .github/workflows/integration.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 2c5ffb084c0..4380dec782d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -50,6 +50,11 @@ permissions: env: DOCKER_VOLUME_PREFIX: ".docker/" +<<<<<<< HEAD +======= + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} +>>>>>>> 9f0281356 (Only add DEFAULT_BRANCH environment variable to archery docker command in integration.yml) jobs: @@ -85,6 +90,7 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' From 4139d8225ad7c93a374b3a42fbbfaf0b0100321d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 21 Sep 2022 16:31:08 -0400 Subject: [PATCH 34/48] In integration.yml, merge edits from default branch and current feature branch. --- .github/workflows/integration.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 4380dec782d..3e6d0d14c78 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -87,11 +87,18 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build +<<<<<<< HEAD env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration +======= + run: > + archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e + DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + conda-integration +>>>>>>> 2600c98a1 (In integration.yml, fix multi-line symbol.) - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From 7d6ab6201105e13e4294118f98bec075e5a9ecf1 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 22 Sep 2022 10:29:13 -0400 Subject: [PATCH 35/48] Add DEFAULT_BRANCH env var for archery docker run command in .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a15822b4a74..4a957b0ce78 100644 --- a/.travis.yml +++ b/.travis.yml @@ -187,6 +187,7 @@ script: - | archery docker run \ ${DOCKER_RUN_ARGS} \ + -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From 7e1d68dd19f40b5be6d3ff642deeeb3cbce43f5c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 11:03:03 -0400 Subject: [PATCH 36/48] Use git command to get default branch name in .travis.yml --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4a957b0ce78..b2caa04ba81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -185,9 +185,10 @@ install: script: - | + GITHUB_DEFAULT_BRANCH_NAME=git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@ archery docker run \ ${DOCKER_RUN_ARGS} \ - -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e DEFAULT_BRANCH=${GITHUB_DEFAULT_BRANCH_NAME} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From d587402b83eedb67d55d92b4874a5ca52be8fbef Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 12:31:07 -0400 Subject: [PATCH 37/48] Fix integration.yml merge --- .github/workflows/integration.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 3e6d0d14c78..e7cd861cd31 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -50,11 +50,6 @@ permissions: env: DOCKER_VOLUME_PREFIX: ".docker/" -<<<<<<< HEAD -======= - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} ->>>>>>> 9f0281356 (Only add DEFAULT_BRANCH environment variable to archery docker command in integration.yml) jobs: @@ -87,18 +82,13 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build -<<<<<<< HEAD env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} - run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration -======= run: > archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} conda-integration ->>>>>>> 2600c98a1 (In integration.yml, fix multi-line symbol.) - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From d944930e96f86c6c691e5e9725ce930220510021 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 15:22:16 -0400 Subject: [PATCH 38/48] Set and export the DEFAULT_BRANCH env var for the archery command. --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b2caa04ba81..43fcc679066 100644 --- a/.travis.yml +++ b/.travis.yml @@ -184,11 +184,10 @@ install: - sudo -H pip3 install -e dev/archery[docker] script: + - export DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - | - GITHUB_DEFAULT_BRANCH_NAME=git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@ archery docker run \ ${DOCKER_RUN_ARGS} \ - -e DEFAULT_BRANCH=${GITHUB_DEFAULT_BRANCH_NAME} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From 6a2dccd0c48b820efcb50a2f1c14c589dd5b5304 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 13:30:02 -0400 Subject: [PATCH 39/48] Remove computation for default branch name from module loading step in archery/archery/crossbow/cli.py --- dev/archery/archery/crossbow/cli.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index b7390ef1417..ba5136ca15b 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -155,14 +155,8 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) - -# Get the default branch name from the repository -arrow_source_dir = ArrowSources.find() -repo = Repo(arrow_source_dir.path) - - @crossbow.command() -@click.option('--base-branch', default=repo.default_branch_name, +@click.option('--base-branch', default=None, help='Set base branch for the PR.') @click.option('--create-pr', is_flag=True, default=False, help='Create GitHub Pull Request') @@ -195,6 +189,13 @@ def verify_release_candidate(obj, base_branch, create_pr, # The verify-release-candidate command will create a PR (or find one) # and add the verify-rc* comment to trigger the verify tasks + # Default value for base_branch is the repository's default branch name + if base_branch is None: + # Get the default branch name from the repository + arrow_source_dir = ArrowSources.find() + repo = Repo(arrow_source_dir.path) + base_branch = repo.default_branch_name + # Redefine Arrow repo to use the correct arrow remote. arrow = Repo(path=obj['arrow'].path, remote_url=remote) response = arrow.github_pr(title=pr_title, head=head_branch, From dc6e939d3ee9d48dc5300bb1774b02dcedc5780a Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:02:31 -0400 Subject: [PATCH 40/48] Removing error if default branch cannot be determined, default to 'master' for now in dev/archery/archery/crossbow/core.py --- dev/archery/archery/crossbow/core.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 02228cc66ee..13dbaebcc28 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -28,6 +28,7 @@ from io import StringIO from pathlib import Path from datetime import date +import warnings import jinja2 from ruamel.yaml import YAML @@ -372,10 +373,14 @@ def default_branch_name(self): target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a \'refs/remotes/origin/HEAD\'' + ' reference. Setting the default branch name to ' + + default_branch_name, RuntimeWarning) return default_branch_name From 3bd8a2e9649b6efd095b63d2af1fee8c9092abdf Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:05:34 -0400 Subject: [PATCH 41/48] Alphabetize the standard library imports in dev/archery/archery/release/core.py --- dev/archery/archery/release/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 20cb47a2eb4..2786f96a663 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -18,11 +18,11 @@ from abc import abstractmethod from collections import defaultdict import functools -import re +import os import pathlib +import re import shelve import warnings -import os from git import Repo from jira import JIRA From c1670e6418a3b345b934bc4bcfd0eb6a8575eeec Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:26:23 -0400 Subject: [PATCH 42/48] Remove error in the case that the default branch name could not be determined, default to 'master' in dev/archery/archery/release/core.py --- dev/archery/archery/release/core.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 2786f96a663..a34a11cc3e6 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -389,10 +389,14 @@ def default_branch(self): # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a \'refs/remotes/origin/HEAD\'' + ' reference. Setting the default branch name to ' + + default_branch_name, RuntimeWarning) return default_branch_name From a7793a05dd547dce913877140ff6a0edfb47c472 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:42:39 -0400 Subject: [PATCH 43/48] Remame DEFAULT_BRANCH env var to ARCHERY_DEFAULT_RBANCH --- .github/workflows/archery.yml | 4 ++-- .github/workflows/integration.yml | 2 +- .travis.yml | 2 +- dev/archery/archery/crossbow/cli.py | 1 + dev/archery/archery/crossbow/core.py | 11 ++++++----- dev/archery/archery/release/core.py | 11 ++++++----- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 5e4d1764a6a..d337ec797cf 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -32,7 +32,7 @@ on: - 'docker-compose.yml' env: - DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + ARCHERY_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} @@ -55,7 +55,7 @@ jobs: fetch-depth: 0 - name: Git Fixup shell: bash - run: git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true + run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index e7cd861cd31..0e6351c523e 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -87,7 +87,7 @@ jobs: ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' diff --git a/.travis.yml b/.travis.yml index 43fcc679066..a96e07f0c43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -184,7 +184,7 @@ install: - sudo -H pip3 install -e dev/archery[docker] script: - - export DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) + - export ARCHERY_DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - | archery docker run \ ${DOCKER_RUN_ARGS} \ diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index ba5136ca15b..ec8eab2a4b5 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -155,6 +155,7 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) + @crossbow.command() @click.option('--base-branch', default=None, help='Set base branch for the PR.') diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 13dbaebcc28..553b2d69216 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,7 +364,7 @@ def signature(self): @property def default_branch_name(self): - default_branch_name = os.getenv("DEFAULT_BRANCH") + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") if default_branch_name is None: try: @@ -377,10 +377,11 @@ def default_branch_name(self): # value from "master" to "main". default_branch_name = "master" warnings.warn('Unable to determine default branch name: ' - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a \'refs/remotes/origin/HEAD\'' - ' reference. Setting the default branch name to ' + - default_branch_name, RuntimeWarning) + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index a34a11cc3e6..d9c85740093 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -364,7 +364,7 @@ def commits(self): @cached_property def default_branch(self): - default_branch_name = os.getenv("DEFAULT_BRANCH") + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") if default_branch_name is None: try: @@ -393,10 +393,11 @@ def default_branch(self): # value from "master" to "main". default_branch_name = "master" warnings.warn('Unable to determine default branch name: ' - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a \'refs/remotes/origin/HEAD\'' - ' reference. Setting the default branch name to ' + - default_branch_name, RuntimeWarning) + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) return default_branch_name From 40c6390e5229a32af5551f54dc07f99e95c17806 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 13 Oct 2022 15:40:05 -0400 Subject: [PATCH 44/48] Reuse arrow Repo object for getting the default branch name, if needed --- dev/archery/archery/crossbow/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index ec8eab2a4b5..d9ab7a33841 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -190,15 +190,14 @@ def verify_release_candidate(obj, base_branch, create_pr, # The verify-release-candidate command will create a PR (or find one) # and add the verify-rc* comment to trigger the verify tasks + # Redefine Arrow repo to use the correct arrow remote. + arrow = Repo(path=obj['arrow'].path, remote_url=remote) + # Default value for base_branch is the repository's default branch name if base_branch is None: # Get the default branch name from the repository - arrow_source_dir = ArrowSources.find() - repo = Repo(arrow_source_dir.path) - base_branch = repo.default_branch_name - - # Redefine Arrow repo to use the correct arrow remote. - arrow = Repo(path=obj['arrow'].path, remote_url=remote) + base_branch = arrow.default_branch_name + response = arrow.github_pr(title=pr_title, head=head_branch, base=base_branch, body=pr_body, github_token=obj['queue'].github_token, From 3775c65e308a9a24df45f4516dee5fca8c4f98e3 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 13 Oct 2022 16:34:10 -0400 Subject: [PATCH 45/48] Update the dask and pandas install scripts to use default branch computed by Git rather than hard-coded defaults --- ci/scripts/install_dask.sh | 5 ++++- ci/scripts/install_pandas.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh index eb9c4e3dd42..50be7b38a6c 100755 --- a/ci/scripts/install_dask.sh +++ b/ci/scripts/install_dask.sh @@ -26,7 +26,10 @@ fi dask=$1 -if [ "${dask}" = "master" ]; then +# Get Git default branch name +DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" + +if [ "${dask}" = "${DEFAULT_BRANCH}" ]; then pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] elif [ "${dask}" = "latest" ]; then pip install dask[dataframe] diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 5aca65f825a..135f60edf67 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -35,7 +35,10 @@ else pip install numpy==${numpy} fi -if [ "${pandas}" = "master" ]; then +# Get Git default branch name +DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" + +if [ "${pandas}" = "${DEFAULT_BRANCH}" ]; then pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas From 669a14eff38834515d866e2e3d772678bc4a8783 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 14 Oct 2022 14:56:49 -0400 Subject: [PATCH 46/48] Change the flag for indicating upstream development version of Pandas and Dask to 'upstream_devel', and update the documentation. --- ci/scripts/install_dask.sh | 5 +---- ci/scripts/install_pandas.sh | 5 +---- dev/archery/archery/docker/tests/test_docker.py | 8 ++++---- docs/source/developers/continuous_integration/docker.rst | 7 ++++--- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh index 50be7b38a6c..8d712a88a6a 100755 --- a/ci/scripts/install_dask.sh +++ b/ci/scripts/install_dask.sh @@ -26,10 +26,7 @@ fi dask=$1 -# Get Git default branch name -DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" - -if [ "${dask}" = "${DEFAULT_BRANCH}" ]; then +if [ "${dask}" = "upstream_devel" ]; then pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] elif [ "${dask}" = "latest" ]; then pip install dask[dataframe] diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 135f60edf67..be29e5da554 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -35,10 +35,7 @@ else pip install numpy==${numpy} fi -# Get Git default branch name -DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" - -if [ "${pandas}" = "${DEFAULT_BRANCH}" ]; then +if [ "${pandas}" = "upstream_devel" ]; then pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index fa6b59f936d..386b7c2bdae 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -259,12 +259,12 @@ def test_arrow_example_validation_passes(arrow_compose_path): def test_compose_default_params_and_env(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( UBUNTU='18.04', - DASK='main' + DASK='upstream_devel' )) assert compose.config.dotenv == arrow_compose_env assert compose.config.params == { 'UBUNTU': '18.04', - 'DASK': 'main', + 'DASK': 'upstream_devel', } @@ -492,7 +492,7 @@ def test_compose_push(arrow_compose_path): def test_compose_error(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( PYTHON='3.8', - PANDAS='main' + PANDAS='upstream_devel' )) error = subprocess.CalledProcessError(99, []) @@ -503,7 +503,7 @@ def test_compose_error(arrow_compose_path): exception_message = str(exc.value) assert "exited with a non-zero exit code 99" in exception_message assert "PANDAS: latest" in exception_message - assert "export PANDAS=main" in exception_message + assert "export PANDAS=upstream_devel" in exception_message def test_image_with_gpu(arrow_compose_path): diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 7035dfe9ec6..49061f5b847 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -85,13 +85,13 @@ where the leaf image is ``conda-python-pandas``. .. code:: bash - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache conda-python-pandas Which translates to: .. code:: bash - export PANDAS=master + export PANDAS=upstream_devel docker-compose pull --ignore-pull-failures conda-cpp docker-compose pull --ignore-pull-failures conda-python docker-compose build conda-cpp @@ -102,7 +102,7 @@ Which translates to: Note that it doesn't pull the conda-python-pandas image and disable the cache when building it. -``PANDAS`` is a `build parameter `_, see the +``PANDAS`` is a :ref:`build parameter `, see the defaults in the .env file. **To entirely skip building the image:** @@ -178,6 +178,7 @@ image when building Glib, Ruby, R and Python bindings. This reduces duplication and streamlines maintenance, but makes the docker-compose configuration more complicated. +.. _docker-build-parameters: Docker Build Parameters ~~~~~~~~~~~~~~~~~~~~~~~ From ddce92e596f2ca8180418c547be01da189bdcc2c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 14 Oct 2022 15:09:38 -0400 Subject: [PATCH 47/48] Run python linting --- dev/archery/archery/crossbow/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index d9ab7a33841..58aed560329 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -197,7 +197,7 @@ def verify_release_candidate(obj, base_branch, create_pr, if base_branch is None: # Get the default branch name from the repository base_branch = arrow.default_branch_name - + response = arrow.github_pr(title=pr_title, head=head_branch, base=base_branch, body=pr_body, github_token=obj['queue'].github_token, From f884601631bee67b2d0926ba940a477e6358fcff Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 19 Oct 2022 09:16:54 -0400 Subject: [PATCH 48/48] Update Dask and Pandas version flag in tasks.yml and dev/archery/archery/docker/cli.py --- dev/archery/archery/docker/cli.py | 3 ++- dev/tasks/tasks.yml | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index 565efed05c5..6f571f27bff 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,7 +217,8 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=main archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache + conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index bdf53ff1dac..8cbb7615b7b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1492,7 +1492,7 @@ tasks: ("3.7", "latest", "latest", False), ("3.8", "latest", "latest", False), ("3.8", "nightly", "nightly", False), - ("3.9", "master", "nightly", False)] %} + ("3.9", "upstream_devel", "nightly", False)] %} test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}: ci: github template: docker-tests/github.linux.yml @@ -1512,7 +1512,7 @@ tasks: image: conda-python-pandas {% endfor %} -{% for dask_version in ["latest", "master"] %} +{% for dask_version in ["latest", "upstream_devel"] %} test-conda-python-3.9-dask-{{ dask_version }}: ci: github template: docker-tests/github.linux.yml