From b35ffbd7935800d0a57ae04ea9a0fc0d6c1c19e3 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 10:53:27 -0400 Subject: [PATCH 01/57] Dask and Pandas repositories now use default branches named 'main'. --- dev/archery/archery/docker/tests/test_docker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index bc25738becf..fa6b59f936d 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -259,12 +259,12 @@ def test_arrow_example_validation_passes(arrow_compose_path): def test_compose_default_params_and_env(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( UBUNTU='18.04', - DASK='master' + DASK='main' )) assert compose.config.dotenv == arrow_compose_env assert compose.config.params == { 'UBUNTU': '18.04', - 'DASK': 'master', + 'DASK': 'main', } @@ -492,7 +492,7 @@ def test_compose_push(arrow_compose_path): def test_compose_error(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( PYTHON='3.8', - PANDAS='master' + PANDAS='main' )) error = subprocess.CalledProcessError(99, []) @@ -503,7 +503,7 @@ def test_compose_error(arrow_compose_path): exception_message = str(exc.value) assert "exited with a non-zero exit code 99" in exception_message assert "PANDAS: latest" in exception_message - assert "export PANDAS=master" in exception_message + assert "export PANDAS=main" in exception_message def test_image_with_gpu(arrow_compose_path): From ed1bbf2e0907face54fbf7456ae9aa0553450d80 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 31 Aug 2022 11:05:02 -0400 Subject: [PATCH 02/57] Replace 'master' with 'default' in comment about Travis CI default behavior. --- dev/archery/archery/crossbow/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index a83c190d121..b64a6f733ae 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -133,7 +133,7 @@ def format_all(items, pattern): # configurations for setting up branch skipping # - appveyor has a feature to skip builds without an appveyor.yml -# - travis reads from the master branch and applies the rules +# - travis reads from the default branch and applies the rules # - circle requires the configuration to be present on all branch, even ones # that are configured to be skipped # - azure skips branches without azure-pipelines.yml by default From 53271cd502e0ef53077e22f8214b1dbb87658484 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 31 Aug 2022 11:13:16 -0400 Subject: [PATCH 03/57] Remove mention of "master" from help text for --arrow-branch crossbow CLI option. --- dev/archery/archery/crossbow/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 456942173e7..55a2338a280 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -96,7 +96,7 @@ def check_config(obj, config_path): 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') @@ -225,7 +225,7 @@ def verify_release_candidate(obj, base_branch, create_pr, 'locally. Examples: https://github.com/apache/arrow or ' 'https://github.com/kszucs/arrow.') @click.option('--arrow-branch', '-b', default=None, - help='Give the branch name explicitly, e.g. master, ARROW-1949.') + help='Give the branch name explicitly, e.g. ARROW-1949.') @click.option('--arrow-sha', '-t', default=None, help='Set commit SHA or Tag name explicitly, e.g. f67a515, ' 'apache-arrow-0.11.1.') From 9cad4f8fc3ee27307087e68c46a28f81eb8f7adb Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 11:30:52 -0400 Subject: [PATCH 04/57] Pandas repository uses 'main' as the default branch. --- dev/archery/archery/docker/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index c7b42c094f6..565efed05c5 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,7 +217,7 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=main archery docker run --no-leaf-cache conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python From f405a02ecb46f3a7d7664cf7ef4905e78a2c03f3 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 31 Aug 2022 11:40:28 -0400 Subject: [PATCH 05/57] Add base_branch property to Release object and modify commits_to_pick to use the property. --- dev/archery/archery/release/core.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 2c775c7506b..00981bbae59 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -361,6 +361,10 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) + @property + def base_branch(self): + return "master" + def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from # jira and add it to the issues @@ -422,9 +426,9 @@ def changelog(self): return JiraChangelog(release=self, categories=categories) def commits_to_pick(self, exclude_already_applied=True): - # collect commits applied on the main branch since the root of the + # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - commit_range = f"{self.previous.tag}..master" + commit_range = f"{self.previous.tag}..{self.base_branch}" # keeping the original order of the commits helps to minimize the merge # conflicts during cherry-picks From 393c5de8b3ba81f20621dde0054da556bc436aa5 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 1 Sep 2022 10:56:02 -0400 Subject: [PATCH 06/57] Modify 'archery' command line interface to reference the mainline development branch generically. --- dev/archery/archery/cli.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index f76e326a8e1..677d935da67 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -529,7 +529,7 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras, help="Hide counters field in diff report.") @click.argument("contender", metavar="[", default=ArrowSources.WORKSPACE, required=False) -@click.argument("baseline", metavar="[]]", default="origin/master", +@click.argument("baseline", metavar="[]]", default="origin/HEAD", required=False) @click.pass_context def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, @@ -542,7 +542,8 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, The caller can optionally specify both the contender and the baseline. If unspecified, the contender will default to the current workspace (like git) - and the baseline will default to master. + and the baseline will default to the mainline development branch (i.e. + default git branch). Each target (contender or baseline) can either be a git revision (commit, tag, special values like HEAD) or a cmake build directory. This @@ -559,12 +560,12 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, Examples: \b - # Compare workspace (contender) with master (baseline) + # Compare workspace (contender) against the mainline development branch (baseline) \b archery benchmark diff \b - # Compare master (contender) with latest version (baseline) + # Compare the mainline development branch (contender) against the latest version (baseline) \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b From 8bc7abead6d01bc336e40166951b1ac7f50cc31c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 8 Sep 2022 16:25:26 -0400 Subject: [PATCH 07/57] Dynamically compute the default branch name for archery and crossbow core/cli files --- dev/archery/archery/crossbow/cli.py | 7 ++++++- dev/archery/archery/crossbow/core.py | 19 ++++++++++++++++--- dev/archery/archery/release/core.py | 27 +++++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index 55a2338a280..eb12fad25ef 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,8 +16,10 @@ # under the License. from pathlib import Path +from pickle import TRUE import time import sys +import pygit2 import click @@ -155,9 +157,12 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) +# Get the default branch name from the repository +arrow_source_dir = ArrowSources.find() +repo = Repo(arrow_source_dir.path) @crossbow.command() -@click.option('--base-branch', default="master", +@click.option('--base-branch', default=repo.default_branch_name, help='Set base branch for the PR.') @click.option('--create-pr', is_flag=True, default=False, help='Create GitHub Pull Request') diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index b64a6f733ae..ce03ac8795b 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -361,6 +361,13 @@ def signature(self): return pygit2.Signature(self.user_name, self.user_email, int(time.time())) + @property + def default_branch_name(self): + ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + target_name = ref_obj.target + target_name_tokenized = target_name.split("/") + return target_name_tokenized[-1] + def create_tree(self, files): builder = self.repo.TreeBuilder() @@ -382,7 +389,7 @@ def create_commit(self, files, parents=None, message='', if parents is None: # by default use the main branch as the base of the new branch # required to reuse github actions cache across crossbow tasks - commit, _ = self.repo.resolve_refish("master") + commit, _ = self.repo.resolve_refish(self.default_branch_name) parents = [commit.id] tree_id = self.create_tree(files) @@ -546,8 +553,10 @@ def github_overwrite_release_assets(self, tag_name, target_commitish, 'Unsupported upload method {}'.format(method) ) - def github_pr(self, title, head=None, base="master", body=None, + def github_pr(self, title, head=None, base=None, body=None, github_token=None, create=False): + # Default value for base is the default_branch name() + base = self.default_branch_name() if base is None else base github_token = github_token or self.github_token repo = self.as_github_repo(github_token=github_token) if create: @@ -1289,11 +1298,15 @@ def validate(self): 'is: `{}`'.format(task_name, str(e)) ) + # Get the default branch name from the repository + arrow_source_dir = ArrowSources.find() + repo = Repo(arrow_source_dir.path) + # validate that the defined tasks are renderable, in order to to that # define the required object with dummy data target = Target( head='e279a7e06e61c14868ca7d71dea795420aea6539', - branch='master', + branch=repo.default_branch_name, remote='https://github.com/apache/arrow', version='1.0.0dev123', r_version='0.13.0.100000123', diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 00981bbae59..c15ec8ffbc3 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -226,6 +226,27 @@ def url(self): def title(self): return self._title +class DefaultBranchName(object): + def __new__(self): + if not hasattr(self, 'instance'): + self.instance = super(DefaultBranchName, self).__new__(self) + arrow = ArrowSources.find() + arrow = ArrowSources.find() + repo = Repo(arrow.path) + remotes = repo.remotes + origin = repo.remotes["origin"] + origin_refs = origin.refs + origin_head = origin_refs["HEAD"] # git.RemoteReference object to origin/HEAD + origin_head_reference = origin_head.reference # git.RemoteReference object to origin/main + origin_head_name = origin_head_reference.name # Should return "origin/main" or "origin/master" + origin_head_name_tokenized = origin_head_name.split("/") + self.default_branch_name = origin_head_name_tokenized[-1] + return self.instance + + @property + def value(self): + return self.default_branch_name + class Release: @@ -363,7 +384,8 @@ def commits(self): @property def base_branch(self): - return "master" + default_branch_name = DefaultBranchName() + return default_branch_name.value() def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -480,7 +502,8 @@ def branch(self): @property def base_branch(self): - return "master" + default_branch_name = DefaultBranchName() + return default_branch_name.value() @cached_property def siblings(self): From 80a9bb485b9f0e201d83a39994fdb0a8a43b367b Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 8 Sep 2022 16:30:46 -0400 Subject: [PATCH 08/57] Performed python linting --- dev/archery/archery/cli.py | 6 ++++-- dev/archery/archery/crossbow/cli.py | 4 ++-- dev/archery/archery/release/core.py | 11 +++++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 677d935da67..3f608872741 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -560,12 +560,14 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, Examples: \b - # Compare workspace (contender) against the mainline development branch (baseline) + # Compare workspace (contender) against the mainline development branch + # (baseline) \b archery benchmark diff \b - # Compare the mainline development branch (contender) against the latest version (baseline) + # Compare the mainline development branch (contender) against the latest + # version (baseline) \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index eb12fad25ef..b7390ef1417 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -16,10 +16,8 @@ # under the License. from pathlib import Path -from pickle import TRUE import time import sys -import pygit2 import click @@ -157,10 +155,12 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) + # Get the default branch name from the repository arrow_source_dir = ArrowSources.find() repo = Repo(arrow_source_dir.path) + @crossbow.command() @click.option('--base-branch', default=repo.default_branch_name, help='Set base branch for the PR.') diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index c15ec8ffbc3..b95a90b6d9c 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -226,6 +226,7 @@ def url(self): def title(self): return self._title + class DefaultBranchName(object): def __new__(self): if not hasattr(self, 'instance'): @@ -233,12 +234,14 @@ def __new__(self): arrow = ArrowSources.find() arrow = ArrowSources.find() repo = Repo(arrow.path) - remotes = repo.remotes origin = repo.remotes["origin"] origin_refs = origin.refs - origin_head = origin_refs["HEAD"] # git.RemoteReference object to origin/HEAD - origin_head_reference = origin_head.reference # git.RemoteReference object to origin/main - origin_head_name = origin_head_reference.name # Should return "origin/main" or "origin/master" + # git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + # git.RemoteReference object to origin/main + origin_head_reference = origin_head.reference + # Should return "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name origin_head_name_tokenized = origin_head_name.split("/") self.default_branch_name = origin_head_name_tokenized[-1] return self.instance From 3fc0a12273a12018c1594c747892d8274b8e2e5d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 14:55:29 -0400 Subject: [PATCH 09/57] remove duplicate code --- dev/archery/archery/release/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index b95a90b6d9c..997ed76417b 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -232,7 +232,6 @@ def __new__(self): if not hasattr(self, 'instance'): self.instance = super(DefaultBranchName, self).__new__(self) arrow = ArrowSources.find() - arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs From 4a44b573d9366b6cfee26ee7560b824e78cd1de5 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 14:56:45 -0400 Subject: [PATCH 10/57] Print debugging info for default_branch_name Repo class function --- dev/archery/archery/crossbow/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index ce03ac8795b..175f59ea016 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,6 +363,10 @@ def signature(self): @property def default_branch_name(self): + for remote in self.repo.remotes: + print(remote.name) + for ref in self.repo.references.objects: + print(ref.target) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From f0ec4cb0bde0ba9dd4275a67279820d14e9b0011 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:04:28 -0400 Subject: [PATCH 11/57] Print more debugging info --- dev/archery/archery/crossbow/core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 175f59ea016..b144f16580c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -365,8 +365,9 @@ def signature(self): def default_branch_name(self): for remote in self.repo.remotes: print(remote.name) - for ref in self.repo.references.objects: - print(ref.target) + # for ref in self.repo.references.objects: + # print(ref.target) + print(self.repo.resolve_refish('origin/HEAD')) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 47b17341ba78ade8c0f44dfff7bb71684726bf3b Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:08:49 -0400 Subject: [PATCH 12/57] Remove resolve_refish print command --- dev/archery/archery/crossbow/core.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index b144f16580c..0e77bccc85d 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,11 +363,15 @@ def signature(self): @property def default_branch_name(self): + print("Remotes:") for remote in self.repo.remotes: print(remote.name) - # for ref in self.repo.references.objects: - # print(ref.target) - print(self.repo.resolve_refish('origin/HEAD')) + for ref in self.repo.references.objects: + print("ref.target" + ref.target) + print("ref.raw_target" + ref.raw_target) + print("ref.shorthand" + ref.shorthand) + print("ref.raw_shorthand" + ref.raw_shorthand) + print("ref.name" + ref.name) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 61a184ef02171843b409c479f834cbd0a0f0496b Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:11:31 -0400 Subject: [PATCH 13/57] Add new line between reference object details --- dev/archery/archery/crossbow/core.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 0e77bccc85d..86851ff7080 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -367,11 +367,12 @@ def default_branch_name(self): for remote in self.repo.remotes: print(remote.name) for ref in self.repo.references.objects: - print("ref.target" + ref.target) - print("ref.raw_target" + ref.raw_target) - print("ref.shorthand" + ref.shorthand) - print("ref.raw_shorthand" + ref.raw_shorthand) - print("ref.name" + ref.name) + print(ref.target) + print(ref.raw_target) + print(ref.shorthand) + print(ref.raw_shorthand) + print(ref.name) + print("\n") ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From 7d62f4f70ee8160c8d23b5979de1b8622da936d7 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 13 Sep 2022 15:30:21 -0400 Subject: [PATCH 14/57] Print branches --- dev/archery/archery/crossbow/core.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 86851ff7080..ce110ae195a 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -366,13 +366,15 @@ def default_branch_name(self): print("Remotes:") for remote in self.repo.remotes: print(remote.name) - for ref in self.repo.references.objects: - print(ref.target) - print(ref.raw_target) - print(ref.shorthand) - print(ref.raw_shorthand) - print(ref.name) - print("\n") + # for ref in self.repo.references.objects: + # print(ref.target) + # print(ref.raw_target) + # print(ref.shorthand) + # print(ref.raw_shorthand) + # print(ref.name) + # print("\n") + for branch in self.repo.branches: + print(branch) ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") From dbee344fe5a4c734a72b8972ef880eb30a13b792 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 10:09:00 -0400 Subject: [PATCH 15/57] Use environment variable, DEFAULT_BRANCH, that is set in the yml file. --- dev/archery/archery/crossbow/core.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index ce110ae195a..a1cedd2379d 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -363,9 +363,11 @@ def signature(self): @property def default_branch_name(self): - print("Remotes:") - for remote in self.repo.remotes: - print(remote.name) + default_branch_name = os.getenv("DEFAULT_BRANCH") + print("**********default_branch_name" + default_branch_name) + # print("Remotes:") + # for remote in self.repo.remotes: + # print(remote.name) # for ref in self.repo.references.objects: # print(ref.target) # print(ref.raw_target) @@ -373,12 +375,13 @@ def default_branch_name(self): # print(ref.raw_shorthand) # print(ref.name) # print("\n") - for branch in self.repo.branches: - print(branch) - ref_obj = self.repo.references["refs/remotes/origin/HEAD"] - target_name = ref_obj.target - target_name_tokenized = target_name.split("/") - return target_name_tokenized[-1] + # for branch in self.repo.branches: + # print(branch) + # ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + # target_name = ref_obj.target + # target_name_tokenized = target_name.split("/") + # return target_name_tokenized[-1] + return default_branch_name def create_tree(self, files): builder = self.repo.TreeBuilder() From 6d353c0f444d19a29193c3f247695386e21a6527 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 10:12:51 -0400 Subject: [PATCH 16/57] Remove string concatenation, types incompatible --- dev/archery/archery/crossbow/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index a1cedd2379d..1ad0bb8633c 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,7 +364,7 @@ def signature(self): @property def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - print("**********default_branch_name" + default_branch_name) + print(default_branch_name) # print("Remotes:") # for remote in self.repo.remotes: # print(remote.name) From 8dc8409d795142517fd98ba318e201eb9b71c9dc Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:15:22 -0400 Subject: [PATCH 17/57] Enable both CI workflows and local repository workflows for getting default branch in archery/crossbow/core.py --- dev/archery/archery/crossbow/core.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 1ad0bb8633c..688af5d55ff 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,23 +364,16 @@ def signature(self): @property def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - print(default_branch_name) - # print("Remotes:") - # for remote in self.repo.remotes: - # print(remote.name) - # for ref in self.repo.references.objects: - # print(ref.target) - # print(ref.raw_target) - # print(ref.shorthand) - # print(ref.raw_shorthand) - # print(ref.name) - # print("\n") - # for branch in self.repo.branches: - # print(branch) - # ref_obj = self.repo.references["refs/remotes/origin/HEAD"] - # target_name = ref_obj.target - # target_name_tokenized = target_name.split("/") - # return target_name_tokenized[-1] + + if default_branch_name == None: + try: + ref_obj = self.repo.references["refs/remotes/origin/HEAD"] + target_name = ref_obj.target + target_name_tokenized = target_name.split("/") + default_branch_name = target_name_tokenized[-1] + except: + raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + return default_branch_name def create_tree(self, files): From 058226c64ac1f879d114f370f6e6da5bb5b21a0f Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:26:43 -0400 Subject: [PATCH 18/57] Enable both CI workflows and local repository workflows for getting default branch in archery/release/core.py --- dev/archery/archery/release/core.py | 42 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 997ed76417b..15a952d22f7 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -22,6 +22,7 @@ import pathlib import shelve import warnings +import os from git import Repo from jira import JIRA @@ -231,18 +232,35 @@ class DefaultBranchName(object): def __new__(self): if not hasattr(self, 'instance'): self.instance = super(DefaultBranchName, self).__new__(self) - arrow = ArrowSources.find() - repo = Repo(arrow.path) - origin = repo.remotes["origin"] - origin_refs = origin.refs - # git.RemoteReference object to origin/HEAD - origin_head = origin_refs["HEAD"] - # git.RemoteReference object to origin/main - origin_head_reference = origin_head.reference - # Should return "origin/main" or "origin/master" - origin_head_name = origin_head_reference.name - origin_head_name_tokenized = origin_head_name.split("/") - self.default_branch_name = origin_head_name_tokenized[-1] + + default_branch_name = os.getenv("DEFAULT_BRANCH") + + if default_branch_name == None: + try: + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + + # git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + + # git.RemoteReference object to origin/main or origin/master + origin_head_reference = origin_head.reference + + # Should return "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name + origin_head_name_tokenized = origin_head_name.split("/") + + # The last token is the default branch name + default_branch_name = origin_head_name_tokenized[-1] + except: + raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + + # Set default branch as class property + self.default_branch_name = default_branch_name + return self.instance @property From e6ecbd4ba4dd83a599561bdd6e840eefe46a434d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:39:40 -0400 Subject: [PATCH 19/57] Add DEFAULT_BRANCH environment variable to archery.yml test step for unittests --- .github/workflows/archery.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 36146d46aa4..696c4242a55 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -65,7 +65,9 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -v archery + run: | + DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 9fbf6a9bd221bb5b6991c108f5962483dbb4b7d4 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:47:02 -0400 Subject: [PATCH 20/57] Set workflow-wide environment variable, DEFAULT_BRANCH, for archery.yml --- .github/workflows/archery.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 696c4242a55..7e32a262281 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -31,6 +31,9 @@ on: - 'dev/tasks/**' - 'docker-compose.yml' +env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true @@ -65,9 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: | - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - pytest -v archery + run: pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 07d24862f629cc05d7a75109db8e3471ec234891 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:49:35 -0400 Subject: [PATCH 21/57] Print reason for skipping tests --- .github/workflows/archery.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 7e32a262281..5d300d5ccee 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -68,7 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -v archery + run: pytest -rs -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 0227965a9b0e66cb7cd96a0ccbcc611d514dd95e Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 11:53:53 -0400 Subject: [PATCH 22/57] Add 'enable-integration' flag to ensure crossbowcli tests run --- .github/workflows/archery.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 5d300d5ccee..2dad44cd5bc 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -68,7 +68,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -rs -v archery + run: pytest -rs --enable-integration -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 9c00e3dc89ca7580764a1e68999563024b8d7bf7 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 14 Sep 2022 12:09:20 -0400 Subject: [PATCH 23/57] Factor out GitFixup step DEFAULT_BRANCH value --- .github/workflows/archery.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 2dad44cd5bc..5e4d1764a6a 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -55,9 +55,7 @@ jobs: fetch-depth: 0 - name: Git Fixup shell: bash - run: | - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true + run: git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true - name: Setup Python uses: actions/setup-python@v4 with: @@ -68,7 +66,7 @@ jobs: run: pip install pytest responses -e dev/archery[all] - name: Archery Unittests working-directory: dev/archery - run: pytest -rs --enable-integration -v archery + run: pytest -v archery - name: Archery Docker Validation run: archery docker check-config - name: Crossbow Check Config From 64dbf1820c46bf6ac6962c2f16564080b2c5fa73 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 15 Sep 2022 10:04:30 -0400 Subject: [PATCH 24/57] Run python linting --- dev/archery/archery/cli.py | 2 +- dev/archery/archery/crossbow/core.py | 3 ++- dev/archery/archery/release/core.py | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 3f608872741..105a64c0603 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -571,7 +571,7 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras, \b export LAST=$(git tag -l "apache-arrow-[0-9]*" | sort -rV | head -1) \b - archery benchmark diff master "$LAST" + archery benchmark diff "$LAST" \b # Compare g++7 (contender) with clang++-8 (baseline) builds diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 688af5d55ff..603dfdbd4f0 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -372,7 +372,8 @@ def default_branch_name(self): target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] except: - raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + raise RuntimeError( + 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 15a952d22f7..131be01a5b1 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -237,7 +237,7 @@ def __new__(self): if default_branch_name == None: try: - # Set up repo object + # Set up repo object arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] @@ -256,11 +256,12 @@ def __new__(self): # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] except: - raise RuntimeError('DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + raise RuntimeError( + 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name - + return self.instance @property From b6173dd027a7f3d0adaad8091b206ff523384e5d Mon Sep 17 00:00:00 2001 From: Fiona La Date: Mon, 19 Sep 2022 14:26:30 -0400 Subject: [PATCH 25/57] Address bare except and line lengths --- dev/archery/archery/crossbow/core.py | 8 +++++--- dev/archery/archery/release/core.py | 16 ++++++++++------ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 603dfdbd4f0..d7d54b146a4 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -365,15 +365,17 @@ def signature(self): def default_branch_name(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - if default_branch_name == None: + if default_branch_name is None: try: ref_obj = self.repo.references["refs/remotes/origin/HEAD"] target_name = ref_obj.target target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] - except: + except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a ' + '\'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 131be01a5b1..83fe659baa0 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -235,7 +235,7 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") - if default_branch_name == None: + if default_branch_name is None: try: # Set up repo object arrow = ArrowSources.find() @@ -243,21 +243,25 @@ def __new__(self): origin = repo.remotes["origin"] origin_refs = origin.refs - # git.RemoteReference object to origin/HEAD + # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] - # git.RemoteReference object to origin/main or origin/master + # Get git.RemoteReference object to origin/main or + # origin/master origin_head_reference = origin_head.reference - # Should return "origin/main" or "origin/master" + # Get string value of remote head reference, should return + # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name origin_head_name_tokenized = origin_head_name.split("/") # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] - except: + except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git repository does not contain \'refs/remotes/origin/HEAD\' reference.') + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a' + '\'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name From 5bdf34802acbd3eab7276a6d066b74123e690f42 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Mon, 19 Sep 2022 17:09:56 -0400 Subject: [PATCH 26/57] Add context to error message when obtaining default branch name. --- dev/archery/archery/crossbow/core.py | 6 +++--- dev/archery/archery/release/core.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index d7d54b146a4..02228cc66ee 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -373,9 +373,9 @@ def default_branch_name(self): default_branch_name = target_name_tokenized[-1] except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a ' - '\'refs/remotes/origin/HEAD\' reference.') + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 83fe659baa0..e242970bfcf 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -259,9 +259,9 @@ def __new__(self): default_branch_name = origin_head_name_tokenized[-1] except KeyError: raise RuntimeError( - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a' - '\'refs/remotes/origin/HEAD\' reference.') + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') # Set default branch as class property self.default_branch_name = default_branch_name From d9c2902ec79151efd48f639e5a57c998d64ca000 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 14:12:12 -0400 Subject: [PATCH 27/57] add debugging print statement in archery/archery/release/core.py commits_to_pick() --- dev/archery/archery/release/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index e242970bfcf..1ded212822b 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -475,6 +475,7 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) + print(self.base_branch) commit_range = f"{self.previous.tag}..{self.base_branch}" # keeping the original order of the commits helps to minimize the merge From da9ee2ff1b8cc5e222d17af4082bc6425410f16a Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 14:37:28 -0400 Subject: [PATCH 28/57] Debugging statements for DefaultBranchName constructor --- dev/archery/archery/release/core.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 1ded212822b..e4578ee6679 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -236,12 +236,21 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") if default_branch_name is None: + print("default_branch_name could not be determined by the environment variable") try: # Set up repo object arrow = ArrowSources.find() repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs + + print("repo.remotes:") + for remote in repo.remotes: + print(remote) + + print("origin.refs:") + for ref in origin.refs: + print(ref) # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] @@ -250,6 +259,8 @@ def __new__(self): # origin/master origin_head_reference = origin_head.reference + print(origin_head_reference) + # Get string value of remote head reference, should return # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name From 5d32d6c4fbb91484fa657cb52cf24157fe66cf41 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 15:50:48 -0400 Subject: [PATCH 29/57] Remove base_branch property of Release, instead add default_branch_property --- dev/archery/archery/release/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index e4578ee6679..872fa91399e 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -243,7 +243,7 @@ def __new__(self): repo = Repo(arrow.path) origin = repo.remotes["origin"] origin_refs = origin.refs - + print("repo.remotes:") for remote in repo.remotes: print(remote) @@ -419,7 +419,7 @@ def commits(self): return list(map(Commit, self.repo.iter_commits(commit_range))) @property - def base_branch(self): + def default_branch(self): default_branch_name = DefaultBranchName() return default_branch_name.value() @@ -486,8 +486,8 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - print(self.base_branch) - commit_range = f"{self.previous.tag}..{self.base_branch}" + print(self.default_branch) + commit_range = f"{self.previous.tag}..{self.default_branch}" # keeping the original order of the commits helps to minimize the merge # conflicts during cherry-picks From bf4fe84c3ac0ffa3505a80ee515ee75114052f41 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 17:09:32 -0400 Subject: [PATCH 30/57] Use separate function for computing default branch. --- dev/archery/archery/release/core.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 872fa91399e..8896a04161a 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -236,7 +236,6 @@ def __new__(self): default_branch_name = os.getenv("DEFAULT_BRANCH") if default_branch_name is None: - print("default_branch_name could not be determined by the environment variable") try: # Set up repo object arrow = ArrowSources.find() @@ -244,14 +243,6 @@ def __new__(self): origin = repo.remotes["origin"] origin_refs = origin.refs - print("repo.remotes:") - for remote in repo.remotes: - print(remote) - - print("origin.refs:") - for ref in origin.refs: - print(ref) - # Get git.RemoteReference object to origin/HEAD origin_head = origin_refs["HEAD"] @@ -259,8 +250,6 @@ def __new__(self): # origin/master origin_head_reference = origin_head.reference - print(origin_head_reference) - # Get string value of remote head reference, should return # "origin/main" or "origin/master" origin_head_name = origin_head_reference.name @@ -420,8 +409,8 @@ def commits(self): @property def default_branch(self): - default_branch_name = DefaultBranchName() - return default_branch_name.value() + dbn = DefaultBranchName() + return dbn.value def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -486,7 +475,6 @@ def changelog(self): def commits_to_pick(self, exclude_already_applied=True): # collect commits applied on the default branch since the root of the # maintenance branch (the previous major release) - print(self.default_branch) commit_range = f"{self.previous.tag}..{self.default_branch}" # keeping the original order of the commits helps to minimize the merge From 9d4e46e4db47e978bf1582fe3325dde75a9a0c46 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 20 Sep 2022 17:29:17 -0400 Subject: [PATCH 31/57] Refactor the default branch code to be calculated within Release class --- dev/archery/archery/release/core.py | 83 ++++++++++++----------------- 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 8896a04161a..20cb47a2eb4 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -228,51 +228,6 @@ def title(self): return self._title -class DefaultBranchName(object): - def __new__(self): - if not hasattr(self, 'instance'): - self.instance = super(DefaultBranchName, self).__new__(self) - - default_branch_name = os.getenv("DEFAULT_BRANCH") - - if default_branch_name is None: - try: - # Set up repo object - arrow = ArrowSources.find() - repo = Repo(arrow.path) - origin = repo.remotes["origin"] - origin_refs = origin.refs - - # Get git.RemoteReference object to origin/HEAD - origin_head = origin_refs["HEAD"] - - # Get git.RemoteReference object to origin/main or - # origin/master - origin_head_reference = origin_head.reference - - # Get string value of remote head reference, should return - # "origin/main" or "origin/master" - origin_head_name = origin_head_reference.name - origin_head_name_tokenized = origin_head_name.split("/") - - # The last token is the default branch name - default_branch_name = origin_head_name_tokenized[-1] - except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') - - # Set default branch as class property - self.default_branch_name = default_branch_name - - return self.instance - - @property - def value(self): - return self.default_branch_name - - class Release: def __new__(self, version, jira=None, repo=None): @@ -407,10 +362,39 @@ def commits(self): commit_range = f"{lower}..{upper}" return list(map(Commit, self.repo.iter_commits(commit_range))) - @property + @cached_property def default_branch(self): - dbn = DefaultBranchName() - return dbn.value + default_branch_name = os.getenv("DEFAULT_BRANCH") + + if default_branch_name is None: + try: + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + + # Get git.RemoteReference object to origin/HEAD + origin_head = origin_refs["HEAD"] + + # Get git.RemoteReference object to origin/main or + # origin/master + origin_head_reference = origin_head.reference + + # Get string value of remote head reference, should return + # "origin/main" or "origin/master" + origin_head_name = origin_head_reference.name + origin_head_name_tokenized = origin_head_name.split("/") + + # The last token is the default branch name + default_branch_name = origin_head_name_tokenized[-1] + except KeyError: + raise RuntimeError( + 'Unable to determine default branch name: DEFAULT_BRANCH ' + 'environment variable is not set. Git repository does not ' + 'contain a \'refs/remotes/origin/HEAD\' reference.') + + return default_branch_name def curate(self, minimal=False): # handle commits with parquet issue key specially and query them from @@ -527,8 +511,7 @@ def branch(self): @property def base_branch(self): - default_branch_name = DefaultBranchName() - return default_branch_name.value() + return self.default_branch @cached_property def siblings(self): From a53db9008345d6e5514bd30299bf5973ab918809 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 21 Sep 2022 16:28:00 -0400 Subject: [PATCH 32/57] Add DEFAULT_BRANCH environment variable to Execute Docker Build step in integration.yml --- .github/workflows/integration.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 2c5ffb084c0..4380dec782d 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -50,6 +50,11 @@ permissions: env: DOCKER_VOLUME_PREFIX: ".docker/" +<<<<<<< HEAD +======= + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} +>>>>>>> 9f0281356 (Only add DEFAULT_BRANCH environment variable to archery docker command in integration.yml) jobs: @@ -85,6 +90,7 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' From 61cce184623d6111b2cfb05f17bd4646ddfa21e6 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 21 Sep 2022 16:31:08 -0400 Subject: [PATCH 33/57] In integration.yml, merge edits from default branch and current feature branch. --- .github/workflows/integration.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 4380dec782d..3e6d0d14c78 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -87,11 +87,18 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build +<<<<<<< HEAD env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration +======= + run: > + archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e + DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + conda-integration +>>>>>>> 2600c98a1 (In integration.yml, fix multi-line symbol.) - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From 1ccd7569b06d47bc2f6d6cdfbda4a6369a0a494c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 22 Sep 2022 10:29:13 -0400 Subject: [PATCH 34/57] Add DEFAULT_BRANCH env var for archery docker run command in .travis.yml --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a15822b4a74..4a957b0ce78 100644 --- a/.travis.yml +++ b/.travis.yml @@ -187,6 +187,7 @@ script: - | archery docker run \ ${DOCKER_RUN_ARGS} \ + -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From 07d1760f32cdcdc22d9cec836b177b018034bc27 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 11:03:03 -0400 Subject: [PATCH 35/57] Use git command to get default branch name in .travis.yml --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4a957b0ce78..b2caa04ba81 100644 --- a/.travis.yml +++ b/.travis.yml @@ -185,9 +185,10 @@ install: script: - | + GITHUB_DEFAULT_BRANCH_NAME=git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@ archery docker run \ ${DOCKER_RUN_ARGS} \ - -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e DEFAULT_BRANCH=${GITHUB_DEFAULT_BRANCH_NAME} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From db35f6069a84d6fbba22b46df1916ebfff04997c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 12:31:07 -0400 Subject: [PATCH 36/57] Fix integration.yml merge --- .github/workflows/integration.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 3e6d0d14c78..e7cd861cd31 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -50,11 +50,6 @@ permissions: env: DOCKER_VOLUME_PREFIX: ".docker/" -<<<<<<< HEAD -======= - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} ->>>>>>> 9f0281356 (Only add DEFAULT_BRANCH environment variable to archery docker command in integration.yml) jobs: @@ -87,18 +82,13 @@ jobs: - name: Setup Archery run: pip install -e dev/archery[docker] - name: Execute Docker Build -<<<<<<< HEAD env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} - run: archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 conda-integration -======= run: > archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e DEFAULT_BRANCH=${{ github.event.repository.default_branch }} conda-integration ->>>>>>> 2600c98a1 (In integration.yml, fix multi-line symbol.) - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From ddd6ae80421323a78bf94714fb1bddf8f4862c1b Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 23 Sep 2022 15:22:16 -0400 Subject: [PATCH 37/57] Set and export the DEFAULT_BRANCH env var for the archery command. --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index b2caa04ba81..43fcc679066 100644 --- a/.travis.yml +++ b/.travis.yml @@ -184,11 +184,10 @@ install: - sudo -H pip3 install -e dev/archery[docker] script: + - export DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - | - GITHUB_DEFAULT_BRANCH_NAME=git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@ archery docker run \ ${DOCKER_RUN_ARGS} \ - -e DEFAULT_BRANCH=${GITHUB_DEFAULT_BRANCH_NAME} \ --volume ${PWD}/build:/build \ ${DOCKER_IMAGE_ID} From 0db921867d7745b912c5456a7cc4a0572e5ea780 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 13:30:02 -0400 Subject: [PATCH 38/57] Remove computation for default branch name from module loading step in archery/archery/crossbow/cli.py --- dev/archery/archery/crossbow/cli.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index b7390ef1417..ba5136ca15b 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -155,14 +155,8 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) - -# Get the default branch name from the repository -arrow_source_dir = ArrowSources.find() -repo = Repo(arrow_source_dir.path) - - @crossbow.command() -@click.option('--base-branch', default=repo.default_branch_name, +@click.option('--base-branch', default=None, help='Set base branch for the PR.') @click.option('--create-pr', is_flag=True, default=False, help='Create GitHub Pull Request') @@ -195,6 +189,13 @@ def verify_release_candidate(obj, base_branch, create_pr, # The verify-release-candidate command will create a PR (or find one) # and add the verify-rc* comment to trigger the verify tasks + # Default value for base_branch is the repository's default branch name + if base_branch is None: + # Get the default branch name from the repository + arrow_source_dir = ArrowSources.find() + repo = Repo(arrow_source_dir.path) + base_branch = repo.default_branch_name + # Redefine Arrow repo to use the correct arrow remote. arrow = Repo(path=obj['arrow'].path, remote_url=remote) response = arrow.github_pr(title=pr_title, head=head_branch, From 3476ef5fc158e5834e8923722efcd0e2b05c42e7 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:02:31 -0400 Subject: [PATCH 39/57] Removing error if default branch cannot be determined, default to 'master' for now in dev/archery/archery/crossbow/core.py --- dev/archery/archery/crossbow/core.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 02228cc66ee..13dbaebcc28 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -28,6 +28,7 @@ from io import StringIO from pathlib import Path from datetime import date +import warnings import jinja2 from ruamel.yaml import YAML @@ -372,10 +373,14 @@ def default_branch_name(self): target_name_tokenized = target_name.split("/") default_branch_name = target_name_tokenized[-1] except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a \'refs/remotes/origin/HEAD\'' + ' reference. Setting the default branch name to ' + + default_branch_name, RuntimeWarning) return default_branch_name From 2bd3535a77bdfa52ef5ffa0046ebfa5e4cc5abbf Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:05:34 -0400 Subject: [PATCH 40/57] Alphabetize the standard library imports in dev/archery/archery/release/core.py --- dev/archery/archery/release/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 20cb47a2eb4..2786f96a663 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -18,11 +18,11 @@ from abc import abstractmethod from collections import defaultdict import functools -import re +import os import pathlib +import re import shelve import warnings -import os from git import Repo from jira import JIRA From df341b6e90af76ce054a678405e05173647ce3e3 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:26:23 -0400 Subject: [PATCH 41/57] Remove error in the case that the default branch name could not be determined, default to 'master' in dev/archery/archery/release/core.py --- dev/archery/archery/release/core.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index 2786f96a663..a34a11cc3e6 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -389,10 +389,14 @@ def default_branch(self): # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] except KeyError: - raise RuntimeError( - 'Unable to determine default branch name: DEFAULT_BRANCH ' - 'environment variable is not set. Git repository does not ' - 'contain a \'refs/remotes/origin/HEAD\' reference.') + # TODO: ARROW-18011 to track changing the hard coded default + # value from "master" to "main". + default_branch_name = "master" + warnings.warn('Unable to determine default branch name: ' + 'DEFAULT_BRANCH environment variable is not set. Git ' + 'repository does not contain a \'refs/remotes/origin/HEAD\'' + ' reference. Setting the default branch name to ' + + default_branch_name, RuntimeWarning) return default_branch_name From a291583757128d29fae3f0635ad8a2d9792c455f Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 12 Oct 2022 14:42:39 -0400 Subject: [PATCH 42/57] Remame DEFAULT_BRANCH env var to ARCHERY_DEFAULT_RBANCH --- .github/workflows/archery.yml | 4 ++-- .github/workflows/integration.yml | 2 +- .travis.yml | 2 +- dev/archery/archery/crossbow/cli.py | 1 + dev/archery/archery/crossbow/core.py | 11 ++++++----- dev/archery/archery/release/core.py | 11 ++++++----- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml index 5e4d1764a6a..d337ec797cf 100644 --- a/.github/workflows/archery.yml +++ b/.github/workflows/archery.yml @@ -32,7 +32,7 @@ on: - 'docker-compose.yml' env: - DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + ARCHERY_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} @@ -55,7 +55,7 @@ jobs: fetch-depth: 0 - name: Git Fixup shell: bash - run: git branch $DEFAULT_BRANCH origin/$DEFAULT_BRANCH || true + run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true - name: Setup Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index e7cd861cd31..0e6351c523e 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -87,7 +87,7 @@ jobs: ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e - DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' diff --git a/.travis.yml b/.travis.yml index 43fcc679066..a96e07f0c43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -184,7 +184,7 @@ install: - sudo -H pip3 install -e dev/archery[docker] script: - - export DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) + - export ARCHERY_DEFAULT_BRANCH=$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@) - | archery docker run \ ${DOCKER_RUN_ARGS} \ diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index ba5136ca15b..ec8eab2a4b5 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -155,6 +155,7 @@ def submit(obj, tasks, groups, params, job_prefix, config_path, arrow_version, queue.push() click.echo('Pushed job identifier is: `{}`'.format(job.branch)) + @crossbow.command() @click.option('--base-branch', default=None, help='Set base branch for the PR.') diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 13dbaebcc28..553b2d69216 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -364,7 +364,7 @@ def signature(self): @property def default_branch_name(self): - default_branch_name = os.getenv("DEFAULT_BRANCH") + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") if default_branch_name is None: try: @@ -377,10 +377,11 @@ def default_branch_name(self): # value from "master" to "main". default_branch_name = "master" warnings.warn('Unable to determine default branch name: ' - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a \'refs/remotes/origin/HEAD\'' - ' reference. Setting the default branch name to ' + - default_branch_name, RuntimeWarning) + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) return default_branch_name diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index a34a11cc3e6..d9c85740093 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -364,7 +364,7 @@ def commits(self): @cached_property def default_branch(self): - default_branch_name = os.getenv("DEFAULT_BRANCH") + default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") if default_branch_name is None: try: @@ -393,10 +393,11 @@ def default_branch(self): # value from "master" to "main". default_branch_name = "master" warnings.warn('Unable to determine default branch name: ' - 'DEFAULT_BRANCH environment variable is not set. Git ' - 'repository does not contain a \'refs/remotes/origin/HEAD\'' - ' reference. Setting the default branch name to ' + - default_branch_name, RuntimeWarning) + 'ARCHERY_DEFAULT_BRANCH environment variable is ' + 'not set. Git repository does not contain a ' + '\'refs/remotes/origin/HEAD\'reference. Setting ' + 'the default branch name to' + + default_branch_name, RuntimeWarning) return default_branch_name From 694106e7ff15570dcd0d715a5d64a93476a03123 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 13 Oct 2022 15:40:05 -0400 Subject: [PATCH 43/57] Reuse arrow Repo object for getting the default branch name, if needed --- dev/archery/archery/crossbow/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index ec8eab2a4b5..d9ab7a33841 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -190,15 +190,14 @@ def verify_release_candidate(obj, base_branch, create_pr, # The verify-release-candidate command will create a PR (or find one) # and add the verify-rc* comment to trigger the verify tasks + # Redefine Arrow repo to use the correct arrow remote. + arrow = Repo(path=obj['arrow'].path, remote_url=remote) + # Default value for base_branch is the repository's default branch name if base_branch is None: # Get the default branch name from the repository - arrow_source_dir = ArrowSources.find() - repo = Repo(arrow_source_dir.path) - base_branch = repo.default_branch_name - - # Redefine Arrow repo to use the correct arrow remote. - arrow = Repo(path=obj['arrow'].path, remote_url=remote) + base_branch = arrow.default_branch_name + response = arrow.github_pr(title=pr_title, head=head_branch, base=base_branch, body=pr_body, github_token=obj['queue'].github_token, From 4b7288d04d1b9b937f2e8fde204a77f12699f98e Mon Sep 17 00:00:00 2001 From: Fiona La Date: Thu, 13 Oct 2022 16:34:10 -0400 Subject: [PATCH 44/57] Update the dask and pandas install scripts to use default branch computed by Git rather than hard-coded defaults --- ci/scripts/install_dask.sh | 5 ++++- ci/scripts/install_pandas.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh index eb9c4e3dd42..50be7b38a6c 100755 --- a/ci/scripts/install_dask.sh +++ b/ci/scripts/install_dask.sh @@ -26,7 +26,10 @@ fi dask=$1 -if [ "${dask}" = "master" ]; then +# Get Git default branch name +DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" + +if [ "${dask}" = "${DEFAULT_BRANCH}" ]; then pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] elif [ "${dask}" = "latest" ]; then pip install dask[dataframe] diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 5aca65f825a..135f60edf67 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -35,7 +35,10 @@ else pip install numpy==${numpy} fi -if [ "${pandas}" = "master" ]; then +# Get Git default branch name +DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" + +if [ "${pandas}" = "${DEFAULT_BRANCH}" ]; then pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas From 1cee9a3317e3a0970092c87e040e0f9ca1962fbd Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 14 Oct 2022 14:56:49 -0400 Subject: [PATCH 45/57] Change the flag for indicating upstream development version of Pandas and Dask to 'upstream_devel', and update the documentation. --- ci/scripts/install_dask.sh | 5 +---- ci/scripts/install_pandas.sh | 5 +---- dev/archery/archery/docker/tests/test_docker.py | 8 ++++---- docs/source/developers/continuous_integration/docker.rst | 7 ++++--- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh index 50be7b38a6c..8d712a88a6a 100755 --- a/ci/scripts/install_dask.sh +++ b/ci/scripts/install_dask.sh @@ -26,10 +26,7 @@ fi dask=$1 -# Get Git default branch name -DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" - -if [ "${dask}" = "${DEFAULT_BRANCH}" ]; then +if [ "${dask}" = "upstream_devel" ]; then pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] elif [ "${dask}" = "latest" ]; then pip install dask[dataframe] diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh index 135f60edf67..be29e5da554 100755 --- a/ci/scripts/install_pandas.sh +++ b/ci/scripts/install_pandas.sh @@ -35,10 +35,7 @@ else pip install numpy==${numpy} fi -# Get Git default branch name -DEFAULT_BRANCH="$(git rev-parse --abbrev-ref origin/HEAD | sed s@origin/@@)" - -if [ "${pandas}" = "${DEFAULT_BRANCH}" ]; then +if [ "${pandas}" = "upstream_devel" ]; then pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation elif [ "${pandas}" = "nightly" ]; then pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py index fa6b59f936d..386b7c2bdae 100644 --- a/dev/archery/archery/docker/tests/test_docker.py +++ b/dev/archery/archery/docker/tests/test_docker.py @@ -259,12 +259,12 @@ def test_arrow_example_validation_passes(arrow_compose_path): def test_compose_default_params_and_env(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( UBUNTU='18.04', - DASK='main' + DASK='upstream_devel' )) assert compose.config.dotenv == arrow_compose_env assert compose.config.params == { 'UBUNTU': '18.04', - 'DASK': 'main', + 'DASK': 'upstream_devel', } @@ -492,7 +492,7 @@ def test_compose_push(arrow_compose_path): def test_compose_error(arrow_compose_path): compose = DockerCompose(arrow_compose_path, params=dict( PYTHON='3.8', - PANDAS='main' + PANDAS='upstream_devel' )) error = subprocess.CalledProcessError(99, []) @@ -503,7 +503,7 @@ def test_compose_error(arrow_compose_path): exception_message = str(exc.value) assert "exited with a non-zero exit code 99" in exception_message assert "PANDAS: latest" in exception_message - assert "export PANDAS=main" in exception_message + assert "export PANDAS=upstream_devel" in exception_message def test_image_with_gpu(arrow_compose_path): diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 7035dfe9ec6..49061f5b847 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -85,13 +85,13 @@ where the leaf image is ``conda-python-pandas``. .. code:: bash - PANDAS=master archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache conda-python-pandas Which translates to: .. code:: bash - export PANDAS=master + export PANDAS=upstream_devel docker-compose pull --ignore-pull-failures conda-cpp docker-compose pull --ignore-pull-failures conda-python docker-compose build conda-cpp @@ -102,7 +102,7 @@ Which translates to: Note that it doesn't pull the conda-python-pandas image and disable the cache when building it. -``PANDAS`` is a `build parameter `_, see the +``PANDAS`` is a :ref:`build parameter `, see the defaults in the .env file. **To entirely skip building the image:** @@ -178,6 +178,7 @@ image when building Glib, Ruby, R and Python bindings. This reduces duplication and streamlines maintenance, but makes the docker-compose configuration more complicated. +.. _docker-build-parameters: Docker Build Parameters ~~~~~~~~~~~~~~~~~~~~~~~ From 19fb6c3966f336234485ef039c6339ad00d64918 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 14 Oct 2022 15:09:38 -0400 Subject: [PATCH 46/57] Run python linting --- dev/archery/archery/crossbow/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/cli.py b/dev/archery/archery/crossbow/cli.py index d9ab7a33841..58aed560329 100644 --- a/dev/archery/archery/crossbow/cli.py +++ b/dev/archery/archery/crossbow/cli.py @@ -197,7 +197,7 @@ def verify_release_candidate(obj, base_branch, create_pr, if base_branch is None: # Get the default branch name from the repository base_branch = arrow.default_branch_name - + response = arrow.github_pr(title=pr_title, head=head_branch, base=base_branch, body=pr_body, github_token=obj['queue'].github_token, From 402a0545815709645e9c4ca4e1a86e0ddb1a606c Mon Sep 17 00:00:00 2001 From: Fiona La Date: Wed, 19 Oct 2022 09:16:54 -0400 Subject: [PATCH 47/57] Update Dask and Pandas version flag in tasks.yml and dev/archery/archery/docker/cli.py --- dev/archery/archery/docker/cli.py | 3 ++- dev/tasks/tasks.yml | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index 565efed05c5..6f571f27bff 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,7 +217,8 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=main archery docker run --no-leaf-cache conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache + conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index bdf53ff1dac..8cbb7615b7b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1492,7 +1492,7 @@ tasks: ("3.7", "latest", "latest", False), ("3.8", "latest", "latest", False), ("3.8", "nightly", "nightly", False), - ("3.9", "master", "nightly", False)] %} + ("3.9", "upstream_devel", "nightly", False)] %} test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}: ci: github template: docker-tests/github.linux.yml @@ -1512,7 +1512,7 @@ tasks: image: conda-python-pandas {% endfor %} -{% for dask_version in ["latest", "master"] %} +{% for dask_version in ["latest", "upstream_devel"] %} test-conda-python-3.9-dask-{{ dask_version }}: ci: github template: docker-tests/github.linux.yml From 4551b59699dbb032df175992ada804cbeeb3500d Mon Sep 17 00:00:00 2001 From: lafiona Date: Fri, 21 Oct 2022 11:47:29 -0400 Subject: [PATCH 48/57] Update .github/workflows/integration.yml Co-authored-by: Sutou Kouhei --- .github/workflows/integration.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 0e6351c523e..c6a39a8306b 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -86,9 +86,10 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > - archery docker run -e ARCHERY_INTEGRATION_WITH_RUST=1 -e - ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - conda-integration + archery docker run + -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + -e ARCHERY_INTEGRATION_WITH_RUST=1 + conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From 362c9b8dc2195328351f3a850abf6900d1437705 Mon Sep 17 00:00:00 2001 From: lafiona Date: Fri, 21 Oct 2022 11:48:33 -0400 Subject: [PATCH 49/57] Update dev/archery/archery/release/core.py error message to include space. Co-authored-by: Sutou Kouhei --- dev/archery/archery/release/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index d9c85740093..ae16e9b30a4 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -396,7 +396,7 @@ def default_branch(self): 'ARCHERY_DEFAULT_BRANCH environment variable is ' 'not set. Git repository does not contain a ' '\'refs/remotes/origin/HEAD\'reference. Setting ' - 'the default branch name to' + + 'the default branch name to ' + default_branch_name, RuntimeWarning) return default_branch_name From 4be1f7944d43e3ce527bc2dc9f2c991902dca045 Mon Sep 17 00:00:00 2001 From: lafiona Date: Fri, 21 Oct 2022 11:49:47 -0400 Subject: [PATCH 50/57] Update dev/archery/archery/crossbow/core.py to add space in error message. Co-authored-by: Sutou Kouhei --- dev/archery/archery/crossbow/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 553b2d69216..79868610a93 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -380,7 +380,7 @@ def default_branch_name(self): 'ARCHERY_DEFAULT_BRANCH environment variable is ' 'not set. Git repository does not contain a ' '\'refs/remotes/origin/HEAD\'reference. Setting ' - 'the default branch name to' + + 'the default branch name to ' + default_branch_name, RuntimeWarning) return default_branch_name From 404a12209cd95edcf7a19dfccc3176bc50902f44 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Fri, 21 Oct 2022 11:59:28 -0400 Subject: [PATCH 51/57] Remove () for accessing computed property, default_branch_name --- dev/archery/archery/crossbow/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index 79868610a93..07e9e7ebf5f 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -572,8 +572,8 @@ def github_overwrite_release_assets(self, tag_name, target_commitish, def github_pr(self, title, head=None, base=None, body=None, github_token=None, create=False): - # Default value for base is the default_branch name() - base = self.default_branch_name() if base is None else base + # Default value for base is the default_branch_name + base = self.default_branch_name if base is None else base github_token = github_token or self.github_token repo = self.as_github_repo(github_token=github_token) if create: From 5fd786a2410ae7d4cfe7ac5d8577332eec97aa59 Mon Sep 17 00:00:00 2001 From: lafiona Date: Mon, 24 Oct 2022 14:36:05 -0400 Subject: [PATCH 52/57] Update .github/workflows/integration.yml indentation Co-authored-by: Sutou Kouhei --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index c6a39a8306b..04c24f12871 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -87,9 +87,9 @@ jobs: ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > archery docker run - -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - -e ARCHERY_INTEGRATION_WITH_RUST=1 - conda-integration + -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} + -e ARCHERY_INTEGRATION_WITH_RUST=1 + conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' env: From 7bfd4e20eb4484cc3cc506e0ec11024760f492fa Mon Sep 17 00:00:00 2001 From: lafiona Date: Tue, 25 Oct 2022 15:50:13 -0400 Subject: [PATCH 53/57] Update .github/workflows/integration.yml Co-authored-by: Antoine Pitrou --- .github/workflows/integration.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 04c24f12871..e94eb764fd6 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -86,9 +86,9 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > - archery docker run - -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} - -e ARCHERY_INTEGRATION_WITH_RUST=1 + archery docker run \ + -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration - name: Docker Push if: success() && github.event_name == 'push' && github.repository == 'apache/arrow' From 37f29e44e89391f31164ea3ed5b4fb32d1d88ba8 Mon Sep 17 00:00:00 2001 From: lafiona Date: Tue, 25 Oct 2022 15:50:44 -0400 Subject: [PATCH 54/57] Update dev/archery/archery/docker/cli.py Co-authored-by: Antoine Pitrou --- dev/archery/archery/docker/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py index 6f571f27bff..42caecd7427 100644 --- a/dev/archery/archery/docker/cli.py +++ b/dev/archery/archery/docker/cli.py @@ -217,8 +217,8 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build, PYTHON=3.8 archery docker run conda-python # disable the cache only for the leaf image - PANDAS=upstream_devel archery docker run --no-leaf-cache - conda-python-pandas + PANDAS=upstream_devel archery docker run --no-leaf-cache \ + conda-python-pandas # entirely skip building the image archery docker run --no-pull --no-build conda-python From edaf2c01390461f243638f957786188a2d83ab12 Mon Sep 17 00:00:00 2001 From: lafiona Date: Tue, 25 Oct 2022 15:51:04 -0400 Subject: [PATCH 55/57] Update docs/source/developers/continuous_integration/docker.rst Co-authored-by: Antoine Pitrou --- docs/source/developers/continuous_integration/docker.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 49061f5b847..50ee6551787 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -179,6 +179,7 @@ This reduces duplication and streamlines maintenance, but makes the docker-compose configuration more complicated. .. _docker-build-parameters: + Docker Build Parameters ~~~~~~~~~~~~~~~~~~~~~~~ From fb3d142113ef81870777ec20b87adc37f70f5596 Mon Sep 17 00:00:00 2001 From: lafiona Date: Tue, 25 Oct 2022 15:51:15 -0400 Subject: [PATCH 56/57] Update docs/source/developers/continuous_integration/docker.rst Co-authored-by: Antoine Pitrou --- docs/source/developers/continuous_integration/docker.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst index 50ee6551787..49cbffe5a42 100644 --- a/docs/source/developers/continuous_integration/docker.rst +++ b/docs/source/developers/continuous_integration/docker.rst @@ -103,7 +103,7 @@ Note that it doesn't pull the conda-python-pandas image and disable the cache when building it. ``PANDAS`` is a :ref:`build parameter `, see the -defaults in the .env file. +defaults in the ``.env`` file. **To entirely skip building the image:** From 374f5401f062d0a0247154e6833baf309cee9947 Mon Sep 17 00:00:00 2001 From: Fiona La Date: Tue, 25 Oct 2022 16:29:13 -0400 Subject: [PATCH 57/57] Factor out repo object set up lines from try block --- dev/archery/archery/release/core.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/dev/archery/archery/release/core.py b/dev/archery/archery/release/core.py index ae16e9b30a4..03eceb80a10 100644 --- a/dev/archery/archery/release/core.py +++ b/dev/archery/archery/release/core.py @@ -367,18 +367,18 @@ def default_branch(self): default_branch_name = os.getenv("ARCHERY_DEFAULT_BRANCH") if default_branch_name is None: - try: - # Set up repo object - arrow = ArrowSources.find() - repo = Repo(arrow.path) - origin = repo.remotes["origin"] - origin_refs = origin.refs + # Set up repo object + arrow = ArrowSources.find() + repo = Repo(arrow.path) + origin = repo.remotes["origin"] + origin_refs = origin.refs + try: # Get git.RemoteReference object to origin/HEAD + # If the reference does not exist, a KeyError will be thrown origin_head = origin_refs["HEAD"] - # Get git.RemoteReference object to origin/main or - # origin/master + # Get git.RemoteReference object to origin/default-branch-name origin_head_reference = origin_head.reference # Get string value of remote head reference, should return @@ -389,6 +389,7 @@ def default_branch(self): # The last token is the default branch name default_branch_name = origin_head_name_tokenized[-1] except KeyError: + # Use a hard-coded default value to set default_branch_name # TODO: ARROW-18011 to track changing the hard coded default # value from "master" to "main". default_branch_name = "master"