From 1fc4590bf16d09f0ad012ec4a1c527a3b827c8fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 12 Jul 2023 14:46:21 +0200 Subject: [PATCH 1/3] GH-36634: [Dev] Ensure merge script goes over all pages when requesting info from GitHub --- dev/merge_arrow_pr.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 90b2e9b034e..2c49aa98722 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -78,7 +78,26 @@ def get_json(url, headers=None): response = requests.get(url, headers=headers) if response.status_code != 200: raise ValueError(response.json()) - return response.json() + # GitHub returns a link header with the next, previous, last + # page if there is pagination on the response. See: + # https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api#using-link-headers + next_responses = None + if "link" in response.headers: + links = response.headers['link'].split(', ') + next_url = None + for link in links: + if 'rel="next"' in link: + # Format: '; rel="next"' + next_url = link.split(";")[0][1:-1] + if next_url: + next_responses = get_json(next_url, headers) + ret_val = response.json() + if next_responses: + if isinstance(ret_val, list): + ret_val.extend(next_responses) + else: + raise ValueError('GitHub response was paginated and is not a list') + return ret_val def run_cmd(cmd): From 11eba03df01906b0a56ecdb7aea2a3cbe89ae53a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 13 Jul 2023 12:58:42 +0200 Subject: [PATCH 2/3] Update dev/merge_arrow_pr.py Co-authored-by: Sutou Kouhei --- dev/merge_arrow_pr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 2c49aa98722..839687e0761 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -84,13 +84,11 @@ def get_json(url, headers=None): next_responses = None if "link" in response.headers: links = response.headers['link'].split(', ') - next_url = None for link in links: if 'rel="next"' in link: # Format: '; rel="next"' next_url = link.split(";")[0][1:-1] - if next_url: - next_responses = get_json(next_url, headers) + next_responses = get_json(next_url, headers) ret_val = response.json() if next_responses: if isinstance(ret_val, list): From a0c2261127a703e2f68f9b10e0559d8ffd39a98b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 13 Jul 2023 13:02:58 +0200 Subject: [PATCH 3/3] Add better variable naming as suggested on code review --- dev/merge_arrow_pr.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 839687e0761..0f36a5ba902 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -89,13 +89,13 @@ def get_json(url, headers=None): # Format: '; rel="next"' next_url = link.split(";")[0][1:-1] next_responses = get_json(next_url, headers) - ret_val = response.json() + responses = response.json() if next_responses: - if isinstance(ret_val, list): - ret_val.extend(next_responses) + if isinstance(responses, list): + responses.extend(next_responses) else: raise ValueError('GitHub response was paginated and is not a list') - return ret_val + return responses def run_cmd(cmd):