From 2da4f0fd89ed29a7846069734ee25b5d7c9378d1 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 16 Jan 2020 17:27:48 -0800 Subject: [PATCH 1/2] HBASE-22853 Git/Jira Release Audit Tool This is an application for performing an audit between the histories on our git branches and the `fixVersion` field set on issues in JIRA. It does this by building a Sqlite database from the commits found on each git branch, identifying Jira IDs and release tags, and then requesting information about those issues from Jira. Once both sources have been collected, queries can be performed against the database to look for discrepancies between the sources of truth (and, possibly, bugs in this script). --- .editorconfig | 4 + dev-support/git-jira-release-audit/.gitignore | 4 + dev-support/git-jira-release-audit/README.md | 186 ++++++++ .../fallback_actions.csv | 227 +++++++++ .../git_jira_release_audit.py | 440 ++++++++++++++++++ .../git-jira-release-audit/requirements.txt | 39 ++ 6 files changed, 900 insertions(+) create mode 100644 dev-support/git-jira-release-audit/.gitignore create mode 100644 dev-support/git-jira-release-audit/README.md create mode 100644 dev-support/git-jira-release-audit/fallback_actions.csv create mode 100644 dev-support/git-jira-release-audit/git_jira_release_audit.py create mode 100644 dev-support/git-jira-release-audit/requirements.txt diff --git a/.editorconfig b/.editorconfig index 011673016c26..aa6adaa77214 100644 --- a/.editorconfig +++ b/.editorconfig @@ -691,3 +691,7 @@ ij_ruby_spaces_around_range_operators = false ij_ruby_spaces_around_relational_operators = true ij_ruby_spaces_within_array_initializer_braces = true ij_ruby_spaces_within_braces = false + +[*.py] +indent_size = 4 +tab_width = 4 diff --git a/dev-support/git-jira-release-audit/.gitignore b/dev-support/git-jira-release-audit/.gitignore new file mode 100644 index 000000000000..ddab63162421 --- /dev/null +++ b/dev-support/git-jira-release-audit/.gitignore @@ -0,0 +1,4 @@ +*.db +*.log +*.svg +venv diff --git a/dev-support/git-jira-release-audit/README.md b/dev-support/git-jira-release-audit/README.md new file mode 100644 index 000000000000..405a371ad0cd --- /dev/null +++ b/dev-support/git-jira-release-audit/README.md @@ -0,0 +1,186 @@ + + +# Git / JIRA Release Audit + +This is an application for performing an audit between the histories on our git +branches and the `fixVersion` field set on issues in JIRA. It does this by +building a Sqlite database from the commits found on each git branch, +identifying Jira IDs and release tags, and then requesting information about +those issues from Jira. Once both sources have been collected, queries can be +performed against the database to look for discrepancies between the sources of +truth (and, possibly, bugs in this script). + +## Setup + +The system prerequisites are Python3 with VirtualEnv available and Sqlite. Also, +you'll need the content of this directory and a local checkout of git repository. + +Build a VirtualEnv with the script's dependencies with + +```shell script +$ python3 --version +Python 3.7.6 +$ python3 -m venv ./venv +$ ./venv/bin/pip install -r ./requirements.txt +... +Successfully installed... +``` + +## Usage + +The tool provides basic help docs. + +```shell script +$ ./venv/bin/python ./git_jira_release_audit.py --help +usage: git_jira_release_audit.py [-h] [--db-path DB_PATH] + [--git-repo-path GIT_REPO_PATH] + [--remote-name REMOTE_NAME] + [--development-branch DEVELOPMENT_BRANCH] + [--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION] + [--release-line-regexp RELEASE_LINE_REGEXP] + [--fallback-actions-path FALLBACK_ACTIONS_PATH] + [--jira-url JIRA_URL] --branch-1-fix-version + BRANCH_1_FIX_VERSION --branch-2-fix-version + BRANCH_2_FIX_VERSION + +optional arguments: + -h, --help show this help message and exit + --db-path DB_PATH Path to the database file, or leave unspecified for a + transient db. + --git-repo-path GIT_REPO_PATH + Path to the git repo, or leave unspecified to infer + from the current file's path. + --remote-name REMOTE_NAME + The name of the git remote to use when identifying + branches. + --development-branch DEVELOPMENT_BRANCH + The name of the branch from which all release lines + originate. + --development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the development branch. + --release-line-regexp RELEASE_LINE_REGEXP + A regexp used to identify release lines. + --fallback-actions-path FALLBACK_ACTIONS_PATH + Path to a file containing a cache of user input. + --jira-url JIRA_URL A URL locating the target JIRA instance. + --branch-1-fix-version BRANCH_1_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the specified release line branch + --branch-2-fix-version BRANCH_2_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the specified release line branch +``` + +Example Run: + +```shell script +$ ./venv/bin/python3 ./git_jira_release_audit.py \ + --db-path=audit.db \ + --remote-name=apache-rw \ + --development-branch-fix-version=3.0.0 \ + --branch-1-fix-version=1.5.0 \ + --branch-2-fix-version=2.3.0 +INFO:root:apache-rw/branch-1 has 4046 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.0 has 1433 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.1 has 2111 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.2 has 2738 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.3 has 3287 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.4 has 3912 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-2 has 3080 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.0 has 2194 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.1 has 2705 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.2 has 2927 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:retrieving 5653 jira_ids from the issue tracker + +apache-rw/branch-1 100%|██████████████████████████████████████████████████████| 4046/4046 [08:23<00:00, 8.04 commit/s] +apache-rw/branch-1.0 100%|████████████████████████████████████████████████████| 1433/1433 [03:49<00:00, 6.26 commit/s] +apache-rw/branch-1.1 100%|████████████████████████████████████████████████████| 2111/2111 [05:16<00:00, 6.68 commit/s] +apache-rw/branch-1.2 100%|████████████████████████████████████████████████████| 2738/2738 [06:26<00:00, 7.10 commit/s] +apache-rw/branch-1.3 100%|████████████████████████████████████████████████████| 3287/3287 [07:21<00:00, 7.46 commit/s] +apache-rw/branch-1.4 100%|████████████████████████████████████████████████████| 3912/3912 [08:08<00:00, 8.02 commit/s] +apache-rw/branch-2 100%|█████████████████████████████████████████████████████| 3080/3080 [03:29<00:00, 14.74 commit/s] +apache-rw/branch-2.0 100%|████████████████████████████████████████████████████| 2194/2194 [04:56<00:00, 7.42 commit/s] +apache-rw/branch-2.1 100%|███████████████████████████████████████████████████| 2705/2705 [03:17<00:00, 13.75 commit/s] +apache-rw/branch-2.2 100%|███████████████████████████████████████████████████| 2927/2927 [03:28<00:00, 14.09 commit/s] +fetch from Jira 100%|█████████████████████████████████████████████████████████| 5653/5653 [00:58<00:00, 98.29 issue/s] +``` + +With a populated database, query with sqlite: + +```shell script +$ sqlite3 audit.db +SQLite version 3.24.0 2018-06-04 14:10:15 +Enter ".help" for usage hints. +sqlite> -- count the number of distinct commits on a release branch +sqlite> select count(distinct jira_id), branch from git_commits group by branch; +3406|apache-rw/branch-1 +1189|apache-rw/branch-1.0 +1728|apache-rw/branch-1.1 +2289|apache-rw/branch-1.2 +2779|apache-rw/branch-1.3 +3277|apache-rw/branch-1.4 +2666|apache-rw/branch-2 +1809|apache-rw/branch-2.0 +2289|apache-rw/branch-2.1 +2511|apache-rw/branch-2.2 + +sqlite> -- count the number of issues that will be in 2.3.0 that have not been released on any earlier +sqlite> -- version. +sqlite> select count(1) from ( + select distinct jira_id from git_commits where branch = 'apache-rw/branch-2' except + select distinct jira_id from git_commits where branch in + ('apache-rw/branch-2.0', 'apache-rw/branch-2.1', 'apache-rw/branch-2.2')); +169 + +sqlite> -- find the issues for which the git commit record and JIRA fixVersion disagree +sqlite> select g.jira_id, g.git_tag, j.fix_version + from git_commits g + inner join jira_versions j + on g.jira_id = j.jira_id + and g.branch = 'apache-rw/branch-2.2' + and g.git_tag is not null + and j.fix_version like '2.2.%' + and g.git_tag != j.fix_version; +HBASE-22941|2.2.2|2.2.1 + +sqlite> -- show jira non-1.x fixVersions for all issues on branch-2 but not on any +sqlite> -- branch-2.x release branch; i.e., issues that are missing a fixVersion or +sqlite> -- are marked for a release other than (3.0.0, 2.3.0) +sqlite> select g.jira_id, j.fix_version +from ( + select distinct jira_id from git_commits where branch = 'apache-rw/branch-2' except + select distinct jira_id from git_commits where branch in + (select distinct branch from git_commits where branch like 'apache-rw/branch-2.%')) g +left join jira_versions j + on g.jira_id = j.jira_id + and j.fix_version not like '1.%' +where ( + j.fix_version is null + OR j.fix_version not in ('3.0.0', '2.3.0')) +order by g.jira_id desc; +HBASE-23683|2.2.4 +HBASE-23032|connector-1.0.1 +HBASE-23032|hbase-filesystem-1.0.0-alpha2 +HBASE-22405|2.2.0 +HBASE-22360|2.2.0 +HBASE-22321| +HBASE-22283|2.2.0 +``` diff --git a/dev-support/git-jira-release-audit/fallback_actions.csv b/dev-support/git-jira-release-audit/fallback_actions.csv new file mode 100644 index 000000000000..7202fe8ad594 --- /dev/null +++ b/dev-support/git-jira-release-audit/fallback_actions.csv @@ -0,0 +1,227 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +hexsha,action,jira_id +0057cd8ca7ff09ed6b794af71df301c5c47487f4,SKIP, +022f30ce0dd3dd931f6045c6778e194ef5c41f7a,SKIP, +0505072c5182841ad1a28d798527c69bcc3348f0,SKIP, +05cb051423953b913156e4950b67f3d9b28ada5f,REVERT,HBASE-14391 +05f8e94191ef6a63baadf56d6114d7d0317796f2,SKIP, +0791b878422eadf00b55076338f09bf059f39f0c,SKIP, +07f9f3d38cf4d0d01044ab28d90a50a1a009f6b8,SKIP, +10f00547627076d79d77cf58dd2deaece2287084,ADD,HBASE-22330 +10f3b77748a02a2c11635c33964929c0474e890d,SKIP, +1404d5a97331ecc63db53971f5cb7329cb40ce67,ADD,HBASE-15203 +14a869828fe481697d29b2d6e4135e8026039a38,SKIP, +1546613e76b1013a08ebc179c2c22bfeb44f3a4a,SKIP, +156a900eec5e53ff33ad77f2de0714f52f7d1fa6,SKIP, +15bb234d511468b468fa941d20911f2b04919e57,SKIP, +178b675b792b4e9d3ddabd625a79042231a6fc91,ADD,HBASE-17959 +18ca1fcb560236b8c9a0ca057f163ebdaab89f0e,SKIP, +190d189fefa9a40a2724d263e333021f18b409d9,SKIP, +19be2cfa86ad287b798768f16b988771169950ed,SKIP, +1a305bb4848ebcda2bd7c0df8f2f9c03ddf5b471,SKIP, +1b3557649c9ee682c7f135ca52a0e3cd10cb9219,SKIP, +1c46250bef9ef9be9c255d61bda69ff7792ed551,SKIP, +1cb7d0e82ad64f37fbd6de950b74081b0d5eddf3,SKIP, +1eaef185327171b3dd3edb303e08cfe85186e745,SKIP, +1eb8ac6fe9dd0c15cdb52f66ced4136316c06465,SKIP, +2068804d7510e8c1f822b5db3cd4585455f6e7e7,SKIP, +214d33e0f472793a51b7f5371012c7c86bc97ee3,SKIP, +2447d87b56ad087dcf92c1245a34d26ffe59a4bc,SKIP, +24d6a65a12e05bceae2d4355b918dd2a773b4ee2,SKIP, +254af5a3210e5b3cec09b73aa2bff31a1d9e3d80,SKIP, +259d12f7397679c6b0d0a4788e5a37f65fd49f20,SKIP, +267bce0590c39570ddb935921e34bda35e3aa44c,SKIP, +278828333c44493ccbaa7db26a788b2756632034,SKIP, +288794d68ba5bd4d1fd8d5c315cee972019dcb3d,ADD,HBASE-22330 +28f07451a5dddf0ab3988b32b8672654fdbc5b58,SKIP, +2ba542d74c2d9e78332c8c94289d1295752d8072,SKIP, +2cb64fb467bc663a507da9c00a669e618ae90a2f,ADD,HBASE-18945 +2e4544a8b00766248c998850f8907511b8bae240,SKIP, +2e63f882c85fb5804aafff5d92503eca60c0820d,SKIP, +2ebd80499473bbac3eac083806211ec03e084db7,SKIP, +31b9096034e19171989fd5b76313e7e0f1a9a12a,SKIP, +31d37fb904c4fcd77e79f9df8db155c5a3d1d8ed,SKIP, +31fe5dbf6b3a261f2c902d0fd6b82bf6c7ecf954,REVERT,HBASE-19685 +31fe5dbf6b3a261f2c902d0fd6b82bf6c7ecf954,SKIP, +34e97f9c08d97b38be9a8f7dda6214d7ae9c6ea8,SKIP, +34ecc75d1669e330c78c3e9b832eca0abf57902d,SKIP, +34fe1f5fd762e4ead3b0e2e820c360796939b315,SKIP, +37d46fcf85da772a06da29d9add8a0652330f6c5,SKIP, +38e2dbc503a7f9ef929ff11b615157f0ee79916c,SKIP, +3966d0fee6c9803cf567ef76d91855a1eaad621d,SKIP, +399b3e9d1bc68c2709565f0a1a719a9a66999564,SKIP, +39a4c56690eeeb2bb5ffaa0f3c8f6759b4fb3fb2,SKIP, +3a11028cdfc6e44576069bed452a0ed10c860db1,SKIP, +3b73ebb7b8975e18c67c24c258fbc061614bb7f2,SKIP, +3c7a349c2eab74a76c06b66df2e2d14ea7681f95,SKIP, +3dcb03947ce9cb1825167784992e689a23847351,ADD,HBASE-18290 +3dd55fa0c00b0f4d04d91ce2a2feb20aea3b8904,SKIP, +4098224c8f227cdf9ec0db5f96585ea3c64ef91a,SKIP, +431b8a5383b894381583bbb9ceef5911911b705c,SKIP, +44d2a9bc1c88f6eb8cf45c9b8a4c37268d540694,SKIP, +451f2fec06617372430573ec64463b39b20833d0,SKIP, +469d6bf457c2c4d8ebe10c1e39004a6b9d907112,SKIP, +46ee6e0fccf6dd8840782c0eb824640e59068a8b,ADD,HBASE-17959 +482d413e0abbabcb0afcdeab7c8ad761218e1df2,SKIP, +48492ec7fd72a89ac67b2ef834ccfa8021fbadd5,REVERT,HBASE-15965 +489dd6427a499d09fb8cde4fbdd46303f0a57b20,REVERT,HBASE-14391 +48d9d27d6840ccd2d8812ffc78fa08e20a460755,SKIP, +492db89d42e490dff0b521f0b1d623d1ac7af9f4,SKIP, +493a4cde31299711de65e2ebbd687791d9bcbb68,SKIP, +49622cb7bbd4382c1ac9397a0158f7d7e85ec97f,SKIP, +49fab7df80c969db4f35ed911a2cd81c3b50928f,ADD,HBASE-19049 +4a40b2e5751702dcde7ab5c58ded54ac79af6178,SKIP, +4bb95edbd91cf6fce001204ada0ba20b33a5a110,SKIP, +4d7e5992cfe949e7aa8c1326ce247011af14a6fd,SKIP, +4e3a750b00d26dada9ba4dec000c895d8507a000,SKIP, +4eb84651a2b6d02d2074143308cef5d0f4b856a3,SKIP, +4f5b22bc19cb8d24ced5d42ebd9794cfd83bae85,SKIP, +54337870eda5649ab7bb81ed01c9dd25d59204f2,SKIP, +58ab201be341f02829286f036a7401d0806eb999,SKIP, +5a16c15d7f51087a50511a2e0730f547c97a033f,SKIP, +5b5ff1d8b2cc43f78acaf9bc960be382dc6c34f7,SKIP, +5fa15dd7488433ea610ff5e92161409d20565690,SKIP, +67404e7e89072b9be892a81cc9ba1bfe8d6aeb7f,SKIP, +676fb753d996b60772284393ac3581b47c7a8afa,ADD,HBASE-12976 +6817a7b131e47a96a354438c2c6ad0fbe6878a28,SKIP, +691efc60f705de50055bf5c44911128648535110,SKIP, +694e79a67e84d0c5e4f23b4abe7d27bb5fb8ce37,SKIP, +69c99da70a5ed973e7d7d798525013d1492835bf,SKIP, +6a974fe826a31888b0d00cf30f7f38983485740f,SKIP, +6b37ae3d77e68458cae385b11163ac5108af7655,SKIP, +6b54917d520d32d00f5b4e9420e0d4894aaa34e8,SKIP, +6cf647a0dfd696580c1d841e245d563beca451dd,SKIP, +6e376b900e125f71a71fd2a25c3ad08057b97f73,SKIP, +719993e0fe2b132b75a3689267ae4adff364b6aa,SKIP, +71ed7033675149956de855b6782e1e22fc908dc8,SKIP, +7242650afd466df511ba2d4cfa34f6d082cb1004,SKIP, +72bd7dfdc91f5cff28e1f909f395128132da72d6,SKIP, +73ec3fdd5c64354ae8339baceed4ed0de229712d,REVERT,HBASE-14391 +7547426705b462d8afc0fffd26c1e4c0e911360a,ADD,HBASE-11951 +75e7714d2057917523bb66464de921f180099f71,ADD,HBASE-20004 +75febcea89c907a7daf70c0a06b92803accc3799,REVERT,HBASE-18843 +764adaad3489913ec5bbdfa5526c4ab5a710dfaf,SKIP, +76d067e9d88dd40095a9cb83fe7ee87c9135a8cd,SKIP, +786418c8833fd0b9bbeb67482e3fb97c06c541fc,SKIP, +797a352763110413c4e806770ca13c74ef2a13ea,ADD,HBASE-20004 +79d927c34eb17828a1b9235df984d6d966c68c38,SKIP, +7a16acc881bfd6dc15d74c424f688dcd068bd4b0,SKIP, +7a9475e6ac55bd13fd492014c15d7b0ffb403b2c,SKIP, +7c97acf6e345023f043964d023816d5b3329dde9,ADD,HBASE-16209 +7ea18e20680e86c200cbebc885ff91cfc1f72fac,SKIP, +80971f83d307ab661d830f1a2196729411873906,SKIP, +80d1f62cf7eaaeea569fe5a2e4a91fc270e7bc1f,SKIP, +829e6383d52e7a98947a4b2bdaa0b7e756bc6bfc,SKIP, +834488d435fb59d5cb2b0ed7f09b8b1e70d7e327,SKIP, +86242e1f55da7df6a2119389897d11356e6bbc2a,SKIP, +8670fb3339accf149d098552f523e9c14b90c941,SKIP, +880c7c35fc50f28ec3e072a4c62a348fc964e9e0,SKIP, +88ff206c57fac513b3c5442fd4369ced416279da,SKIP, +8aa1214a1722ba491d52cbbfab1b39cbd0eddeea,SKIP, +8ae29677767db1ac7a29c30143249a6ce2c50537,SKIP, +8e2800f50401c37dcb921533cff62b40efd7e8d6,SKIP, +8ef87ce4343e80321fcfd99594372759557c90f2,SKIP, +9213d7194ede5b723bc817a9bb634679ee3ce5c1,SKIP, +930f68c0b976a600066b838283a0f3dce050256f,SKIP, +962d7e9bf06f4e2e569ba34acae6203b4deef778,ADD,HBASE-19074 +97d7b3572cc661a8d31f82b9c567d7a75b9eef95,SKIP, +99e18fed23a2a476514fa4bd500b07a8d913e330,SKIP, +9b65c7a26d2d200d740d1cb6aed6c5e73e829dc1,SKIP, +9daafb67c158ab69acf8a5090e12925d356ff945,SKIP, +9e3b28804d28ad586d12df24a2e5417c25cff857,SKIP, +9e68719014a62f37e7559329e44e2df49738ef6c,SKIP, +9ecd8589c755d91fa6c374bd8d7ffc3260d59a5d,SKIP, +9fb4bfec813e8d962ca6d4934a226c53801bec76,SKIP, +9ff10759c10c93ec27cc5d2b9b151729954e75f6,SKIP, +a05cef75c4b33171ab29d89d0fbb0fbbc11d6d39,SKIP, +a312705dbc8e6d604adcc874526294c72b8ff580,SKIP, +a67481209f5d315f06e3a6910fa44493e398210f,REVERT,HBASE-16840 +a72d40694116d84454f480c961c1cc1f5d7e1deb,SKIP, +a80799a3bc73513393f764df330704ad688140e8,SKIP, +aa8a9997792b686a606e8ada2cd34fb9ad895bc0,SKIP, +aaeb488f43a9e79655275ddb481ba970b49d1173,SKIP, +ac9035db199902533c07d80f384ae29c115d3ad5,SKIP, +ad2064d8a5ff57d021852c3210a30c5f58eaa43c,SKIP, +ad885a0baae21b943ffebef168c65650f8317023,SKIP, +adec117e47a2ca503458954d6877667d877890fd,SKIP, +ae95b1f215a120890de5454739651911749057ca,SKIP, +b182030d48dcc89d8c26b98f2a58d7909957ea49,SKIP, +b3d55441b8174c704ada4585603f6bcfca298843,SKIP, +b65231d04dbc565a578ce928e809aa51f5439857,SKIP, +b6549007b313e8f3aa993d5c1ebd29c84ccb7b7b,SKIP, +b6d4fc955fe0fc41f5225f1cc2e3e4b92029251c,SKIP, +b9f5c6b065ebd572193c1fdc9d38557320b42fe6,SKIP, +bcadcef21048e4764f7ae8dec3ce52884f20c02c,SKIP, +bcdc56ac76e4a26e53faa8301a441e94ee8614d7,SKIP, +bd2c03dc7df600fe481ba7f2fed958deb18f5291,SKIP, +bd4e14db07ea32a45c3ef734e06d195a405da67c,SKIP, +bd4eba2b53b7af738fd9584511d737c4393d0855,SKIP, +bef0616ef33306afca3060b96c2cba5f9762035d,SKIP, +c100fb835a54be6002fe9704349e726f27b15b7a,SKIP, +c71da858ada94e1b93065f0b7caf3558942bc4da,SKIP, +c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248 +c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248 +c97905a962b88a0c68ca8a51c2e507daec81ca6d,SKIP, +c9f506a2973e0acbd0d2df7b9353c9291f6c94a8,SKIP, +cbb86942eda4b65ddfc5ec436c78a04e5dd21631,SKIP, +cbdc9fcb8a705f4e5ee28a917a335c6f1ef5df42,SKIP, +ccee3d8dd59dfb181d577b5df483632722db01b1,SKIP, +cd3628d529677852f100da6d010d4c6f76380b84,SKIP, +ce6a6014daded424d9460f7de4eadae169f52683,SKIP, +cf1ccc30909bfb04326415e5a648605759d57360,SKIP, +cf45c8d30a4d9810cd676b2a1a348141c4e27eeb,SKIP, +d14e335edc9c22c30827bc75e73b5303ca64ee0d,SKIP, +d32230d0b5a4706b625cc7ac7ee7d28f44bd7b85,SKIP, +d524768528cd15151ba1ebb82e32609da5308128,SKIP, +d5a1b276270a1d41f21badd5b85d9502f8f9f415,SKIP, +d6e85b0511396b3221cc7f495eaee5bbacc42afd,SKIP, +d91908b0d46156fa364ba11d476b9cdbc01d0411,SKIP, +da619282469c65dcf6bee06783c4246a24a1517c,SKIP, +da8bcabb99ee5a9a35efd114aa45292616ca3c70,SKIP, +dfb1af48927a66aa5baa5b182e84327770b3c6c9,SKIP, +e075492b4dac5c347b7f6b2e5318e2967b95b18b,SKIP, +e08277ac8fe466bf63f6fc342256ab7b8d41243a,SKIP, +e0f80766931fc1d8f652c0dda844cb1cc11c9598,SKIP, +e1eb914f21305ea0e2e8a784a187efd11d0d8ca0,SKIP, +e2d48f41c5d11b9d2478af7f506dcc749025da82,SKIP, +e40fcee6b54712b76d702af6937c3320c60df2b9,SKIP, +e501fe1a296be8fec0890e7e15414683aa3d933b,SKIP, +e5349d589c000e395e12340e003aa9e2153afea6,SKIP, +e5fb8214b2bfd6396539a4e8b6cf5f3cc5e9c06f,REVERT,HBASE-21874 +e8e45ef8f2fb91a870399636b492d5cee58a4c39,SKIP, +e92a147e1961366e36a39577816994566e1e21c5,SKIP, +eacf3cb29641af1a68978d9bd7654f643a3aa3a1,SKIP, +ec251bdd3649de7f30ece914c7930498e642527e,SKIP, +ec39dc8c149b9f89a91596d57d27de812973f0a9,SKIP, +ed520133d6dbb47a40f1883a56460582732f863a,SKIP, +ed62e08786273587378b86278fae452dfc817dfb,SKIP, +ee30872dcf6dc2a1c6e90440e9e4ecd6397a1275,SKIP, +f0541fceed8d3ce13da3da005bbbbe3c5c5cc557,SKIP, +f0b1c4279eaf09d255336d1de9c2bc2b5d726e70,SKIP, +f4acc47e2debb3d3d87c05436d940ef2fdfe0be3,SKIP, +f6095adea64912deaebfaf2a6a5881b820d315b2,SKIP, +f61f02b2b24af39545cc2754cfbc25122da60651,SKIP, +f6d6bf59faa2a4a0767480af7658e4a844fd186f,SKIP, +fab0b2e60385fca20021f74335a9c3d36368f621,SKIP, +fb9be046aefb2e0b6e832dd00bc44a38ee62ab1f,SKIP, +fc2ef413fab50d4375318fbd667051fd02f085f2,SKIP, +fd5c5fb3887914183a1510f5972e50d9365e02f5,SKIP, +fe84833ea22c30b68022203132706ebb1e526852,SKIP, +fe9e7483a316df9f5a62e9c215bcedcfd65c5f12,SKIP, +ffcd4d424f69b4ecac1bd9f5980c14bb4b61a3fa,ADD,HBASE-13796 diff --git a/dev-support/git-jira-release-audit/git_jira_release_audit.py b/dev-support/git-jira-release-audit/git_jira_release_audit.py new file mode 100644 index 000000000000..3702b7023b24 --- /dev/null +++ b/dev-support/git-jira-release-audit/git_jira_release_audit.py @@ -0,0 +1,440 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Build a database from git commit histories. Can be used to audit git vs. jira. For usage, +# see README.md. + +import argparse +import csv +import enlighten +import enum +import git +import jira +import logging +import pathlib +import re +import sqlite3 +import time + + +class _DB: + class Action(enum.Enum): + ADD = 'ADD' + REVERT = 'REVERT' + SKIP = 'SKIP' + + def __init__(self, db_path, **_kwargs): + self._conn = sqlite3.connect(db_path) + for table in 'git_commits', 'jira_versions': + self._conn.execute("DROP TABLE IF EXISTS %s" % table) + self._conn.execute(""" + CREATE TABLE IF NOT EXISTS "git_commits"( + jira_id TEXT NOT NULL, + branch TEXT NOT NULL, + git_sha TEXT NOT NULL, + git_tag TEXT, + CONSTRAINT pk PRIMARY KEY (jira_id, branch, git_sha) + );""") + self._conn.execute(""" + CREATE TABLE IF NOT EXISTS "jira_versions"( + jira_id TEXT NOT NULL, + fix_version TEXT NOT NULL, + CONSTRAINT pk PRIMARY KEY (jira_id, fix_version) + );""") + self._conn.commit() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._conn.close() + + @property + def conn(self): + return self._conn + + def apply_commit(self, action, jira_id, branch, git_sha): + if action == _DB.Action.ADD: + self._conn.execute( + "INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)", + (jira_id, branch, git_sha)) + elif action == _DB.Action.REVERT: + self._conn.execute(""" + DELETE FROM git_commits WHERE + jira_id=upper(?) + AND branch=? + """, (jira_id.upper(), branch)) + + def flush_commits(self): + self._conn.commit() + + def apply_git_tag(self, branch, git_sha, git_tag): + self._conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?", + (git_tag, branch, git_sha)) + + def apply_fix_version(self, jira_id, fix_version): + self._conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)", + (jira_id, fix_version)) + + def unique_jira_ids_from_git(self): + results = self._conn.execute("SELECT distinct jira_id FROM git_commits").fetchall() + return [x[0] for x in results] + + def backup(self, target): + dst = sqlite3.connect(target) + with dst: + self._conn.backup(dst) + dst.close() + + +class _RepoReader: + _extract_release_tag_pattern = re.compile(r'^rel/(\d+\.\d+\.\d+)(\^0)?$', re.IGNORECASE) + _skip_patterns = [ + re.compile(r'^preparing development version.+', re.IGNORECASE), + re.compile(r'^preparing hbase release.+', re.IGNORECASE), + re.compile(r'^\s*updated? pom.xml version (for|to) .+', re.IGNORECASE), + re.compile(r'^\s*updated? chang', re.IGNORECASE), + re.compile(r'^\s*updated? (book|docs|documentation)', re.IGNORECASE), + re.compile(r'^\s*updating (docs|changes).+', re.IGNORECASE), + re.compile(r'^\s*bump (pom )?versions?', re.IGNORECASE), + re.compile(r'^\s*updated? (version|poms|changes).+', re.IGNORECASE), + ] + _identify_leading_jira_id_pattern = re.compile(r'^[\s\[]*(hbase-\d+)', re.IGNORECASE) + _identify_backport_jira_id_patterns = [ + re.compile(r'^backport "(.+)".*', re.IGNORECASE), + re.compile(r'^backport (.+)', re.IGNORECASE), + ] + _identify_revert_jira_id_pattern = re.compile(r'^revert:? "(.+)"', re.IGNORECASE) + _identify_revert_revert_jira_id_pattern = re.compile( + '^revert "revert "(.+)"\\.?"\\.?', re.IGNORECASE) + _identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE) + + def __init__(self, db, fallback_actions_path, remote_name, development_branch, + release_line_regexp, **_kwargs): + self._db = db + self._repo = _RepoReader._open_repo() + self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path) + self._remote_name = remote_name + self._development_branch = development_branch + self._release_line_regexp = release_line_regexp + + @property + def repo(self): + return self._repo + + @property + def remote_name(self): + return self._remote_name + + @property + def development_branch_ref(self): + refs = self.repo.remote(self._remote_name).refs + return [ref for ref in refs + if ref.name == '%s/%s' % (self._remote_name, self._development_branch)][0] + + @property + def release_line_refs(self): + refs = self.repo.remote(self._remote_name).refs + pattern = re.compile('%s/%s' % (self._remote_name, self._release_line_regexp)) + return [ref for ref in refs if pattern.match(ref.name)] + + @property + def release_branch_refs(self): + refs = self.repo.remote(self._remote_name).refs + release_line_refs = self.release_line_refs + return [ref for ref in refs + if any([ref.name.startswith(release_line.name + '.') + for release_line in release_line_refs])] + + @staticmethod + def _open_repo(): + return git.Repo(pathlib.Path(__file__).parent.absolute(), search_parent_directories=True) + + def identify_least_common_commit(self, ref_a, ref_b): + commits = self._repo.merge_base(ref_a, ref_b, "--all") + if commits: + return commits[0] + raise Exception("could not identify merge base between %s, %s" % (ref_a, ref_b)) + + @staticmethod + def _skip(summary): + return any([p.match(summary) for p in _RepoReader._skip_patterns]) + + @staticmethod + def _identify_leading_jira_id(summary): + match = _RepoReader._identify_leading_jira_id_pattern.match(summary) + if match: + return match.groups()[0] + return None + + @staticmethod + def _identify_backport_jira_id(summary): + for pattern in _RepoReader._identify_backport_jira_id_patterns: + match = pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_revert_jira_id(summary): + match = _RepoReader._identify_revert_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_revert_revert_jira_id(summary): + match = _RepoReader._identify_revert_revert_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_amend_jira_id(summary): + match = _RepoReader._identify_amend_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _action_jira_id_for(summary): + jira_id = _RepoReader._identify_leading_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_backport_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_revert_jira_id(summary) + if jira_id: + return _DB.Action.REVERT, jira_id + jira_id = _RepoReader._identify_revert_revert_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_amend_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + return None + + def _extract_release_tag(self, commit): + """works for extracting the tag, but need a way to retro-actively tag + commits we've already seen.""" + names = self._repo.git.name_rev(commit, tags=True, refs='rel/*') + for name in names.split(' '): + match = _RepoReader._extract_release_tag_pattern.match(name) + if match: + return match.groups()[0] + return None + + def _set_release_tag(self, branch, tag, shas): + cnt = 0 + for sha in shas: + self._db.apply_git_tag(branch, sha, tag) + cnt += 1 + if cnt % 50 == 0: + self._db.flush_commits() + self._db.flush_commits() + + def _resolve_ambiguity(self, commit): + if commit.hexsha not in self._fallback_actions: + logging.warning('Unable to resolve action for %s: %s' % (commit.hexsha, commit.summary)) + return _DB.Action.SKIP, None + action, jira_id = self._fallback_actions[commit.hexsha] + if not jira_id: + jira_id = None + return _DB.Action[action], jira_id + + def _row_generator(self, branch, commit): + if _RepoReader._skip(commit.summary): + return None + result = _RepoReader._action_jira_id_for(commit.summary) + if not result: + result = self._resolve_ambiguity(commit) + if not result: + raise Exception('Cannot resolve action for %s: %s' % (commit.hexsha, commit.summary)) + action, jira_id = result + return action, jira_id, branch, commit.hexsha + + def populate_db_release_branch(self, origin_commit, release_branch): + global manager + commits = list(self._repo.iter_commits( + "%s...%s" % (origin_commit.hexsha, release_branch), reverse=True)) + logging.info("%s has %d commits since its origin at %s.", release_branch, len(commits), + origin_commit) + counter = manager.counter(total=len(commits), desc=release_branch, unit='commit') + commits_since_release = list() + cnt = 0 + for commit in counter(commits): + row = self._row_generator(release_branch, commit) + if row: + self._db.apply_commit(*row) + cnt += 1 + if cnt % 50 == 0: + self._db.flush_commits() + commits_since_release.append(commit.hexsha) + tag = self._extract_release_tag(commit) + if tag: + self._set_release_tag(release_branch, tag, commits_since_release) + commits_since_release = list() + self._db.flush_commits() + + @staticmethod + def _load_fallback_actions(file): + result = dict() + if pathlib.Path(file).exists(): + with open(file, 'r') as handle: + reader = csv.DictReader(filter(lambda line: line[0] != '#', handle)) + result = dict() + for row in reader: + result[row['hexsha']] = (row['action'], row['jira_id']) + return result + + +class _JiraReader: + def __init__(self, db, jira_url, **_kwargs): + self._db = db + self.client = jira.JIRA(jira_url) + self.throttle_time_in_sec = 1 + + def _fetch_fix_versions(self, jira_id): + val = self.client.issue(jira_id, fields='fixVersions') + return [version.name for version in val.fields.fixVersions] + + def _fetch_fix_versions_throttled(self, jira_id): + val = self._fetch_fix_versions(jira_id) + time.sleep(self.throttle_time_in_sec) + return val + + def populate_db(self): + global manager + jira_ids = self._db.unique_jira_ids_from_git() + logging.info("retrieving %s jira_ids from the issue tracker", len(jira_ids)) + counter = manager.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue') + chunk_size = 50 + chunks = [jira_ids[i:i + chunk_size] for i in range(0, len(jira_ids), chunk_size)] + + cnt = 0 + for chunk in chunks: + query = "key in (" + ",".join([("'" + jira_id + "'") for jira_id in chunk]) + ")" + results = self.client.search_issues(jql_str=query, maxResults=chunk_size, + fields='fixVersions') + for result in results: + jira_id = result.key + fix_versions = [version.name for version in result.fields.fixVersions] + for fix_version in fix_versions: + self._db.apply_fix_version(jira_id, fix_version) + cnt += 1 + if cnt % 50: + self._db.flush_commits() + counter.update(incr=len(chunk)) + self._db.flush_commits() + + +class Auditor: + def __init__(self, repo_reader, jira_reader, db, **_kwargs): + self._repo_reader = repo_reader + self._jira_reader = jira_reader + self._db = db + + def populate_db_from_git(self): + for release_line in self._repo_reader.release_line_refs: + branch_origin = self._repo_reader.identify_least_common_commit( + self._repo_reader.development_branch_ref.name, release_line.name) + self._repo_reader.populate_db_release_branch(branch_origin, release_line.name) + for release_branch in self._repo_reader.release_branch_refs: + if not release_branch.name.startswith(release_line.name): + continue + self._repo_reader.populate_db_release_branch(branch_origin, release_branch.name) + + def populate_db_from_jira(self): + self._jira_reader.populate_db() + + @staticmethod + def build_first_pass_parser(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + '--db-path', + help='Path to the database file, or leave unspecified for a transient db.', + default=':memory:') + parser.add_argument( + '--git-repo-path', + help='Path to the git repo, or leave unspecified to infer from the current' + + ' file\'s path.', + default=__file__) + parser.add_argument( + '--remote-name', + help='The name of the git remote to use when identifying branches.', + default='origin') + parser.add_argument( + '--development-branch', + help='The name of the branch from which all release lines originate.', + default='master') + parser.add_argument( + '--development-branch-fix-version', + help='The Jira fixVersion used to indicate an issue is committed to the development ' + + 'branch.', + default='3.0.0') + parser.add_argument( + '--release-line-regexp', + help='A regexp used to identify release lines.', + default=r'branch-\d+$') + parser.add_argument( + '--fallback-actions-path', + help='Path to a file containing a cache of user input.', + default='fallback_actions.csv') + parser.add_argument( + '--jira-url', + help='A URL locating the target JIRA instance.', + default='https://issues.apache.org/jira') + return parser + + @staticmethod + def build_second_pass_parser(repo_reader, parent_parser): + parser = argparse.ArgumentParser(parents=[parent_parser]) + for release_line in repo_reader.release_line_refs: + name = release_line.name + parser.add_argument( + '--%s-fix-version' % name[len(repo_reader.remote_name) + 1:], + help='The Jira fixVersion used to indicate an issue is committed to the specified ' + + 'release line branch', + required=True) + return parser + + +manager = None + + +def main(): + global manager + + first_pass_parser = Auditor.build_first_pass_parser() + known_args, extras = first_pass_parser.parse_known_args() + known_args = vars(known_args) + with _DB(**known_args) as db: + logging.basicConfig(level=logging.INFO) + repo_reader = _RepoReader(db, **known_args) + jira_reader = _JiraReader(db, **known_args) + second_pass_parser = Auditor.build_second_pass_parser(repo_reader, first_pass_parser) + args = second_pass_parser.parse_args(extras) + auditor = Auditor(repo_reader, jira_reader, db, **vars(args)) + with enlighten.get_manager() as manager: + auditor.populate_db_from_git() + auditor.populate_db_from_jira() + + +if __name__ == '__main__': + main() diff --git a/dev-support/git-jira-release-audit/requirements.txt b/dev-support/git-jira-release-audit/requirements.txt new file mode 100644 index 000000000000..252336af831e --- /dev/null +++ b/dev-support/git-jira-release-audit/requirements.txt @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +blessed==1.17.0 +certifi==2019.11.28 +cffi==1.13.2 +chardet==3.0.4 +cryptography==2.8 +defusedxml==0.6.0 +enlighten==1.4.0 +gitdb2==2.0.6 +GitPython==3.0.5 +idna==2.8 +jira==2.0.0 +oauthlib==3.1.0 +pbr==5.4.4 +pycparser==2.19 +PyJWT==1.7.1 +requests==2.22.0 +requests-oauthlib==1.3.0 +requests-toolbelt==0.9.1 +six==1.14.0 +smmap2==2.0.5 +urllib3==1.25.8 +wcwidth==0.1.8 From 18f432869466767b77d8f6fcc6c1342e4e53f886 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Mon, 27 Jan 2020 10:10:42 -0800 Subject: [PATCH 2/2] PR Feedback --- dev-support/git-jira-release-audit/README.md | 4 +- .../fallback_actions.csv | 4 + .../git_jira_release_audit.py | 137 +++++++++++++++--- 3 files changed, 124 insertions(+), 21 deletions(-) diff --git a/dev-support/git-jira-release-audit/README.md b/dev-support/git-jira-release-audit/README.md index 405a371ad0cd..dd815c503c32 100644 --- a/dev-support/git-jira-release-audit/README.md +++ b/dev-support/git-jira-release-audit/README.md @@ -32,7 +32,7 @@ truth (and, possibly, bugs in this script). The system prerequisites are Python3 with VirtualEnv available and Sqlite. Also, you'll need the content of this directory and a local checkout of git repository. -Build a VirtualEnv with the script's dependencies with +Build a VirtualEnv with the script's dependencies with: ```shell script $ python3 --version @@ -79,7 +79,7 @@ optional arguments: --release-line-regexp RELEASE_LINE_REGEXP A regexp used to identify release lines. --fallback-actions-path FALLBACK_ACTIONS_PATH - Path to a file containing a cache of user input. + Path to a file containing _DB.Actions applicable to specific git shas. --jira-url JIRA_URL A URL locating the target JIRA instance. --branch-1-fix-version BRANCH_1_FIX_VERSION The Jira fixVersion used to indicate an issue is diff --git a/dev-support/git-jira-release-audit/fallback_actions.csv b/dev-support/git-jira-release-audit/fallback_actions.csv index 7202fe8ad594..e5faaf0916cb 100644 --- a/dev-support/git-jira-release-audit/fallback_actions.csv +++ b/dev-support/git-jira-release-audit/fallback_actions.csv @@ -15,6 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# This file describes 'Actions' that should be taken for the specified git commit. As new commits +# land in the repo that have commit messages that are not recognized by the regular expressions +# in the application, this file provides overrides on a per-sha basis. +# hexsha,action,jira_id 0057cd8ca7ff09ed6b794af71df301c5c47487f4,SKIP, 022f30ce0dd3dd931f6045c6778e194ef5c41f7a,SKIP, diff --git a/dev-support/git-jira-release-audit/git_jira_release_audit.py b/dev-support/git-jira-release-audit/git_jira_release_audit.py index 3702b7023b24..9c388847da2e 100644 --- a/dev-support/git-jira-release-audit/git_jira_release_audit.py +++ b/dev-support/git-jira-release-audit/git_jira_release_audit.py @@ -18,22 +18,36 @@ # # Build a database from git commit histories. Can be used to audit git vs. jira. For usage, # see README.md. +"""An application to assist Release Managers with ensuring that histories in Git and fixVersions in +JIRA are in agreement. See README.md for a detailed explanation. +""" import argparse import csv -import enlighten import enum -import git -import jira import logging import pathlib import re import sqlite3 import time +import enlighten +import git +import jira + class _DB: + """Manages an instance of Sqlite on behalf of the application. + + Args: + db_path (str): Path to the Sqlite database file. ':memory:' for an ephemeral database. + **_kwargs: Convenience for CLI argument parsing. Ignored. + + Attributes: + conn (:obj:`sqlite3.db2api.Connection`): The underlying connection object. + """ class Action(enum.Enum): + """Describes an action to be taken against the database.""" ADD = 'ADD' REVERT = 'REVERT' SKIP = 'SKIP' @@ -66,9 +80,18 @@ def __exit__(self, exc_type, exc_val, exc_tb): @property def conn(self): + """:obj:`sqlite3.db2api.Connection`: Underlying database handle.""" return self._conn def apply_commit(self, action, jira_id, branch, git_sha): + """Apply an edit to the commits database. + + Args: + action (:obj:`_DB.Action`): The action to execute. + jira_id (str): The applicable Issue ID from JIRA. + branch (str): The name of the git branch from which the commit originates. + git_sha (str): The commit's SHA. + """ if action == _DB.Action.ADD: self._conn.execute( "INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)", @@ -81,21 +104,42 @@ def apply_commit(self, action, jira_id, branch, git_sha): """, (jira_id.upper(), branch)) def flush_commits(self): + """Commit any pending changes to the database.""" self._conn.commit() def apply_git_tag(self, branch, git_sha, git_tag): + """Annotate a commit in the commits database as being a part of the specified release. + + Args: + branch (str): The name of the git branch from which the commit originates. + git_sha (str): The commit's SHA. + git_tag (str): The first release tag following the commit. + """ self._conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?", (git_tag, branch, git_sha)) def apply_fix_version(self, jira_id, fix_version): + """Annotate a Jira issue in the jira database as being part of the specified release + version. + + Args: + jira_id (str): The applicable Issue ID from JIRA. + fix_version (str): The annotated `fixVersion` as seen in JIRA. + """ self._conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)", (jira_id, fix_version)) def unique_jira_ids_from_git(self): + """Query the commits database for the population of Jira Issue IDs.""" results = self._conn.execute("SELECT distinct jira_id FROM git_commits").fetchall() return [x[0] for x in results] def backup(self, target): + """Write a copy of the database to the `target` destination. + + Args: + target (str): The backup target, a filesystem path. + """ dst = sqlite3.connect(target) with dst: self._conn.backup(dst) @@ -103,6 +147,21 @@ def backup(self, target): class _RepoReader: + """This class interacts with the git repo, and encapsulates actions specific to HBase's git + history. + + Args: + db (:obj:`_DB`): A handle to the database manager. + fallback_actions_path (str): Path to the file containing sha-specific actions + (see README.md). + remote_name (str): The name of the remote to query for branches and histories + (i.e., "origin"). + development_branch (str): The name of the branch on which active development occurs + (i.e., "master"). + release_line_regexp (str): Filter criteria used to select "release line" branches (such + as "branch-1," "branch-2," &c.). + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ _extract_release_tag_pattern = re.compile(r'^rel/(\d+\.\d+\.\d+)(\^0)?$', re.IGNORECASE) _skip_patterns = [ re.compile(r'^preparing development version.+', re.IGNORECASE), @@ -135,26 +194,33 @@ def __init__(self, db, fallback_actions_path, remote_name, development_branch, @property def repo(self): + """:obj:`git.repo.base.Repo`: Underlying Repo handle.""" return self._repo @property def remote_name(self): + """str: The name of the remote used for querying branches and histories.""" return self._remote_name @property def development_branch_ref(self): + """:obj:`git.refs.reference.Reference`: The git branch where active development occurs.""" refs = self.repo.remote(self._remote_name).refs return [ref for ref in refs if ref.name == '%s/%s' % (self._remote_name, self._development_branch)][0] @property def release_line_refs(self): + """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as + "release lines", i.e., "branch-2".""" refs = self.repo.remote(self._remote_name).refs pattern = re.compile('%s/%s' % (self._remote_name, self._release_line_regexp)) return [ref for ref in refs if pattern.match(ref.name)] @property def release_branch_refs(self): + """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as + "release branches", i.e., "branch-2.2".""" refs = self.repo.remote(self._remote_name).refs release_line_refs = self.release_line_refs return [ref for ref in refs @@ -166,6 +232,9 @@ def _open_repo(): return git.Repo(pathlib.Path(__file__).parent.absolute(), search_parent_directories=True) def identify_least_common_commit(self, ref_a, ref_b): + """Given a pair of references, attempt to identify the commit that they have in common, + i.e., the commit at which a "release branch" originates from a "release line" branch. + """ commits = self._repo.merge_base(ref_a, ref_b, "--all") if commits: return commits[0] @@ -251,7 +320,7 @@ def _set_release_tag(self, branch, tag, shas): def _resolve_ambiguity(self, commit): if commit.hexsha not in self._fallback_actions: - logging.warning('Unable to resolve action for %s: %s' % (commit.hexsha, commit.summary)) + logging.warning('Unable to resolve action for %s: %s', commit.hexsha, commit.summary) return _DB.Action.SKIP, None action, jira_id = self._fallback_actions[commit.hexsha] if not jira_id: @@ -270,12 +339,20 @@ def _row_generator(self, branch, commit): return action, jira_id, branch, commit.hexsha def populate_db_release_branch(self, origin_commit, release_branch): - global manager + """List all commits on `release_branch` since `origin_commit`, recording them as + observations in the commits database. + + Args: + origin_commit (:obj:`git.objects.commit.Commit`): The sha of the first commit to + consider. + release_branch (str): The name of the ref whose history is to be parsed. + """ + global MANAGER commits = list(self._repo.iter_commits( "%s...%s" % (origin_commit.hexsha, release_branch), reverse=True)) logging.info("%s has %d commits since its origin at %s.", release_branch, len(commits), origin_commit) - counter = manager.counter(total=len(commits), desc=release_branch, unit='commit') + counter = MANAGER.counter(total=len(commits), desc=release_branch, unit='commit') commits_since_release = list() cnt = 0 for commit in counter(commits): @@ -305,6 +382,13 @@ def _load_fallback_actions(file): class _JiraReader: + """This class interacts with the Jira instance. + + Args: + db (:obj:`_DB`): A handle to the database manager. + jira_url (str): URL of the Jira instance to query. + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ def __init__(self, db, jira_url, **_kwargs): self._db = db self.client = jira.JIRA(jira_url) @@ -320,10 +404,12 @@ def _fetch_fix_versions_throttled(self, jira_id): return val def populate_db(self): - global manager + """Query Jira for issue IDs found in the commits database, writing them to the jira + database.""" + global MANAGER jira_ids = self._db.unique_jira_ids_from_git() logging.info("retrieving %s jira_ids from the issue tracker", len(jira_ids)) - counter = manager.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue') + counter = MANAGER.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue') chunk_size = 50 chunks = [jira_ids[i:i + chunk_size] for i in range(0, len(jira_ids), chunk_size)] @@ -345,12 +431,23 @@ def populate_db(self): class Auditor: + """This class builds databases from git and Jira, making it possible to audit the two for + discrepancies. At some point, it will provide pre-canned audit queries against those databases. + It is the entrypoint to this application. + + Args: + repo_reader (:obj:`_RepoReader`): An instance of the `_RepoReader`. + jira_reader (:obj:`_JiraReader`): An instance of the `JiraReader`. + db (:obj:`_DB`): A handle to the database manager. + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ def __init__(self, repo_reader, jira_reader, db, **_kwargs): self._repo_reader = repo_reader self._jira_reader = jira_reader self._db = db def populate_db_from_git(self): + """Process the git repository, populating the commits database.""" for release_line in self._repo_reader.release_line_refs: branch_origin = self._repo_reader.identify_least_common_commit( self._repo_reader.development_branch_ref.name, release_line.name) @@ -361,10 +458,12 @@ def populate_db_from_git(self): self._repo_reader.populate_db_release_branch(branch_origin, release_branch.name) def populate_db_from_jira(self): + """Process the Jira issues identified by the commits database, populating the jira + database.""" self._jira_reader.populate_db() @staticmethod - def build_first_pass_parser(): + def _build_first_pass_parser(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument( '--db-path', @@ -373,7 +472,7 @@ def build_first_pass_parser(): parser.add_argument( '--git-repo-path', help='Path to the git repo, or leave unspecified to infer from the current' - + ' file\'s path.', + + ' file\'s path.', default=__file__) parser.add_argument( '--remote-name', @@ -386,7 +485,7 @@ def build_first_pass_parser(): parser.add_argument( '--development-branch-fix-version', help='The Jira fixVersion used to indicate an issue is committed to the development ' - + 'branch.', + + 'branch.', default='3.0.0') parser.add_argument( '--release-line-regexp', @@ -394,7 +493,7 @@ def build_first_pass_parser(): default=r'branch-\d+$') parser.add_argument( '--fallback-actions-path', - help='Path to a file containing a cache of user input.', + help='Path to a file containing _DB.Actions applicable to specific git shas.', default='fallback_actions.csv') parser.add_argument( '--jira-url', @@ -403,35 +502,35 @@ def build_first_pass_parser(): return parser @staticmethod - def build_second_pass_parser(repo_reader, parent_parser): + def _build_second_pass_parser(repo_reader, parent_parser): parser = argparse.ArgumentParser(parents=[parent_parser]) for release_line in repo_reader.release_line_refs: name = release_line.name parser.add_argument( '--%s-fix-version' % name[len(repo_reader.remote_name) + 1:], help='The Jira fixVersion used to indicate an issue is committed to the specified ' - + 'release line branch', + + 'release line branch', required=True) return parser -manager = None +MANAGER = None def main(): - global manager + global MANAGER - first_pass_parser = Auditor.build_first_pass_parser() + first_pass_parser = Auditor._build_first_pass_parser() known_args, extras = first_pass_parser.parse_known_args() known_args = vars(known_args) with _DB(**known_args) as db: logging.basicConfig(level=logging.INFO) repo_reader = _RepoReader(db, **known_args) jira_reader = _JiraReader(db, **known_args) - second_pass_parser = Auditor.build_second_pass_parser(repo_reader, first_pass_parser) + second_pass_parser = Auditor._build_second_pass_parser(repo_reader, first_pass_parser) args = second_pass_parser.parse_args(extras) auditor = Auditor(repo_reader, jira_reader, db, **vars(args)) - with enlighten.get_manager() as manager: + with enlighten.get_manager() as MANAGER: auditor.populate_db_from_git() auditor.populate_db_from_jira()