diff --git a/.editorconfig b/.editorconfig index 011673016c26..aa6adaa77214 100644 --- a/.editorconfig +++ b/.editorconfig @@ -691,3 +691,7 @@ ij_ruby_spaces_around_range_operators = false ij_ruby_spaces_around_relational_operators = true ij_ruby_spaces_within_array_initializer_braces = true ij_ruby_spaces_within_braces = false + +[*.py] +indent_size = 4 +tab_width = 4 diff --git a/dev-support/git-jira-release-audit/.gitignore b/dev-support/git-jira-release-audit/.gitignore new file mode 100644 index 000000000000..ddab63162421 --- /dev/null +++ b/dev-support/git-jira-release-audit/.gitignore @@ -0,0 +1,4 @@ +*.db +*.log +*.svg +venv diff --git a/dev-support/git-jira-release-audit/README.md b/dev-support/git-jira-release-audit/README.md new file mode 100644 index 000000000000..dd815c503c32 --- /dev/null +++ b/dev-support/git-jira-release-audit/README.md @@ -0,0 +1,186 @@ + + +# Git / JIRA Release Audit + +This is an application for performing an audit between the histories on our git +branches and the `fixVersion` field set on issues in JIRA. It does this by +building a Sqlite database from the commits found on each git branch, +identifying Jira IDs and release tags, and then requesting information about +those issues from Jira. Once both sources have been collected, queries can be +performed against the database to look for discrepancies between the sources of +truth (and, possibly, bugs in this script). + +## Setup + +The system prerequisites are Python3 with VirtualEnv available and Sqlite. Also, +you'll need the content of this directory and a local checkout of git repository. + +Build a VirtualEnv with the script's dependencies with: + +```shell script +$ python3 --version +Python 3.7.6 +$ python3 -m venv ./venv +$ ./venv/bin/pip install -r ./requirements.txt +... +Successfully installed... +``` + +## Usage + +The tool provides basic help docs. + +```shell script +$ ./venv/bin/python ./git_jira_release_audit.py --help +usage: git_jira_release_audit.py [-h] [--db-path DB_PATH] + [--git-repo-path GIT_REPO_PATH] + [--remote-name REMOTE_NAME] + [--development-branch DEVELOPMENT_BRANCH] + [--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION] + [--release-line-regexp RELEASE_LINE_REGEXP] + [--fallback-actions-path FALLBACK_ACTIONS_PATH] + [--jira-url JIRA_URL] --branch-1-fix-version + BRANCH_1_FIX_VERSION --branch-2-fix-version + BRANCH_2_FIX_VERSION + +optional arguments: + -h, --help show this help message and exit + --db-path DB_PATH Path to the database file, or leave unspecified for a + transient db. + --git-repo-path GIT_REPO_PATH + Path to the git repo, or leave unspecified to infer + from the current file's path. + --remote-name REMOTE_NAME + The name of the git remote to use when identifying + branches. + --development-branch DEVELOPMENT_BRANCH + The name of the branch from which all release lines + originate. + --development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the development branch. + --release-line-regexp RELEASE_LINE_REGEXP + A regexp used to identify release lines. + --fallback-actions-path FALLBACK_ACTIONS_PATH + Path to a file containing _DB.Actions applicable to specific git shas. + --jira-url JIRA_URL A URL locating the target JIRA instance. + --branch-1-fix-version BRANCH_1_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the specified release line branch + --branch-2-fix-version BRANCH_2_FIX_VERSION + The Jira fixVersion used to indicate an issue is + committed to the specified release line branch +``` + +Example Run: + +```shell script +$ ./venv/bin/python3 ./git_jira_release_audit.py \ + --db-path=audit.db \ + --remote-name=apache-rw \ + --development-branch-fix-version=3.0.0 \ + --branch-1-fix-version=1.5.0 \ + --branch-2-fix-version=2.3.0 +INFO:root:apache-rw/branch-1 has 4046 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.0 has 1433 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.1 has 2111 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.2 has 2738 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.3 has 3287 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-1.4 has 3912 commits since its origin at 0167558eb31ff48308d592ef70b6d005ba6d21fb. +INFO:root:apache-rw/branch-2 has 3080 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.0 has 2194 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.1 has 2705 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:apache-rw/branch-2.2 has 2927 commits since its origin at 0d0c330401ade938bf934aafd79ec23705edcc60. +INFO:root:retrieving 5653 jira_ids from the issue tracker + +apache-rw/branch-1 100%|██████████████████████████████████████████████████████| 4046/4046 [08:23<00:00, 8.04 commit/s] +apache-rw/branch-1.0 100%|████████████████████████████████████████████████████| 1433/1433 [03:49<00:00, 6.26 commit/s] +apache-rw/branch-1.1 100%|████████████████████████████████████████████████████| 2111/2111 [05:16<00:00, 6.68 commit/s] +apache-rw/branch-1.2 100%|████████████████████████████████████████████████████| 2738/2738 [06:26<00:00, 7.10 commit/s] +apache-rw/branch-1.3 100%|████████████████████████████████████████████████████| 3287/3287 [07:21<00:00, 7.46 commit/s] +apache-rw/branch-1.4 100%|████████████████████████████████████████████████████| 3912/3912 [08:08<00:00, 8.02 commit/s] +apache-rw/branch-2 100%|█████████████████████████████████████████████████████| 3080/3080 [03:29<00:00, 14.74 commit/s] +apache-rw/branch-2.0 100%|████████████████████████████████████████████████████| 2194/2194 [04:56<00:00, 7.42 commit/s] +apache-rw/branch-2.1 100%|███████████████████████████████████████████████████| 2705/2705 [03:17<00:00, 13.75 commit/s] +apache-rw/branch-2.2 100%|███████████████████████████████████████████████████| 2927/2927 [03:28<00:00, 14.09 commit/s] +fetch from Jira 100%|█████████████████████████████████████████████████████████| 5653/5653 [00:58<00:00, 98.29 issue/s] +``` + +With a populated database, query with sqlite: + +```shell script +$ sqlite3 audit.db +SQLite version 3.24.0 2018-06-04 14:10:15 +Enter ".help" for usage hints. +sqlite> -- count the number of distinct commits on a release branch +sqlite> select count(distinct jira_id), branch from git_commits group by branch; +3406|apache-rw/branch-1 +1189|apache-rw/branch-1.0 +1728|apache-rw/branch-1.1 +2289|apache-rw/branch-1.2 +2779|apache-rw/branch-1.3 +3277|apache-rw/branch-1.4 +2666|apache-rw/branch-2 +1809|apache-rw/branch-2.0 +2289|apache-rw/branch-2.1 +2511|apache-rw/branch-2.2 + +sqlite> -- count the number of issues that will be in 2.3.0 that have not been released on any earlier +sqlite> -- version. +sqlite> select count(1) from ( + select distinct jira_id from git_commits where branch = 'apache-rw/branch-2' except + select distinct jira_id from git_commits where branch in + ('apache-rw/branch-2.0', 'apache-rw/branch-2.1', 'apache-rw/branch-2.2')); +169 + +sqlite> -- find the issues for which the git commit record and JIRA fixVersion disagree +sqlite> select g.jira_id, g.git_tag, j.fix_version + from git_commits g + inner join jira_versions j + on g.jira_id = j.jira_id + and g.branch = 'apache-rw/branch-2.2' + and g.git_tag is not null + and j.fix_version like '2.2.%' + and g.git_tag != j.fix_version; +HBASE-22941|2.2.2|2.2.1 + +sqlite> -- show jira non-1.x fixVersions for all issues on branch-2 but not on any +sqlite> -- branch-2.x release branch; i.e., issues that are missing a fixVersion or +sqlite> -- are marked for a release other than (3.0.0, 2.3.0) +sqlite> select g.jira_id, j.fix_version +from ( + select distinct jira_id from git_commits where branch = 'apache-rw/branch-2' except + select distinct jira_id from git_commits where branch in + (select distinct branch from git_commits where branch like 'apache-rw/branch-2.%')) g +left join jira_versions j + on g.jira_id = j.jira_id + and j.fix_version not like '1.%' +where ( + j.fix_version is null + OR j.fix_version not in ('3.0.0', '2.3.0')) +order by g.jira_id desc; +HBASE-23683|2.2.4 +HBASE-23032|connector-1.0.1 +HBASE-23032|hbase-filesystem-1.0.0-alpha2 +HBASE-22405|2.2.0 +HBASE-22360|2.2.0 +HBASE-22321| +HBASE-22283|2.2.0 +``` diff --git a/dev-support/git-jira-release-audit/fallback_actions.csv b/dev-support/git-jira-release-audit/fallback_actions.csv new file mode 100644 index 000000000000..e5faaf0916cb --- /dev/null +++ b/dev-support/git-jira-release-audit/fallback_actions.csv @@ -0,0 +1,231 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This file describes 'Actions' that should be taken for the specified git commit. As new commits +# land in the repo that have commit messages that are not recognized by the regular expressions +# in the application, this file provides overrides on a per-sha basis. +# +hexsha,action,jira_id +0057cd8ca7ff09ed6b794af71df301c5c47487f4,SKIP, +022f30ce0dd3dd931f6045c6778e194ef5c41f7a,SKIP, +0505072c5182841ad1a28d798527c69bcc3348f0,SKIP, +05cb051423953b913156e4950b67f3d9b28ada5f,REVERT,HBASE-14391 +05f8e94191ef6a63baadf56d6114d7d0317796f2,SKIP, +0791b878422eadf00b55076338f09bf059f39f0c,SKIP, +07f9f3d38cf4d0d01044ab28d90a50a1a009f6b8,SKIP, +10f00547627076d79d77cf58dd2deaece2287084,ADD,HBASE-22330 +10f3b77748a02a2c11635c33964929c0474e890d,SKIP, +1404d5a97331ecc63db53971f5cb7329cb40ce67,ADD,HBASE-15203 +14a869828fe481697d29b2d6e4135e8026039a38,SKIP, +1546613e76b1013a08ebc179c2c22bfeb44f3a4a,SKIP, +156a900eec5e53ff33ad77f2de0714f52f7d1fa6,SKIP, +15bb234d511468b468fa941d20911f2b04919e57,SKIP, +178b675b792b4e9d3ddabd625a79042231a6fc91,ADD,HBASE-17959 +18ca1fcb560236b8c9a0ca057f163ebdaab89f0e,SKIP, +190d189fefa9a40a2724d263e333021f18b409d9,SKIP, +19be2cfa86ad287b798768f16b988771169950ed,SKIP, +1a305bb4848ebcda2bd7c0df8f2f9c03ddf5b471,SKIP, +1b3557649c9ee682c7f135ca52a0e3cd10cb9219,SKIP, +1c46250bef9ef9be9c255d61bda69ff7792ed551,SKIP, +1cb7d0e82ad64f37fbd6de950b74081b0d5eddf3,SKIP, +1eaef185327171b3dd3edb303e08cfe85186e745,SKIP, +1eb8ac6fe9dd0c15cdb52f66ced4136316c06465,SKIP, +2068804d7510e8c1f822b5db3cd4585455f6e7e7,SKIP, +214d33e0f472793a51b7f5371012c7c86bc97ee3,SKIP, +2447d87b56ad087dcf92c1245a34d26ffe59a4bc,SKIP, +24d6a65a12e05bceae2d4355b918dd2a773b4ee2,SKIP, +254af5a3210e5b3cec09b73aa2bff31a1d9e3d80,SKIP, +259d12f7397679c6b0d0a4788e5a37f65fd49f20,SKIP, +267bce0590c39570ddb935921e34bda35e3aa44c,SKIP, +278828333c44493ccbaa7db26a788b2756632034,SKIP, +288794d68ba5bd4d1fd8d5c315cee972019dcb3d,ADD,HBASE-22330 +28f07451a5dddf0ab3988b32b8672654fdbc5b58,SKIP, +2ba542d74c2d9e78332c8c94289d1295752d8072,SKIP, +2cb64fb467bc663a507da9c00a669e618ae90a2f,ADD,HBASE-18945 +2e4544a8b00766248c998850f8907511b8bae240,SKIP, +2e63f882c85fb5804aafff5d92503eca60c0820d,SKIP, +2ebd80499473bbac3eac083806211ec03e084db7,SKIP, +31b9096034e19171989fd5b76313e7e0f1a9a12a,SKIP, +31d37fb904c4fcd77e79f9df8db155c5a3d1d8ed,SKIP, +31fe5dbf6b3a261f2c902d0fd6b82bf6c7ecf954,REVERT,HBASE-19685 +31fe5dbf6b3a261f2c902d0fd6b82bf6c7ecf954,SKIP, +34e97f9c08d97b38be9a8f7dda6214d7ae9c6ea8,SKIP, +34ecc75d1669e330c78c3e9b832eca0abf57902d,SKIP, +34fe1f5fd762e4ead3b0e2e820c360796939b315,SKIP, +37d46fcf85da772a06da29d9add8a0652330f6c5,SKIP, +38e2dbc503a7f9ef929ff11b615157f0ee79916c,SKIP, +3966d0fee6c9803cf567ef76d91855a1eaad621d,SKIP, +399b3e9d1bc68c2709565f0a1a719a9a66999564,SKIP, +39a4c56690eeeb2bb5ffaa0f3c8f6759b4fb3fb2,SKIP, +3a11028cdfc6e44576069bed452a0ed10c860db1,SKIP, +3b73ebb7b8975e18c67c24c258fbc061614bb7f2,SKIP, +3c7a349c2eab74a76c06b66df2e2d14ea7681f95,SKIP, +3dcb03947ce9cb1825167784992e689a23847351,ADD,HBASE-18290 +3dd55fa0c00b0f4d04d91ce2a2feb20aea3b8904,SKIP, +4098224c8f227cdf9ec0db5f96585ea3c64ef91a,SKIP, +431b8a5383b894381583bbb9ceef5911911b705c,SKIP, +44d2a9bc1c88f6eb8cf45c9b8a4c37268d540694,SKIP, +451f2fec06617372430573ec64463b39b20833d0,SKIP, +469d6bf457c2c4d8ebe10c1e39004a6b9d907112,SKIP, +46ee6e0fccf6dd8840782c0eb824640e59068a8b,ADD,HBASE-17959 +482d413e0abbabcb0afcdeab7c8ad761218e1df2,SKIP, +48492ec7fd72a89ac67b2ef834ccfa8021fbadd5,REVERT,HBASE-15965 +489dd6427a499d09fb8cde4fbdd46303f0a57b20,REVERT,HBASE-14391 +48d9d27d6840ccd2d8812ffc78fa08e20a460755,SKIP, +492db89d42e490dff0b521f0b1d623d1ac7af9f4,SKIP, +493a4cde31299711de65e2ebbd687791d9bcbb68,SKIP, +49622cb7bbd4382c1ac9397a0158f7d7e85ec97f,SKIP, +49fab7df80c969db4f35ed911a2cd81c3b50928f,ADD,HBASE-19049 +4a40b2e5751702dcde7ab5c58ded54ac79af6178,SKIP, +4bb95edbd91cf6fce001204ada0ba20b33a5a110,SKIP, +4d7e5992cfe949e7aa8c1326ce247011af14a6fd,SKIP, +4e3a750b00d26dada9ba4dec000c895d8507a000,SKIP, +4eb84651a2b6d02d2074143308cef5d0f4b856a3,SKIP, +4f5b22bc19cb8d24ced5d42ebd9794cfd83bae85,SKIP, +54337870eda5649ab7bb81ed01c9dd25d59204f2,SKIP, +58ab201be341f02829286f036a7401d0806eb999,SKIP, +5a16c15d7f51087a50511a2e0730f547c97a033f,SKIP, +5b5ff1d8b2cc43f78acaf9bc960be382dc6c34f7,SKIP, +5fa15dd7488433ea610ff5e92161409d20565690,SKIP, +67404e7e89072b9be892a81cc9ba1bfe8d6aeb7f,SKIP, +676fb753d996b60772284393ac3581b47c7a8afa,ADD,HBASE-12976 +6817a7b131e47a96a354438c2c6ad0fbe6878a28,SKIP, +691efc60f705de50055bf5c44911128648535110,SKIP, +694e79a67e84d0c5e4f23b4abe7d27bb5fb8ce37,SKIP, +69c99da70a5ed973e7d7d798525013d1492835bf,SKIP, +6a974fe826a31888b0d00cf30f7f38983485740f,SKIP, +6b37ae3d77e68458cae385b11163ac5108af7655,SKIP, +6b54917d520d32d00f5b4e9420e0d4894aaa34e8,SKIP, +6cf647a0dfd696580c1d841e245d563beca451dd,SKIP, +6e376b900e125f71a71fd2a25c3ad08057b97f73,SKIP, +719993e0fe2b132b75a3689267ae4adff364b6aa,SKIP, +71ed7033675149956de855b6782e1e22fc908dc8,SKIP, +7242650afd466df511ba2d4cfa34f6d082cb1004,SKIP, +72bd7dfdc91f5cff28e1f909f395128132da72d6,SKIP, +73ec3fdd5c64354ae8339baceed4ed0de229712d,REVERT,HBASE-14391 +7547426705b462d8afc0fffd26c1e4c0e911360a,ADD,HBASE-11951 +75e7714d2057917523bb66464de921f180099f71,ADD,HBASE-20004 +75febcea89c907a7daf70c0a06b92803accc3799,REVERT,HBASE-18843 +764adaad3489913ec5bbdfa5526c4ab5a710dfaf,SKIP, +76d067e9d88dd40095a9cb83fe7ee87c9135a8cd,SKIP, +786418c8833fd0b9bbeb67482e3fb97c06c541fc,SKIP, +797a352763110413c4e806770ca13c74ef2a13ea,ADD,HBASE-20004 +79d927c34eb17828a1b9235df984d6d966c68c38,SKIP, +7a16acc881bfd6dc15d74c424f688dcd068bd4b0,SKIP, +7a9475e6ac55bd13fd492014c15d7b0ffb403b2c,SKIP, +7c97acf6e345023f043964d023816d5b3329dde9,ADD,HBASE-16209 +7ea18e20680e86c200cbebc885ff91cfc1f72fac,SKIP, +80971f83d307ab661d830f1a2196729411873906,SKIP, +80d1f62cf7eaaeea569fe5a2e4a91fc270e7bc1f,SKIP, +829e6383d52e7a98947a4b2bdaa0b7e756bc6bfc,SKIP, +834488d435fb59d5cb2b0ed7f09b8b1e70d7e327,SKIP, +86242e1f55da7df6a2119389897d11356e6bbc2a,SKIP, +8670fb3339accf149d098552f523e9c14b90c941,SKIP, +880c7c35fc50f28ec3e072a4c62a348fc964e9e0,SKIP, +88ff206c57fac513b3c5442fd4369ced416279da,SKIP, +8aa1214a1722ba491d52cbbfab1b39cbd0eddeea,SKIP, +8ae29677767db1ac7a29c30143249a6ce2c50537,SKIP, +8e2800f50401c37dcb921533cff62b40efd7e8d6,SKIP, +8ef87ce4343e80321fcfd99594372759557c90f2,SKIP, +9213d7194ede5b723bc817a9bb634679ee3ce5c1,SKIP, +930f68c0b976a600066b838283a0f3dce050256f,SKIP, +962d7e9bf06f4e2e569ba34acae6203b4deef778,ADD,HBASE-19074 +97d7b3572cc661a8d31f82b9c567d7a75b9eef95,SKIP, +99e18fed23a2a476514fa4bd500b07a8d913e330,SKIP, +9b65c7a26d2d200d740d1cb6aed6c5e73e829dc1,SKIP, +9daafb67c158ab69acf8a5090e12925d356ff945,SKIP, +9e3b28804d28ad586d12df24a2e5417c25cff857,SKIP, +9e68719014a62f37e7559329e44e2df49738ef6c,SKIP, +9ecd8589c755d91fa6c374bd8d7ffc3260d59a5d,SKIP, +9fb4bfec813e8d962ca6d4934a226c53801bec76,SKIP, +9ff10759c10c93ec27cc5d2b9b151729954e75f6,SKIP, +a05cef75c4b33171ab29d89d0fbb0fbbc11d6d39,SKIP, +a312705dbc8e6d604adcc874526294c72b8ff580,SKIP, +a67481209f5d315f06e3a6910fa44493e398210f,REVERT,HBASE-16840 +a72d40694116d84454f480c961c1cc1f5d7e1deb,SKIP, +a80799a3bc73513393f764df330704ad688140e8,SKIP, +aa8a9997792b686a606e8ada2cd34fb9ad895bc0,SKIP, +aaeb488f43a9e79655275ddb481ba970b49d1173,SKIP, +ac9035db199902533c07d80f384ae29c115d3ad5,SKIP, +ad2064d8a5ff57d021852c3210a30c5f58eaa43c,SKIP, +ad885a0baae21b943ffebef168c65650f8317023,SKIP, +adec117e47a2ca503458954d6877667d877890fd,SKIP, +ae95b1f215a120890de5454739651911749057ca,SKIP, +b182030d48dcc89d8c26b98f2a58d7909957ea49,SKIP, +b3d55441b8174c704ada4585603f6bcfca298843,SKIP, +b65231d04dbc565a578ce928e809aa51f5439857,SKIP, +b6549007b313e8f3aa993d5c1ebd29c84ccb7b7b,SKIP, +b6d4fc955fe0fc41f5225f1cc2e3e4b92029251c,SKIP, +b9f5c6b065ebd572193c1fdc9d38557320b42fe6,SKIP, +bcadcef21048e4764f7ae8dec3ce52884f20c02c,SKIP, +bcdc56ac76e4a26e53faa8301a441e94ee8614d7,SKIP, +bd2c03dc7df600fe481ba7f2fed958deb18f5291,SKIP, +bd4e14db07ea32a45c3ef734e06d195a405da67c,SKIP, +bd4eba2b53b7af738fd9584511d737c4393d0855,SKIP, +bef0616ef33306afca3060b96c2cba5f9762035d,SKIP, +c100fb835a54be6002fe9704349e726f27b15b7a,SKIP, +c71da858ada94e1b93065f0b7caf3558942bc4da,SKIP, +c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248 +c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248 +c97905a962b88a0c68ca8a51c2e507daec81ca6d,SKIP, +c9f506a2973e0acbd0d2df7b9353c9291f6c94a8,SKIP, +cbb86942eda4b65ddfc5ec436c78a04e5dd21631,SKIP, +cbdc9fcb8a705f4e5ee28a917a335c6f1ef5df42,SKIP, +ccee3d8dd59dfb181d577b5df483632722db01b1,SKIP, +cd3628d529677852f100da6d010d4c6f76380b84,SKIP, +ce6a6014daded424d9460f7de4eadae169f52683,SKIP, +cf1ccc30909bfb04326415e5a648605759d57360,SKIP, +cf45c8d30a4d9810cd676b2a1a348141c4e27eeb,SKIP, +d14e335edc9c22c30827bc75e73b5303ca64ee0d,SKIP, +d32230d0b5a4706b625cc7ac7ee7d28f44bd7b85,SKIP, +d524768528cd15151ba1ebb82e32609da5308128,SKIP, +d5a1b276270a1d41f21badd5b85d9502f8f9f415,SKIP, +d6e85b0511396b3221cc7f495eaee5bbacc42afd,SKIP, +d91908b0d46156fa364ba11d476b9cdbc01d0411,SKIP, +da619282469c65dcf6bee06783c4246a24a1517c,SKIP, +da8bcabb99ee5a9a35efd114aa45292616ca3c70,SKIP, +dfb1af48927a66aa5baa5b182e84327770b3c6c9,SKIP, +e075492b4dac5c347b7f6b2e5318e2967b95b18b,SKIP, +e08277ac8fe466bf63f6fc342256ab7b8d41243a,SKIP, +e0f80766931fc1d8f652c0dda844cb1cc11c9598,SKIP, +e1eb914f21305ea0e2e8a784a187efd11d0d8ca0,SKIP, +e2d48f41c5d11b9d2478af7f506dcc749025da82,SKIP, +e40fcee6b54712b76d702af6937c3320c60df2b9,SKIP, +e501fe1a296be8fec0890e7e15414683aa3d933b,SKIP, +e5349d589c000e395e12340e003aa9e2153afea6,SKIP, +e5fb8214b2bfd6396539a4e8b6cf5f3cc5e9c06f,REVERT,HBASE-21874 +e8e45ef8f2fb91a870399636b492d5cee58a4c39,SKIP, +e92a147e1961366e36a39577816994566e1e21c5,SKIP, +eacf3cb29641af1a68978d9bd7654f643a3aa3a1,SKIP, +ec251bdd3649de7f30ece914c7930498e642527e,SKIP, +ec39dc8c149b9f89a91596d57d27de812973f0a9,SKIP, +ed520133d6dbb47a40f1883a56460582732f863a,SKIP, +ed62e08786273587378b86278fae452dfc817dfb,SKIP, +ee30872dcf6dc2a1c6e90440e9e4ecd6397a1275,SKIP, +f0541fceed8d3ce13da3da005bbbbe3c5c5cc557,SKIP, +f0b1c4279eaf09d255336d1de9c2bc2b5d726e70,SKIP, +f4acc47e2debb3d3d87c05436d940ef2fdfe0be3,SKIP, +f6095adea64912deaebfaf2a6a5881b820d315b2,SKIP, +f61f02b2b24af39545cc2754cfbc25122da60651,SKIP, +f6d6bf59faa2a4a0767480af7658e4a844fd186f,SKIP, +fab0b2e60385fca20021f74335a9c3d36368f621,SKIP, +fb9be046aefb2e0b6e832dd00bc44a38ee62ab1f,SKIP, +fc2ef413fab50d4375318fbd667051fd02f085f2,SKIP, +fd5c5fb3887914183a1510f5972e50d9365e02f5,SKIP, +fe84833ea22c30b68022203132706ebb1e526852,SKIP, +fe9e7483a316df9f5a62e9c215bcedcfd65c5f12,SKIP, +ffcd4d424f69b4ecac1bd9f5980c14bb4b61a3fa,ADD,HBASE-13796 diff --git a/dev-support/git-jira-release-audit/git_jira_release_audit.py b/dev-support/git-jira-release-audit/git_jira_release_audit.py new file mode 100644 index 000000000000..9c388847da2e --- /dev/null +++ b/dev-support/git-jira-release-audit/git_jira_release_audit.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Build a database from git commit histories. Can be used to audit git vs. jira. For usage, +# see README.md. +"""An application to assist Release Managers with ensuring that histories in Git and fixVersions in +JIRA are in agreement. See README.md for a detailed explanation. +""" + +import argparse +import csv +import enum +import logging +import pathlib +import re +import sqlite3 +import time + +import enlighten +import git +import jira + + +class _DB: + """Manages an instance of Sqlite on behalf of the application. + + Args: + db_path (str): Path to the Sqlite database file. ':memory:' for an ephemeral database. + **_kwargs: Convenience for CLI argument parsing. Ignored. + + Attributes: + conn (:obj:`sqlite3.db2api.Connection`): The underlying connection object. + """ + class Action(enum.Enum): + """Describes an action to be taken against the database.""" + ADD = 'ADD' + REVERT = 'REVERT' + SKIP = 'SKIP' + + def __init__(self, db_path, **_kwargs): + self._conn = sqlite3.connect(db_path) + for table in 'git_commits', 'jira_versions': + self._conn.execute("DROP TABLE IF EXISTS %s" % table) + self._conn.execute(""" + CREATE TABLE IF NOT EXISTS "git_commits"( + jira_id TEXT NOT NULL, + branch TEXT NOT NULL, + git_sha TEXT NOT NULL, + git_tag TEXT, + CONSTRAINT pk PRIMARY KEY (jira_id, branch, git_sha) + );""") + self._conn.execute(""" + CREATE TABLE IF NOT EXISTS "jira_versions"( + jira_id TEXT NOT NULL, + fix_version TEXT NOT NULL, + CONSTRAINT pk PRIMARY KEY (jira_id, fix_version) + );""") + self._conn.commit() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._conn.close() + + @property + def conn(self): + """:obj:`sqlite3.db2api.Connection`: Underlying database handle.""" + return self._conn + + def apply_commit(self, action, jira_id, branch, git_sha): + """Apply an edit to the commits database. + + Args: + action (:obj:`_DB.Action`): The action to execute. + jira_id (str): The applicable Issue ID from JIRA. + branch (str): The name of the git branch from which the commit originates. + git_sha (str): The commit's SHA. + """ + if action == _DB.Action.ADD: + self._conn.execute( + "INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)", + (jira_id, branch, git_sha)) + elif action == _DB.Action.REVERT: + self._conn.execute(""" + DELETE FROM git_commits WHERE + jira_id=upper(?) + AND branch=? + """, (jira_id.upper(), branch)) + + def flush_commits(self): + """Commit any pending changes to the database.""" + self._conn.commit() + + def apply_git_tag(self, branch, git_sha, git_tag): + """Annotate a commit in the commits database as being a part of the specified release. + + Args: + branch (str): The name of the git branch from which the commit originates. + git_sha (str): The commit's SHA. + git_tag (str): The first release tag following the commit. + """ + self._conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?", + (git_tag, branch, git_sha)) + + def apply_fix_version(self, jira_id, fix_version): + """Annotate a Jira issue in the jira database as being part of the specified release + version. + + Args: + jira_id (str): The applicable Issue ID from JIRA. + fix_version (str): The annotated `fixVersion` as seen in JIRA. + """ + self._conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)", + (jira_id, fix_version)) + + def unique_jira_ids_from_git(self): + """Query the commits database for the population of Jira Issue IDs.""" + results = self._conn.execute("SELECT distinct jira_id FROM git_commits").fetchall() + return [x[0] for x in results] + + def backup(self, target): + """Write a copy of the database to the `target` destination. + + Args: + target (str): The backup target, a filesystem path. + """ + dst = sqlite3.connect(target) + with dst: + self._conn.backup(dst) + dst.close() + + +class _RepoReader: + """This class interacts with the git repo, and encapsulates actions specific to HBase's git + history. + + Args: + db (:obj:`_DB`): A handle to the database manager. + fallback_actions_path (str): Path to the file containing sha-specific actions + (see README.md). + remote_name (str): The name of the remote to query for branches and histories + (i.e., "origin"). + development_branch (str): The name of the branch on which active development occurs + (i.e., "master"). + release_line_regexp (str): Filter criteria used to select "release line" branches (such + as "branch-1," "branch-2," &c.). + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ + _extract_release_tag_pattern = re.compile(r'^rel/(\d+\.\d+\.\d+)(\^0)?$', re.IGNORECASE) + _skip_patterns = [ + re.compile(r'^preparing development version.+', re.IGNORECASE), + re.compile(r'^preparing hbase release.+', re.IGNORECASE), + re.compile(r'^\s*updated? pom.xml version (for|to) .+', re.IGNORECASE), + re.compile(r'^\s*updated? chang', re.IGNORECASE), + re.compile(r'^\s*updated? (book|docs|documentation)', re.IGNORECASE), + re.compile(r'^\s*updating (docs|changes).+', re.IGNORECASE), + re.compile(r'^\s*bump (pom )?versions?', re.IGNORECASE), + re.compile(r'^\s*updated? (version|poms|changes).+', re.IGNORECASE), + ] + _identify_leading_jira_id_pattern = re.compile(r'^[\s\[]*(hbase-\d+)', re.IGNORECASE) + _identify_backport_jira_id_patterns = [ + re.compile(r'^backport "(.+)".*', re.IGNORECASE), + re.compile(r'^backport (.+)', re.IGNORECASE), + ] + _identify_revert_jira_id_pattern = re.compile(r'^revert:? "(.+)"', re.IGNORECASE) + _identify_revert_revert_jira_id_pattern = re.compile( + '^revert "revert "(.+)"\\.?"\\.?', re.IGNORECASE) + _identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE) + + def __init__(self, db, fallback_actions_path, remote_name, development_branch, + release_line_regexp, **_kwargs): + self._db = db + self._repo = _RepoReader._open_repo() + self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path) + self._remote_name = remote_name + self._development_branch = development_branch + self._release_line_regexp = release_line_regexp + + @property + def repo(self): + """:obj:`git.repo.base.Repo`: Underlying Repo handle.""" + return self._repo + + @property + def remote_name(self): + """str: The name of the remote used for querying branches and histories.""" + return self._remote_name + + @property + def development_branch_ref(self): + """:obj:`git.refs.reference.Reference`: The git branch where active development occurs.""" + refs = self.repo.remote(self._remote_name).refs + return [ref for ref in refs + if ref.name == '%s/%s' % (self._remote_name, self._development_branch)][0] + + @property + def release_line_refs(self): + """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as + "release lines", i.e., "branch-2".""" + refs = self.repo.remote(self._remote_name).refs + pattern = re.compile('%s/%s' % (self._remote_name, self._release_line_regexp)) + return [ref for ref in refs if pattern.match(ref.name)] + + @property + def release_branch_refs(self): + """:obj:`list` of :obj:`git.refs.reference.Reference`: The git branches identified as + "release branches", i.e., "branch-2.2".""" + refs = self.repo.remote(self._remote_name).refs + release_line_refs = self.release_line_refs + return [ref for ref in refs + if any([ref.name.startswith(release_line.name + '.') + for release_line in release_line_refs])] + + @staticmethod + def _open_repo(): + return git.Repo(pathlib.Path(__file__).parent.absolute(), search_parent_directories=True) + + def identify_least_common_commit(self, ref_a, ref_b): + """Given a pair of references, attempt to identify the commit that they have in common, + i.e., the commit at which a "release branch" originates from a "release line" branch. + """ + commits = self._repo.merge_base(ref_a, ref_b, "--all") + if commits: + return commits[0] + raise Exception("could not identify merge base between %s, %s" % (ref_a, ref_b)) + + @staticmethod + def _skip(summary): + return any([p.match(summary) for p in _RepoReader._skip_patterns]) + + @staticmethod + def _identify_leading_jira_id(summary): + match = _RepoReader._identify_leading_jira_id_pattern.match(summary) + if match: + return match.groups()[0] + return None + + @staticmethod + def _identify_backport_jira_id(summary): + for pattern in _RepoReader._identify_backport_jira_id_patterns: + match = pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_revert_jira_id(summary): + match = _RepoReader._identify_revert_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_revert_revert_jira_id(summary): + match = _RepoReader._identify_revert_revert_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _identify_amend_jira_id(summary): + match = _RepoReader._identify_amend_jira_id_pattern.match(summary) + if match: + return _RepoReader._identify_leading_jira_id(match.groups()[0]) + return None + + @staticmethod + def _action_jira_id_for(summary): + jira_id = _RepoReader._identify_leading_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_backport_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_revert_jira_id(summary) + if jira_id: + return _DB.Action.REVERT, jira_id + jira_id = _RepoReader._identify_revert_revert_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + jira_id = _RepoReader._identify_amend_jira_id(summary) + if jira_id: + return _DB.Action.ADD, jira_id + return None + + def _extract_release_tag(self, commit): + """works for extracting the tag, but need a way to retro-actively tag + commits we've already seen.""" + names = self._repo.git.name_rev(commit, tags=True, refs='rel/*') + for name in names.split(' '): + match = _RepoReader._extract_release_tag_pattern.match(name) + if match: + return match.groups()[0] + return None + + def _set_release_tag(self, branch, tag, shas): + cnt = 0 + for sha in shas: + self._db.apply_git_tag(branch, sha, tag) + cnt += 1 + if cnt % 50 == 0: + self._db.flush_commits() + self._db.flush_commits() + + def _resolve_ambiguity(self, commit): + if commit.hexsha not in self._fallback_actions: + logging.warning('Unable to resolve action for %s: %s', commit.hexsha, commit.summary) + return _DB.Action.SKIP, None + action, jira_id = self._fallback_actions[commit.hexsha] + if not jira_id: + jira_id = None + return _DB.Action[action], jira_id + + def _row_generator(self, branch, commit): + if _RepoReader._skip(commit.summary): + return None + result = _RepoReader._action_jira_id_for(commit.summary) + if not result: + result = self._resolve_ambiguity(commit) + if not result: + raise Exception('Cannot resolve action for %s: %s' % (commit.hexsha, commit.summary)) + action, jira_id = result + return action, jira_id, branch, commit.hexsha + + def populate_db_release_branch(self, origin_commit, release_branch): + """List all commits on `release_branch` since `origin_commit`, recording them as + observations in the commits database. + + Args: + origin_commit (:obj:`git.objects.commit.Commit`): The sha of the first commit to + consider. + release_branch (str): The name of the ref whose history is to be parsed. + """ + global MANAGER + commits = list(self._repo.iter_commits( + "%s...%s" % (origin_commit.hexsha, release_branch), reverse=True)) + logging.info("%s has %d commits since its origin at %s.", release_branch, len(commits), + origin_commit) + counter = MANAGER.counter(total=len(commits), desc=release_branch, unit='commit') + commits_since_release = list() + cnt = 0 + for commit in counter(commits): + row = self._row_generator(release_branch, commit) + if row: + self._db.apply_commit(*row) + cnt += 1 + if cnt % 50 == 0: + self._db.flush_commits() + commits_since_release.append(commit.hexsha) + tag = self._extract_release_tag(commit) + if tag: + self._set_release_tag(release_branch, tag, commits_since_release) + commits_since_release = list() + self._db.flush_commits() + + @staticmethod + def _load_fallback_actions(file): + result = dict() + if pathlib.Path(file).exists(): + with open(file, 'r') as handle: + reader = csv.DictReader(filter(lambda line: line[0] != '#', handle)) + result = dict() + for row in reader: + result[row['hexsha']] = (row['action'], row['jira_id']) + return result + + +class _JiraReader: + """This class interacts with the Jira instance. + + Args: + db (:obj:`_DB`): A handle to the database manager. + jira_url (str): URL of the Jira instance to query. + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ + def __init__(self, db, jira_url, **_kwargs): + self._db = db + self.client = jira.JIRA(jira_url) + self.throttle_time_in_sec = 1 + + def _fetch_fix_versions(self, jira_id): + val = self.client.issue(jira_id, fields='fixVersions') + return [version.name for version in val.fields.fixVersions] + + def _fetch_fix_versions_throttled(self, jira_id): + val = self._fetch_fix_versions(jira_id) + time.sleep(self.throttle_time_in_sec) + return val + + def populate_db(self): + """Query Jira for issue IDs found in the commits database, writing them to the jira + database.""" + global MANAGER + jira_ids = self._db.unique_jira_ids_from_git() + logging.info("retrieving %s jira_ids from the issue tracker", len(jira_ids)) + counter = MANAGER.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue') + chunk_size = 50 + chunks = [jira_ids[i:i + chunk_size] for i in range(0, len(jira_ids), chunk_size)] + + cnt = 0 + for chunk in chunks: + query = "key in (" + ",".join([("'" + jira_id + "'") for jira_id in chunk]) + ")" + results = self.client.search_issues(jql_str=query, maxResults=chunk_size, + fields='fixVersions') + for result in results: + jira_id = result.key + fix_versions = [version.name for version in result.fields.fixVersions] + for fix_version in fix_versions: + self._db.apply_fix_version(jira_id, fix_version) + cnt += 1 + if cnt % 50: + self._db.flush_commits() + counter.update(incr=len(chunk)) + self._db.flush_commits() + + +class Auditor: + """This class builds databases from git and Jira, making it possible to audit the two for + discrepancies. At some point, it will provide pre-canned audit queries against those databases. + It is the entrypoint to this application. + + Args: + repo_reader (:obj:`_RepoReader`): An instance of the `_RepoReader`. + jira_reader (:obj:`_JiraReader`): An instance of the `JiraReader`. + db (:obj:`_DB`): A handle to the database manager. + **_kwargs: Convenience for CLI argument parsing. Ignored. + """ + def __init__(self, repo_reader, jira_reader, db, **_kwargs): + self._repo_reader = repo_reader + self._jira_reader = jira_reader + self._db = db + + def populate_db_from_git(self): + """Process the git repository, populating the commits database.""" + for release_line in self._repo_reader.release_line_refs: + branch_origin = self._repo_reader.identify_least_common_commit( + self._repo_reader.development_branch_ref.name, release_line.name) + self._repo_reader.populate_db_release_branch(branch_origin, release_line.name) + for release_branch in self._repo_reader.release_branch_refs: + if not release_branch.name.startswith(release_line.name): + continue + self._repo_reader.populate_db_release_branch(branch_origin, release_branch.name) + + def populate_db_from_jira(self): + """Process the Jira issues identified by the commits database, populating the jira + database.""" + self._jira_reader.populate_db() + + @staticmethod + def _build_first_pass_parser(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument( + '--db-path', + help='Path to the database file, or leave unspecified for a transient db.', + default=':memory:') + parser.add_argument( + '--git-repo-path', + help='Path to the git repo, or leave unspecified to infer from the current' + + ' file\'s path.', + default=__file__) + parser.add_argument( + '--remote-name', + help='The name of the git remote to use when identifying branches.', + default='origin') + parser.add_argument( + '--development-branch', + help='The name of the branch from which all release lines originate.', + default='master') + parser.add_argument( + '--development-branch-fix-version', + help='The Jira fixVersion used to indicate an issue is committed to the development ' + + 'branch.', + default='3.0.0') + parser.add_argument( + '--release-line-regexp', + help='A regexp used to identify release lines.', + default=r'branch-\d+$') + parser.add_argument( + '--fallback-actions-path', + help='Path to a file containing _DB.Actions applicable to specific git shas.', + default='fallback_actions.csv') + parser.add_argument( + '--jira-url', + help='A URL locating the target JIRA instance.', + default='https://issues.apache.org/jira') + return parser + + @staticmethod + def _build_second_pass_parser(repo_reader, parent_parser): + parser = argparse.ArgumentParser(parents=[parent_parser]) + for release_line in repo_reader.release_line_refs: + name = release_line.name + parser.add_argument( + '--%s-fix-version' % name[len(repo_reader.remote_name) + 1:], + help='The Jira fixVersion used to indicate an issue is committed to the specified ' + + 'release line branch', + required=True) + return parser + + +MANAGER = None + + +def main(): + global MANAGER + + first_pass_parser = Auditor._build_first_pass_parser() + known_args, extras = first_pass_parser.parse_known_args() + known_args = vars(known_args) + with _DB(**known_args) as db: + logging.basicConfig(level=logging.INFO) + repo_reader = _RepoReader(db, **known_args) + jira_reader = _JiraReader(db, **known_args) + second_pass_parser = Auditor._build_second_pass_parser(repo_reader, first_pass_parser) + args = second_pass_parser.parse_args(extras) + auditor = Auditor(repo_reader, jira_reader, db, **vars(args)) + with enlighten.get_manager() as MANAGER: + auditor.populate_db_from_git() + auditor.populate_db_from_jira() + + +if __name__ == '__main__': + main() diff --git a/dev-support/git-jira-release-audit/requirements.txt b/dev-support/git-jira-release-audit/requirements.txt new file mode 100644 index 000000000000..252336af831e --- /dev/null +++ b/dev-support/git-jira-release-audit/requirements.txt @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +blessed==1.17.0 +certifi==2019.11.28 +cffi==1.13.2 +chardet==3.0.4 +cryptography==2.8 +defusedxml==0.6.0 +enlighten==1.4.0 +gitdb2==2.0.6 +GitPython==3.0.5 +idna==2.8 +jira==2.0.0 +oauthlib==3.1.0 +pbr==5.4.4 +pycparser==2.19 +PyJWT==1.7.1 +requests==2.22.0 +requests-oauthlib==1.3.0 +requests-toolbelt==0.9.1 +six==1.14.0 +smmap2==2.0.5 +urllib3==1.25.8 +wcwidth==0.1.8