diff --git a/.github/workflows/dev_ecr_push.yml b/.github/workflows/dev_ecr_push.yml new file mode 100644 index 0000000..4a1c5e4 --- /dev/null +++ b/.github/workflows/dev_ecr_push.yml @@ -0,0 +1,38 @@ +name: dev ECR push +on: + push: + branches: + - main +# Set defaults +defaults: + run: + shell: bash + +env: + AWS_REGION: "us-east-1" + AWS_ACCOUNT_ID: "222053980223" + IAM_ROLE: "ppod-gha-dev" + +jobs: + deploy: + name: Deploy dev build + runs-on: ubuntu-latest + # These permissions are needed to interact with GitHub's OIDC Token endpoint. + permissions: + id-token: write + contents: read + + steps: + - uses: actions/checkout@v2 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: arn:aws:iam::${{ env.AWS_ACCOUNT_ID }}:role/${{ env.IAM_ROLE }} + aws-region: ${{ env.AWS_REGION }} + + - name: Build image + run: make dist-dev + - name: Push image + run: make publish-dev + - name: Update lambda function + run: make update-lambda-dev \ No newline at end of file diff --git a/Makefile b/Makefile index 7d0b32b..f9058ee 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ update: install ## Update all Python dependencies ### Test commands ### test: ## Run tests and print a coverage report - pipenv run coverage run --source=. -m pytest + pipenv run coverage run --include=ppod.py -m pytest pipenv run coverage report -m coveralls: test @@ -53,7 +53,7 @@ publish-dev: dist-dev ## Build, tag and push docker push $(ECR_REGISTRY_DEV)/ppod-dev:latest docker push $(ECR_REGISTRY_DEV)/ppod-dev:`git describe --always` -update-format-lambda-dev: ## Updates the lambda with whatever is the most recent image in the ecr +update-lambda-dev: ## Updates the lambda with whatever is the most recent image in the ecr aws lambda update-function-code \ --function-name ppod-dev \ --image-uri $(shell aws sts get-caller-identity --query Account --output text).dkr.ecr.us-east-1.amazonaws.com/ppod-dev:latest diff --git a/Pipfile b/Pipfile index 590da12..25ea7a9 100644 --- a/Pipfile +++ b/Pipfile @@ -4,7 +4,9 @@ verify_ssl = true name = "pypi" [packages] +boto3 = "*" sentry-sdk = "*" +smart-open = "*" [dev-packages] bandit = "*" @@ -12,6 +14,7 @@ black = "*" coverage = "*" flake8 = "*" isort = "*" +moto = "*" mypy = "*" pytest = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 5be7b53..ecd0d06 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "deace4d7b00d7e265c99030fdc88b9eeea820f245701f01153a510446ff93e8c" + "sha256": "e2dba0a30b638806dfc2b40f8074796e417d9ab7332f00470b7255aaf3329ade" }, "pipfile-spec": 6, "requires": { @@ -16,6 +16,22 @@ ] }, "default": { + "boto3": { + "hashes": [ + "sha256:4408cf07340d29d7a9c8d32cf71b1c54f86b768b2145d341d2698c1e467d7d32", + "sha256:7889c3a07171b8a43468a8644d7c95948dc9e1389c4aac2b689a428ee1a98300" + ], + "index": "pypi", + "version": "==1.23.2" + }, + "botocore": { + "hashes": [ + "sha256:16b9d523a19d61b0edc80ef2253f9130165bad473b1b5707027f10975a8d5467", + "sha256:1977f2ad6b6263f4dd9e8b784e69b194988f16d6bd90c4eede15964f4eecf878" + ], + "markers": "python_version >= '3.6'", + "version": "==1.26.2" + }, "certifi": { "hashes": [ "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", @@ -23,6 +39,30 @@ ], "version": "==2021.10.8" }, + "jmespath": { + "hashes": [ + "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e", + "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.0" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "s3transfer": { + "hashes": [ + "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", + "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" + ], + "markers": "python_version >= '3.6'", + "version": "==0.5.2" + }, "sentry-sdk": { "hashes": [ "sha256:259535ba66933eacf85ab46524188c84dcb4c39f40348455ce15e2c0aca68863", @@ -31,6 +71,22 @@ "index": "pypi", "version": "==1.5.12" }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "smart-open": { + "hashes": [ + "sha256:94afbd5058a45d4fdc4f859ed158b46054cb5ca1c019d76f6f8a60495f662129", + "sha256:d60106b96f0bcaedf5f1cd46ff5524a1c3d02d5653425618bb0fa66e158d22b0" + ], + "index": "pypi", + "version": "==6.0.0" + }, "urllib3": { "hashes": [ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", @@ -86,6 +142,92 @@ "index": "pypi", "version": "==22.3.0" }, + "boto3": { + "hashes": [ + "sha256:4408cf07340d29d7a9c8d32cf71b1c54f86b768b2145d341d2698c1e467d7d32", + "sha256:7889c3a07171b8a43468a8644d7c95948dc9e1389c4aac2b689a428ee1a98300" + ], + "index": "pypi", + "version": "==1.23.2" + }, + "botocore": { + "hashes": [ + "sha256:16b9d523a19d61b0edc80ef2253f9130165bad473b1b5707027f10975a8d5467", + "sha256:1977f2ad6b6263f4dd9e8b784e69b194988f16d6bd90c4eede15964f4eecf878" + ], + "markers": "python_version >= '3.6'", + "version": "==1.26.2" + }, + "certifi": { + "hashes": [ + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + ], + "version": "==2021.10.8" + }, + "cffi": { + "hashes": [ + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" + ], + "version": "==1.15.0" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, "click": { "hashes": [ "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", @@ -141,6 +283,34 @@ "index": "pypi", "version": "==6.3.3" }, + "cryptography": { + "hashes": [ + "sha256:093cb351031656d3ee2f4fa1be579a8c69c754cf874206be1d4cf3b542042804", + "sha256:0cc20f655157d4cfc7bada909dc5cc228211b075ba8407c46467f63597c78178", + "sha256:1b9362d34363f2c71b7853f6251219298124aa4cc2075ae2932e64c91a3e2717", + "sha256:1f3bfbd611db5cb58ca82f3deb35e83af34bb8cf06043fa61500157d50a70982", + "sha256:2bd1096476aaac820426239ab534b636c77d71af66c547b9ddcd76eb9c79e004", + "sha256:31fe38d14d2e5f787e0aecef831457da6cec68e0bb09a35835b0b44ae8b988fe", + "sha256:3b8398b3d0efc420e777c40c16764d6870bcef2eb383df9c6dbb9ffe12c64452", + "sha256:3c81599befb4d4f3d7648ed3217e00d21a9341a9a688ecdd615ff72ffbed7336", + "sha256:419c57d7b63f5ec38b1199a9521d77d7d1754eb97827bbb773162073ccd8c8d4", + "sha256:46f4c544f6557a2fefa7ac8ac7d1b17bf9b647bd20b16decc8fbcab7117fbc15", + "sha256:471e0d70201c069f74c837983189949aa0d24bb2d751b57e26e3761f2f782b8d", + "sha256:59b281eab51e1b6b6afa525af2bd93c16d49358404f814fe2c2410058623928c", + "sha256:731c8abd27693323b348518ed0e0705713a36d79fdbd969ad968fbef0979a7e0", + "sha256:95e590dd70642eb2079d280420a888190aa040ad20f19ec8c6e097e38aa29e06", + "sha256:a68254dd88021f24a68b613d8c51d5c5e74d735878b9e32cc0adf19d1f10aaf9", + "sha256:a7d5137e556cc0ea418dca6186deabe9129cee318618eb1ffecbd35bee55ddc1", + "sha256:aeaba7b5e756ea52c8861c133c596afe93dd716cbcacae23b80bc238202dc023", + "sha256:dc26bb134452081859aa21d4990474ddb7e863aa39e60d1592800a8865a702de", + "sha256:e53258e69874a306fcecb88b7534d61820db8a98655662a3dd2ec7f1afd9132f", + "sha256:ef15c2df7656763b4ff20a9bc4381d8352e6640cfeb95c2972c38ef508e75181", + "sha256:f224ad253cc9cea7568f49077007d2263efa57396a2f2f78114066fd54b5c68e", + "sha256:f8ec91983e638a9bcd75b39f1396e5c0dc2330cbd9ce4accefe68717e6779e0a" + ], + "markers": "python_version >= '3.6'", + "version": "==37.0.2" + }, "flake8": { "hashes": [ "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d", @@ -165,6 +335,14 @@ "markers": "python_version >= '3.7'", "version": "==3.1.27" }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" + }, "iniconfig": { "hashes": [ "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", @@ -180,6 +358,68 @@ "index": "pypi", "version": "==5.10.1" }, + "jinja2": { + "hashes": [ + "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", + "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.2" + }, + "jmespath": { + "hashes": [ + "sha256:a490e280edd1f57d6de88636992d05b71e97d69a26a19f058ecf7d304474bf5e", + "sha256:e8dcd576ed616f14ec02eed0005c85973b5890083313860136657e24784e4c04" + ], + "markers": "python_version >= '3.7'", + "version": "==1.0.0" + }, + "markupsafe": { + "hashes": [ + "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", + "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", + "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", + "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", + "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", + "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", + "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", + "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", + "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", + "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", + "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", + "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", + "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", + "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", + "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", + "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", + "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", + "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", + "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", + "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", + "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", + "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", + "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", + "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", + "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", + "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", + "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", + "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", + "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", + "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", + "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", + "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", + "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", + "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", + "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", + "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", + "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", + "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", + "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", + "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.1" + }, "mccabe": { "hashes": [ "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", @@ -187,6 +427,14 @@ ], "version": "==0.6.1" }, + "moto": { + "hashes": [ + "sha256:8928ec168e5fd88b1127413b2fa570a80d45f25182cdad793edd208d07825269", + "sha256:ba683e70950b6579189bc12d74c1477aa036c090c6ad8b151a22f5896c005113" + ], + "index": "pypi", + "version": "==3.1.9" + }, "mypy": { "hashes": [ "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d", @@ -278,6 +526,13 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==2.8.0" }, + "pycparser": { + "hashes": [ + "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9", + "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" + ], + "version": "==2.21" + }, "pyflakes": { "hashes": [ "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c", @@ -302,6 +557,21 @@ "index": "pypi", "version": "==7.1.2" }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "pytz": { + "hashes": [ + "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7", + "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c" + ], + "version": "==2022.1" + }, "pyyaml": { "hashes": [ "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", @@ -341,6 +611,38 @@ "markers": "python_version >= '3.6'", "version": "==6.0" }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==2.27.1" + }, + "responses": { + "hashes": [ + "sha256:18831bc2d72443b67664d98038374a6fa1f27eaaff4dd9a7d7613723416fea3c", + "sha256:644905bc4fb8a18fa37e3882b2ac05e610fe8c2f967d327eed669e314d94a541" + ], + "markers": "python_version >= '3.7'", + "version": "==0.20.0" + }, + "s3transfer": { + "hashes": [ + "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", + "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" + ], + "markers": "python_version >= '3.6'", + "version": "==0.5.2" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, "smmap": { "hashes": [ "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94", @@ -362,7 +664,7 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version >= '3.7'", + "markers": "python_version < '3.11'", "version": "==2.0.1" }, "typing-extensions": { @@ -372,6 +674,30 @@ ], "markers": "python_version < '3.10'", "version": "==4.2.0" + }, + "urllib3": { + "hashes": [ + "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", + "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.9" + }, + "werkzeug": { + "hashes": [ + "sha256:1ce08e8093ed67d638d63879fd1ba3735817f7a80de3674d293f5984f25fb6e6", + "sha256:72a4b735692dd3135217911cbeaa1be5fa3f62bffb8745c5215420a03dc55255" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.2" + }, + "xmltodict": { + "hashes": [ + "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56", + "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852" + ], + "markers": "python_version >= '3.4'", + "version": "==0.13.0" } } } diff --git a/README.md b/README.md index abf407a..8111a57 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,14 @@ make lint ``` ## Required ENV +`BUCKET` = The bucket containing the compressed MARCXML files to be submitted to POD. + `SENTRY_DSN` = If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development. `WORKSPACE` = Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform. ### To run locally +NOTE: These instructions for running locally don't currently work and functionality has to be verified in our dev AWS account. - Build the container: ```bash docker build -t ppod . diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..c8c6330 --- /dev/null +++ b/conftest.py @@ -0,0 +1,45 @@ +import os + +import boto3 +import pytest +from moto import mock_s3 + + +@pytest.fixture(scope="session") +def aws_credentials(): + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + + +@pytest.fixture() +def request_data_matching_file(): + request_data = {"filename-prefix": "upload/"} + yield request_data + + +@pytest.fixture(scope="session") +def mocked_s3(aws_credentials): + with mock_s3(): + s3 = boto3.client("s3", region_name="us-east-1") + s3.create_bucket(Bucket="ppod") + s3.put_object( + Body=open("fixtures/pod.tar.gz", "rb"), + Bucket="ppod", + Key="upload/pod.tar.gz", + ) + s3.create_bucket(Bucket="no_files") + s3.create_bucket(Bucket="a_lot_of_files") + for i in range(1001): + s3.put_object( + Body=str(i), + Bucket="a_lot_of_files", + Key=f"upload/{i}.txt", + ) + yield s3 + + +@pytest.fixture(autouse=True) +def test_env(): + os.environ = {"WORKSPACE": "test", "BUCKET": "ppod"} + yield diff --git a/fixtures/pod.tar.gz b/fixtures/pod.tar.gz new file mode 100644 index 0000000..28ac0d1 Binary files /dev/null and b/fixtures/pod.tar.gz differ diff --git a/fixtures/pod.xml b/fixtures/pod.xml new file mode 100644 index 0000000..590d695 --- /dev/null +++ b/fixtures/pod.xml @@ -0,0 +1,264 @@ + + + + 01168nam 2200385Ia 4500 + 20210714130252.0 + 990603s1999 ne a 000 0 eng d + 990008915090106761 + + 9071570231 (pbk.) + + + (MCM)000891509 + + + (MCM)000891509MIT01 + + + (OCoLC)41479068 + + + FBR + FBR + MYG + + + MYGG + + + NA2750 + .M34 1999 + + + 720.1 + 21 + + + Magyar, P\xc3\xa9ter. + + + Thought palaces / + Peter Magyar. + + + Amsterdam : + Architectura & Natura Press, + c1999. + + + 333 p. : + ill. ; 24 cm. + + + text + txt + rdacontent + + + unspecified + z + rdamedia + + + unspecified + zu + rdacarrier + + + Includes bibliographical references (p. 16). + + + 10891509 + + + Architectural design. + + + Architecture + Philosophy. + + + Thought palaces + 2000005118 200006007 + + + RTC + STACK + NA2750.M34 1999 + 22527227150006761 + + + 1 + 0 + STACK + false + BOOK + 39080015616003 + STACK + 23527227120006761 + 01 + NA2750.M34 1999 + RTC + RTC + + + + 02007nam 2200481 4500 + 20210822074144.0 + 740426s1974 nyu b 101 0 eng + 990000101910106761 + + 73088720 + + + 0914362097 + + + (MCM)000010191 + + + (MCM)000010191MIT01 + + + (OCoLC)00902533 + + + DLC + DLC + MYG + + + a-cc--- + + + MYGH + [769195] + MYGR + [758319] + + + R601 + .M43 + + + R601.M43 + + + Medicine and society in China; + report of a conference sponsored jointly by the National Library of Medicine and the Josiah Macy, Jr. Foundation. Edited by John Z. Bowers and Elizabeth F. Purcell. + + + New York, + Josiah Macy, Jr. Foundation + [1974] + + + vii, 176 p. + 23 cm. + + + text + txt + rdacontent + + + unmediated + n + rdamedia + + + volume + nc + rdacarrier + + + The Macy Foundation series on medicine and public health in China + + + committed to retain + 20170930 + 20421231 + HathiTrust + https://www.hathitrust.org/shared%5Fprint%5Fprogram + MCM + + + Includes bibliographical references. + + + Medicine + China + History + Congresses. + + + Public health + China + Congresses. + + + Conference papers and proceedings. + lcgft + + + Bowers, John Z., + 1913-1993. + + + Purcell, Elizabeth. + + + National Library of Medicine (U.S.) + + + Josiah Macy, Jr. Foundation. + + + Conference on Medicine and Society in China + (1973) + + + Macy Foundation series on medicine and public health in China. + + + LSA + OCC + R601.M43 + 22527225770006761 + + + HUM + STACK + R601.M43 + 22527225790006761 + + + 1 + 0 + STACK + false + BOOK + 39080000528593 + STACK + 23527225780006761 + 01 + R601.M43 + HUM + HUM + + + 1 + 0 + OCC + false + BOOK + 39080019409462 + OCC + 23527225760006761 + 15 + R601.M43 + LSA + LSA + + + \ No newline at end of file diff --git a/ppod.py b/ppod.py index 8d1a7aa..e10136d 100644 --- a/ppod.py +++ b/ppod.py @@ -1,22 +1,56 @@ import logging import os +import tarfile +from typing import IO, Generator, Optional import sentry_sdk +import smart_open +from boto3 import client -def lambda_handler(event: dict, context: object) -> str: +def lambda_handler(event: dict, context: object) -> dict: logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) - env = os.environ["WORKSPACE"] - logger.info( - "Running ppod with env=%s and log level=%s,", - env, - os.getenv("LOGGING_LEVEL", "DEBUG").upper(), - ) if sentry_dsn := os.getenv("SENTRY_DSN"): sentry_sdk.init(sentry_dsn, environment=env) logger.info( "Sentry DSN found, exceptions will be sent to Sentry with env=%s", env ) - return "lambda" + file_count = 0 + bucket = os.environ["BUCKET"] + s3_files = filter_files_in_bucket( + bucket, + event["filename-prefix"], + ) + for s3_file in s3_files: + logger.info("Processing file: %s", s3_file) + s3_file_content = smart_open.open(f"s3://{bucket}/{s3_file}", "rb") + files = extract_files_from_tar(s3_file_content) + for file in files: + file # do a thing + file_count += 1 + return {"files-processed": file_count} + + +def extract_files_from_tar( + tar_file: IO[bytes], +) -> Generator[Optional[IO[bytes]], None, None]: + with tarfile.open(fileobj=tar_file) as tar: + for member in tar.getmembers(): + file = tar.extractfile(member) + yield file + + +def filter_files_in_bucket(bucket: str, prefix: str) -> Generator[str, None, None]: + """Retrieve files in the specified bucket with the specified prefix.""" + s3_client = client("s3", region_name="us-east-1") + paginator = s3_client.get_paginator("list_objects_v2") + pages = paginator.paginate(Bucket=bucket, Prefix=prefix) + try: + for s3_object in ( + s3_object for page in pages for s3_object in page["Contents"] + ): + yield s3_object["Key"] + except KeyError: + raise KeyError(f"No files retrieved from {bucket} with prefix {prefix}") diff --git a/requirements.txt b/requirements.txt index 7b35692..3fc8f3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,13 @@ # -i https://pypi.org/simple +boto3==1.23.2 +botocore==1.26.2; python_version >= '3.6' certifi==2021.10.8 +jmespath==1.0.0; python_version >= '3.7' +python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +s3transfer==0.5.2; python_version >= '3.6' sentry-sdk==1.5.12 +six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +smart-open==6.0.0 urllib3==1.26.9; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4' diff --git a/setup.cfg b/setup.cfg index 6413b09..6f4ab0e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,4 +3,15 @@ max-line-length = 90 extend-ignore = E203 [isort] -profile = black \ No newline at end of file +profile = black + +[mypy] + +[mypy-boto3.*] +ignore_missing_imports = True + +[mypy-moto.*] +ignore_missing_imports = True + +[mypy-smart_open.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/test_ppod.py b/test_ppod.py index ee6b46f..36f7531 100644 --- a/test_ppod.py +++ b/test_ppod.py @@ -1,29 +1,72 @@ import logging -from ppod import lambda_handler +import pytest - -def test_ppod_expected_output(caplog, monkeypatch): - monkeypatch.setenv("WORKSPACE", "test") - caplog.set_level(logging.INFO) - output = lambda_handler({}, {}) - assert output == "lambda" +from ppod import extract_files_from_tar, filter_files_in_bucket, lambda_handler -def test_ppod_configures_sentry_if_dsn_present(caplog, monkeypatch): - monkeypatch.setenv("WORKSPACE", "test") +def test_ppod_configures_sentry_if_dsn_present( + caplog, monkeypatch, mocked_s3, request_data_matching_file +): monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456") caplog.set_level(logging.INFO) - lambda_handler({}, {}) + lambda_handler(request_data_matching_file, {}) assert ( "Sentry DSN found, exceptions will be sent to Sentry with env=test" in caplog.text ) -def test_webhook_doesnt_configure_sentry_if_dsn_not_present(caplog, monkeypatch): - monkeypatch.setenv("WORKSPACE", "test") +def test_ppod_doesnt_configure_sentry_if_dsn_not_present( + caplog, monkeypatch, mocked_s3, request_data_matching_file +): monkeypatch.delenv("SENTRY_DSN", raising=False) caplog.set_level(logging.INFO) - lambda_handler({}, {}) + lambda_handler(request_data_matching_file, {}) assert "Sentry DSN found" not in caplog.text + + +def test_ppod_matching_files(mocked_s3, request_data_matching_file): + output = lambda_handler(request_data_matching_file, {}) + assert output == {"files-processed": 1} + + +def test_ppod_no_files_raises_exception( + monkeypatch, mocked_s3, request_data_matching_file +): + monkeypatch.setenv("BUCKET", "no_files") + with pytest.raises(KeyError): + lambda_handler(request_data_matching_file, {}) + + +def test_ppod_no_matching_files_raises_exception(mocked_s3): + request_data = {"filename-prefix": "download/"} + with pytest.raises(KeyError): + lambda_handler(request_data, {}) + + +def test_extract_files_from_tar(): + files = extract_files_from_tar(open("fixtures/pod.tar.gz", "rb")) + assert next(files).read() == open("fixtures/pod.xml", "rb").read() + + +def test_filter_files_in_bucket_with_1001_matching_file(mocked_s3): + files = filter_files_in_bucket("a_lot_of_files", "upload/") + assert len(list(files)) == 1001 + + +def test_filter_files_in_bucket_with_matching_file(mocked_s3): + files = filter_files_in_bucket("ppod", "upload/") + assert next(files) == "upload/pod.tar.gz" + + +def test_filter_files_in_bucket_with_no_file(mocked_s3): + with pytest.raises(KeyError): + files = filter_files_in_bucket("no_files", "upload/") + next(files) + + +def test_filter_files_in_bucket_without_matching_file(mocked_s3): + with pytest.raises(KeyError): + files = filter_files_in_bucket("ppod", "download/") + next(files)