From d693f6459e674beeefa2bf6060f783b04adc554b Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Tue, 31 May 2022 13:50:12 -0400 Subject: [PATCH 1/4] ENSY-71-post-files-to-pod Why these changes are being introduced: * ppod needs to post MARC records to POD streams using an access token How this addresses that need: * Add mocked_pod and pod_response fixture and update test_env fixture with new variables * Add requests module and related dependencies to Pipfile * Add post_files_to_pod function * Add unit tests for new function * Update README.md to include new envs Side effects of this change: * New env required to run the code Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/ENSY-71 --- Pipfile | 3 + Pipfile.lock | 181 ++++++++++++++++--------- README.md | 6 + conftest.py | 23 +++- fixtures/pod_response.html | 271 +++++++++++++++++++++++++++++++++++++ ppod.py | 35 ++++- test_ppod.py | 34 ++++- 7 files changed, 480 insertions(+), 73 deletions(-) create mode 100644 fixtures/pod_response.html diff --git a/Pipfile b/Pipfile index 25ea7a9..9c49b2f 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,8 @@ name = "pypi" boto3 = "*" sentry-sdk = "*" smart-open = "*" +requests = "*" +types-requests = "*" [dev-packages] bandit = "*" @@ -17,6 +19,7 @@ isort = "*" moto = "*" mypy = "*" pytest = "*" +requests-mock = "*" [requires] python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock index ecd0d06..2f71b4f 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "e2dba0a30b638806dfc2b40f8074796e417d9ab7332f00470b7255aaf3329ade" + "sha256": "a4f834b2601ab590f4df2fd22c19324699e92ef5bfc92bad98a2d25e48c3de08" }, "pipfile-spec": 6, "requires": { @@ -18,26 +18,43 @@ "default": { "boto3": { "hashes": [ - "sha256:4408cf07340d29d7a9c8d32cf71b1c54f86b768b2145d341d2698c1e467d7d32", - "sha256:7889c3a07171b8a43468a8644d7c95948dc9e1389c4aac2b689a428ee1a98300" + "sha256:3fb956d097105a0fb98c29a622ff233fa8de68519aabd7088d7ffd36dfc33214", + "sha256:b59a210fa6a87f0c755b40403ffc66b9b285680bbc5ad5245cf167e2def33620" ], "index": "pypi", - "version": "==1.23.2" + "version": "==1.23.7" }, "botocore": { "hashes": [ - "sha256:16b9d523a19d61b0edc80ef2253f9130165bad473b1b5707027f10975a8d5467", - "sha256:1977f2ad6b6263f4dd9e8b784e69b194988f16d6bd90c4eede15964f4eecf878" + "sha256:0f4a467188644382856e96e85bff0b453442d5cf0c0f554154571a6e2468a005", + "sha256:9f8d5e8d65b24d97fcb7804b84831e5627fceb52707167d2f496477675c98ded" ], "markers": "python_version >= '3.6'", - "version": "==1.26.2" + "version": "==1.26.7" }, "certifi": { "hashes": [ - "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", - "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7", + "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a" ], - "version": "==2021.10.8" + "markers": "python_version >= '3.6'", + "version": "==2022.5.18.1" + }, + "charset-normalizer": { + "hashes": [ + "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", + "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" + ], + "markers": "python_version >= '3'", + "version": "==2.0.12" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3'", + "version": "==3.3" }, "jmespath": { "hashes": [ @@ -55,6 +72,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.8.2" }, + "requests": { + "hashes": [ + "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", + "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" + ], + "index": "pypi", + "version": "==2.27.1" + }, "s3transfer": { "hashes": [ "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", @@ -87,6 +112,21 @@ "index": "pypi", "version": "==6.0.0" }, + "types-requests": { + "hashes": [ + "sha256:6d8463ffe1f6edcf2e5361740a6140e7a16d427c267d83c7c1d3d1298f4e67c5", + "sha256:d618d9809fa32f514cf17cea8460814da671c56366fb1c908accca8bf183112b" + ], + "index": "pypi", + "version": "==2.27.27" + }, + "types-urllib3": { + "hashes": [ + "sha256:2a2578e4b36341ccd240b00fccda9826988ff0589a44ba4a664bbd69ef348d27", + "sha256:5d2388aa76395b1e3999ff789ea5b3283677dad8e9bcf3d9117ba19271fd35d9" + ], + "version": "==1.26.14" + }, "urllib3": { "hashes": [ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", @@ -144,26 +184,27 @@ }, "boto3": { "hashes": [ - "sha256:4408cf07340d29d7a9c8d32cf71b1c54f86b768b2145d341d2698c1e467d7d32", - "sha256:7889c3a07171b8a43468a8644d7c95948dc9e1389c4aac2b689a428ee1a98300" + "sha256:3fb956d097105a0fb98c29a622ff233fa8de68519aabd7088d7ffd36dfc33214", + "sha256:b59a210fa6a87f0c755b40403ffc66b9b285680bbc5ad5245cf167e2def33620" ], "index": "pypi", - "version": "==1.23.2" + "version": "==1.23.7" }, "botocore": { "hashes": [ - "sha256:16b9d523a19d61b0edc80ef2253f9130165bad473b1b5707027f10975a8d5467", - "sha256:1977f2ad6b6263f4dd9e8b784e69b194988f16d6bd90c4eede15964f4eecf878" + "sha256:0f4a467188644382856e96e85bff0b453442d5cf0c0f554154571a6e2468a005", + "sha256:9f8d5e8d65b24d97fcb7804b84831e5627fceb52707167d2f496477675c98ded" ], "markers": "python_version >= '3.6'", - "version": "==1.26.2" + "version": "==1.26.7" }, "certifi": { "hashes": [ - "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", - "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" + "sha256:9c5705e395cd70084351dd8ad5c41e65655e08ce46f2ec9cf6c2c08390f71eb7", + "sha256:f1d53542ee8cbedbe2118b5686372fb33c297fcd6379b050cca0ef13a597382a" ], - "version": "==2021.10.8" + "markers": "python_version >= '3.6'", + "version": "==2022.5.18.1" }, "cffi": { "hashes": [ @@ -238,50 +279,50 @@ }, "coverage": { "hashes": [ - "sha256:06f54765cdbce99901871d50fe9f41d58213f18e98b170a30ca34f47de7dd5e8", - "sha256:114944e6061b68a801c5da5427b9173a0dd9d32cd5fcc18a13de90352843737d", - "sha256:1414e8b124611bf4df8d77215bd32cba6e3425da8ce9c1f1046149615e3a9a31", - "sha256:2781c43bffbbec2b8867376d4d61916f5e9c4cc168232528562a61d1b4b01879", - "sha256:2ab88a01cd180b5640ccc9c47232e31924d5f9967ab7edd7e5c91c68eee47a69", - "sha256:338c417613f15596af9eb7a39353b60abec9d8ce1080aedba5ecee6a5d85f8d3", - "sha256:3401b0d2ed9f726fadbfa35102e00d1b3547b73772a1de5508ef3bdbcb36afe7", - "sha256:462105283de203df8de58a68c1bb4ba2a8a164097c2379f664fa81d6baf94b81", - "sha256:4cd696aa712e6cd16898d63cf66139dc70d998f8121ab558f0e1936396dbc579", - "sha256:4d06380e777dd6b35ee936f333d55b53dc4a8271036ff884c909cf6e94be8b6c", - "sha256:61f4fbf3633cb0713437291b8848634ea97f89c7e849c2be17a665611e433f53", - "sha256:6d4a6f30f611e657495cc81a07ff7aa8cd949144e7667c5d3e680d73ba7a70e4", - "sha256:6f5fee77ec3384b934797f1873758f796dfb4f167e1296dc00f8b2e023ce6ee9", - "sha256:75b5dbffc334e0beb4f6c503fb95e6d422770fd2d1b40a64898ea26d6c02742d", - "sha256:7835f76a081787f0ca62a53504361b3869840a1620049b56d803a8cb3a9eeea3", - "sha256:79bf405432428e989cad7b8bc60581963238f7645ae8a404f5dce90236cc0293", - "sha256:8329635c0781927a2c6ae068461e19674c564e05b86736ab8eb29c420ee7dc20", - "sha256:8586b177b4407f988731eb7f41967415b2197f35e2a6ee1a9b9b561f6323c8e9", - "sha256:892e7fe32191960da559a14536768a62e83e87bbb867e1b9c643e7e0fbce2579", - "sha256:91502bf27cbd5c83c95cfea291ef387469f2387508645602e1ca0fd8a4ba7548", - "sha256:93b16b08f94c92cab88073ffd185070cdcb29f1b98df8b28e6649145b7f2c90d", - "sha256:9c9441d57b0963cf8340268ad62fc83de61f1613034b79c2b1053046af0c5284", - "sha256:ad8f9068f5972a46d50fe5f32c09d6ee11da69c560fcb1b4c3baea246ca4109b", - "sha256:afb03f981fadb5aed1ac6e3dd34f0488e1a0875623d557b6fad09b97a942b38a", - "sha256:b5ba058610e8289a07db2a57bce45a1793ec0d3d11db28c047aae2aa1a832572", - "sha256:baa8be8aba3dd1e976e68677be68a960a633a6d44c325757aefaa4d66175050f", - "sha256:c06455121a089252b5943ea682187a4e0a5cf0a3fb980eb8e7ce394b144430a9", - "sha256:c1a9942e282cc9d3ed522cd3e3cab081149b27ea3bda72d6f61f84eaf88c1a63", - "sha256:c488db059848702aff30aa1d90ef87928d4e72e4f00717343800546fdbff0a94", - "sha256:cb5311d6ccbd22578c80028c5e292a7ab9adb91bd62c1982087fad75abe2e63d", - "sha256:cbe91bc84be4e5ef0b1480d15c7b18e29c73bdfa33e07d3725da7d18e1b0aff2", - "sha256:cc692c9ee18f0dd3214843779ba6b275ee4bb9b9a5745ba64265bce911aefd1a", - "sha256:cc972d829ad5ef4d4c5fcabd2bbe2add84ce8236f64ba1c0c72185da3a273130", - "sha256:ceb6534fcdfb5c503affb6b1130db7b5bfc8a0f77fa34880146f7a5c117987d0", - "sha256:d522f1dc49127eab0bfbba4e90fa068ecff0899bbf61bf4065c790ddd6c177fe", - "sha256:db094a6a4ae6329ed322a8973f83630b12715654c197dd392410400a5bfa1a73", - "sha256:df32ee0f4935a101e4b9a5f07b617d884a531ed5666671ff6ac66d2e8e8246d8", - "sha256:e5af1feee71099ae2e3b086ec04f57f9950e1be9ecf6c420696fea7977b84738", - "sha256:e814a4a5a1d95223b08cdb0f4f57029e8eab22ffdbae2f97107aeef28554517e", - "sha256:f8cabc5fd0091976ab7b020f5708335033e422de25e20ddf9416bdce2b7e07d8", - "sha256:fbc86ae8cc129c801e7baaafe3addf3c8d49c9c1597c44bdf2d78139707c3c62" + "sha256:00c8544510f3c98476bbd58201ac2b150ffbcce46a8c3e4fb89ebf01998f806a", + "sha256:016d7f5cf1c8c84f533a3c1f8f36126fbe00b2ec0ccca47cc5731c3723d327c6", + "sha256:03014a74023abaf5a591eeeaf1ac66a73d54eba178ff4cb1fa0c0a44aae70383", + "sha256:033ebec282793bd9eb988d0271c211e58442c31077976c19c442e24d827d356f", + "sha256:21e6686a95025927775ac501e74f5940cdf6fe052292f3a3f7349b0abae6d00f", + "sha256:26f8f92699756cb7af2b30720de0c5bb8d028e923a95b6d0c891088025a1ac8f", + "sha256:2e76bd16f0e31bc2b07e0fb1379551fcd40daf8cdf7e24f31a29e442878a827c", + "sha256:341e9c2008c481c5c72d0e0dbf64980a4b2238631a7f9780b0fe2e95755fb018", + "sha256:3cfd07c5889ddb96a401449109a8b97a165be9d67077df6802f59708bfb07720", + "sha256:4002f9e8c1f286e986fe96ec58742b93484195defc01d5cc7809b8f7acb5ece3", + "sha256:50ed480b798febce113709846b11f5d5ed1e529c88d8ae92f707806c50297abf", + "sha256:543e172ce4c0de533fa892034cce260467b213c0ea8e39da2f65f9a477425211", + "sha256:5a78cf2c43b13aa6b56003707c5203f28585944c277c1f3f109c7b041b16bd39", + "sha256:5cd698341626f3c77784858427bad0cdd54a713115b423d22ac83a28303d1d95", + "sha256:60c2147921da7f4d2d04f570e1838db32b95c5509d248f3fe6417e91437eaf41", + "sha256:62d382f7d77eeeaff14b30516b17bcbe80f645f5cf02bb755baac376591c653c", + "sha256:69432946f154c6add0e9ede03cc43b96e2ef2733110a77444823c053b1ff5166", + "sha256:727dafd7f67a6e1cad808dc884bd9c5a2f6ef1f8f6d2f22b37b96cb0080d4f49", + "sha256:742fb8b43835078dd7496c3c25a1ec8d15351df49fb0037bffb4754291ef30ce", + "sha256:750e13834b597eeb8ae6e72aa58d1d831b96beec5ad1d04479ae3772373a8088", + "sha256:7b546cf2b1974ddc2cb222a109b37c6ed1778b9be7e6b0c0bc0cf0438d9e45a6", + "sha256:83bd142cdec5e4a5c4ca1d4ff6fa807d28460f9db919f9f6a31babaaa8b88426", + "sha256:8d2e80dd3438e93b19e1223a9850fa65425e77f2607a364b6fd134fcd52dc9df", + "sha256:9229d074e097f21dfe0643d9d0140ee7433814b3f0fc3706b4abffd1e3038632", + "sha256:968ed5407f9460bd5a591cefd1388cc00a8f5099de9e76234655ae48cfdbe2c3", + "sha256:9c82f2cd69c71698152e943f4a5a6b83a3ab1db73b88f6e769fabc86074c3b08", + "sha256:a00441f5ea4504f5abbc047589d09e0dc33eb447dc45a1a527c8b74bfdd32c65", + "sha256:a022394996419142b33a0cf7274cb444c01d2bb123727c4bb0b9acabcb515dea", + "sha256:af5b9ee0fc146e907aa0f5fb858c3b3da9199d78b7bb2c9973d95550bd40f701", + "sha256:b5578efe4038be02d76c344007b13119b2b20acd009a88dde8adec2de4f630b5", + "sha256:b84ab65444dcc68d761e95d4d70f3cfd347ceca5a029f2ffec37d4f124f61311", + "sha256:c53ad261dfc8695062fc8811ac7c162bd6096a05a19f26097f411bdf5747aee7", + "sha256:cc173f1ce9ffb16b299f51c9ce53f66a62f4d975abe5640e976904066f3c835d", + "sha256:d548edacbf16a8276af13063a2b0669d58bbcfca7c55a255f84aac2870786a61", + "sha256:d55fae115ef9f67934e9f1103c9ba826b4c690e4c5bcf94482b8b2398311bf9c", + "sha256:d8099ea680201c2221f8468c372198ceba9338a5fec0e940111962b03b3f716a", + "sha256:e35217031e4b534b09f9b9a5841b9344a30a6357627761d4218818b865d45055", + "sha256:e4f52c272fdc82e7c65ff3f17a7179bc5f710ebc8ce8a5cadac81215e8326740", + "sha256:e637ae0b7b481905358624ef2e81d7fb0b1af55f5ff99f9ba05442a444b11e45", + "sha256:eef5292b60b6de753d6e7f2d128d5841c7915fb1e3321c3a1fe6acfe76c38052", + "sha256:fb45fe08e1abc64eb836d187b20a59172053999823f7f6ef4f18a819c44ba16f" ], "index": "pypi", - "version": "==6.3.3" + "version": "==6.4" }, "cryptography": { "hashes": [ @@ -429,11 +470,11 @@ }, "moto": { "hashes": [ - "sha256:8928ec168e5fd88b1127413b2fa570a80d45f25182cdad793edd208d07825269", - "sha256:ba683e70950b6579189bc12d74c1477aa036c090c6ad8b151a22f5896c005113" + "sha256:d69bc53850e3a5fb14ec92beb2b11e94ed967900d3961297a901b0b91b227232", + "sha256:f3b5f77780ed7a852670b4079931d8cd397983f631de7f9b09b81747a4bd56cd" ], "index": "pypi", - "version": "==3.1.9" + "version": "==3.1.10" }, "mypy": { "hashes": [ @@ -616,9 +657,17 @@ "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "index": "pypi", "version": "==2.27.1" }, + "requests-mock": { + "hashes": [ + "sha256:0a2d38a117c08bb78939ec163522976ad59a6b7fdd82b709e23bb98004a44970", + "sha256:8d72abe54546c1fc9696fa1516672f1031d72a55a1d66c85184f972a24ba0eba" + ], + "index": "pypi", + "version": "==1.9.3" + }, "responses": { "hashes": [ "sha256:18831bc2d72443b67664d98038374a6fa1f27eaaff4dd9a7d7613723416fea3c", @@ -672,7 +721,7 @@ "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708", "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376" ], - "markers": "python_version < '3.10'", + "markers": "python_version >= '3.7'", "version": "==4.2.0" }, "urllib3": { diff --git a/README.md b/README.md index 8111a57..5b55bf4 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,16 @@ make lint ``` ## Required ENV +`ACCESS_TOKEN` : The POD access token used to authenticate uploads. The access tokens can be found on the `Manage Organization` page. + `BUCKET` = The bucket containing the compressed MARCXML files to be submitted to POD. +`POD_URL` = The POD URL which includes the organization code: `https://pod.stanford.edu/organizations/{Organization Code}/uploads?stream=` + `SENTRY_DSN` = If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development. +`STREAM` = The POD stream to use when posting MARCXML records. + `WORKSPACE` = Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform. ### To run locally diff --git a/conftest.py b/conftest.py index 10e70a8..98fb5d4 100644 --- a/conftest.py +++ b/conftest.py @@ -2,6 +2,7 @@ import boto3 import pytest +import requests_mock from moto import mock_s3 @@ -18,6 +19,20 @@ def request_data_matching_file(): yield request_data +@pytest.fixture() +def mocked_pod(): + with requests_mock.Mocker() as m, open( + "fixtures/pod_response.html", "r" + ) as pod_response: + request_headers = {"Authorization": "Bearer 1234abcd"} + m.post( + "http://example.example/organizations/ORG/uploads?stream=default", + text=pod_response.read(), + request_headers=request_headers, + ) + yield m + + @pytest.fixture(scope="session") def mocked_s3(aws_credentials): with mock_s3(): @@ -50,5 +65,11 @@ def mocked_s3(aws_credentials): @pytest.fixture(autouse=True) def test_env(): - os.environ = {"WORKSPACE": "test", "BUCKET": "ppod"} + os.environ = { + "ACCESS_TOKEN": "1234abcd", + "BUCKET": "ppod", + "POD_URL": "http://example.example/organizations/ORG/uploads?stream=", + "STREAM": "default", + "WORKSPACE": "test", + } yield diff --git a/fixtures/pod_response.html b/fixtures/pod_response.html new file mode 100644 index 0000000..460ae74 --- /dev/null +++ b/fixtures/pod_response.html @@ -0,0 +1,271 @@ + + + + POD Aggregator + + + + + + + + + + + + + + + +
+ + +
+
+
+ +

+ MIT +

+
+ MARC code: MCM +
+ +
+ +
+
+ +
+
+
+ +
+
+ + +
+ + +
+ + + +
+

Upload: 2022-05-31T15:46:31Z

+ + + +
+ +

+ Part of stream: Testing 04-2022 +

+ +

Files

+ +
    +
  • + + Valid MARC (adds, updates, or deletes) +
  • +
  • + + Deletes (text file) +
  • +
  • + + Invalid MARC +
  • +
  • + + Neither MARC nor Deletes +
  • +
  • + + Unknown +
  • +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
StatusFileDate createdSizeContent typeRecordsActions
+ + success + + bytesio_test.xml + 5.35 KBapplication/marcxml+xml1 + + + + + Profile + +
+ + +
+ +
+ + + + + + + \ No newline at end of file diff --git a/ppod.py b/ppod.py index 9cfebf3..19ad84c 100644 --- a/ppod.py +++ b/ppod.py @@ -4,6 +4,7 @@ from io import BytesIO from typing import IO, Generator, Optional +import requests import sentry_sdk import smart_open from boto3 import client @@ -18,8 +19,12 @@ def lambda_handler(event: dict, context: object) -> dict: logger.info( "Sentry DSN found, exceptions will be sent to Sentry with env=%s", env ) - file_count = 0 + bucket = os.environ["BUCKET"] + url = os.environ["POD_URL"] + os.environ["STREAM"] + headers = {"Authorization": f'Bearer {os.environ["ACCESS_TOKEN"]}'} + + file_count = 0 s3_files = filter_files_in_bucket( bucket, event["filename-prefix"], @@ -30,8 +35,12 @@ def lambda_handler(event: dict, context: object) -> dict: xml_files = extract_files_from_tar(s3_file_content) for xml_file in xml_files: if xml_file: - add_namespaces_to_alma_marcxml(xml_file) - # post modified_xml to POD + modified_xml = add_namespaces_to_alma_marcxml(xml_file) + pod_file_name = os.path.basename(s3_file).replace("tar.gz", "xml") + response = post_files_to_pod( + url, headers, pod_file_name, modified_xml + ) + logger.info("%s: %s", pod_file_name, response) file_count += 1 else: raise ValueError(f"No files extracted from {s3_file}") @@ -84,3 +93,23 @@ def filter_files_in_bucket(bucket: str, prefix: str) -> Generator[str, None, Non yield s3_object["Key"] except KeyError: raise KeyError(f"No files retrieved from {bucket} with prefix {prefix}") + + +def post_files_to_pod( + url: str, headers: dict, pod_file_name: str, file_content: BytesIO +) -> requests.Response: + """Post file content to POD with the specified file name.""" + files = { + "upload[files][]": ( + pod_file_name, + file_content, + "application/xml", + ), + } + response = requests.post( + url, + headers=headers, + files=files, + ) + response.raise_for_status() + return response diff --git a/test_ppod.py b/test_ppod.py index fb29dc7..a9fa788 100644 --- a/test_ppod.py +++ b/test_ppod.py @@ -1,17 +1,19 @@ import logging import pytest +import requests from ppod import ( add_namespaces_to_alma_marcxml, extract_files_from_tar, filter_files_in_bucket, lambda_handler, + post_files_to_pod, ) def test_ppod_configures_sentry_if_dsn_present( - caplog, monkeypatch, mocked_s3, request_data_matching_file + caplog, monkeypatch, mocked_pod, mocked_s3, request_data_matching_file ): monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456") caplog.set_level(logging.INFO) @@ -23,7 +25,7 @@ def test_ppod_configures_sentry_if_dsn_present( def test_ppod_doesnt_configure_sentry_if_dsn_not_present( - caplog, monkeypatch, mocked_s3, request_data_matching_file + caplog, monkeypatch, mocked_pod, mocked_s3, request_data_matching_file ): monkeypatch.delenv("SENTRY_DSN", raising=False) caplog.set_level(logging.INFO) @@ -31,7 +33,7 @@ def test_ppod_doesnt_configure_sentry_if_dsn_not_present( assert "Sentry DSN found" not in caplog.text -def test_ppod_matching_files(mocked_s3, request_data_matching_file): +def test_ppod_matching_files(mocked_pod, mocked_s3, request_data_matching_file): output = lambda_handler(request_data_matching_file, {}) assert output == {"files_processed": 1} @@ -99,3 +101,29 @@ def test_filter_files_in_bucket_without_matching_file(mocked_s3): with pytest.raises(KeyError): files = filter_files_in_bucket("ppod", "download/") next(files) + + +def test_post_files_to_pod_success(mocked_pod): + with open("fixtures/pod_with_namespaces.xml", "rb") as pod_with_namespaces, open( + "fixtures/pod_response.html", "r" + ) as pod_response: + response = post_files_to_pod( + "http://example.example/organizations/ORG/uploads?stream=default", + {"Authorization": "Bearer 1234abcd"}, + "pod_file", + pod_with_namespaces, + ) + assert response.status_code == 200 + assert response.text == pod_response.read() + + +def test_post_files_to_pod_bad_url_raises_error(): + with pytest.raises(requests.exceptions.ConnectionError), open( + "fixtures/pod_with_namespaces.xml", "rb" + ) as pod_with_namespaces: + post_files_to_pod( + "http://example.example/organizations/ORG/uploads?stream=default", + {"Authorization": "Bearer 1234abcd"}, + "pod_file", + pod_with_namespaces, + ) From 397dc13bf3fcc4d4604e3acf5a7f1dae800c0280 Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 1 Jun 2022 10:26:37 -0400 Subject: [PATCH 2/4] Updates based on discussion in PR #4 * Rename fixtures for consistency and accuracy * Add marcxml and mocked_ssm fixtures * Update mocked_pod fixture * Remove pod response fixture due to lack of use response text in the app * Rename function to post_file_to_pod * Correct file type in post_file_to_pod * Update log message after posting a file * Shift types-requests to dev in Pipfile * Update tests with new fixtures --- Pipfile | 2 +- conftest.py | 44 ++- fixtures/{pod.tar.gz => marc.tar.gz} | Bin fixtures/{pod.xml => marc.xml} | 0 ...amespaces.xml => marc_with_namespaces.xml} | 0 fixtures/pod_response.html | 271 ------------------ ppod.py | 22 +- test_ppod.py | 63 ++-- 8 files changed, 76 insertions(+), 326 deletions(-) rename fixtures/{pod.tar.gz => marc.tar.gz} (100%) rename fixtures/{pod.xml => marc.xml} (100%) rename fixtures/{pod_with_namespaces.xml => marc_with_namespaces.xml} (100%) delete mode 100644 fixtures/pod_response.html diff --git a/Pipfile b/Pipfile index 9c49b2f..ff76f08 100644 --- a/Pipfile +++ b/Pipfile @@ -8,7 +8,6 @@ boto3 = "*" sentry-sdk = "*" smart-open = "*" requests = "*" -types-requests = "*" [dev-packages] bandit = "*" @@ -20,6 +19,7 @@ moto = "*" mypy = "*" pytest = "*" requests-mock = "*" +types-requests = "*" [requires] python_version = "3.9" diff --git a/conftest.py b/conftest.py index 98fb5d4..809684f 100644 --- a/conftest.py +++ b/conftest.py @@ -3,7 +3,7 @@ import boto3 import pytest import requests_mock -from moto import mock_s3 +from moto import mock_s3, mock_ssm @pytest.fixture(scope="session") @@ -14,29 +14,36 @@ def aws_credentials(): @pytest.fixture() -def request_data_matching_file(): - request_data = {"filename-prefix": "upload/"} - yield request_data +def marcxml(): + with open("fixtures/marc.xml", "rb") as marcxml: + yield marcxml + + +@pytest.fixture() +def marcxml_with_namespaces(): + with open("fixtures/marc_with_namespaces.xml", "rb") as marcxml_with_namespaces: + yield marcxml_with_namespaces @pytest.fixture() def mocked_pod(): - with requests_mock.Mocker() as m, open( - "fixtures/pod_response.html", "r" - ) as pod_response: + with requests_mock.Mocker() as m: request_headers = {"Authorization": "Bearer 1234abcd"} m.post( "http://example.example/organizations/ORG/uploads?stream=default", - text=pod_response.read(), request_headers=request_headers, ) + m.post( + "http://example.example/organizations/ORG/uploads?stream=not-a-stream", + status_code=404, + ) yield m @pytest.fixture(scope="session") def mocked_s3(aws_credentials): with mock_s3(): - with open("fixtures/pod.tar.gz", "rb") as pod_tar, open( + with open("fixtures/marc.tar.gz", "rb") as pod_tar, open( "fixtures/empty.tar.gz", "rb" ) as empty_tar: s3 = boto3.client("s3", region_name="us-east-1") @@ -44,7 +51,7 @@ def mocked_s3(aws_credentials): s3.put_object( Body=pod_tar, Bucket="ppod", - Key="upload/pod.tar.gz", + Key="upload/marc.tar.gz", ) s3.create_bucket(Bucket="empty_tar") s3.put_object( @@ -63,13 +70,28 @@ def mocked_s3(aws_credentials): yield s3 +@pytest.fixture() +def mocked_ssm(): + with mock_ssm(): + ssm = boto3.client("ssm", region_name="us-east-1") + ssm.put_parameter( + Name="/apps/ppod/stream-name", + Value="default", + ) + yield ssm + + +@pytest.fixture() +def request_data_matching_file(): + yield {"filename-prefix": "upload/"} + + @pytest.fixture(autouse=True) def test_env(): os.environ = { "ACCESS_TOKEN": "1234abcd", "BUCKET": "ppod", "POD_URL": "http://example.example/organizations/ORG/uploads?stream=", - "STREAM": "default", "WORKSPACE": "test", } yield diff --git a/fixtures/pod.tar.gz b/fixtures/marc.tar.gz similarity index 100% rename from fixtures/pod.tar.gz rename to fixtures/marc.tar.gz diff --git a/fixtures/pod.xml b/fixtures/marc.xml similarity index 100% rename from fixtures/pod.xml rename to fixtures/marc.xml diff --git a/fixtures/pod_with_namespaces.xml b/fixtures/marc_with_namespaces.xml similarity index 100% rename from fixtures/pod_with_namespaces.xml rename to fixtures/marc_with_namespaces.xml diff --git a/fixtures/pod_response.html b/fixtures/pod_response.html deleted file mode 100644 index 460ae74..0000000 --- a/fixtures/pod_response.html +++ /dev/null @@ -1,271 +0,0 @@ - - - - POD Aggregator - - - - - - - - - - - - - - - -
- - -
-
-
- -

- MIT -

-
- MARC code: MCM -
- -
- -
-
- -
-
-
- -
-
- - -
- - -
- - - -
-

Upload: 2022-05-31T15:46:31Z

- - - -
- -

- Part of stream: Testing 04-2022 -

- -

Files

- -
    -
  • - - Valid MARC (adds, updates, or deletes) -
  • -
  • - - Deletes (text file) -
  • -
  • - - Invalid MARC -
  • -
  • - - Neither MARC nor Deletes -
  • -
  • - - Unknown -
  • -
- - - - - - - - - - - - - - - - - - - - - - - - - -
StatusFileDate createdSizeContent typeRecordsActions
- - success - - bytesio_test.xml - 5.35 KBapplication/marcxml+xml1 - - - - - Profile - -
- - -
- -
- - - - - - - \ No newline at end of file diff --git a/ppod.py b/ppod.py index 19ad84c..aa9b81d 100644 --- a/ppod.py +++ b/ppod.py @@ -21,8 +21,12 @@ def lambda_handler(event: dict, context: object) -> dict: ) bucket = os.environ["BUCKET"] - url = os.environ["POD_URL"] + os.environ["STREAM"] - headers = {"Authorization": f'Bearer {os.environ["ACCESS_TOKEN"]}'} + ssm_client = client("ssm", region_name="us-east-1") + stream = ssm_client.get_parameter( + Name="/apps/ppod/stream-name", WithDecryption=True + )["Parameter"]["Value"] + pod_url = os.environ["POD_URL"] + stream + pod_headers = {"Authorization": f'Bearer {os.environ["ACCESS_TOKEN"]}'} file_count = 0 s3_files = filter_files_in_bucket( @@ -37,10 +41,14 @@ def lambda_handler(event: dict, context: object) -> dict: if xml_file: modified_xml = add_namespaces_to_alma_marcxml(xml_file) pod_file_name = os.path.basename(s3_file).replace("tar.gz", "xml") - response = post_files_to_pod( - url, headers, pod_file_name, modified_xml + response = post_file_to_pod( + pod_url, pod_headers, pod_file_name, modified_xml + ) + logger.info( + "Submited file %s and received response: %s", + pod_file_name, + response, ) - logger.info("%s: %s", pod_file_name, response) file_count += 1 else: raise ValueError(f"No files extracted from {s3_file}") @@ -95,7 +103,7 @@ def filter_files_in_bucket(bucket: str, prefix: str) -> Generator[str, None, Non raise KeyError(f"No files retrieved from {bucket} with prefix {prefix}") -def post_files_to_pod( +def post_file_to_pod( url: str, headers: dict, pod_file_name: str, file_content: BytesIO ) -> requests.Response: """Post file content to POD with the specified file name.""" @@ -103,7 +111,7 @@ def post_files_to_pod( "upload[files][]": ( pod_file_name, file_content, - "application/xml", + "application/marcxml+xml", ), } response = requests.post( diff --git a/test_ppod.py b/test_ppod.py index a9fa788..3f0f6cc 100644 --- a/test_ppod.py +++ b/test_ppod.py @@ -8,12 +8,12 @@ extract_files_from_tar, filter_files_in_bucket, lambda_handler, - post_files_to_pod, + post_file_to_pod, ) def test_ppod_configures_sentry_if_dsn_present( - caplog, monkeypatch, mocked_pod, mocked_s3, request_data_matching_file + caplog, monkeypatch, mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file ): monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456") caplog.set_level(logging.INFO) @@ -25,7 +25,7 @@ def test_ppod_configures_sentry_if_dsn_present( def test_ppod_doesnt_configure_sentry_if_dsn_not_present( - caplog, monkeypatch, mocked_pod, mocked_s3, request_data_matching_file + caplog, monkeypatch, mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file ): monkeypatch.delenv("SENTRY_DSN", raising=False) caplog.set_level(logging.INFO) @@ -33,13 +33,15 @@ def test_ppod_doesnt_configure_sentry_if_dsn_not_present( assert "Sentry DSN found" not in caplog.text -def test_ppod_matching_files(mocked_pod, mocked_s3, request_data_matching_file): +def test_ppod_matching_files( + mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file +): output = lambda_handler(request_data_matching_file, {}) assert output == {"files_processed": 1} def test_ppod_no_files_raises_exception( - monkeypatch, mocked_s3, request_data_matching_file + monkeypatch, mocked_s3, mocked_ssm, request_data_matching_file ): monkeypatch.setenv("BUCKET", "no_files") with pytest.raises(KeyError): @@ -60,12 +62,9 @@ def test_ppod_no_matching_files_raises_exception(mocked_s3): lambda_handler(request_data, {}) -def test_add_namespaces_to_alma_marcxml(): - with open("fixtures/pod.xml", "rb") as pod_xml, open( - "fixtures/pod_with_namespaces.xml", "rb" - ) as pod_xml_namespaces: - modified_xml = add_namespaces_to_alma_marcxml(pod_xml) - assert modified_xml.read() == pod_xml_namespaces.read() +def test_add_namespaces_to_alma_marcxml(marcxml, marcxml_with_namespaces): + modified_xml = add_namespaces_to_alma_marcxml(marcxml) + assert modified_xml.read() == marcxml_with_namespaces.read() def test_add_namespaces_to_alma_marcxml_invalid_xml_raises_exception(): @@ -73,12 +72,10 @@ def test_add_namespaces_to_alma_marcxml_invalid_xml_raises_exception(): add_namespaces_to_alma_marcxml(invalid_xml) -def test_extract_files_from_tar(): - with open("fixtures/pod.tar.gz", "rb") as pod_tar, open( - "fixtures/pod.xml", "rb" - ) as pod_xml: +def test_extract_files_from_tar(marcxml): + with open("fixtures/marc.tar.gz", "rb") as pod_tar: files = extract_files_from_tar(pod_tar) - assert next(files).read() == pod_xml.read() + assert next(files).read() == marcxml.read() def test_filter_files_in_bucket_with_1001_matching_file(mocked_s3): @@ -88,7 +85,7 @@ def test_filter_files_in_bucket_with_1001_matching_file(mocked_s3): def test_filter_files_in_bucket_with_matching_file(mocked_s3): files = filter_files_in_bucket("ppod", "upload/") - assert next(files) == "upload/pod.tar.gz" + assert next(files) == "upload/marc.tar.gz" def test_filter_files_in_bucket_with_no_file(mocked_s3): @@ -103,27 +100,21 @@ def test_filter_files_in_bucket_without_matching_file(mocked_s3): next(files) -def test_post_files_to_pod_success(mocked_pod): - with open("fixtures/pod_with_namespaces.xml", "rb") as pod_with_namespaces, open( - "fixtures/pod_response.html", "r" - ) as pod_response: - response = post_files_to_pod( - "http://example.example/organizations/ORG/uploads?stream=default", - {"Authorization": "Bearer 1234abcd"}, - "pod_file", - pod_with_namespaces, - ) - assert response.status_code == 200 - assert response.text == pod_response.read() +def test_post_files_to_pod_success(marcxml_with_namespaces, mocked_pod): + response = post_file_to_pod( + "http://example.example/organizations/ORG/uploads?stream=default", + {"Authorization": "Bearer 1234abcd"}, + "pod_file", + marcxml_with_namespaces, + ) + assert response.status_code == 200 -def test_post_files_to_pod_bad_url_raises_error(): - with pytest.raises(requests.exceptions.ConnectionError), open( - "fixtures/pod_with_namespaces.xml", "rb" - ) as pod_with_namespaces: - post_files_to_pod( - "http://example.example/organizations/ORG/uploads?stream=default", +def test_post_files_to_pod_bad_url_raises_error(marcxml_with_namespaces, mocked_pod): + with pytest.raises(requests.exceptions.HTTPError): + post_file_to_pod( + "http://example.example/organizations/ORG/uploads?stream=not-a-stream", {"Authorization": "Bearer 1234abcd"}, "pod_file", - pod_with_namespaces, + marcxml_with_namespaces, ) From 448ad30f21687164fb3e53f86c6d082f6228e8fd Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 1 Jun 2022 16:17:27 -0400 Subject: [PATCH 3/4] Updates based on further discussion in PR #4 * Change fixture parameters for greater efficiency * Renamed ENV for clarity * Update SSM call to remove unnecessary decryption * Update requirements.txt * Update README.md with dev1 testing instructions * Remove unnecessary fixture calls --- Pipfile.lock | 138 +++++++++++++++++++++++------------------------ README.md | 44 ++++++++------- conftest.py | 8 +-- ppod.py | 8 +-- requirements.txt | 11 ++-- test_ppod.py | 30 +++++------ 6 files changed, 122 insertions(+), 117 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 2f71b4f..4dedcd9 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "a4f834b2601ab590f4df2fd22c19324699e92ef5bfc92bad98a2d25e48c3de08" + "sha256": "40fbdef0564f55449824263b012a35381e2483054a0f7efa2ea289a27017185f" }, "pipfile-spec": 6, "requires": { @@ -18,19 +18,19 @@ "default": { "boto3": { "hashes": [ - "sha256:3fb956d097105a0fb98c29a622ff233fa8de68519aabd7088d7ffd36dfc33214", - "sha256:b59a210fa6a87f0c755b40403ffc66b9b285680bbc5ad5245cf167e2def33620" + "sha256:8df0215521969e229a6a004eedc6a484a3656611ddee698419d3658ae9c53c50", + "sha256:a42900a0ea75600a76b371b03ac645461e4f3c97bb13ae5136bb4d3c87c6c110" ], "index": "pypi", - "version": "==1.23.7" + "version": "==1.24.0" }, "botocore": { "hashes": [ - "sha256:0f4a467188644382856e96e85bff0b453442d5cf0c0f554154571a6e2468a005", - "sha256:9f8d5e8d65b24d97fcb7804b84831e5627fceb52707167d2f496477675c98ded" + "sha256:40823d9c3e2e707e74112aa0b1073e9eeb6c7f6a7d123518b5f768fc11b250f7", + "sha256:505ba80201dd577cb4c704fea5b16142c85473e4e2ef3eb55ebd991037b70142" ], - "markers": "python_version >= '3.6'", - "version": "==1.26.7" + "markers": "python_version >= '3.7'", + "version": "==1.27.0" }, "certifi": { "hashes": [ @@ -82,11 +82,11 @@ }, "s3transfer": { "hashes": [ - "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", - "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" + "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd", + "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947" ], - "markers": "python_version >= '3.6'", - "version": "==0.5.2" + "markers": "python_version >= '3.7'", + "version": "==0.6.0" }, "sentry-sdk": { "hashes": [ @@ -112,21 +112,6 @@ "index": "pypi", "version": "==6.0.0" }, - "types-requests": { - "hashes": [ - "sha256:6d8463ffe1f6edcf2e5361740a6140e7a16d427c267d83c7c1d3d1298f4e67c5", - "sha256:d618d9809fa32f514cf17cea8460814da671c56366fb1c908accca8bf183112b" - ], - "index": "pypi", - "version": "==2.27.27" - }, - "types-urllib3": { - "hashes": [ - "sha256:2a2578e4b36341ccd240b00fccda9826988ff0589a44ba4a664bbd69ef348d27", - "sha256:5d2388aa76395b1e3999ff789ea5b3283677dad8e9bcf3d9117ba19271fd35d9" - ], - "version": "==1.26.14" - }, "urllib3": { "hashes": [ "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14", @@ -184,19 +169,19 @@ }, "boto3": { "hashes": [ - "sha256:3fb956d097105a0fb98c29a622ff233fa8de68519aabd7088d7ffd36dfc33214", - "sha256:b59a210fa6a87f0c755b40403ffc66b9b285680bbc5ad5245cf167e2def33620" + "sha256:8df0215521969e229a6a004eedc6a484a3656611ddee698419d3658ae9c53c50", + "sha256:a42900a0ea75600a76b371b03ac645461e4f3c97bb13ae5136bb4d3c87c6c110" ], "index": "pypi", - "version": "==1.23.7" + "version": "==1.24.0" }, "botocore": { "hashes": [ - "sha256:0f4a467188644382856e96e85bff0b453442d5cf0c0f554154571a6e2468a005", - "sha256:9f8d5e8d65b24d97fcb7804b84831e5627fceb52707167d2f496477675c98ded" + "sha256:40823d9c3e2e707e74112aa0b1073e9eeb6c7f6a7d123518b5f768fc11b250f7", + "sha256:505ba80201dd577cb4c704fea5b16142c85473e4e2ef3eb55ebd991037b70142" ], - "markers": "python_version >= '3.6'", - "version": "==1.26.7" + "markers": "python_version >= '3.7'", + "version": "==1.27.0" }, "certifi": { "hashes": [ @@ -470,40 +455,40 @@ }, "moto": { "hashes": [ - "sha256:d69bc53850e3a5fb14ec92beb2b11e94ed967900d3961297a901b0b91b227232", - "sha256:f3b5f77780ed7a852670b4079931d8cd397983f631de7f9b09b81747a4bd56cd" + "sha256:1b0c472f4b7401775a92e63fbcfa26112a00e087b9f6edda12202a39c62c6181", + "sha256:67dbd45c6e09e4b3968a8b08f9a5763b15551383ecd3effdbc0c1744e38248dd" ], "index": "pypi", - "version": "==3.1.10" + "version": "==3.1.11" }, "mypy": { "hashes": [ - "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d", - "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8", - "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de", - "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038", - "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed", - "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334", - "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff", - "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2", - "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22", - "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2", - "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2", - "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605", - "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb", - "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519", - "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0", - "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc", - "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b", - "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f", - "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075", - "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef", - "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb", - "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a", - "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b" + "sha256:0ebfb3f414204b98c06791af37a3a96772203da60636e2897408517fcfeee7a8", + "sha256:239d6b2242d6c7f5822163ee082ef7a28ee02e7ac86c35593ef923796826a385", + "sha256:29dc94d9215c3eb80ac3c2ad29d0c22628accfb060348fd23d73abe3ace6c10d", + "sha256:2c7f8bb9619290836a4e167e2ef1f2cf14d70e0bc36c04441e41487456561409", + "sha256:33d53a232bb79057f33332dbbb6393e68acbcb776d2f571ba4b1d50a2c8ba873", + "sha256:3a3e525cd76c2c4f90f1449fd034ba21fcca68050ff7c8397bb7dd25dd8b8248", + "sha256:3eabcbd2525f295da322dff8175258f3fc4c3eb53f6d1929644ef4d99b92e72d", + "sha256:481f98c6b24383188c928f33dd2f0776690807e12e9989dd0419edd5c74aa53b", + "sha256:7a76dc4f91e92db119b1be293892df8379b08fd31795bb44e0ff84256d34c251", + "sha256:7d390248ec07fa344b9f365e6ed9d205bd0205e485c555bed37c4235c868e9d5", + "sha256:826a2917c275e2ee05b7c7b736c1e6549a35b7ea5a198ca457f8c2ebea2cbecf", + "sha256:85cf2b14d32b61db24ade8ac9ae7691bdfc572a403e3cb8537da936e74713275", + "sha256:8d645e9e7f7a5da3ec3bbcc314ebb9bb22c7ce39e70367830eb3c08d0140b9ce", + "sha256:925aa84369a07846b7f3b8556ccade1f371aa554f2bd4fb31cb97a24b73b036e", + "sha256:a85a20b43fa69efc0b955eba1db435e2ffecb1ca695fe359768e0503b91ea89f", + "sha256:bfd4f6536bd384c27c392a8b8f790fd0ed5c0cf2f63fc2fed7bce56751d53026", + "sha256:cb7752b24528c118a7403ee955b6a578bfcf5879d5ee91790667c8ea511d2085", + "sha256:cc537885891382e08129d9862553b3d00d4be3eb15b8cae9e2466452f52b0117", + "sha256:d4fccf04c1acf750babd74252e0f2db6bd2ac3aa8fe960797d9f3ef41cf2bfd4", + "sha256:f1ba54d440d4feee49d8768ea952137316d454b15301c44403db3f2cb51af024", + "sha256:f47322796c412271f5aea48381a528a613f33e0a115452d03ae35d673e6064f8", + "sha256:fbfb873cf2b8d8c3c513367febde932e061a5f73f762896826ba06391d932b2a", + "sha256:ffdad80a92c100d1b0fe3d3cf1a4724136029a29afe8566404c0146747114382" ], "index": "pypi", - "version": "==0.950" + "version": "==0.960" }, "mypy-extensions": { "hashes": [ @@ -670,19 +655,19 @@ }, "responses": { "hashes": [ - "sha256:18831bc2d72443b67664d98038374a6fa1f27eaaff4dd9a7d7613723416fea3c", - "sha256:644905bc4fb8a18fa37e3882b2ac05e610fe8c2f967d327eed669e314d94a541" + "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487", + "sha256:b82502eb5f09a0289d8e209e7bad71ef3978334f56d09b444253d5ad67bf5253" ], "markers": "python_version >= '3.7'", - "version": "==0.20.0" + "version": "==0.21.0" }, "s3transfer": { "hashes": [ - "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971", - "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed" + "sha256:06176b74f3a15f61f1b4f25a1fc29a4429040b7647133a463da8fa5bd28d5ecd", + "sha256:2ed07d3866f523cc561bf4a00fc5535827981b117dd7876f036b0c1aca42c947" ], - "markers": "python_version >= '3.6'", - "version": "==0.5.2" + "markers": "python_version >= '3.7'", + "version": "==0.6.0" }, "six": { "hashes": [ @@ -713,9 +698,24 @@ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" ], - "markers": "python_version < '3.11'", + "markers": "python_version >= '3.7'", "version": "==2.0.1" }, + "types-requests": { + "hashes": [ + "sha256:014f4f82db7b96c41feea9adaea30e68cd64c230eeab34b70c29bebb26ec74ac", + "sha256:fb453b3a76a48eca66381cea8004feaaea12835e838196f5c7ac87c75c5c19ef" + ], + "index": "pypi", + "version": "==2.27.29" + }, + "types-urllib3": { + "hashes": [ + "sha256:6011befa13f901fc934f59bb1fd6973be6f3acf4ebfce427593a27e7f492918f", + "sha256:c89283541ef92e344b7f59f83ea9b5a295b16366ceee3f25ecfc5593c79f794e" + ], + "version": "==1.26.15" + }, "typing-extensions": { "hashes": [ "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708", diff --git a/README.md b/README.md index 5b55bf4..67cbf9e 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ make lint ``` ## Required ENV -`ACCESS_TOKEN` : The POD access token used to authenticate uploads. The access tokens can be found on the `Manage Organization` page. +`POD_ACCESS_TOKEN` : The POD access token used to authenticate uploads. The access tokens can be found on the `Manage Organization` page. `BUCKET` = The bucket containing the compressed MARCXML files to be submitted to POD. @@ -25,24 +25,32 @@ make lint `SENTRY_DSN` = If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development. -`STREAM` = The POD stream to use when posting MARCXML records. - `WORKSPACE` = Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform. -### To run locally -NOTE: These instructions for running locally don't currently work and functionality has to be verified in our dev AWS account. -- Build the container: - ```bash - docker build -t ppod . - ``` -- Run the container: - ```bash - docker run -p 9000:8080 -e WORKSPACE=dev ppod:latest - ``` -- Post data to the container: + +### Verify local changes in Dev1 +- Ensure your AWS CLI is configured with credentials for the Dev1 account. +- Publish the lambda function: ```bash - curl -XPOST "http://localhost:9000/2015-03-31/functions/function/invocations" -d "{}" + make publish-dev + make update-lambda-dev ``` -- Observe output: - ``` - lambda \ No newline at end of file + +#### Submit files to POD test stream +Use the `Test` tab on the lambda to `Event JSON` that will match files in the dev1 S3 bucket: + +```bash +{ + "filename-prefix": "exlibris/pod/POD_ALMA_EXPORT_20220523" +} +``` + + +Observe that the output reflects the correct number of files: + +```bash +{ + "files_processed": 2 +} +``` + diff --git a/conftest.py b/conftest.py index 809684f..338f0e2 100644 --- a/conftest.py +++ b/conftest.py @@ -25,7 +25,7 @@ def marcxml_with_namespaces(): yield marcxml_with_namespaces -@pytest.fixture() +@pytest.fixture(autouse=True, scope="session") def mocked_pod(): with requests_mock.Mocker() as m: request_headers = {"Authorization": "Bearer 1234abcd"} @@ -40,7 +40,7 @@ def mocked_pod(): yield m -@pytest.fixture(scope="session") +@pytest.fixture(autouse=True, scope="session") def mocked_s3(aws_credentials): with mock_s3(): with open("fixtures/marc.tar.gz", "rb") as pod_tar, open( @@ -70,7 +70,7 @@ def mocked_s3(aws_credentials): yield s3 -@pytest.fixture() +@pytest.fixture(autouse=True, scope="session") def mocked_ssm(): with mock_ssm(): ssm = boto3.client("ssm", region_name="us-east-1") @@ -89,7 +89,7 @@ def request_data_matching_file(): @pytest.fixture(autouse=True) def test_env(): os.environ = { - "ACCESS_TOKEN": "1234abcd", + "POD_ACCESS_TOKEN": "1234abcd", "BUCKET": "ppod", "POD_URL": "http://example.example/organizations/ORG/uploads?stream=", "WORKSPACE": "test", diff --git a/ppod.py b/ppod.py index aa9b81d..3b4c93a 100644 --- a/ppod.py +++ b/ppod.py @@ -22,11 +22,11 @@ def lambda_handler(event: dict, context: object) -> dict: bucket = os.environ["BUCKET"] ssm_client = client("ssm", region_name="us-east-1") - stream = ssm_client.get_parameter( - Name="/apps/ppod/stream-name", WithDecryption=True - )["Parameter"]["Value"] + stream = ssm_client.get_parameter(Name="/apps/ppod/stream-name")["Parameter"][ + "Value" + ] pod_url = os.environ["POD_URL"] + stream - pod_headers = {"Authorization": f'Bearer {os.environ["ACCESS_TOKEN"]}'} + pod_headers = {"Authorization": f'Bearer {os.environ["POD_ACCESS_TOKEN"]}'} file_count = 0 s3_files = filter_files_in_bucket( diff --git a/requirements.txt b/requirements.txt index 3fc8f3d..dd170c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,12 +6,15 @@ # -i https://pypi.org/simple -boto3==1.23.2 -botocore==1.26.2; python_version >= '3.6' -certifi==2021.10.8 +boto3==1.24.0 +botocore==1.27.0; python_version >= '3.7' +certifi==2022.5.18.1; python_version >= '3.6' +charset-normalizer==2.0.12; python_version >= '3' +idna==3.3; python_version >= '3' jmespath==1.0.0; python_version >= '3.7' python-dateutil==2.8.2; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -s3transfer==0.5.2; python_version >= '3.6' +requests==2.27.1 +s3transfer==0.6.0; python_version >= '3.7' sentry-sdk==1.5.12 six==1.16.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' smart-open==6.0.0 diff --git a/test_ppod.py b/test_ppod.py index 3f0f6cc..3f473d7 100644 --- a/test_ppod.py +++ b/test_ppod.py @@ -13,7 +13,7 @@ def test_ppod_configures_sentry_if_dsn_present( - caplog, monkeypatch, mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file + caplog, monkeypatch, request_data_matching_file ): monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456") caplog.set_level(logging.INFO) @@ -25,7 +25,7 @@ def test_ppod_configures_sentry_if_dsn_present( def test_ppod_doesnt_configure_sentry_if_dsn_not_present( - caplog, monkeypatch, mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file + caplog, monkeypatch, request_data_matching_file ): monkeypatch.delenv("SENTRY_DSN", raising=False) caplog.set_level(logging.INFO) @@ -33,30 +33,24 @@ def test_ppod_doesnt_configure_sentry_if_dsn_not_present( assert "Sentry DSN found" not in caplog.text -def test_ppod_matching_files( - mocked_pod, mocked_s3, mocked_ssm, request_data_matching_file -): +def test_ppod_matching_files(request_data_matching_file): output = lambda_handler(request_data_matching_file, {}) assert output == {"files_processed": 1} -def test_ppod_no_files_raises_exception( - monkeypatch, mocked_s3, mocked_ssm, request_data_matching_file -): +def test_ppod_no_files_raises_exception(monkeypatch, request_data_matching_file): monkeypatch.setenv("BUCKET", "no_files") with pytest.raises(KeyError): lambda_handler(request_data_matching_file, {}) -def test_ppod_empty_tar_raises_exception( - monkeypatch, mocked_s3, request_data_matching_file -): +def test_ppod_empty_tar_raises_exception(monkeypatch, request_data_matching_file): monkeypatch.setenv("BUCKET", "empty_tar") with pytest.raises(ValueError): lambda_handler(request_data_matching_file, {}) -def test_ppod_no_matching_files_raises_exception(mocked_s3): +def test_ppod_no_matching_files_raises_exception(): request_data = {"filename-prefix": "download/"} with pytest.raises(KeyError): lambda_handler(request_data, {}) @@ -78,29 +72,29 @@ def test_extract_files_from_tar(marcxml): assert next(files).read() == marcxml.read() -def test_filter_files_in_bucket_with_1001_matching_file(mocked_s3): +def test_filter_files_in_bucket_with_1001_matching_file(): files = filter_files_in_bucket("a_lot_of_files", "upload/") assert len(list(files)) == 1001 -def test_filter_files_in_bucket_with_matching_file(mocked_s3): +def test_filter_files_in_bucket_with_matching_file(): files = filter_files_in_bucket("ppod", "upload/") assert next(files) == "upload/marc.tar.gz" -def test_filter_files_in_bucket_with_no_file(mocked_s3): +def test_filter_files_in_bucket_with_no_file(): with pytest.raises(KeyError): files = filter_files_in_bucket("no_files", "upload/") next(files) -def test_filter_files_in_bucket_without_matching_file(mocked_s3): +def test_filter_files_in_bucket_without_matching_file(): with pytest.raises(KeyError): files = filter_files_in_bucket("ppod", "download/") next(files) -def test_post_files_to_pod_success(marcxml_with_namespaces, mocked_pod): +def test_post_files_to_pod_success(marcxml_with_namespaces): response = post_file_to_pod( "http://example.example/organizations/ORG/uploads?stream=default", {"Authorization": "Bearer 1234abcd"}, @@ -110,7 +104,7 @@ def test_post_files_to_pod_success(marcxml_with_namespaces, mocked_pod): assert response.status_code == 200 -def test_post_files_to_pod_bad_url_raises_error(marcxml_with_namespaces, mocked_pod): +def test_post_files_to_pod_bad_url_raises_error(marcxml_with_namespaces): with pytest.raises(requests.exceptions.HTTPError): post_file_to_pod( "http://example.example/organizations/ORG/uploads?stream=not-a-stream", From a86438630001252c1c69869d405de41d99eb52fa Mon Sep 17 00:00:00 2001 From: Eric Hanson Date: Wed, 1 Jun 2022 16:42:30 -0400 Subject: [PATCH 4/4] Update README.md * Add export bucket note --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 67cbf9e..29d48e7 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,8 @@ Use the `Test` tab on the lambda to `Event JSON` that will match files in the de } ``` +Note: If it's been a while since the last POD export from Alma sandbox, there may be no files in the Dev1 S3 export bucket and you may need to run the publishing job from the sandbox. + Observe that the output reflects the correct number of files: