diff --git a/.gitignore b/.gitignore index 066882e..16afb09 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .conda/ __pycache__/ dist/ -.env \ No newline at end of file +.env +.vscode/ \ No newline at end of file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..de0ae6e --- /dev/null +++ b/poetry.lock @@ -0,0 +1,197 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "soupsieve" +version = "2.5" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"}, + {file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.8" +content-hash = "a642df7d4f6efa28b03c1c289a83a8c7513aa1e0cb58385a0cf22f5a17bb1037" diff --git a/pyproject.toml b/pyproject.toml index 96737a3..f52bffa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,17 @@ -[build-system] -requires = ["hatchling", - "requests"] -build-backend = "hatchling.build" - -[project] +[tool.poetry] name = "substack-api" -version = "0.0.2" -authors = [ - { name="Nick Hagar", email="nicholasrhagar@gmail.com" }, -] -description = "The unofficial Substack API wrapper for Python." +version = "0.1.0" +description = "unofficial python wrapper for collecting substack data" +authors = ["NHagar "] +license = "MIT" readme = "README.md" -license = { file="LICENSE" } -requires-python = ">=3.8" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", -] -[project.urls] -"Homepage" = "https://github.com/NHagar/substack_api" \ No newline at end of file +[tool.poetry.dependencies] +python = "^3.8" +requests = "^2.31.0" +beautifulsoup4 = "^4.12.3" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/src/substack_api/__init__.py b/substack_api/__init__.py similarity index 100% rename from src/substack_api/__init__.py rename to substack_api/__init__.py diff --git a/src/substack_api/substack_api.py b/substack_api/newsletter.py similarity index 74% rename from src/substack_api/substack_api.py rename to substack_api/newsletter.py index 35b188b..5f121e7 100644 --- a/src/substack_api/substack_api.py +++ b/substack_api/newsletter.py @@ -2,19 +2,22 @@ from time import sleep from typing import Dict, List, Tuple, Union +from bs4 import BeautifulSoup import requests + HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" } + def list_all_categories() -> List[Tuple[str, int]]: """ Get name / id representations of all newsletter categories """ endpoint_cat = "https://substack.com/api/v1/categories" r = requests.get(endpoint_cat, headers=HEADERS) - categories = [(i['name'], i['id']) for i in r.json()] + categories = [(i["name"], i["id"]) for i in r.json()] return categories @@ -50,7 +53,12 @@ def category_name_to_id(name: str) -> int: raise ValueError(f"{name} is not in Substack's list of categories") -def get_newsletters_in_category(category_id: int, subdomains_only: bool = False, start_page: int = None, end_page: int = None) -> List: +def get_newsletters_in_category( + category_id: int, + subdomains_only: bool = False, + start_page: int = None, + end_page: int = None, +) -> List: """ Collects newsletter objects listed under specified category @@ -84,7 +92,12 @@ def get_newsletters_in_category(category_id: int, subdomains_only: bool = False, return all_pubs -def get_newsletter_post_metadata(newsletter_subdomain: str, slugs_only: bool = False, start_offset: int = None, end_offset: int = None) -> List: +def get_newsletter_post_metadata( + newsletter_subdomain: str, + slugs_only: bool = False, + start_offset: int = None, + end_offset: int = None, +) -> List: """ Get available post metadata for newsletter @@ -104,6 +117,9 @@ def get_newsletter_post_metadata(newsletter_subdomain: str, slugs_only: bool = F full_url = f"https://{newsletter_subdomain}.substack.com/api/v1/archive?sort=new&search=&offset={offset_start}&limit=10" posts = requests.get(full_url, headers=HEADERS).json() + if len(posts) == 0: + break + last_id = posts[-1]["id"] if last_id == last_id_ref: break @@ -121,7 +137,9 @@ def get_newsletter_post_metadata(newsletter_subdomain: str, slugs_only: bool = F return all_posts -def get_post_contents(newsletter_subdomain: str, slug: str, html_only: bool = False) -> Union[Dict, str]: +def get_post_contents( + newsletter_subdomain: str, slug: str, html_only: bool = False +) -> Union[Dict, str]: """ Gets individual post metadata and contents @@ -137,3 +155,24 @@ def get_post_contents(newsletter_subdomain: str, slug: str, html_only: bool = Fa return post_info["body_html"] else: return post_info + + +def get_newsletter_recommendations(newsletter_subdomain: str) -> List[Dict[str, str]]: + """ + Gets recommended newsletters for a given newsletter + + Parameters + ---------- + newsletter_subdomain : Substack subdomain of newsletter (can be retrieved from `get_newsletters_in_category`) + """ + endpoint = f"https://{newsletter_subdomain}.substack.com/recommendations" + r = requests.get(endpoint, headers=HEADERS) + recs = r.text + soup = BeautifulSoup(recs, "html.parser") + div_elements = soup.find_all("div", class_="publication-content") + a_elements = [div.find("a") for div in div_elements] + titles = [i.text for i in soup.find_all("div", {"class": "publication-title"})] + links = [i["href"].split("?")[0] for i in a_elements] + results = [{"title": t, "url": u} for t, u in zip(titles, links)] + + return results diff --git a/substack_api/user.py b/substack_api/user.py new file mode 100644 index 0000000..bb71cd2 --- /dev/null +++ b/substack_api/user.py @@ -0,0 +1,77 @@ +from typing import Dict, List + +import requests + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" +} + + +def get_user_id(username: str) -> int: + """ + Get the user ID of a Substack user. + + Parameters + ---------- + username : str + The username of the Substack user. + """ + endpoint = f"https://substack.com/api/v1/user/{username}/public_profile" + r = requests.get(endpoint, headers=HEADERS) + user_id = r.json()["id"] + return user_id + + +def get_user_reads(username: str) -> List[Dict[str, str]]: + """ + Get newsletters from the "Reads" section of a user's profile. + + Parameters + ---------- + username : str + The username of the Substack user. + """ + endpoint = f"https://substack.com/api/v1/user/{username}/public_profile" + r = requests.get(endpoint, headers=HEADERS) + user_data = r.json() + reads = [ + { + "publication_id": i["publication"]["id"], + "publication_name": i["publication"]["name"], + "subscription_status": i["membership_state"], + } + for i in user_data["subscriptions"] + ] + return reads + + +def get_user_likes(user_id: int): + """ + Get liked posts from a user's profile. + + Parameters + ---------- + user_id : int + The user ID of the Substack user. + """ + endpoint = ( + f"https://substack.com/api/v1/reader/feed/profile/{user_id}?types%5B%5D=like" + ) + r = requests.get(endpoint, headers=HEADERS) + likes = r.json()["items"] + return likes + + +def get_user_notes(user_id: int): + """ + Get notes and comments posted by a user. + + Parameters + ---------- + user_id : int + The user ID of the Substack user. + """ + endpoint = f"https://substack.com/api/v1/reader/feed/profile/{user_id}" + r = requests.get(endpoint, headers=HEADERS) + notes = r.json()["items"] + return notes diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_newsletter.py b/tests/test_newsletter.py new file mode 100644 index 0000000..41b721c --- /dev/null +++ b/tests/test_newsletter.py @@ -0,0 +1,147 @@ +import unittest +from unittest.mock import patch, Mock, MagicMock +from bs4 import BeautifulSoup +from substack_api.newsletter import ( + get_newsletter_post_metadata, + get_newsletter_recommendations, + get_post_contents, + HEADERS, +) + + +class TestGetNewsletterPostMetadata(unittest.TestCase): + @patch("requests.get") + def test_get_newsletter_post_metadata_slugs_only(self, mock_get): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.json.return_value = [ + {"id": 1, "slug": "post-1"}, + {"id": 2, "slug": "post-2"}, + ] + + result = get_newsletter_post_metadata("test_subdomain", slugs_only=True) + self.assertEqual(result, ["post-1", "post-2"]) + + @patch("requests.get") + def test_get_newsletter_post_metadata_all_metadata(self, mock_get): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.json.return_value = [ + {"id": 1, "slug": "post-1", "title": "Post 1"}, + {"id": 2, "slug": "post-2", "title": "Post 2"}, + ] + + result = get_newsletter_post_metadata("test_subdomain", slugs_only=False) + self.assertEqual( + result, + [ + {"id": 1, "slug": "post-1", "title": "Post 1"}, + {"id": 2, "slug": "post-2", "title": "Post 2"}, + ], + ) + + @patch("requests.get") + def test_get_newsletter_post_metadata_pagination(self, mock_get): + mock_get.side_effect = [ + Mock( + ok=True, + json=Mock( + return_value=[ + {"id": 1, "slug": "post-1"}, + {"id": 2, "slug": "post-2"}, + ] + ), + ), + Mock( + ok=True, + json=Mock( + return_value=[ + {"id": 3, "slug": "post-3"}, + {"id": 4, "slug": "post-4"}, + ] + ), + ), + ] + + result = get_newsletter_post_metadata( + "test_subdomain", slugs_only=True, start_offset=0, end_offset=20 + ) + self.assertEqual(result, ["post-1", "post-2", "post-3", "post-4"]) + + @patch("requests.get") + def test_get_newsletter_post_metadata_no_posts(self, mock_get): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.json.return_value = [] + + result = get_newsletter_post_metadata("test_subdomain") + self.assertEqual(result, []) + + +class TestGetNewsletterRecommendations(unittest.TestCase): + @patch("requests.get") + @patch.object(BeautifulSoup, "find_all") + @patch.object(BeautifulSoup, "__init__", return_value=None) + def test_get_newsletter_recommendations( + self, mock_bs_init, mock_find_all, mock_get + ): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.text = "mocked_html" + + mock_div = MagicMock() + mock_div.find.return_value = {"href": "https://mocked_url.com?param=value"} + + mock_find_all.side_effect = [ + [mock_div, mock_div], # div_elements + [Mock(text="title1"), Mock(text="title2")], # titles + ] + + result = get_newsletter_recommendations("test_subdomain") + + self.assertEqual( + result, + [ + {"title": "title1", "url": "https://mocked_url.com"}, + {"title": "title2", "url": "https://mocked_url.com"}, + ], + ) + + mock_get.assert_called_once_with( + "https://test_subdomain.substack.com/recommendations", headers=HEADERS + ) + mock_bs_init.assert_called_once_with("mocked_html", "html.parser") + self.assertEqual(mock_find_all.call_count, 2) + + +class TestGetPostContents(unittest.TestCase): + @patch("requests.get") + def test_get_post_contents_html_only(self, mock_get): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.json.return_value = { + "body_html": "Test post" + } + + result = get_post_contents("test_subdomain", "test_slug", html_only=True) + self.assertEqual(result, "Test post") + + @patch("requests.get") + def test_get_post_contents_all_metadata(self, mock_get): + mock_get.return_value = Mock(ok=True) + mock_get.return_value.json.return_value = { + "body_html": "Test post", + "title": "Test post", + "author": "Test author", + "date": "2022-01-01", + } + + result = get_post_contents("test_subdomain", "test_slug", html_only=False) + self.assertEqual( + result, + { + "body_html": "Test post", + "title": "Test post", + "author": "Test author", + "date": "2022-01-01", + }, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_user.py b/tests/test_user.py new file mode 100644 index 0000000..b2d97d6 --- /dev/null +++ b/tests/test_user.py @@ -0,0 +1,52 @@ +import unittest +from unittest.mock import patch +from substack_api.user import ( + get_user_id, + get_user_reads, + get_user_likes, + get_user_notes, +) + + +class TestUser(unittest.TestCase): + @patch("requests.get") + def test_get_user_id(self, mock_get): + mock_get.return_value.json.return_value = {"id": 123} + result = get_user_id("testuser") + self.assertEqual(result, 123) + + @patch("requests.get") + def test_get_user_reads(self, mock_get): + mock_get.return_value.json.return_value = { + "subscriptions": [ + { + "publication": {"id": "123", "name": "Test Publication"}, + "membership_state": "subscribed", + } + ] + } + expected_result = [ + { + "publication_id": "123", + "publication_name": "Test Publication", + "subscription_status": "subscribed", + } + ] + result = get_user_reads("testuser") + self.assertEqual(result, expected_result) + + @patch("requests.get") + def test_get_user_likes(self, mock_get): + mock_get.return_value.json.return_value = {"items": ["post1", "post2"]} + result = get_user_likes(123) + self.assertEqual(result, ["post1", "post2"]) + + @patch("requests.get") + def test_get_user_notes(self, mock_get): + mock_get.return_value.json.return_value = {"items": ["note1", "note2"]} + result = get_user_notes(123) + self.assertEqual(result, ["note1", "note2"]) + + +if __name__ == "__main__": + unittest.main()