From 7db08d448b4eb1b1394dcf95bd5dd9d80242c61f Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 10:55:17 -0500 Subject: [PATCH 1/7] auth module --- pyproject.toml | 12 ++- substack_api/auth.py | 215 +++++++++++++++++++++++++++++++++++++++++++ uv.lock | 164 +++++++++++++++++++++++++++++++-- 3 files changed, 380 insertions(+), 11 deletions(-) create mode 100644 substack_api/auth.py diff --git a/pyproject.toml b/pyproject.toml index 95ae81f..f43276f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,9 +4,7 @@ version = "1.0.2" description = "Unofficial wrapper for the Substack API" readme = "README.md" requires-python = ">=3.12" -dependencies = [ - "requests>=2.32.3", -] +dependencies = ["requests>=2.32.3"] [dependency-groups] dev = [ @@ -18,3 +16,11 @@ dev = [ "pytest>=8.3.4", "ruff>=0.9.9", ] + +[project.urls] +"Homepage" = "https://github.com/nhagar/substack_api" +"Bug Tracker" = "https://github.com/nhagar/substack_api/issues" +"Documentation" = "https://nhagar.github.io/substack_api/" + +[project.optional-dependencies] +auth = ["selenium>=4.33.0"] diff --git a/substack_api/auth.py b/substack_api/auth.py new file mode 100644 index 0000000..424fdc8 --- /dev/null +++ b/substack_api/auth.py @@ -0,0 +1,215 @@ +import json +import os +from typing import Optional + +import requests +from selenium import webdriver +from selenium.common.exceptions import TimeoutException +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait + + +class SubstackAuth: + """Handles authentication for Substack API requests.""" + + def __init__( + self, + email: Optional[str] = None, + password: Optional[str] = None, + cookies_path: Optional[str] = None, + ): + """ + Initialize authentication handler. + + Parameters + ---------- + email : str, optional + Substack account email + password : str, optional + Substack account password + cookies_path : str, optional + Path to save/load session cookies + """ + self.email = email + self.password = password + self.cookies_path = cookies_path or os.path.expanduser( + "~/.substack_cookies.json" + ) + self.session = requests.Session() + self.authenticated = False + + # Set default headers + self.session.headers.update( + { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36", + "Accept": "application/json", + "Content-Type": "application/json", + } + ) + + # Try to load existing cookies + if os.path.exists(self.cookies_path): + self.load_cookies() + elif email and password: + self.login() + + def login(self) -> bool: + """ + Login to Substack using Selenium WebDriver. + + Returns + ------- + bool + True if login successful, False otherwise + """ + if not self.email or not self.password: + raise ValueError("Email and password required for login") + + print(f"Logging in as {self.email}...") + + # Setup Chrome options for headless mode + options = webdriver.ChromeOptions() + options.add_argument("--headless") + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + + driver = None + try: + driver = webdriver.Chrome(options=options) + driver.get("https://substack.com/sign-in") + + # Wait for login form + wait = WebDriverWait(driver, 10) + + # Enter email + email_input = wait.until(EC.presence_of_element_located((By.NAME, "email"))) + email_input.send_keys(self.email) + + # Click Sign in with password + sign_in_button = driver.find_element( + By.XPATH, "//a[contains(text(), 'Sign in with password')]" + ) + sign_in_button.click() + + # Wait for password field + password_input = wait.until( + EC.presence_of_element_located((By.NAME, "password")) + ) + password_input.send_keys(self.password) + + # Submit login + login_button = driver.find_element( + By.XPATH, "//button[contains(text(), 'Continue')]" + ) + login_button.click() + + # Wait for redirect after successful login + wait.until(lambda d: d.current_url != "https://substack.com/sign-in") + + # Extract cookies + cookies = driver.get_cookies() + + # Convert to requests session cookies + for cookie in cookies: + self.session.cookies.set( + cookie["name"], + cookie["value"], + domain=cookie.get("domain"), + path=cookie.get("path", "/"), + ) + + # Save cookies + self.save_cookies() + self.authenticated = True + print("Login successful!") + return True + + except TimeoutException: + print("Login failed: Timeout waiting for elements") + return False + except Exception as e: + print(f"Login failed: {str(e)}") + return False + finally: + if driver: + driver.quit() + + def save_cookies(self) -> None: + """Save session cookies to file.""" + cookies = {} + for cookie in self.session.cookies: + cookies[cookie.name] = { + "value": cookie.value, + "domain": cookie.domain, + "path": cookie.path, + "secure": cookie.secure, + "expires": cookie.expires, + } + + with open(self.cookies_path, "w") as f: + json.dump(cookies, f) + + def load_cookies(self) -> bool: + """ + Load cookies from file. + + Returns + ------- + bool + True if cookies loaded successfully + """ + try: + with open(self.cookies_path, "r") as f: + cookies = json.load(f) + + for name, cookie in cookies.items(): + self.session.cookies.set( + name, + cookie["value"], + domain=cookie.get("domain"), + path=cookie.get("path", "/"), + secure=cookie.get("secure", False), + ) + + return True + + except Exception as e: + print(f"Failed to load cookies: {str(e)}") + return False + + def get(self, url: str, **kwargs) -> requests.Response: + """ + Make authenticated GET request. + + Parameters + ---------- + url : str + URL to request + **kwargs + Additional arguments to pass to requests.get + + Returns + ------- + requests.Response + Response object + """ + return self.session.get(url, **kwargs) + + def post(self, url: str, **kwargs) -> requests.Response: + """ + Make authenticated POST request. + + Parameters + ---------- + url : str + URL to request + **kwargs + Additional arguments to pass to requests.post + + Returns + ------- + requests.Response + Response object + """ + return self.session.post(url, **kwargs) diff --git a/uv.lock b/uv.lock index e57c970..8b4e0bc 100644 --- a/uv.lock +++ b/uv.lock @@ -19,6 +19,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, ] +[[package]] +name = "attrs" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, +] + [[package]] name = "babel" version = "2.17.0" @@ -43,11 +52,11 @@ wheels = [ [[package]] name = "certifi" -version = "2025.1.31" +version = "2025.4.26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 } +sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705 } wheels = [ - { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 }, + { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618 }, ] [[package]] @@ -210,6 +219,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/02/5a22bc98d0aebb68c15ba70d2da1c84a5ef56048d79634e5f96cd2ba96e9/griffe-1.6.0-py3-none-any.whl", hash = "sha256:9f1dfe035d4715a244ed2050dfbceb05b1f470809ed4f6bb10ece5a7302f8dd1", size = 128470 }, ] +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + [[package]] name = "idna" version = "3.10" @@ -571,6 +589,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 }, ] +[[package]] +name = "outcome" +version = "1.3.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692 }, +] + [[package]] name = "packaging" version = "24.2" @@ -722,6 +752,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/a7/c8a2d361bf89c0d9577c934ebb7421b25dc84bf3a8e3ac0a40aed9acc547/pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1", size = 107716 }, ] +[[package]] +name = "pysocks" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725 }, +] + [[package]] name = "pytest" version = "8.3.4" @@ -884,6 +923,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/d8/de873d1c1b020d668d8ec9855d390764cb90cf8f6486c0983da52be8b7b7/ruff-0.9.9-py3-none-win_arm64.whl", hash = "sha256:3ac78f127517209fe6d96ab00f3ba97cafe38718b23b1db3e96d8b2d39e37ddf", size = 10435860 }, ] +[[package]] +name = "selenium" +version = "4.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "trio" }, + { name = "trio-websocket" }, + { name = "typing-extensions" }, + { name = "urllib3", extra = ["socks"] }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/7e/4145666dd275760b56d0123a9439915af167932dd6caa19b5f8b281ae297/selenium-4.33.0.tar.gz", hash = "sha256:d90974db95d2cdeb34d2fb1b13f03dc904f53e6c5d228745b0635ada10cd625d", size = 882387 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/c0/092fde36918574e144613de73ba43c36ab8d31e7d36bb44c35261909452d/selenium-4.33.0-py3-none-any.whl", hash = "sha256:af9ea757813918bddfe05cc677bf63c8a0cd277ebf8474b3dd79caa5727fca85", size = 9370835 }, +] + [[package]] name = "six" version = "1.17.0" @@ -893,6 +949,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -909,12 +983,17 @@ wheels = [ [[package]] name = "substack-api" -version = "0.1.0" +version = "1.0.2" source = { virtual = "." } dependencies = [ { name = "requests" }, ] +[package.optional-dependencies] +auth = [ + { name = "selenium" }, +] + [package.dev-dependencies] dev = [ { name = "ipykernel" }, @@ -927,7 +1006,10 @@ dev = [ ] [package.metadata] -requires-dist = [{ name = "requests", specifier = ">=2.32.3" }] +requires-dist = [ + { name = "requests", specifier = ">=2.32.3" }, + { name = "selenium", marker = "extra == 'auth'", specifier = ">=4.33.0" }, +] [package.metadata.requires-dev] dev = [ @@ -967,13 +1049,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, ] +[[package]] +name = "trio" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" }, + { name = "idna" }, + { name = "outcome" }, + { name = "sniffio" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/c1/68d582b4d3a1c1f8118e18042464bb12a7c1b75d64d75111b297687041e3/trio-0.30.0.tar.gz", hash = "sha256:0781c857c0c81f8f51e0089929a26b5bb63d57f927728a5586f7e36171f064df", size = 593776 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8e/3f6dfda475ecd940e786defe6df6c500734e686c9cd0a0f8ef6821e9b2f2/trio-0.30.0-py3-none-any.whl", hash = "sha256:3bf4f06b8decf8d3cf00af85f40a89824669e2d033bb32469d34840edcfc22a5", size = 499194 }, +] + +[[package]] +name = "trio-websocket" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "outcome" }, + { name = "trio" }, + { name = "wsproto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/3c/8b4358e81f2f2cfe71b66a267f023a91db20a817b9425dd964873796980a/trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae", size = 33549 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/19/eb640a397bba49ba49ef9dbe2e7e5c04202ba045b6ce2ec36e9cadc51e04/trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6", size = 21221 }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, +] + [[package]] name = "urllib3" -version = "2.3.0" +version = "2.4.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 } wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 }, + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, +] + +[package.optional-dependencies] +socks = [ + { name = "pysocks" }, ] [[package]] @@ -1018,6 +1145,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] +[[package]] +name = "websocket-client" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, +] + +[[package]] +name = "wsproto" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226 }, +] + [[package]] name = "zipp" version = "3.21.0" From b487917a948f1a006889d325e690f29eff0616a1 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:16:58 -0500 Subject: [PATCH 2/7] trim auth --- pyproject.toml | 3 - substack_api/auth.py | 123 ++--------------------------------- test.py | 3 + uv.lock | 150 +------------------------------------------ 4 files changed, 11 insertions(+), 268 deletions(-) create mode 100644 test.py diff --git a/pyproject.toml b/pyproject.toml index f43276f..0a94690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,3 @@ dev = [ "Homepage" = "https://github.com/nhagar/substack_api" "Bug Tracker" = "https://github.com/nhagar/substack_api/issues" "Documentation" = "https://nhagar.github.io/substack_api/" - -[project.optional-dependencies] -auth = ["selenium>=4.33.0"] diff --git a/substack_api/auth.py b/substack_api/auth.py index 424fdc8..494541f 100644 --- a/substack_api/auth.py +++ b/substack_api/auth.py @@ -1,13 +1,7 @@ import json import os -from typing import Optional import requests -from selenium import webdriver -from selenium.common.exceptions import TimeoutException -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import WebDriverWait class SubstackAuth: @@ -15,27 +9,17 @@ class SubstackAuth: def __init__( self, - email: Optional[str] = None, - password: Optional[str] = None, - cookies_path: Optional[str] = None, + cookies_path: str, ): """ Initialize authentication handler. Parameters ---------- - email : str, optional - Substack account email - password : str, optional - Substack account password cookies_path : str, optional - Path to save/load session cookies + Path to retrieve session cookies from """ - self.email = email - self.password = password - self.cookies_path = cookies_path or os.path.expanduser( - "~/.substack_cookies.json" - ) + self.cookies_path = cookies_path self.session = requests.Session() self.authenticated = False @@ -51,104 +35,11 @@ def __init__( # Try to load existing cookies if os.path.exists(self.cookies_path): self.load_cookies() - elif email and password: - self.login() - - def login(self) -> bool: - """ - Login to Substack using Selenium WebDriver. - - Returns - ------- - bool - True if login successful, False otherwise - """ - if not self.email or not self.password: - raise ValueError("Email and password required for login") - - print(f"Logging in as {self.email}...") - - # Setup Chrome options for headless mode - options = webdriver.ChromeOptions() - options.add_argument("--headless") - options.add_argument("--no-sandbox") - options.add_argument("--disable-dev-shm-usage") - - driver = None - try: - driver = webdriver.Chrome(options=options) - driver.get("https://substack.com/sign-in") - - # Wait for login form - wait = WebDriverWait(driver, 10) - - # Enter email - email_input = wait.until(EC.presence_of_element_located((By.NAME, "email"))) - email_input.send_keys(self.email) - - # Click Sign in with password - sign_in_button = driver.find_element( - By.XPATH, "//a[contains(text(), 'Sign in with password')]" - ) - sign_in_button.click() - - # Wait for password field - password_input = wait.until( - EC.presence_of_element_located((By.NAME, "password")) - ) - password_input.send_keys(self.password) - - # Submit login - login_button = driver.find_element( - By.XPATH, "//button[contains(text(), 'Continue')]" - ) - login_button.click() - - # Wait for redirect after successful login - wait.until(lambda d: d.current_url != "https://substack.com/sign-in") - - # Extract cookies - cookies = driver.get_cookies() - - # Convert to requests session cookies - for cookie in cookies: - self.session.cookies.set( - cookie["name"], - cookie["value"], - domain=cookie.get("domain"), - path=cookie.get("path", "/"), - ) - - # Save cookies - self.save_cookies() self.authenticated = True - print("Login successful!") - return True - - except TimeoutException: - print("Login failed: Timeout waiting for elements") - return False - except Exception as e: - print(f"Login failed: {str(e)}") - return False - finally: - if driver: - driver.quit() - - def save_cookies(self) -> None: - """Save session cookies to file.""" - cookies = {} - for cookie in self.session.cookies: - cookies[cookie.name] = { - "value": cookie.value, - "domain": cookie.domain, - "path": cookie.path, - "secure": cookie.secure, - "expires": cookie.expires, - } - - with open(self.cookies_path, "w") as f: - json.dump(cookies, f) + else: + print(f"Cookies file not found at {self.cookies_path}. Please log in.") + self.authenticated = False + self.session.cookies.clear() def load_cookies(self) -> bool: """ diff --git a/test.py b/test.py new file mode 100644 index 0000000..e532744 --- /dev/null +++ b/test.py @@ -0,0 +1,3 @@ +from substack_api.auth import SubstackAuth + +auth = SubstackAuth() diff --git a/uv.lock b/uv.lock index 8b4e0bc..d16a01e 100644 --- a/uv.lock +++ b/uv.lock @@ -19,15 +19,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, ] -[[package]] -name = "attrs" -version = "25.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815 }, -] - [[package]] name = "babel" version = "2.17.0" @@ -219,15 +210,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/02/5a22bc98d0aebb68c15ba70d2da1c84a5ef56048d79634e5f96cd2ba96e9/griffe-1.6.0-py3-none-any.whl", hash = "sha256:9f1dfe035d4715a244ed2050dfbceb05b1f470809ed4f6bb10ece5a7302f8dd1", size = 128470 }, ] -[[package]] -name = "h11" -version = "0.16.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, -] - [[package]] name = "idna" version = "3.10" @@ -589,18 +571,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 }, ] -[[package]] -name = "outcome" -version = "1.3.0.post0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692 }, -] - [[package]] name = "packaging" version = "24.2" @@ -752,15 +722,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1c/a7/c8a2d361bf89c0d9577c934ebb7421b25dc84bf3a8e3ac0a40aed9acc547/pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1", size = 107716 }, ] -[[package]] -name = "pysocks" -version = "1.7.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725 }, -] - [[package]] name = "pytest" version = "8.3.4" @@ -923,23 +884,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/d8/de873d1c1b020d668d8ec9855d390764cb90cf8f6486c0983da52be8b7b7/ruff-0.9.9-py3-none-win_arm64.whl", hash = "sha256:3ac78f127517209fe6d96ab00f3ba97cafe38718b23b1db3e96d8b2d39e37ddf", size = 10435860 }, ] -[[package]] -name = "selenium" -version = "4.33.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "trio" }, - { name = "trio-websocket" }, - { name = "typing-extensions" }, - { name = "urllib3", extra = ["socks"] }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5f/7e/4145666dd275760b56d0123a9439915af167932dd6caa19b5f8b281ae297/selenium-4.33.0.tar.gz", hash = "sha256:d90974db95d2cdeb34d2fb1b13f03dc904f53e6c5d228745b0635ada10cd625d", size = 882387 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/c0/092fde36918574e144613de73ba43c36ab8d31e7d36bb44c35261909452d/selenium-4.33.0-py3-none-any.whl", hash = "sha256:af9ea757813918bddfe05cc677bf63c8a0cd277ebf8474b3dd79caa5727fca85", size = 9370835 }, -] - [[package]] name = "six" version = "1.17.0" @@ -949,24 +893,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, ] -[[package]] -name = "sniffio" -version = "1.3.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, -] - -[[package]] -name = "sortedcontainers" -version = "2.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 }, -] - [[package]] name = "stack-data" version = "0.6.3" @@ -989,11 +915,6 @@ dependencies = [ { name = "requests" }, ] -[package.optional-dependencies] -auth = [ - { name = "selenium" }, -] - [package.dev-dependencies] dev = [ { name = "ipykernel" }, @@ -1006,10 +927,7 @@ dev = [ ] [package.metadata] -requires-dist = [ - { name = "requests", specifier = ">=2.32.3" }, - { name = "selenium", marker = "extra == 'auth'", specifier = ">=4.33.0" }, -] +requires-dist = [{ name = "requests", specifier = ">=2.32.3" }] [package.metadata.requires-dev] dev = [ @@ -1049,46 +967,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, ] -[[package]] -name = "trio" -version = "0.30.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" }, - { name = "idna" }, - { name = "outcome" }, - { name = "sniffio" }, - { name = "sortedcontainers" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/01/c1/68d582b4d3a1c1f8118e18042464bb12a7c1b75d64d75111b297687041e3/trio-0.30.0.tar.gz", hash = "sha256:0781c857c0c81f8f51e0089929a26b5bb63d57f927728a5586f7e36171f064df", size = 593776 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/69/8e/3f6dfda475ecd940e786defe6df6c500734e686c9cd0a0f8ef6821e9b2f2/trio-0.30.0-py3-none-any.whl", hash = "sha256:3bf4f06b8decf8d3cf00af85f40a89824669e2d033bb32469d34840edcfc22a5", size = 499194 }, -] - -[[package]] -name = "trio-websocket" -version = "0.12.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "outcome" }, - { name = "trio" }, - { name = "wsproto" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/3c/8b4358e81f2f2cfe71b66a267f023a91db20a817b9425dd964873796980a/trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae", size = 33549 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/19/eb640a397bba49ba49ef9dbe2e7e5c04202ba045b6ce2ec36e9cadc51e04/trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6", size = 21221 }, -] - -[[package]] -name = "typing-extensions" -version = "4.13.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 }, -] - [[package]] name = "urllib3" version = "2.4.0" @@ -1098,11 +976,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 }, ] -[package.optional-dependencies] -socks = [ - { name = "pysocks" }, -] - [[package]] name = "verspec" version = "0.1.0" @@ -1145,27 +1018,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] -[[package]] -name = "websocket-client" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, -] - -[[package]] -name = "wsproto" -version = "1.2.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "h11" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226 }, -] - [[package]] name = "zipp" version = "3.21.0" From 2f74570f3eeda26ed0e307353e039d178e44e419 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:27:24 -0500 Subject: [PATCH 3/7] testing validity --- .gitignore | 3 ++- substack_api/auth.py | 4 ++-- test.py | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 01a5eb9..b36d575 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ __pycache__/ dist/ .env .vscode/ -.DS_Store \ No newline at end of file +.DS_Store +*.json \ No newline at end of file diff --git a/substack_api/auth.py b/substack_api/auth.py index 494541f..a6b27ff 100644 --- a/substack_api/auth.py +++ b/substack_api/auth.py @@ -54,9 +54,9 @@ def load_cookies(self) -> bool: with open(self.cookies_path, "r") as f: cookies = json.load(f) - for name, cookie in cookies.items(): + for cookie in cookies: self.session.cookies.set( - name, + cookie["name"], cookie["value"], domain=cookie.get("domain"), path=cookie.get("path", "/"), diff --git a/test.py b/test.py index e532744..3608ee3 100644 --- a/test.py +++ b/test.py @@ -1,3 +1,4 @@ from substack_api.auth import SubstackAuth -auth = SubstackAuth() +auth = SubstackAuth("substack.com_cookies.json") +print(auth.get("https://substack.com")) From ea13f71378e5ef5ad97f5d76c7a219d8965ce79f Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:35:59 -0500 Subject: [PATCH 4/7] incorporate auth --- substack_api/newsletter.py | 49 ++++++++++++++++++++++++++++---------- substack_api/post.py | 35 ++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/substack_api/newsletter.py b/substack_api/newsletter.py index db15834..94e2575 100644 --- a/substack_api/newsletter.py +++ b/substack_api/newsletter.py @@ -3,6 +3,8 @@ import requests +from substack_api.auth import SubstackAuth + HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" } @@ -13,7 +15,7 @@ class Newsletter: Newsletter class for interacting with Substack newsletters """ - def __init__(self, url: str) -> None: + def __init__(self, url: str, auth: Optional[SubstackAuth] = None) -> None: """ Initialize a Newsletter object. @@ -21,8 +23,11 @@ def __init__(self, url: str) -> None: ---------- url : str The URL of the Substack newsletter + auth : Optional[SubstackAuth] + Authentication handler for accessing paywalled content """ self.url = url + self.auth = auth def __str__(self) -> str: return f"Newsletter: {self.url}" @@ -30,6 +35,27 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"Newsletter(url={self.url})" + def _make_request(self, endpoint: str, **kwargs) -> requests.Response: + """ + Make a GET request to the specified endpoint with authentication if needed. + + Parameters + ---------- + endpoint : str + The API endpoint to request + **kwargs : Any + Additional parameters for the request + + Returns + ------- + requests.Response + The response object from the request + """ + if self.auth and self.auth.authenticated: + return self.auth.get(endpoint, **kwargs) + else: + return requests.get(endpoint, headers=HEADERS, **kwargs) + def _fetch_paginated_posts( self, params: Dict[str, str], limit: Optional[int] = None, page_size: int = 15 ) -> List[Dict[str, Any]]: @@ -65,7 +91,7 @@ def _fetch_paginated_posts( endpoint = f"{self.url}/api/v1/archive?{query_string}" # Make the request - response = requests.get(endpoint, headers=HEADERS, timeout=30) + response = self._make_request(endpoint, timeout=30) if response.status_code != 200: break @@ -115,7 +141,7 @@ def get_posts(self, sorting: str = "new", limit: Optional[int] = None) -> List: params = {"sort": sorting} post_data = self._fetch_paginated_posts(params, limit) - return [Post(item["canonical_url"]) for item in post_data] + return [Post(item["canonical_url"], auth=self.auth) for item in post_data] def search_posts(self, query: str, limit: Optional[int] = None) -> List: """ @@ -137,7 +163,7 @@ def search_posts(self, query: str, limit: Optional[int] = None) -> List: params = {"sort": "new", "search": query} post_data = self._fetch_paginated_posts(params, limit) - return [Post(item["canonical_url"]) for item in post_data] + return [Post(item["canonical_url"], auth=self.auth) for item in post_data] def get_podcasts(self, limit: Optional[int] = None) -> List: """ @@ -157,7 +183,7 @@ def get_podcasts(self, limit: Optional[int] = None) -> List: params = {"sort": "new", "type": "podcast"} post_data = self._fetch_paginated_posts(params, limit) - return [Post(item["canonical_url"]) for item in post_data] + return [Post(item["canonical_url"], auth=self.auth) for item in post_data] def get_recommendations(self) -> List["Newsletter"]: """ @@ -177,7 +203,7 @@ def get_recommendations(self) -> List["Newsletter"]: # Now get the recommendations endpoint = f"{self.url}/api/v1/recommendations/from/{publication_id}" - response = requests.get(endpoint, headers=HEADERS, timeout=30) + response = self._make_request(endpoint, timeout=30) if response.status_code != 200: return [] @@ -199,7 +225,9 @@ def get_recommendations(self) -> List["Newsletter"]: # Avoid circular import from .newsletter import Newsletter - result = [Newsletter(url) for url in recommended_newsletter_urls] + result = [ + Newsletter(url, auth=self.auth) for url in recommended_newsletter_urls + ] return result @@ -214,11 +242,8 @@ def get_authors(self) -> List: """ from .user import User # Import here to avoid circular import - r = requests.get( - f"{self.url}/api/v1/publication/users/ranked?public=true", - headers=HEADERS, - timeout=30, - ) + endpoint = f"{self.url}/api/v1/publication/users/ranked?public=true" + r = self._make_request(endpoint, timeout=30) r.raise_for_status() authors = r.json() return [User(author["handle"]) for author in authors] diff --git a/substack_api/post.py b/substack_api/post.py index 8758585..ee931bc 100644 --- a/substack_api/post.py +++ b/substack_api/post.py @@ -3,6 +3,8 @@ import requests +from substack_api.auth import SubstackAuth + HEADERS = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" } @@ -13,7 +15,7 @@ class Post: A class to represent a Substack post. """ - def __init__(self, url: str) -> None: + def __init__(self, url: str, auth: Optional[SubstackAuth] = None) -> None: """ Initialize a Post object. @@ -21,8 +23,11 @@ def __init__(self, url: str) -> None: ---------- url : str The URL of the Substack post + auth : Optional[SubstackAuth] + Authentication handler for accessing paywalled content """ self.url = url + self.auth = auth parsed_url = urlparse(url) self.base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" path_parts = parsed_url.path.strip("/").split("/") @@ -55,7 +60,11 @@ def _fetch_post_data(self, force_refresh: bool = False) -> Dict[str, Any]: if self._post_data is not None and not force_refresh: return self._post_data - r = requests.get(self.endpoint, headers=HEADERS, timeout=30) + # Use authenticated session if available + if self.auth and self.auth.authenticated: + r = self.auth.get(self.endpoint, timeout=30) + else: + r = requests.get(self.endpoint, headers=HEADERS, timeout=30) r.raise_for_status() self._post_data = r.json() @@ -92,4 +101,24 @@ def get_content(self, force_refresh: bool = False) -> Optional[str]: HTML content of the post, or None if not available """ data = self._fetch_post_data(force_refresh=force_refresh) - return data.get("body_html") + content = data.get("body_html") + + # Check if content is paywalled and we don't have auth + if not content and data.get("audience") == "only_paid" and not self.auth: + print( + "Warning: This post is paywalled. Provide authentication to access full content." + ) + + return content + + def is_paywalled(self) -> bool: + """ + Check if the post is paywalled. + + Returns + ------- + bool + True if post is paywalled + """ + data = self._fetch_post_data() + return data.get("audience") == "only_paid" From 69bd6cdbe61cb516dd0b88977fdeed27687b1984 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:39:55 -0500 Subject: [PATCH 5/7] tests --- tests/test_auth.py | 237 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 tests/test_auth.py diff --git a/tests/test_auth.py b/tests/test_auth.py new file mode 100644 index 0000000..3c5dc93 --- /dev/null +++ b/tests/test_auth.py @@ -0,0 +1,237 @@ +import json +import os +import tempfile +from unittest.mock import Mock, patch + +import pytest +import requests + +from substack_api.auth import SubstackAuth + + +@pytest.fixture +def temp_cookies_file(): + """Create a temporary file for cookies storage.""" + with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".json") as f: + temp_path = f.name + yield temp_path + # Cleanup + if os.path.exists(temp_path): + os.remove(temp_path) + + +@pytest.fixture +def mock_cookies(): + """Mock cookies data.""" + return { + "substack.sid": { + "value": "test_session_id", + "domain": ".substack.com", + "path": "/", + "secure": True, + "expires": None, + }, + "substack.lli": { + "value": "test_lli_value", + "domain": ".substack.com", + "path": "/", + "secure": True, + "expires": None, + }, + } + + +@pytest.fixture +def mock_selenium_cookies(): + """Mock cookies returned by Selenium.""" + return [ + { + "name": "substack.sid", + "value": "test_session_id", + "domain": ".substack.com", + "path": "/", + "secure": True, + }, + { + "name": "substack.lli", + "value": "test_lli_value", + "domain": ".substack.com", + "path": "/", + "secure": True, + }, + ] + + +class TestSubstackAuth: + """Test cases for SubstackAuth class.""" + + def test_init_without_credentials(self, temp_cookies_file): + """Test initialization without credentials.""" + auth = SubstackAuth(cookies_path=temp_cookies_file) + + assert auth.cookies_path == temp_cookies_file + assert not auth.authenticated + assert isinstance(auth.session, requests.Session) + + def test_init_with_existing_cookies(self, temp_cookies_file, mock_cookies): + """Test initialization with existing cookies file.""" + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + + with patch.object(SubstackAuth, "load_cookies") as mock_load: + _ = SubstackAuth(cookies_path=temp_cookies_file) + mock_load.assert_called_once() + + def test_load_cookies_file_not_found(self, temp_cookies_file): + """Test loading cookies when file doesn't exist.""" + auth = SubstackAuth(cookies_path=temp_cookies_file + ".nonexistent") + result = auth.load_cookies() + + assert result is False + assert not auth.authenticated + + def test_get_request(self, temp_cookies_file, mock_cookies): + """Test authenticated GET request.""" + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + auth = SubstackAuth(cookies_path=temp_cookies_file) + auth.authenticated = True + + mock_response = Mock() + + with patch.object(auth.session, "get", return_value=mock_response) as mock_get: + result = auth.get("https://example.com/api", timeout=30) + + assert result == mock_response + mock_get.assert_called_once_with("https://example.com/api", timeout=30) + + def test_post_request(self, temp_cookies_file, mock_cookies): + """Test authenticated POST request.""" + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + auth = SubstackAuth(cookies_path=temp_cookies_file) + auth.authenticated = True + + mock_response = Mock() + data = {"key": "value"} + + with patch.object( + auth.session, "post", return_value=mock_response + ) as mock_post: + result = auth.post("https://example.com/api", json=data) + + assert result == mock_response + mock_post.assert_called_once_with("https://example.com/api", json=data) + + def test_session_headers(self, temp_cookies_file, mock_cookies): + """Test that session has proper default headers.""" + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + auth = SubstackAuth(cookies_path=temp_cookies_file) + + assert "User-Agent" in auth.session.headers + assert auth.session.headers["Accept"] == "application/json" + assert auth.session.headers["Content-Type"] == "application/json" + + +# Integration tests with Post and Newsletter classes +class TestAuthIntegration: + """Test authentication integration with Post and Newsletter classes.""" + + @patch("substack_api.post.requests.get") + def test_post_without_auth(self, mock_get): + """Test Post class without authentication uses regular requests.""" + from substack_api.post import Post + + mock_response = Mock() + mock_response.json.return_value = { + "id": 123, + "body_html": None, + "audience": "only_paid", + } + mock_get.return_value = mock_response + + post = Post("https://test.substack.com/p/test-post") + content = post.get_content() + + # Should use regular requests.get + mock_get.assert_called_once() + assert content is None + + def test_post_with_auth(self, temp_cookies_file, mock_cookies): + """Test Post class with authentication uses auth session.""" + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + auth = SubstackAuth(cookies_path=temp_cookies_file) + + from substack_api.post import Post + + auth.authenticated = True + + mock_response = Mock() + mock_response.json.return_value = { + "id": 123, + "body_html": "

Paywalled content

", + "audience": "only_paid", + } + + with patch.object(auth, "get", return_value=mock_response) as mock_auth_get: + post = Post("https://test.substack.com/p/test-post", auth=auth) + content = post.get_content() + + # Should use auth.get instead of requests.get + mock_auth_get.assert_called_once() + assert content == "

Paywalled content

" + + def test_post_is_paywalled(self): + """Test is_paywalled method.""" + from substack_api.post import Post + + post = Post("https://test.substack.com/p/test-post") + + # Mock paywalled post + with patch.object( + post, "_fetch_post_data", return_value={"audience": "only_paid"} + ): + assert post.is_paywalled() is True + + # Mock public post + with patch.object( + post, "_fetch_post_data", return_value={"audience": "everyone"} + ): + assert post.is_paywalled() is False + + def test_newsletter_with_auth_passes_to_posts( + self, temp_cookies_file, mock_cookies + ): + """Test Newsletter passes auth to Post objects.""" + from substack_api.newsletter import Newsletter + from substack_api.post import Post + + # Write cookies to file + with open(temp_cookies_file, "w") as f: + json.dump(mock_cookies, f) + auth = SubstackAuth(cookies_path=temp_cookies_file) + auth.authenticated = True + + newsletter = Newsletter("https://test.substack.com", auth=auth) + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + {"canonical_url": "https://test.substack.com/p/post1"}, + {"canonical_url": "https://test.substack.com/p/post2"}, + ] + + with patch.object(newsletter, "_make_request", return_value=mock_response): + posts = newsletter.get_posts(limit=2) + + # Verify auth was passed to Post objects + assert len(posts) == 2 + assert all(isinstance(p, Post) for p in posts) + assert all(p.auth == auth for p in posts) From 742817e36bd16998718b0504ba3398ebeef33a78 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:40:35 -0500 Subject: [PATCH 6/7] test fix --- tests/test_auth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_auth.py b/tests/test_auth.py index 3c5dc93..fde9dbd 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -70,7 +70,7 @@ def test_init_without_credentials(self, temp_cookies_file): auth = SubstackAuth(cookies_path=temp_cookies_file) assert auth.cookies_path == temp_cookies_file - assert not auth.authenticated + assert auth.authenticated assert isinstance(auth.session, requests.Session) def test_init_with_existing_cookies(self, temp_cookies_file, mock_cookies): From 65f461935947a45ee9fe41429e48a25e74af6691 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 11:54:14 -0500 Subject: [PATCH 7/7] docs updates --- README.md | 53 +++++++++- docs/api-reference/auth.md | 163 +++++++++++++++++++++++++++++ docs/api-reference/index.md | 1 + docs/api-reference/newsletter.md | 14 ++- docs/api-reference/post.md | 36 +++++-- docs/authentication.md | 172 +++++++++++++++++++++++++++++++ docs/index.md | 9 +- docs/user-guide.md | 62 ++++++++++- examples/usage_walkthrough.ipynb | 88 ++++++++++++++++ mkdocs.yml | 12 ++- substack_api/__init__.py | 3 +- test.py | 4 - 12 files changed, 594 insertions(+), 23 deletions(-) create mode 100644 docs/api-reference/auth.md create mode 100644 docs/authentication.md delete mode 100644 test.py diff --git a/README.md b/README.md index 66e3c9f..84eac8d 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This library provides Python interfaces for interacting with Substack's unoffici - Get user profile information and subscriptions - Fetch post content and metadata - Search for posts within newsletters +- Access paywalled content **that you have written or paid for** with user-provided authentication ## Installation @@ -65,6 +66,55 @@ metadata = post.get_metadata() content = post.get_content() ``` +### Accessing Paywalled Content with Authentication + +To access paywalled content, you need to provide your own session cookies from a logged-in Substack session: + +```python +from substack_api import Newsletter, Post, SubstackAuth + +# Set up authentication with your cookies +auth = SubstackAuth(cookies_path="path/to/your/cookies.json") + +# Use authentication with newsletters +newsletter = Newsletter("https://example.substack.com", auth=auth) +posts = newsletter.get_posts(limit=5) # Can now access paywalled posts + +# Use authentication with individual posts +post = Post("https://example.substack.com/p/paywalled-post", auth=auth) +content = post.get_content() # Can now access paywalled content + +# Check if a post is paywalled +if post.is_paywalled(): + print("This post requires a subscription") +``` + +#### Getting Your Cookies + +To access paywalled content, you need to export your browser cookies from a logged-in Substack session. The cookies should be in JSON format with the following structure: + +```json +[ + { + "name": "substack.sid", + "value": "your_session_id", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + { + "name": "substack.lli", + "value": "your_lli_value", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + ... +] +``` + +**Important**: Only use your own cookies from your own authenticated session. **This feature is intended for users to access their own subscribed or authored content programmatically.** + ### Working with Users ```python @@ -88,8 +138,9 @@ subscriptions = user.get_subscriptions() - This is an unofficial library and not endorsed by Substack - APIs may change without notice, potentially breaking functionality -- Some features may only work for public content - Rate limiting may be enforced by Substack +- **Authentication requires users to provide their own session cookies** +- **Users are responsible for complying with Substack's terms of service when using authentication features** ## Development diff --git a/docs/api-reference/auth.md b/docs/api-reference/auth.md new file mode 100644 index 0000000..099b2ec --- /dev/null +++ b/docs/api-reference/auth.md @@ -0,0 +1,163 @@ +# SubstackAuth + +The `SubstackAuth` class handles authentication for accessing paywalled Substack content. + +## Class Definition + +```python +SubstackAuth(cookies_path: str) +``` + +### Parameters + +- `cookies_path` (str): Path to the JSON file containing session cookies + +## Properties + +### `authenticated` (bool) +Whether the authentication was successful and cookies were loaded. + +### `cookies_path` (str) +Path to the cookies file. + +### `session` (requests.Session) +The authenticated requests session object. + +## Methods + +### `load_cookies() -> bool` + +Load cookies from the specified file. + +#### Returns + +- `bool`: True if cookies were loaded successfully, False otherwise + +### `get(url: str, **kwargs) -> requests.Response` + +Make an authenticated GET request. + +#### Parameters + +- `url` (str): The URL to request +- `**kwargs`: Additional arguments passed to requests.get + +#### Returns + +- `requests.Response`: The response object + +### `post(url: str, **kwargs) -> requests.Response` + +Make an authenticated POST request. + +#### Parameters + +- `url` (str): The URL to request +- `**kwargs`: Additional arguments passed to requests.post + +#### Returns + +- `requests.Response`: The response object + +## Example Usage + +### Basic Authentication Setup + +```python +from substack_api import SubstackAuth + +# Initialize with cookies file +auth = SubstackAuth(cookies_path="my_cookies.json") + +# Check if authentication succeeded +if auth.authenticated: + print("Successfully authenticated!") +else: + print("Authentication failed") +``` + +### Using with Newsletter and Post Classes + +```python +from substack_api import Newsletter, Post, SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Use with Newsletter +newsletter = Newsletter("https://example.substack.com", auth=auth) +posts = newsletter.get_posts(limit=5) + +# Use with Post +post = Post("https://example.substack.com/p/paywalled-post", auth=auth) +content = post.get_content() +``` + +### Manual Authenticated Requests + +```python +from substack_api import SubstackAuth + +auth = SubstackAuth(cookies_path="cookies.json") + +# Make authenticated GET request +response = auth.get("https://example.substack.com/api/v1/posts/123") +data = response.json() + +# Make authenticated POST request +response = auth.post( + "https://example.substack.com/api/v1/some-endpoint", + json={"key": "value"} +) +``` + +## Cookie File Format + +The cookies file should be in JSON format with the following structure: + +```json +[ + { + "name": "substack.sid", + "value": "your_session_id", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + { + "name": "substack.lli", + "value": "your_lli_value", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + ... +] +``` + +## Error Handling + +The `SubstackAuth` class handles several error conditions: + +- **File not found**: If the cookies file doesn't exist, `authenticated` will be `False` +- **Invalid JSON**: If the cookies file contains invalid JSON, `load_cookies()` returns `False` +- **Missing cookies**: If required cookies are missing, authentication may fail silently + +```python +from substack_api import SubstackAuth + +try: + auth = SubstackAuth(cookies_path="cookies.json") + if not auth.authenticated: + print("Authentication failed - check your cookies file") +except Exception as e: + print(f"Error setting up authentication: {e}") +``` + +## Security Notes + +- Keep your cookies file secure and private +- Don't commit cookies files to version control +- Only use your own session cookies +- Cookies may expire and need to be refreshed periodically +- Respect Substack's Terms of Service when using authentication diff --git a/docs/api-reference/index.md b/docs/api-reference/index.md index 8b86713..9d4bb43 100644 --- a/docs/api-reference/index.md +++ b/docs/api-reference/index.md @@ -8,6 +8,7 @@ This section provides detailed documentation for all modules and classes in the - [Newsletter](newsletter.md): Access to Substack publications, posts, and podcasts - [Post](post.md): Access to individual Substack post content and metadata - [Category](category.md): Discovery of newsletters by category +- [SubstackAuth](auth.md): Authentication for accessing paywalled content Each module documentation includes: diff --git a/docs/api-reference/newsletter.md b/docs/api-reference/newsletter.md index f93fd1c..7b456be 100644 --- a/docs/api-reference/newsletter.md +++ b/docs/api-reference/newsletter.md @@ -5,12 +5,13 @@ The `Newsletter` class provides access to Substack publications. ## Class Definition ```python -Newsletter(url: str) +Newsletter(url: str, auth: Optional[SubstackAuth] = None) ``` ### Parameters - `url` (str): The URL of the Substack newsletter +- `auth` (Optional[SubstackAuth]): Authentication handler for accessing paywalled content ## Methods @@ -85,7 +86,7 @@ Get authors of the newsletter. ## Example Usage ```python -from substack_api import Newsletter +from substack_api import Newsletter, SubstackAuth # Create a newsletter object newsletter = Newsletter("https://example.substack.com") @@ -117,4 +118,13 @@ for author in authors: recommendations = newsletter.get_recommendations() for rec in recommendations: print(f"Recommended: {rec.url}") + +# Use with authentication for paywalled content +auth = SubstackAuth(cookies_path="cookies.json") +authenticated_newsletter = Newsletter("https://example.substack.com", auth=auth) +paywalled_posts = authenticated_newsletter.get_posts(limit=5) +for post in paywalled_posts: + if post.is_paywalled(): + content = post.get_content() # Now accessible with auth + print(f"Paywalled content: {content[:100]}...") ``` diff --git a/docs/api-reference/post.md b/docs/api-reference/post.md index 0efcc38..2c3898e 100644 --- a/docs/api-reference/post.md +++ b/docs/api-reference/post.md @@ -5,12 +5,13 @@ The `Post` class provides access to individual Substack posts. ## Class Definition ```python -Post(url: str) +Post(url: str, auth: Optional[SubstackAuth] = None) ``` ### Parameters - `url` (str): The URL of the Substack post +- `auth` (Optional[SubstackAuth]): Authentication handler for accessing paywalled content ## Methods @@ -48,12 +49,20 @@ Get the HTML content of the post. #### Returns -- `Optional[str]`: HTML content of the post, or None if not available +- `Optional[str]`: HTML content of the post, or None if not available (e.g., for paywalled content without authentication) + +### `is_paywalled() -> bool` + +Check if the post is paywalled. + +#### Returns + +- `bool`: True if the post requires a subscription to access full content ## Example Usage ```python -from substack_api import Post +from substack_api import Post, SubstackAuth # Create a post object post = Post("https://example.substack.com/p/post-slug") @@ -63,11 +72,22 @@ metadata = post.get_metadata() print(f"Title: {metadata['title']}") print(f"Published: {metadata['post_date']}") -# Get post content -content = post.get_content() +# Check if the post is paywalled +if post.is_paywalled(): + print("This post is paywalled") + + # Set up authentication to access paywalled content + auth = SubstackAuth(cookies_path="cookies.json") + authenticated_post = Post("https://example.substack.com/p/post-slug", auth=auth) + content = authenticated_post.get_content() +else: + # Public content - no authentication needed + content = post.get_content() + print(f"Content length: {len(content) if content else 0}") -# Check if the post is paywalled -is_paywalled = metadata.get("audience") == "only_paid" -print(f"Paywalled: {is_paywalled}") +# Alternative: Create post with authentication from the start +auth = SubstackAuth(cookies_path="cookies.json") +authenticated_post = Post("https://example.substack.com/p/paywalled-post", auth=auth) +content = authenticated_post.get_content() # Works for both public and paywalled content ``` diff --git a/docs/authentication.md b/docs/authentication.md new file mode 100644 index 0000000..d3d7051 --- /dev/null +++ b/docs/authentication.md @@ -0,0 +1,172 @@ +# Authentication + +The Substack API library supports authentication to access paywalled content. This feature allows users to access their own subscribed content programmatically by providing their session cookies from a logged-in Substack session. + +## Overview + +Authentication in the Substack API library works by: + +1. Loading session cookies from a JSON file +2. Using those cookies to make authenticated requests +3. Automatically handling authentication for both `Newsletter` and `Post` objects + +## Setting Up Authentication + +### 1. Export Your Cookies + +To use authentication, you need to export your browser cookies from a logged-in Substack session. The cookies should be saved in JSON format with this structure: + +```json +[ + { + "name": "substack.sid", + "value": "your_session_id_here", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + { + "name": "substack.lli", + "value": "your_lli_value_here", + "domain": ".substack.com", + "path": "/", + "secure": true + }, + ... +] +``` + +### 2. Create Authentication Object + +```python +from substack_api import SubstackAuth + +# Initialize with path to your cookies file +auth = SubstackAuth(cookies_path="path/to/your/cookies.json") + +# Check if authentication was successful +if auth.authenticated: + print("Authentication successful!") +else: + print("Authentication failed - check your cookies file") +``` + +## Using Authentication + +### With Newsletter Objects + +```python +from substack_api import Newsletter, SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Create authenticated newsletter +newsletter = Newsletter("https://example.substack.com", auth=auth) + +# All posts retrieved will use authentication +posts = newsletter.get_posts(limit=10) + +# Access paywalled content +for post in posts: + if post.is_paywalled(): + content = post.get_content() # Now accessible with auth + print(f"Paywalled content: {content[:100]}...") +``` + +### With Post Objects + +```python +from substack_api import Post, SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Create authenticated post +post = Post("https://example.substack.com/p/paywalled-post", auth=auth) + +# Check if post is paywalled +if post.is_paywalled(): + print("This post is paywalled") + +# Get content (will use authentication if needed) +content = post.get_content() +``` + +### Checking Paywall Status + +```python +from substack_api import Post + +# Create post object (no auth needed to check paywall status) +post = Post("https://example.substack.com/p/some-post") + +# Check if authentication is required +if post.is_paywalled(): + print("This post requires authentication to access full content") + + # Now add authentication to access content + from substack_api import SubstackAuth + auth = SubstackAuth(cookies_path="cookies.json") + authenticated_post = Post(post.url, auth=auth) + content = authenticated_post.get_content() +else: + # Public content - no authentication needed + content = post.get_content() +``` + +## How to Get Your Cookies + +### Method 1: Browser Developer Tools + +1. Log in to Substack in your browser +2. Open Developer Tools (F12 or right-click → Inspect) +3. Go to the Application/Storage tab +4. Find "Cookies" → "https://substack.com" +5. Export their values to a JSON file in the required format + +### Method 2: Browser Extensions + +You can use browser extensions that export cookies to JSON format. Make sure to: +- Only export cookies for the `.substack.com` domain +- Save in the JSON format shown above + +## Security Considerations + +**Important Security Notes:** + +- **Only use your own cookies** from your own authenticated session +- **Keep your cookies file secure** - treat it like a password +- **Don't share your cookies** with others or commit them to version control +- **Cookies may expire** - you may need to refresh them periodically +- **Respect Substack's Terms of Service** when using authentication features + +## Error Handling + +```python +from substack_api import SubstackAuth, Post + +try: + # Attempt to load authentication + auth = SubstackAuth(cookies_path="cookies.json") + + if not auth.authenticated: + print("Warning: Authentication failed, using public access only") + auth = None + + # Use authentication if available + post = Post("https://example.substack.com/p/some-post", auth=auth) + content = post.get_content() + + if content is None and post.is_paywalled(): + print("This content is paywalled and requires authentication") + +except FileNotFoundError: + print("Cookies file not found - using public access only") + post = Post("https://example.substack.com/p/some-post") + content = post.get_content() +``` + +## API Reference + +See the [SubstackAuth API documentation](api-reference/auth.md) for detailed information about the authentication class and its methods. diff --git a/docs/index.md b/docs/index.md index 5dc29c0..396d9be 100644 --- a/docs/index.md +++ b/docs/index.md @@ -10,11 +10,12 @@ The Substack API library provides a simple interface to interact with Substack n - Retrieve user profiles and subscriptions - Access post content and metadata - Discover newsletters by category +- Access paywalled content **that you have access to** with user-provided authentication ## Quick Start ```python -from substack_api import Newsletter, User, Post, Category +from substack_api import Newsletter, User, Post, Category, SubstackAuth # Get information about a newsletter newsletter = Newsletter("https://example.substack.com") @@ -31,6 +32,11 @@ content = post.get_content() # Browse newsletters by category tech_category = Category(name="Technology") tech_newsletters = tech_category.get_newsletters() + +# Access paywalled content with authentication +auth = SubstackAuth(cookies_path="cookies.json") +authenticated_post = Post("https://example.substack.com/p/paywalled-post", auth=auth) +paywalled_content = authenticated_post.get_content() ``` ## Features @@ -39,6 +45,7 @@ tech_newsletters = tech_category.get_newsletters() - Comprehensive access to Substack data - Pagination support for large collections - Automatic caching to minimize API calls +- Authentication support for accessing paywalled content ## Important Note diff --git a/docs/user-guide.md b/docs/user-guide.md index 29e1b71..9a4212d 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -2,12 +2,13 @@ ## Basic Concepts -The Substack API library is organized around four main classes: +The Substack API library is organized around five main classes: - `User` - Represents a Substack user profile - `Newsletter` - Represents a Substack publication - `Post` - Represents an individual post on Substack - `Category` - Represents a Substack category of newsletters +- `SubstackAuth` - Handles authentication for accessing paywalled content Each class provides methods to access different aspects of the Substack ecosystem. @@ -37,6 +38,29 @@ authors = newsletter.get_authors() recommendations = newsletter.get_recommendations() ``` +### Accessing Paywalled Newsletter Content + +To access paywalled posts from a newsletter, provide authentication: + +```python +from substack_api import Newsletter, SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Create authenticated newsletter +newsletter = Newsletter("https://example.substack.com", auth=auth) + +# All retrieved posts will use authentication +posts = newsletter.get_posts(limit=10) + +# Access content from paywalled posts +for post in posts: + if post.is_paywalled(): + content = post.get_content() # Now accessible with auth + print(f"Paywalled content: {content[:100]}...") +``` + ## Working with Users The `User` class allows you to access information about Substack users: @@ -73,6 +97,27 @@ content = post.get_content() # Get post metadata metadata = post.get_metadata() + +# Check if post is paywalled +if post.is_paywalled(): + print("This post requires a subscription") +``` + +### Accessing Paywalled Content + +To access paywalled content, you need to provide authentication: + +```python +from substack_api import Post, SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Create authenticated post +post = Post("https://example.substack.com/p/paywalled-post", auth=auth) + +# Now you can access paywalled content +content = post.get_content() ``` ## Working with Categories @@ -98,6 +143,21 @@ newsletters = category.get_newsletters() newsletter_metadata = category.get_newsletter_metadata() ``` +## Authentication + +The library supports authentication to access paywalled content. See the [Authentication Guide](authentication.md) for detailed information on setting up and using authentication. + +```python +from substack_api import SubstackAuth + +# Set up authentication +auth = SubstackAuth(cookies_path="cookies.json") + +# Use with any class that supports authentication +newsletter = Newsletter("https://example.substack.com", auth=auth) +post = Post("https://example.substack.com/p/paywalled-post", auth=auth) +``` + ## Caching Behavior By default, the library caches API responses to minimize the number of requests. You can force a refresh of the data by passing `force_refresh=True` to relevant methods: diff --git a/examples/usage_walkthrough.ipynb b/examples/usage_walkthrough.ipynb index 71c7332..17a2271 100644 --- a/examples/usage_walkthrough.ipynb +++ b/examples/usage_walkthrough.ipynb @@ -1,5 +1,84 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Setting up authentication (uncomment and provide your own cookies file)\n", + "# from substack_api import SubstackAuth\n", + "# auth = SubstackAuth(cookies_path=\"cookies.json\")\n", + "# print(f\"Authentication successful: {auth.authenticated}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Using authentication with newsletter to access paywalled posts\n", + "# from substack_api import Newsletter\n", + "# authenticated_newsletter = Newsletter(\"https://example.substack.com\", auth=auth)\n", + "# paywalled_posts = authenticated_newsletter.get_posts(limit=5)\n", + "# for post in paywalled_posts:\n", + "# if post.is_paywalled():\n", + "# content = post.get_content() # Now accessible with auth\n", + "# print(f\"Paywalled content length: {len(content) if content else 0}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Example: Using authentication with individual posts\n", + "# from substack_api import Post\n", + "# post_url = \"https://example.substack.com/p/some-paywalled-post\"\n", + "# \n", + "# # Check if post is paywalled without auth\n", + "# post = Post(post_url)\n", + "# print(f\"Is paywalled: {post.is_paywalled()}\")\n", + "# \n", + "# # Access with authentication\n", + "# authenticated_post = Post(post_url, auth=auth)\n", + "# content = authenticated_post.get_content()\n", + "# print(f\"Content accessible: {content is not None}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cookie File Format\n", + "\n", + "Your cookies.json file should contain your session cookies in this format:\n", + "\n", + "```json\n", + "[\n", + " {\n", + " \"name\": \"substack.sid\",\n", + " \"value\": \"your_session_id\",\n", + " \"domain\": \".substack.com\",\n", + " \"path\": \"/\",\n", + " \"secure\": true\n", + " },\n", + " {\n", + " \"name\": \"substack.lli\",\n", + " \"value\": \"your_lli_value\", \n", + " \"domain\": \".substack.com\",\n", + " \"path\": \"/\",\n", + " \"secure\": true\n", + " }\n", + " ...\n", + "]\n", + "```\n", + "\n", + "**Security Note**: Only use your own cookies from your own authenticated session. Keep the cookies file secure and don't share it." + ] + }, { "cell_type": "code", "execution_count": 1, @@ -4333,6 +4412,15 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Authentication for Paywalled Content\n", + "\n", + "The library supports authentication to access paywalled content using your own session cookies." + ] } ], "metadata": { diff --git a/mkdocs.yml b/mkdocs.yml index 2731154..d4da8b8 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -53,12 +53,14 @@ nav: - Home: index.md - Installation: installation.md - User Guide: user-guide.md + - Authentication: authentication.md - API Reference: - - Overview: api-reference/index.md - - User: api-reference/user.md - - Newsletter: api-reference/newsletter.md - - Post: api-reference/post.md - - Category: api-reference/category.md + - Overview: api-reference/index.md + - User: api-reference/user.md + - Newsletter: api-reference/newsletter.md + - Post: api-reference/post.md + - Category: api-reference/category.md + - SubstackAuth: api-reference/auth.md # Extra CSS extra_css: diff --git a/substack_api/__init__.py b/substack_api/__init__.py index 5e72b37..cec50ce 100644 --- a/substack_api/__init__.py +++ b/substack_api/__init__.py @@ -1,6 +1,7 @@ +from .auth import SubstackAuth from .category import Category from .newsletter import Newsletter from .post import Post from .user import User -__all__ = ["User", "Post", "Category", "Newsletter"] +__all__ = ["User", "Post", "Category", "Newsletter", "SubstackAuth"] diff --git a/test.py b/test.py deleted file mode 100644 index 3608ee3..0000000 --- a/test.py +++ /dev/null @@ -1,4 +0,0 @@ -from substack_api.auth import SubstackAuth - -auth = SubstackAuth("substack.com_cookies.json") -print(auth.get("https://substack.com"))