From c77b8f57b2c2c1abf767c1ddbf9694aedbb80c11 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 12:15:25 -0500 Subject: [PATCH 1/5] user module with support for redirects --- substack_api/user.py | 149 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 140 insertions(+), 9 deletions(-) diff --git a/substack_api/user.py b/substack_api/user.py index da08c82..6b363d3 100644 --- a/substack_api/user.py +++ b/substack_api/user.py @@ -1,4 +1,6 @@ -from typing import Any, Dict, List +import logging +from typing import Any, Dict, List, Optional +from urllib.parse import urlparse import requests @@ -6,13 +8,67 @@ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" } +# Setup logger +logger = logging.getLogger(__name__) + + +def resolve_handle_redirect(old_handle: str, timeout: int = 30) -> Optional[str]: + """ + Resolve a potentially renamed Substack handle by following redirects. + + Parameters + ---------- + old_handle : str + The original handle that may have been renamed + timeout : int + Request timeout in seconds + + Returns + ------- + Optional[str] + The new handle if renamed, None if no redirect or on error + """ + try: + # Make request to the public profile page with redirects enabled + response = requests.get( + f"https://substack.com/@{old_handle}", + headers=HEADERS, + timeout=timeout, + allow_redirects=True, + ) + + # If we got a successful response, check if we were redirected + if response.status_code == 200: + # Parse the final URL to extract the handle + parsed_url = urlparse(response.url) + path_parts = parsed_url.path.strip("/").split("/") + + # Check if this is a profile URL (starts with @) + if path_parts and path_parts[0].startswith("@"): + new_handle = path_parts[0][1:] # Remove the @ prefix + + # Only return if it's actually different + if new_handle and new_handle != old_handle: + logger.info( + f"Handle redirect detected: {old_handle} -> {new_handle}" + ) + return new_handle + + return None + + except requests.RequestException as e: + logger.debug(f"Error resolving handle redirect for {old_handle}: {e}") + return None + class User: """ - User class for interacting with Substack user profiles + User class for interacting with Substack user profiles. + + Now handles renamed accounts by following redirects when a handle has changed. """ - def __init__(self, username: str) -> None: + def __init__(self, username: str, follow_redirects: bool = True): """ Initialize a User object. @@ -20,10 +76,15 @@ def __init__(self, username: str) -> None: ---------- username : str The Substack username + follow_redirects : bool + Whether to follow redirects when a handle has been renamed (default: True) """ self.username = username + self.original_username = username # Keep track of the original + self.follow_redirects = follow_redirects self.endpoint = f"https://substack.com/api/v1/user/{username}/public_profile" self._user_data = None # Cache for user data + self._redirect_attempted = False # Prevent infinite redirect loops def __str__(self) -> str: return f"User: {self.username}" @@ -31,9 +92,24 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"User(username={self.username})" + def _update_handle(self, new_handle: str) -> None: + """ + Update the user's handle and endpoint. + + Parameters + ---------- + new_handle : str + The new handle after redirect + """ + logger.info(f"Updating handle from {self.username} to {new_handle}") + self.username = new_handle + self.endpoint = f"https://substack.com/api/v1/user/{new_handle}/public_profile" + def _fetch_user_data(self, force_refresh: bool = False) -> Dict[str, Any]: """ - Fetch the raw user data from the API and cache it + Fetch the raw user data from the API and cache it. + + Handles renamed accounts by following redirects when follow_redirects is True. Parameters ---------- @@ -44,15 +120,58 @@ def _fetch_user_data(self, force_refresh: bool = False) -> Dict[str, Any]: ------- Dict[str, Any] Full user profile data + + Raises + ------ + requests.HTTPError + If the user cannot be found even after redirect attempts """ if self._user_data is not None and not force_refresh: return self._user_data - r = requests.get(self.endpoint, headers=HEADERS, timeout=30) - r.raise_for_status() + try: + r = requests.get(self.endpoint, headers=HEADERS, timeout=30) + r.raise_for_status() + self._user_data = r.json() + return self._user_data + + except requests.HTTPError as e: + # Handle 404 errors if we should follow redirects + if ( + e.response.status_code == 404 + and self.follow_redirects + and not self._redirect_attempted + ): + # Mark that we've attempted a redirect to prevent loops + self._redirect_attempted = True + + # Try to resolve the redirect + new_handle = resolve_handle_redirect(self.username) + + if new_handle: + # Update our state with the new handle + self._update_handle(new_handle) - self._user_data = r.json() - return self._user_data + # Try the request again with the new handle + try: + r = requests.get(self.endpoint, headers=HEADERS, timeout=30) + r.raise_for_status() + self._user_data = r.json() + return self._user_data + except requests.HTTPError: + # If it still fails, log and re-raise + logger.error( + f"Failed to fetch user data even after redirect to {new_handle}" + ) + raise + else: + # No redirect found, this is a real 404 + logger.debug( + f"No redirect found for {self.username}, user may be deleted" + ) + + # Re-raise the original error + raise def get_raw_data(self, force_refresh: bool = False) -> Dict[str, Any]: """ @@ -109,6 +228,18 @@ def profile_set_up_at(self) -> str: data = self._fetch_user_data() return data["profile_set_up_at"] + @property + def was_redirected(self) -> bool: + """ + Check if this user's handle was redirected from the original. + + Returns + ------- + bool + True if the handle was changed via redirect + """ + return self.username != self.original_username + def get_subscriptions(self) -> List[Dict[str, Any]]: """ Get newsletters the user has subscribed to @@ -121,7 +252,7 @@ def get_subscriptions(self) -> List[Dict[str, Any]]: data = self._fetch_user_data() subscriptions = [] - for sub in data["subscriptions"]: + for sub in data.get("subscriptions", []): pub = sub["publication"] domain = pub.get("custom_domain") or f"{pub['subdomain']}.substack.com" subscriptions.append( From 2835f621e8ece91f365f82b01f0161eb6c433192 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 12:20:31 -0500 Subject: [PATCH 2/5] tests --- test.py | 5 + tests/test_user_redirects.py | 292 +++++++++++++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 test.py create mode 100644 tests/test_user_redirects.py diff --git a/test.py b/test.py new file mode 100644 index 0000000..14ed845 --- /dev/null +++ b/test.py @@ -0,0 +1,5 @@ +from substack_api.user import User + +test = User("5thingsyoushouldbuy") + +print(test.get_raw_data()["handle"]) diff --git a/tests/test_user_redirects.py b/tests/test_user_redirects.py new file mode 100644 index 0000000..05265aa --- /dev/null +++ b/tests/test_user_redirects.py @@ -0,0 +1,292 @@ +# tests/test_user_redirects.py + +import unittest +from unittest.mock import ANY, Mock, patch + +import pytest +import requests + +from substack_api.user import User, resolve_handle_redirect + + +class TestHandleRedirects(unittest.TestCase): + """Test cases for handle redirect functionality.""" + + @patch("requests.get") + def test_resolve_handle_redirect_success(self, mock_get): + """Test successful handle redirect resolution.""" + # Mock a successful redirect + mock_response = Mock() + mock_response.status_code = 200 + mock_response.url = "https://substack.com/@newhandle" + mock_get.return_value = mock_response + + result = resolve_handle_redirect("oldhandle") + + assert result == "newhandle" + mock_get.assert_called_once_with( + "https://substack.com/@oldhandle", + headers=ANY, + timeout=30, + allow_redirects=True, + ) + + @patch("requests.get") + def test_resolve_handle_redirect_no_redirect(self, mock_get): + """Test when no redirect occurs (same handle).""" + # Mock no redirect + mock_response = Mock() + mock_response.status_code = 200 + mock_response.url = "https://substack.com/@samehandle" + mock_get.return_value = mock_response + + result = resolve_handle_redirect("samehandle") + + assert result is None + + @patch("requests.get") + def test_resolve_handle_redirect_error(self, mock_get): + """Test error handling in redirect resolution.""" + # Mock network error + mock_get.side_effect = requests.RequestException("Network error") + + result = resolve_handle_redirect("errorhandle") + + assert result is None + + @patch("requests.get") + def test_resolve_handle_redirect_404(self, mock_get): + """Test when profile page itself returns 404.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + result = resolve_handle_redirect("deletedhandle") + + assert result is None + + +class TestUserWithRedirects(unittest.TestCase): + """Test User class with redirect handling.""" + + def test_user_init_with_redirects(self): + """Test User initialization with redirect support.""" + user = User("testuser", follow_redirects=True) + + assert user.username == "testuser" + assert user.original_username == "testuser" + assert user.follow_redirects is True + assert user._redirect_attempted is False + + def test_user_init_without_redirects(self): + """Test User initialization without redirect support.""" + user = User("testuser", follow_redirects=False) + + assert user.follow_redirects is False + + @patch("requests.get") + def test_fetch_user_data_no_redirect_needed(self, mock_get): + """Test normal case where no redirect is needed.""" + # Mock successful API response + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"id": 123, "name": "Test User"} + mock_get.return_value = mock_response + + user = User("testuser") + data = user._fetch_user_data() + + assert data == {"id": 123, "name": "Test User"} + assert user.username == "testuser" # Username unchanged + assert not user.was_redirected + mock_get.assert_called_once() + + @patch("requests.get") + @patch("substack_api.user.resolve_handle_redirect") + def test_fetch_user_data_with_redirect(self, mock_resolve, mock_get): + """Test handling of renamed user with redirect.""" + # First call returns 404 + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError(response=mock_404) + + # Second call (after redirect) succeeds + mock_success = Mock() + mock_success.status_code = 200 + mock_success.json.return_value = { + "id": 123, + "name": "Test User", + "handle": "newhandle", + } + + mock_get.side_effect = [mock_404, mock_success] + + # Mock redirect resolution + mock_resolve.return_value = "newhandle" + + user = User("oldhandle", follow_redirects=True) + data = user._fetch_user_data() + + assert data == {"id": 123, "name": "Test User", "handle": "newhandle"} + assert user.username == "newhandle" # Username updated + assert user.original_username == "oldhandle" + assert user.was_redirected + assert user._redirect_attempted is True + + # Verify API calls + assert mock_get.call_count == 2 + mock_resolve.assert_called_once_with("oldhandle") + + @patch("requests.get") + @patch("substack_api.user.resolve_handle_redirect") + def test_fetch_user_data_redirect_disabled(self, mock_resolve, mock_get): + """Test that redirects are not followed when disabled.""" + # Mock 404 response + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError(response=mock_404) + mock_get.return_value = mock_404 + + user = User("oldhandle", follow_redirects=False) + + with pytest.raises(requests.HTTPError): + user._fetch_user_data() + + # Should not attempt redirect + mock_resolve.assert_not_called() + assert user.username == "oldhandle" # Username unchanged + + @patch("requests.get") + @patch("substack_api.user.resolve_handle_redirect") + def test_fetch_user_data_no_redirect_found(self, mock_resolve, mock_get): + """Test when no redirect is found (user truly deleted).""" + # Mock 404 response + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError(response=mock_404) + mock_get.return_value = mock_404 + + # No redirect found + mock_resolve.return_value = None + + user = User("deleteduser", follow_redirects=True) + + with pytest.raises(requests.HTTPError): + user._fetch_user_data() + + mock_resolve.assert_called_once_with("deleteduser") + assert user.username == "deleteduser" # Username unchanged + + @patch("requests.get") + @patch("substack_api.user.resolve_handle_redirect") + def test_fetch_user_data_redirect_still_404(self, mock_resolve, mock_get): + """Test when redirect is found but new handle also returns 404.""" + # Both calls return 404 + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError(response=mock_404) + mock_get.return_value = mock_404 + + # Redirect found + mock_resolve.return_value = "newhandle" + + user = User("oldhandle", follow_redirects=True) + + with pytest.raises(requests.HTTPError): + user._fetch_user_data() + + assert user.username == "newhandle" # Username was updated + assert user.was_redirected + assert mock_get.call_count == 2 + + @patch("requests.get") + def test_prevent_infinite_redirect_loop(self, mock_get): + """Test that redirect is only attempted once.""" + # All calls return 404 + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError(response=mock_404) + mock_get.return_value = mock_404 + + user = User("testuser", follow_redirects=True) + + # First attempt + with pytest.raises(requests.HTTPError): + user._fetch_user_data() + + # Second attempt should not try redirect again + with pytest.raises(requests.HTTPError): + user._fetch_user_data() + + # Should only have made 3 API calls total (2 original + 1 after redirect) + # not 4+ calls + assert mock_get.call_count <= 3 + + def test_update_handle(self): + """Test the _update_handle method.""" + user = User("oldhandle") + + user._update_handle("newhandle") + + assert user.username == "newhandle" + assert ( + user.endpoint == "https://substack.com/api/v1/user/newhandle/public_profile" + ) + assert user.original_username == "oldhandle" # Original preserved + + def test_was_redirected_property(self): + """Test the was_redirected property.""" + user = User("testuser") + assert not user.was_redirected + + user._update_handle("newhandle") + assert user.was_redirected + + +class TestUserRedirectExamples(unittest.TestCase): + """Test the specific redirect examples provided.""" + + @patch("requests.get") + @patch("substack_api.user.resolve_handle_redirect") + def test_real_world_redirects(self, mock_resolve, mock_get): + """Test with the real examples provided.""" + test_cases = [ + ("150wordreviews", "johndevore"), + ("15thcfeminist", "15thcenturyfeminist"), + ("300tangpoems", "hyunwookimwriter"), + ("5thingsyoushouldbuy", "beckymalinsky"), + ] + + for old_handle, new_handle in test_cases: + with self.subTest(old=old_handle, new=new_handle): + # Reset mocks + mock_get.reset_mock() + mock_resolve.reset_mock() + + # Setup mocks + mock_404 = Mock() + mock_404.status_code = 404 + mock_404.raise_for_status.side_effect = requests.HTTPError( + response=mock_404 + ) + + mock_success = Mock() + mock_success.status_code = 200 + mock_success.json.return_value = { + "id": 123, + "handle": new_handle, + "name": "Test User", + } + + mock_get.side_effect = [mock_404, mock_success] + mock_resolve.return_value = new_handle + + # Test + user = User(old_handle) + data = user.get_raw_data() + + # Verify + assert user.original_username == old_handle + assert user.username == new_handle + assert user.was_redirected + assert data["handle"] == new_handle From 71ee53610d6d0b92d3fbb1aa4956752de302f0d0 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 12:23:05 -0500 Subject: [PATCH 3/5] init and readme --- README.md | 35 +++++++++++++++++++++++++++++++++++ substack_api/__init__.py | 11 +++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 84eac8d..9a07a6e 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,41 @@ name = user.name subscriptions = user.get_subscriptions() ``` +#### Handling Renamed Accounts +Substack allows users to change their handle (username) at any time. When this happens, the old API endpoints return 404 errors. This library automatically handles these redirects by default. +##### Automatic Redirect Handling + +```python +from substack_api import User + +# This will automatically follow redirects if the handle has changed +user = User("oldhandle") # Will find the user even if they renamed to "newhandle" + +# Check if a redirect occurred +if user.was_redirected: + print(f"User was renamed from {user.original_username} to {user.username}") +``` + +##### Disable Redirect Following + +If you prefer to handle 404s yourself: + +```python +# Disable automatic redirect following +user = User("oldhandle", follow_redirects=False) +``` + +##### Manual Handle Resolution + +You can also manually resolve handle redirects: + +```python +from substack_api import resolve_handle_redirect + +new_handle = resolve_handle_redirect("oldhandle") +if new_handle: + print(f"Handle was renamed to: {new_handle}") +``` ## Limitations - This is an unofficial library and not endorsed by Substack diff --git a/substack_api/__init__.py b/substack_api/__init__.py index cec50ce..c21012d 100644 --- a/substack_api/__init__.py +++ b/substack_api/__init__.py @@ -2,6 +2,13 @@ from .category import Category from .newsletter import Newsletter from .post import Post -from .user import User +from .user import User, resolve_handle_redirect -__all__ = ["User", "Post", "Category", "Newsletter", "SubstackAuth"] +__all__ = [ + "User", + "Post", + "Category", + "Newsletter", + "SubstackAuth", + "resolve_handle_redirect", +] From 51bed0a2b575b447e231b9c87e49bbf4776797e4 Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 12:27:08 -0500 Subject: [PATCH 4/5] docs --- docs/api-reference/user.md | 58 +++++++++++++++++++++++++++++++++++--- docs/user-guide.md | 20 +++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/docs/api-reference/user.md b/docs/api-reference/user.md index fba9f5d..95441dd 100644 --- a/docs/api-reference/user.md +++ b/docs/api-reference/user.md @@ -1,22 +1,23 @@ # User -The `User` class provides access to Substack user profiles. +The `User` class provides access to Substack user profiles. It also handles renamed Substack handles by following redirects. ## Class Definition ```python -User(username: str) +User(username: str, follow_redirects: bool = True) ``` ### Parameters - `username` (str): The Substack username +- `follow_redirects` (bool): Whether to follow redirects when a handle has been renamed (default: True) ## Methods ### `_fetch_user_data(force_refresh: bool = False) -> Dict[str, Any]` -Fetch the raw user data from the API and cache it. +Fetch the raw user data from the API and cache it. Handles renamed accounts by following redirects when `follow_redirects` is True. #### Parameters @@ -26,6 +27,10 @@ Fetch the raw user data from the API and cache it. - `Dict[str, Any]`: Full user profile data +#### Raises + +- `requests.HTTPError`: If the user cannot be found even after redirect attempts + ### `get_raw_data(force_refresh: bool = False) -> Dict[str, Any]` Get the complete raw user data. @@ -46,6 +51,14 @@ Get newsletters the user has subscribed to. - `List[Dict[str, Any]]`: List of publications the user subscribes to with domain info +### `_update_handle(new_handle: str) -> None` + +Update the user's handle and endpoint. + +#### Parameters + +- `new_handle` (str): The new handle after redirect + ## Properties ### `id` -> int @@ -60,22 +73,59 @@ Get the user's name. Get the date when the user's profile was set up. +### `was_redirected` -> bool + +Check if this user's handle was redirected from the original. + +#### Returns + +- `bool`: True if the handle was changed via redirect + +## Helper Functions + +### `resolve_handle_redirect(old_handle: str, timeout: int = 30) -> Optional[str]` + +Resolve a potentially renamed Substack handle by following redirects. + +#### Parameters + +- `old_handle` (str): The original handle that may have been renamed +- `timeout` (int): Request timeout in seconds + +#### Returns + +- `Optional[str]`: The new handle if renamed, None if no redirect or on error + ## Example Usage ```python from substack_api import User -# Create a user object +# Create a user object (automatically handles redirects) user = User("username") +# Create a user object without redirect handling +user_no_redirect = User("username", follow_redirects=False) + # Get basic user information print(f"User ID: {user.id}") print(f"Name: {user.name}") print(f"Profile created: {user.profile_set_up_at}") +# Check if the user was redirected (handle was renamed) +if user.was_redirected: + print(f"Original handle '{user.original_username}' was redirected to '{user.username}'") + # Get the user's subscriptions subscriptions = user.get_subscriptions() # Get raw user data user_data = user.get_raw_data() + +# Using the standalone redirect resolver +from substack_api.user import resolve_handle_redirect + +new_handle = resolve_handle_redirect("old_username") +if new_handle: + print(f"The handle has been renamed to: {new_handle}") ``` diff --git a/docs/user-guide.md b/docs/user-guide.md index 9a4212d..746347d 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -71,6 +71,9 @@ from substack_api import User # Create a user object user = User("username") +# Create a user object without redirect handling (if a handle has been renamed) +user = User("username", follow_redirects=False) + # Get basic user information user_id = user.id name = user.name @@ -80,6 +83,23 @@ subscriptions = user.get_subscriptions() # Get raw user data user_data = user.get_raw_data() + +# Check if the user was redirected (handle was renamed) +if user.was_redirected: + print(f"Original handle '{user.original_username}' was redirected to '{user.username}'") +``` + +### Handle Redirects + +Substack users sometimes change handles. The `User` class automatically handles these redirects by default: + +```python +# This will work even if "old_username" has been renamed to "new_username" +user = User("old_username") # follow_redirects=True by default + +# Check if a redirect happened +if user.was_redirected: + print(f"User was redirected from {user.original_username} to {user.username}") ``` ## Working with Posts From 5d6c8e3c4c05aa677df97f8cd94ca69c0caf0c0e Mon Sep 17 00:00:00 2001 From: Nick Hagar Date: Sun, 25 May 2025 12:28:12 -0500 Subject: [PATCH 5/5] cleanup --- test.py | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 14ed845..0000000 --- a/test.py +++ /dev/null @@ -1,5 +0,0 @@ -from substack_api.user import User - -test = User("5thingsyoushouldbuy") - -print(test.get_raw_data()["handle"])