Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ dist/
.env
.vscode/
.DS_Store
*.json
*.json
substack_api.egg-info/
.coverage
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dev = [
"mkdocs-material>=9.6.6",
"mkdocstrings-python>=1.16.2",
"pytest>=8.3.4",
"pytest-cov>=7.0.0",
"ruff>=0.9.9",
]

Expand Down
121 changes: 92 additions & 29 deletions substack_api/newsletter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re
import urllib.parse
from time import sleep
from typing import Any, Dict, List, Optional

Expand All @@ -10,6 +12,43 @@
}


SEARCH_URL = "https://substack.com/api/v1/publication/search"

DISCOVERY_HEADERS = {
"User-Agent": HEADERS["User-Agent"],
"Accept": "application/json",
"Origin": "https://substack.com",
"Referer": "https://substack.com/discover",
}


def _host_from_url(url: str) -> str:
host = urllib.parse.urlparse(
url if "://" in url else f"https://{url}"
).netloc.lower()
return host


def _match_publication(search_results: dict, host: str) -> Optional[dict]:
# Try exact custom domain, then subdomain match
for item in search_results.get("publications", []):
if (
item.get("custom_domain") and _host_from_url(item["custom_domain"]) == host
) or (
item.get("subdomain")
and f"{item['subdomain'].lower()}.substack.com" == host
):
return item
# Fallback: loose match on subdomain token
m = re.match(r"^([a-z0-9-]+)\.substack\.com$", host)
if m:
sub = m.group(1)
for item in search_results.get("publications", []):
if item.get("subdomain", "").lower() == sub:
return item
return None


class Newsletter:
"""
Newsletter class for interacting with Substack newsletters
Expand Down Expand Up @@ -183,49 +222,73 @@ def get_podcasts(self, limit: Optional[int] = None) -> List:
post_data = self._fetch_paginated_posts(params, limit)
return [Post(item["canonical_url"], auth=self.auth) for item in post_data]

def get_recommendations(self) -> List["Newsletter"]:
def _resolve_publication_id(self) -> Optional[int]:
"""
Get recommended publications for this newsletter
Resolve publication_id via Substack discovery search—no posts needed.

Parameters
----------
None

Returns
-------
List[Newsletter]
List of recommended Newsletter objects
Optional[int]
The publication ID if found, otherwise None.

Raises
------
requests.HTTPError
If the HTTP request to Substack fails.
"""
# First get any post to extract the publication ID
posts = self.get_posts(limit=1)
if not posts:
return []
host = _host_from_url(self.url)
q = host.split(":")[0] # strip port if present
params = {
"query": q,
"page": 0,
"limit": 25,
"skipExplanation": "true",
"sort": "relevance",
}
r = requests.get(
SEARCH_URL, headers=DISCOVERY_HEADERS, params=params, timeout=30
)
r.raise_for_status()
match = _match_publication(r.json(), host)
return match.get("id") if match else None

publication_id = posts[0].get_metadata()["publication_id"]
def get_recommendations(self) -> List["Newsletter"]:
"""
Get recommended publications without relying on the latest post.
"""
publication_id = self._resolve_publication_id()
if not publication_id:
# graceful fallback to your existing (post-derived) path
try:
posts = self.get_posts(limit=1)
publication_id = (
posts[0].get_metadata()["publication_id"] if posts else None
)
except Exception:
publication_id = None
if not publication_id:
return []

# Now get the recommendations
endpoint = f"{self.url}/api/v1/recommendations/from/{publication_id}"
response = self._make_request(endpoint, timeout=30)
response.raise_for_status()
recommendations = response.json() or []

recommendations = response.json()
if not recommendations:
return []

recommended_newsletter_urls = []
urls = []
for rec in recommendations:
recpub = rec["recommendedPublication"]
if "custom_domain" in recpub and recpub["custom_domain"]:
recommended_newsletter_urls.append(recpub["custom_domain"])
else:
recommended_newsletter_urls.append(
f"{recpub['subdomain']}.substack.com"
)

# Avoid circular import
from .newsletter import Newsletter
pub = rec.get("recommendedPublication", {})
if pub.get("custom_domain"):
urls.append(pub["custom_domain"])
elif pub.get("subdomain"):
urls.append(f"{pub['subdomain']}.substack.com")

result = [
Newsletter(url, auth=self.auth) for url in recommended_newsletter_urls
]
from .newsletter import Newsletter # avoid circular import

return result
return [Newsletter(u, auth=self.auth) for u in urls]

def get_authors(self) -> List:
"""
Expand Down
Loading