Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .pre-commit-config.yaml
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really appreciate the focus on linting and formatting! Just the same, the number of changes applied in this PR are massive as a result of applying what looks like linting and formatting updates. This makes it difficult to discern what was changed due to linting or formatting and what was changed to help with uv environment development. Consider decoupling these ideas into distinct PR's.

You could keep this PR in place, dropping the linting/formatting changes, and focus on just the uv implementation. Separately, another PR could be used for linting changes to help isolate those changes from environment management. This would improve our ability as developers to understand when something breaks or changes behavior which PR it was associated with and as a result, where to look for a root cause + resolution.

From my perspective, PR's should be small and isolated to one specific focus to help keep the development moving towards common goals and decrease the complexity for both the developers and reviewers (i.e. larger changes are harder to contextualize).

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.11.9
hooks:
# Run the linter.
- id: ruff
types_or: [python, pyi]

args:
[
--fix,
--exclude,
"contrib/*,Older Experiments/,backend/data/,scripts/",
]
# Run the formatter.
- id: ruff-format
types_or: [python, pyi]
args: [--exclude, "contrib/*,Older Experiments/,backend/data/,scripts/"]
Comment on lines +7 to +19
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned during the MOSS call earlier consider merging this work with the .pre-commit-config.yml found under the Older Experiments directory so as to not lose the configuration we've used up until recently.

27 changes: 14 additions & 13 deletions Older Experiments/docs/gen_ref_pages.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Generate the code reference pages and navigation."""

# from: https://mkdocstrings.github.io/recipes/#bind-pages-to-sections-themselves
import os
from pathlib import Path
Expand All @@ -7,29 +8,29 @@

nav = mkdocs_gen_files.Nav()

src = os.path.join("src", "moss", "lib")
src = os.path.join('src', 'moss', 'lib')

for path in sorted(Path(src).rglob("*.py")):
module_path = src / path.relative_to(src).with_suffix("")
doc_path = src / path.relative_to(src).with_suffix(".md")
full_doc_path = Path("reference", doc_path)
for path in sorted(Path(src).rglob('*.py')):
module_path = src / path.relative_to(src).with_suffix('')
doc_path = src / path.relative_to(src).with_suffix('.md')
full_doc_path = Path('reference', doc_path)

parts = tuple(module_path.parts)

if parts[-1] == "__init__":
if parts[-1] == '__init__':
parts = parts[:-1]
doc_path = doc_path.with_name("index.md")
full_doc_path = full_doc_path.with_name("index.md")
elif parts[-1] == "__main__":
doc_path = doc_path.with_name('index.md')
full_doc_path = full_doc_path.with_name('index.md')
elif parts[-1] == '__main__':
continue

nav[parts] = doc_path.as_posix()

with mkdocs_gen_files.open(full_doc_path, "w") as fd:
ident = ".".join(parts)
fd.write(f"::: {ident}")
with mkdocs_gen_files.open(full_doc_path, 'w') as fd:
ident = '.'.join(parts)
fd.write(f'::: {ident}')

mkdocs_gen_files.set_edit_path(full_doc_path, path.relative_to(root))

with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
with mkdocs_gen_files.open('reference/SUMMARY.md', 'w') as nav_file:
nav_file.writelines(nav.build_literate_nav())
127 changes: 72 additions & 55 deletions Older Experiments/scrappy-proof-of-concept/clients/github_client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# clients/github_client.py
import requests
import time
import json
import logging
import time

import requests
from config import GITHUB_API_BASE_URL # Use centralized config

logger = logging.getLogger(__name__)


class GitHubClient:
BASE_URL = GITHUB_API_BASE_URL

Expand All @@ -19,7 +21,7 @@ def __init__(self, token=None, default_timeout=30):
self.default_timeout = default_timeout
self.headers = {}
if token:
self.headers["Authorization"] = f"token {token}"
self.headers['Authorization'] = f'token {token}'

def get(self, url, params=None):
"""
Expand All @@ -29,72 +31,83 @@ def get(self, url, params=None):
attempt = 0
while attempt < max_retries:
attempt += 1
logger.debug(f"[GET Attempt {attempt}/{max_retries}] URL={url} Params={params}")
logger.debug(
f'[GET Attempt {attempt}/{max_retries}] URL={url} Params={params}'
)
try:
response = requests.get(
url,
headers=self.headers,
params=params,
timeout=self.default_timeout
timeout=self.default_timeout,
)
if response.status_code == 200:
logger.debug(f"[GET {url}] -> 200 OK")
logger.debug(f'[GET {url}] -> 200 OK')
try:
return response.json()
except json.JSONDecodeError as e:
logger.error(f"[GET {url}] JSON parse error: {e}")
logger.error(f'[GET {url}] JSON parse error: {e}')
return None
elif response.status_code == 403:
try:
error_json = response.json()
except json.JSONDecodeError:
error_json = {}
message = error_json.get("message", "").lower()
if "rate limit exceeded" in message:
reset_timestamp = response.headers.get("X-RateLimit-Reset")
remaining = response.headers.get("X-RateLimit-Remaining")
logger.warning("GitHub rate limit exceeded!")
logger.warning(f"X-RateLimit-Remaining: {remaining}")
logger.warning(f"X-RateLimit-Reset: {reset_timestamp}")
message = error_json.get('message', '').lower()
if 'rate limit exceeded' in message:
reset_timestamp = response.headers.get('X-RateLimit-Reset')
remaining = response.headers.get('X-RateLimit-Remaining')
logger.warning('GitHub rate limit exceeded!')
logger.warning(f'X-RateLimit-Remaining: {remaining}')
logger.warning(f'X-RateLimit-Reset: {reset_timestamp}')
if reset_timestamp:
reset_ts = int(reset_timestamp)
current_ts = int(time.time())
sleep_time = reset_ts - current_ts + 1
if sleep_time < 1:
sleep_time = 1
logger.warning(f"Sleeping for {sleep_time} seconds (rate limit).")
logger.warning(
f'Sleeping for {sleep_time} seconds (rate limit).'
)
time.sleep(sleep_time)
continue
else:
logger.warning("No X-RateLimit-Reset header found. Sleeping 60s.")
logger.warning(
'No X-RateLimit-Reset header found. Sleeping 60s.'
)
time.sleep(60)
continue
else:
logger.error(f"[GET {url}] 403 Forbidden: {response.text}")
logger.error(f'[GET {url}] 403 Forbidden: {response.text}')
return None
else:
logger.error(f"[GET {url}] -> {response.status_code} {response.reason}")
logger.error(f"Response Text: {response.text}")
logger.error(
f'[GET {url}] -> {response.status_code} {response.reason}'
)
logger.error(f'Response Text: {response.text}')
return None
except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as e:
logger.warning(f"[GET {url}] Timeout on attempt {attempt}. Error: {e}")
except (
requests.exceptions.ConnectTimeout,
requests.exceptions.ReadTimeout,
) as e:
logger.warning(f'[GET {url}] Timeout on attempt {attempt}. Error: {e}')
if attempt < max_retries:
backoff = 5 * attempt
logger.warning(f"Retrying in {backoff} seconds...")
logger.warning(f'Retrying in {backoff} seconds...')
time.sleep(backoff)
else:
logger.error("Max retries reached. Giving up.")
logger.error('Max retries reached. Giving up.')
return None
except requests.exceptions.RequestException as e:
logger.error(f"[GET {url}] RequestException on attempt {attempt}: {e}")
logger.error(f'[GET {url}] RequestException on attempt {attempt}: {e}')
if attempt < max_retries:
backoff = 5 * attempt
logger.warning(f"Retrying in {backoff} seconds...")
logger.warning(f'Retrying in {backoff} seconds...')
time.sleep(backoff)
else:
logger.error("Max retries reached. Giving up.")
logger.error('Max retries reached. Giving up.')
return None
logger.error(f"[GET {url}] All retries exhausted. Returning None.")
logger.error(f'[GET {url}] All retries exhausted. Returning None.')
return None

def get_all_pages(self, url, params=None):
Expand All @@ -105,107 +118,111 @@ def get_all_pages(self, url, params=None):
page = 1
while True:
local_params = params.copy() if params else {}
local_params.update({"page": page, "per_page": 100})
logger.info(f"Fetching page {page} of {url}")
local_params.update({'page': page, 'per_page': 100})
logger.info(f'Fetching page {page} of {url}')
items = self.get(url, params=local_params)
if not items:
logger.info(f"No more data for {url} on page {page}.")
logger.info(f'No more data for {url} on page {page}.')
break
if isinstance(items, list):
all_items.extend(items)
logger.info(f"Fetched {len(items)} items from page {page}.")
logger.info(f'Fetched {len(items)} items from page {page}.')
if len(items) < 100:
break
else:
logger.info(f"Non-list response encountered. Ending pagination for {url}.")
logger.info(
f'Non-list response encountered. Ending pagination for {url}.'
)
break
page += 1
time.sleep(1)
logger.info(f"Finished pagination for {url}, total items fetched: {len(all_items)}")
logger.info(
f'Finished pagination for {url}, total items fetched: {len(all_items)}'
)
return all_items

def get_repository(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}"
url = f'{self.BASE_URL}/repos/{owner}/{repo}'
return self.get(url)

def get_user(self, username):
url = f"{self.BASE_URL}/users/{username}"
url = f'{self.BASE_URL}/users/{username}'
return self.get(url)

def get_organization(self, org_login):
url = f"{self.BASE_URL}/orgs/{org_login}"
url = f'{self.BASE_URL}/orgs/{org_login}'
return self.get(url)

def get_branches(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/branches"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/branches'
return self.get_all_pages(url)

def get_tags(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/tags"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/tags'
return self.get_all_pages(url)

def get_commits(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/commits"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/commits'
return self.get_all_pages(url)

def get_labels(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/labels"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/labels'
return self.get_all_pages(url)

def get_milestones(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/milestones"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/milestones'
return self.get_all_pages(url)

def get_releases(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/releases"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/releases'
return self.get_all_pages(url)

def get_webhooks(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/hooks"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/hooks'
return self.get_all_pages(url)

def get_events(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/events"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/events'
return self.get_all_pages(url)

def get_collaborators(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/collaborators"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/collaborators'
return self.get_all_pages(url)

def get_workflows(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/actions/workflows"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/actions/workflows'
data = self.get(url)
if data and isinstance(data, dict):
return data.get("workflows", [])
return data.get('workflows', [])
return []

def get_workflow_runs(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/actions/runs"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/actions/runs'
data = self.get(url)
if data and isinstance(data, dict):
return data.get("workflow_runs", [])
return data.get('workflow_runs', [])
return []

def get_readme(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/readme"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/readme'
return self.get(url)

def get_discussions(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/discussions"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/discussions'
return self.get_all_pages(url)

def get_citation_cff(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/contents/CITATION.cff"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/contents/CITATION.cff'
return self.get(url)

def get_traffic_views(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/views"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/views'
return self.get(url)

def get_traffic_clones(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/clones"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/clones'
return self.get(url)

def get_traffic_popular_paths(self, owner, repo):
url = f"{self.BASE_URL}/repos/{owner}/{repo}/traffic/popular/paths"
url = f'{self.BASE_URL}/repos/{owner}/{repo}/traffic/popular/paths'
return self.get(url)
Loading