Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 50 additions & 2 deletions update_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,68 @@
"""
import json
import os
import re
import sys
import time
from distutils.version import LooseVersion
from xmlrpc.client import Fault
from xmlrpc.client import ServerProxy

INDEX_FILE_NAME = os.path.join(os.path.dirname(__file__), "index.json")

BLACKLIST = {"pytest-nbsmoke"}


class RateLimitedServerProxy:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we can use some tenacity decorators around the actual proxy instead of implementing a custom logic?

@retry(wait=wait_fixed(30), stop=stop_after_attempt(10))
def browse(self, classifiers):
    return self._server_proxy.browse(classifiers)

I understand the logic is not as clever as what you implemented, which looks at the actual message to decide what to do, but this might be good enough.

But of course if the plan is to throw this away in favor of a reimplementation using PyPIJSON, I'm happy to have this in the mean time.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The plan would be to eventually deprecate this in favor of PyPIJSON

Thank you for sharing tenacity! I didn't know this existed. I think we should look into using it for the proper migration

def __init__(self, uri):
self._server_proxy = ServerProxy(uri)

def browse(self, classifiers):
return self._rate_limit_request(self._server_proxy.browse, classifiers)

def list_packages(self):
return self._rate_limit_request(self._server_proxy.list_packages)

def package_releases(self, package_name):
return self._rate_limit_request(self._server_proxy.package_releases, package_name)

def release_data(self, name, version):
return self._rate_limit_request(self._server_proxy.release_data, name, version)

def _rate_limit_request(self, request_method, *args):
while True:
try:
return request_method(*args)
except Fault as fault:
# If PyPI errors due to too many requests, sleep and try again depending on the error message received
# The fault message is of form:
# The action could not be performed because there were too many requests by the client. Limit may reset in 1 seconds.
limit_reset_regex_match = re.search(
r"^.+Limit may reset in (\d+) seconds\.$", fault.faultString
)
if limit_reset_regex_match is not None:
sleep_amt = int(limit_reset_regex_match.group(1))
time.sleep(sleep_amt)
continue

# The fault message is of form:
# The action could not be performed because there were too many requests by the client.
too_many_requests_regex_match = re.search(
"^.+The action could not be performed because there were too many requests by the client.$",
fault.faultString,
)
if too_many_requests_regex_match is not None:
time.sleep(60)
continue

raise


def iter_plugins(client, blacklist, *, consider_classifier=True):
"""
Returns an iterator of (name, latest version, summary) from PyPI.

:param client: xmlrpclib.ServerProxy
:param client: RateLimitedServerProxy
:param search: package names to search for
"""
# previously we used the more efficient "search" XMLRPC method, but
Expand Down Expand Up @@ -96,7 +144,7 @@ def write_plugins_index(file_name, plugins):


def main():
client = ServerProxy("https://pypi.org/pypi")
client = RateLimitedServerProxy("https://pypi.org/pypi")
plugins = sorted(iter_plugins(client, BLACKLIST, consider_classifier=False))

if write_plugins_index(INDEX_FILE_NAME, plugins):
Expand Down