From fd6fd16a90f6e33a5ff6bec297d17a75d6befdee Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 11:47:40 -0400 Subject: [PATCH 1/7] Add PyPI source type and migrate pip-installed Python plugins Fixes #2440 Add a pypi source type to the fetcher so plugins distributed as Python packages can track releases from PyPI rather than GitHub tags. The fetcher queries https://pypi.org/pypi/{name}/json, skips yanked and pre-release versions, and respects the existing ignore_versions and max_version filters. Migrate four plugins whose Dockerfiles install via pip to the new source type: - connectrpc/python (protoc-gen-connect-python) - community/nipunn1313-mypy (mypy-protobuf) - community/nipunn1313-mypy-grpc (mypy-protobuf) - community/danielgtaylor-betterproto (betterproto) Leave protocolbuffers/python, protocolbuffers/pyi, and grpc/python on github, as all three compile C++ from source using Bazel and version against the upstream monorepo tag rather than any PyPI package. Add table-driven tests for fetchPyPI covering yanked releases, empty file lists, Python-style pre-release strings (e.g. 2.0.0b7), and the ignore_versions/max_version filters. Mock data is shaped after the real mypy-protobuf PyPI API response. --- internal/fetchclient/fetchclient.go | 78 +++++++++- internal/fetchclient/fetchclient_test.go | 135 ++++++++++++++++++ internal/source/config.go | 16 +++ internal/source/source_test.go | 10 +- .../success/mypy-protobuf/source.yaml | 3 + .../danielgtaylor-betterproto/source.yaml | 5 +- .../nipunn1313-mypy-grpc/source.yaml | 5 +- plugins/community/nipunn1313-mypy/source.yaml | 5 +- plugins/connectrpc/python/source.yaml | 5 +- 9 files changed, 243 insertions(+), 19 deletions(-) create mode 100644 internal/fetchclient/fetchclient_test.go create mode 100644 internal/source/testdata/success/mypy-protobuf/source.yaml diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index 0c76a848a..94467de1c 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -29,6 +29,11 @@ const ( mavenURL = "https://repo1.maven.org/maven2" ) +const ( + // docs: https://warehouse.pypa.io/api-reference/json.html + defaultPyPIURL = "https://pypi.org/pypi" +) + var ( // ErrSemverPrerelease is returned when a version is a pre-release. ErrSemverPrerelease = errors.New("pre-release versions are not supported") @@ -36,8 +41,9 @@ var ( // Client is a client used to fetch latest package version. type Client struct { - httpClient *http.Client - ghClient *github.Client + httpClient *http.Client + ghClient *github.Client + pypiBaseURL string } // New returns a new client. @@ -54,8 +60,9 @@ func New(ctx context.Context) *Client { client = retryableClient.StandardClient() } return &Client{ - httpClient: client, - ghClient: github.NewClient(client), + httpClient: client, + ghClient: github.NewClient(client), + pypiBaseURL: defaultPyPIURL, } } @@ -103,6 +110,8 @@ func (c *Client) fetch(ctx context.Context, config *source.Config) (string, erro return c.fetchMaven(ctx, config.Source.Maven.Group, config.Source.Maven.Name, ignoreVersions, maxVersion) case config.Source.Crates != nil: return c.fetchCrate(ctx, config.Source.Crates.CrateName, ignoreVersions, maxVersion) + case config.Source.PyPI != nil: + return c.fetchPyPI(ctx, config.Source.PyPI.Name, ignoreVersions, maxVersion) } return "", errors.New("failed to match a source") } @@ -417,6 +426,67 @@ func (c *Client) fetchGithub( return versions[len(versions)-1], nil } +func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[string]struct{}, maxVersion string) (string, error) { + request, err := http.NewRequestWithContext( + ctx, + http.MethodGet, + fmt.Sprintf("%s/%s/json", c.pypiBaseURL, strings.TrimPrefix(name, "/")), + nil, + ) + if err != nil { + return "", err + } + response, err := c.httpClient.Do(request) + if err != nil { + return "", err + } + defer response.Body.Close() + if response.StatusCode != http.StatusOK { + return "", fmt.Errorf("received status code %d retrieving %q", response.StatusCode, request.URL.String()) + } + + var data struct { + Releases map[string][]struct { + Yanked bool `json:"yanked"` + } `json:"releases"` + } + if err := json.NewDecoder(response.Body).Decode(&data); err != nil { + return "", err + } + var versions []string + for version, files := range data.Releases { + if len(files) == 0 { + continue + } + yanked := true + for _, file := range files { + if !file.Yanked { + yanked = false + break + } + } + if yanked { + continue + } + v, ok := ensureSemverPrefix(version) + if !ok { + continue + } + if _, ok := ignoreVersions[v]; ok { + continue + } + if maxVersion != "" && semver.Compare(v, maxVersion) >= 0 { + continue + } + versions = append(versions, v) + } + if len(versions) == 0 { + return "", errors.New("no versions found") + } + semver.Sort(versions) + return versions[len(versions)-1], nil +} + // ensureSemverPrefix checks if the given version is valid semver, optionally // prefixing with "v". The output version is not guaranteed to be the same // as input. This function returns false if the version is not valid semver or diff --git a/internal/fetchclient/fetchclient_test.go b/internal/fetchclient/fetchclient_test.go new file mode 100644 index 000000000..8456c46ad --- /dev/null +++ b/internal/fetchclient/fetchclient_test.go @@ -0,0 +1,135 @@ +package fetchclient + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// pypiRelease mirrors the file-level shape returned by the PyPI JSON API. +// Shape verified against https://pypi.org/pypi/mypy-protobuf/json. +type pypiRelease struct { + Yanked bool `json:"yanked"` +} + +func TestFetchPyPI(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + releases map[string][]pypiRelease + ignoreVersions map[string]struct{} + maxVersion string + wantVersion string + wantErr string + }{ + { + name: "returns latest semver version", + releases: map[string][]pypiRelease{ + "3.5.0": {{Yanked: false}}, + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, + // Go semver accepts these as v1.0.0 and v2.10.0, but 5.0.0 is still highest. + "1.0": {{Yanked: false}}, + "2.10": {{Yanked: false}}, + // Python-style pre-release: invalid Go semver, filtered out. + "2.0.0b7": {{Yanked: false}}, + }, + wantVersion: "v5.0.0", + }, + { + name: "skips fully yanked releases", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: true}, {Yanked: true}}, + }, + wantVersion: "v3.6.0", + }, + { + name: "version with at least one non-yanked file is available", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: true}, {Yanked: false}}, + }, + wantVersion: "v5.0.0", + }, + { + name: "skips releases with empty file list", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {}, + }, + wantVersion: "v3.6.0", + }, + { + name: "skips pre-release versions", + releases: map[string][]pypiRelease{ + "1.2.5": {{Yanked: false}}, + "2.0.0b7": {{Yanked: false}}, + }, + wantVersion: "v1.2.5", + }, + { + name: "respects ignore_versions", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, + }, + ignoreVersions: map[string]struct{}{"v5.0.0": {}}, + wantVersion: "v3.6.0", + }, + { + name: "respects max_version exclusive upper bound", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, + }, + maxVersion: "v5.0.0", + wantVersion: "v3.6.0", + }, + { + name: "error when no valid versions remain", + releases: map[string][]pypiRelease{ + // Python-style pre-releases: invalid Go semver, all filtered out. + "2.0.0b7": {{Yanked: false}}, + "2.0.0rc1": {{Yanked: false}}, + }, + wantErr: "no versions found", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(struct { + Releases map[string][]pypiRelease `json:"releases"` + }{Releases: tt.releases}); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } + })) + t.Cleanup(srv.Close) + + c := &Client{ + httpClient: srv.Client(), + pypiBaseURL: srv.URL, + } + ignoreVersions := tt.ignoreVersions + if ignoreVersions == nil { + ignoreVersions = map[string]struct{}{} + } + got, err := c.fetchPyPI(t.Context(), "mypy-protobuf", ignoreVersions, tt.maxVersion) + if tt.wantErr != "" { + require.ErrorContains(t, err, tt.wantErr) + return + } + require.NoError(t, err) + assert.Equal(t, tt.wantVersion, got) + }) + } +} diff --git a/internal/source/config.go b/internal/source/config.go index ca90505ef..c08201681 100644 --- a/internal/source/config.go +++ b/internal/source/config.go @@ -45,6 +45,7 @@ type Source struct { NPMRegistry *NPMRegistryConfig `yaml:"npm_registry"` Maven *MavenConfig `yaml:"maven"` Crates *CratesConfig `yaml:"crates"` + PyPI *PyPIConfig `yaml:"pypi"` // IgnoreVersions is a list of versions to ignore when fetching. IgnoreVersions []string `yaml:"ignore_versions"` // MaxVersion is an exclusive upper bound for versions. Versions >= this value will be ignored. @@ -72,6 +73,8 @@ func (s *Source) Name() string { return "maven" case s.Crates != nil: return "crates" + case s.PyPI != nil: + return "pypi" } return "unknown" } @@ -91,6 +94,8 @@ func (s *Source) CacheKey() string { return name + "-" + s.Maven.CacheKey() case s.Crates != nil: return name + "-" + s.Crates.CacheKey() + case s.PyPI != nil: + return name + "-" + s.PyPI.CacheKey() } return name } @@ -162,3 +167,14 @@ var _ Cacheable = (*MavenConfig)(nil) func (m MavenConfig) CacheKey() string { return m.Group + "-" + m.Name } + +// PyPIConfig is the PyPI configuration. +type PyPIConfig struct { + Name string `yaml:"name"` +} + +var _ Cacheable = (*PyPIConfig)(nil) + +func (p PyPIConfig) CacheKey() string { + return p.Name +} diff --git a/internal/source/source_test.go b/internal/source/source_test.go index b97318b45..7f16e0034 100644 --- a/internal/source/source_test.go +++ b/internal/source/source_test.go @@ -15,13 +15,13 @@ func TestGatherSourceFilenames(t *testing.T) { // Walk entire directory with a depth of 1 filenames, err := gatherSourceFilenames("testdata/success") require.NoError(t, err) - assert.Len(t, filenames, 2) + assert.Len(t, filenames, 3) filenames, err = gatherSourceFilenames("testdata/success/connect-go") require.NoError(t, err) assert.Len(t, filenames, 1) filenames, err = gatherSourceFilenames("testdata/success") require.NoError(t, err) - assert.Len(t, filenames, 2) + assert.Len(t, filenames, 3) filenames, err = gatherSourceFilenames("testdata/fail") require.NoError(t, err) @@ -46,7 +46,7 @@ func TestGatherConfigs(t *testing.T) { t.Parallel() configs, err := GatherConfigs("testdata/success") require.NoError(t, err) - assert.Len(t, configs, 2) + assert.Len(t, configs, 3) for _, config := range configs { name := filepath.Base(filepath.Dir(config.Filename)) @@ -63,6 +63,10 @@ func TestGatherConfigs(t *testing.T) { assert.Equal(t, "@bufbuild/protoc-gen-connect-web", source.Name) assert.True(t, config.Source.Disabled) assert.Nil(t, config.Source.DartFlutter) + case "mypy-protobuf": + source := config.Source.PyPI + require.NotNil(t, source) + assert.Equal(t, "mypy-protobuf", source.Name) default: assert.FailNow(t, "unknown plugin name", name) } diff --git a/internal/source/testdata/success/mypy-protobuf/source.yaml b/internal/source/testdata/success/mypy-protobuf/source.yaml new file mode 100644 index 000000000..310def05b --- /dev/null +++ b/internal/source/testdata/success/mypy-protobuf/source.yaml @@ -0,0 +1,3 @@ +source: + pypi: + name: mypy-protobuf diff --git a/plugins/community/danielgtaylor-betterproto/source.yaml b/plugins/community/danielgtaylor-betterproto/source.yaml index f7191eeb3..6a1d9a4a9 100644 --- a/plugins/community/danielgtaylor-betterproto/source.yaml +++ b/plugins/community/danielgtaylor-betterproto/source.yaml @@ -1,4 +1,3 @@ source: - github: - owner: danielgtaylor - repository: python-betterproto + pypi: + name: betterproto diff --git a/plugins/community/nipunn1313-mypy-grpc/source.yaml b/plugins/community/nipunn1313-mypy-grpc/source.yaml index 4b678c641..310def05b 100644 --- a/plugins/community/nipunn1313-mypy-grpc/source.yaml +++ b/plugins/community/nipunn1313-mypy-grpc/source.yaml @@ -1,4 +1,3 @@ source: - github: - owner: nipunn1313 - repository: mypy-protobuf + pypi: + name: mypy-protobuf diff --git a/plugins/community/nipunn1313-mypy/source.yaml b/plugins/community/nipunn1313-mypy/source.yaml index 4b678c641..310def05b 100644 --- a/plugins/community/nipunn1313-mypy/source.yaml +++ b/plugins/community/nipunn1313-mypy/source.yaml @@ -1,4 +1,3 @@ source: - github: - owner: nipunn1313 - repository: mypy-protobuf + pypi: + name: mypy-protobuf diff --git a/plugins/connectrpc/python/source.yaml b/plugins/connectrpc/python/source.yaml index a72834232..0199a43fa 100644 --- a/plugins/connectrpc/python/source.yaml +++ b/plugins/connectrpc/python/source.yaml @@ -1,4 +1,3 @@ source: - github: - owner: connectrpc - repository: connect-python + pypi: + name: protoc-gen-connect-python From 436f998762bf04f4435834ab64d24150789e5bb0 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 11:57:33 -0400 Subject: [PATCH 2/7] Consolidate pypiURL into main const block --- internal/fetchclient/fetchclient.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index 94467de1c..bf03ceb3f 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -27,11 +27,8 @@ const ( goProxyURL = "https://proxy.golang.org" npmRegistryURL = "https://registry.npmjs.org" mavenURL = "https://repo1.maven.org/maven2" -) - -const ( // docs: https://warehouse.pypa.io/api-reference/json.html - defaultPyPIURL = "https://pypi.org/pypi" + pypiURL = "https://pypi.org/pypi" ) var ( @@ -62,7 +59,7 @@ func New(ctx context.Context) *Client { return &Client{ httpClient: client, ghClient: github.NewClient(client), - pypiBaseURL: defaultPyPIURL, + pypiBaseURL: pypiURL, } } From fb4290b5931efd58b6a47b4eae09850d946090a5 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 12:49:55 -0400 Subject: [PATCH 3/7] Switch PyPI fetcher to Simple API (PEP 691) Address review feedback from #2441: - Update docs URL to https://docs.pypi.org/api/simple/ - Use the Simple API (/simple/{name}/ with Accept: application/vnd.pypi.simple.v1+json) instead of the legacy JSON API, whose releases key is deprecated - Handle yanked as json.RawMessage: per PEP 691 the field is absent/false when not yanked, or a non-empty string reason when yanked - Add pypiVersionFromFilename to extract version from wheel/sdist filenames (handles both hyphenated and normalized underscore forms) - Add pypiFileYanked helper for the string-or-false yanked check - Update tests to use Simple API response shape and add TestPyPIVersionFromFilename covering both wheel and sdist formats --- internal/fetchclient/fetchclient.go | 74 +++++++++---- internal/fetchclient/fetchclient_test.go | 127 +++++++++++++++-------- 2 files changed, 141 insertions(+), 60 deletions(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index bf03ceb3f..dc66e6a2b 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -27,8 +27,8 @@ const ( goProxyURL = "https://proxy.golang.org" npmRegistryURL = "https://registry.npmjs.org" mavenURL = "https://repo1.maven.org/maven2" - // docs: https://warehouse.pypa.io/api-reference/json.html - pypiURL = "https://pypi.org/pypi" + // docs: https://docs.pypi.org/api/simple/ + pypiURL = "https://pypi.org/simple" ) var ( @@ -427,12 +427,13 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ request, err := http.NewRequestWithContext( ctx, http.MethodGet, - fmt.Sprintf("%s/%s/json", c.pypiBaseURL, strings.TrimPrefix(name, "/")), + fmt.Sprintf("%s/%s/", c.pypiBaseURL, strings.TrimPrefix(name, "/")), nil, ) if err != nil { return "", err } + request.Header.Set("Accept", "application/vnd.pypi.simple.v1+json") response, err := c.httpClient.Do(request) if err != nil { return "", err @@ -442,29 +443,30 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return "", fmt.Errorf("received status code %d retrieving %q", response.StatusCode, request.URL.String()) } + // The Simple API JSON response (PEP 691) has a files array. Each file's + // yanked field is absent/false when available, or a non-empty string reason + // when yanked (PEP 592). var data struct { - Releases map[string][]struct { - Yanked bool `json:"yanked"` - } `json:"releases"` + Files []struct { + Filename string `json:"filename"` + Yanked json.RawMessage `json:"yanked"` + } `json:"files"` } if err := json.NewDecoder(response.Body).Decode(&data); err != nil { return "", err } - var versions []string - for version, files := range data.Releases { - if len(files) == 0 { + // Collect versions that have at least one non-yanked file. + available := make(map[string]struct{}) + for _, file := range data.Files { + if pypiFileYanked(file.Yanked) { continue } - yanked := true - for _, file := range files { - if !file.Yanked { - yanked = false - break - } - } - if yanked { - continue + if v := pypiVersionFromFilename(file.Filename, name); v != "" { + available[v] = struct{}{} } + } + var versions []string + for version := range available { v, ok := ensureSemverPrefix(version) if !ok { continue @@ -484,6 +486,42 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return versions[len(versions)-1], nil } +// pypiFileYanked reports whether a file's yanked field marks it as yanked. +// Per PEP 691, yanked is absent or false when not yanked, or a non-empty +// string (the reason) when yanked. +func pypiFileYanked(yanked json.RawMessage) bool { + s := string(yanked) + return len(s) > 0 && s != "false" && s != "null" +} + +// pypiVersionFromFilename extracts the version string from a PyPI filename. +// Wheel filenames: {dist}-{version}-{python}-{abi}-{platform}.whl +// Sdist filenames: {dist}-{version}.tar.gz / .tar.bz2 / .zip +// The distribution name is normalized ([-._] → _) in filenames. +func pypiVersionFromFilename(filename, pkg string) string { + normalizer := strings.NewReplacer("-", "_", ".", "_") + // Normalize both sides so hyphens and underscores match. Substitution is + // 1:1 so len(prefix) equals the number of original characters to skip. + normPkg := strings.ToLower(normalizer.Replace(pkg)) + normFile := strings.ToLower(normalizer.Replace(filename)) + prefix := normPkg + "_" + if !strings.HasPrefix(normFile, prefix) { + return "" + } + rest := filename[len(prefix):] + // Wheel: version ends at first '-' before the python/abi/platform tags. + if version, _, ok := strings.Cut(rest, "-"); ok { + return version + } + // Sdist: version is everything before the file extension. + for _, ext := range []string{".tar.gz", ".tar.bz2", ".zip"} { + if strings.HasSuffix(strings.ToLower(rest), ext) { + return rest[:len(rest)-len(ext)] + } + } + return "" +} + // ensureSemverPrefix checks if the given version is valid semver, optionally // prefixing with "v". The output version is not guaranteed to be the same // as input. This function returns false if the version is not valid semver or diff --git a/internal/fetchclient/fetchclient_test.go b/internal/fetchclient/fetchclient_test.go index 8456c46ad..3ca18569e 100644 --- a/internal/fetchclient/fetchclient_test.go +++ b/internal/fetchclient/fetchclient_test.go @@ -10,10 +10,26 @@ import ( "github.com/stretchr/testify/require" ) -// pypiRelease mirrors the file-level shape returned by the PyPI JSON API. -// Shape verified against https://pypi.org/pypi/mypy-protobuf/json. -type pypiRelease struct { - Yanked bool `json:"yanked"` +// pypiTestFile mirrors the file entry in the PyPI Simple API JSON response. +// Shape verified against https://pypi.org/simple/mypy-protobuf/ with +// Accept: application/vnd.pypi.simple.v1+json. +type pypiTestFile struct { + Filename string `json:"filename"` + Yanked json.RawMessage `json:"yanked"` +} + +// notYanked returns a file entry that is not yanked. +func notYanked(filename string) pypiTestFile { + return pypiTestFile{Filename: filename, Yanked: json.RawMessage("false")} +} + +// yankedFile returns a file entry yanked with the given reason string. +// The reason is JSON-encoded as a string literal. +func yankedFile(filename, reason string) pypiTestFile { + return pypiTestFile{ + Filename: filename, + Yanked: json.RawMessage(`"` + reason + `"`), + } } func TestFetchPyPI(t *testing.T) { @@ -21,7 +37,7 @@ func TestFetchPyPI(t *testing.T) { tests := []struct { name string - releases map[string][]pypiRelease + files []pypiTestFile ignoreVersions map[string]struct{} maxVersion string wantVersion string @@ -29,74 +45,70 @@ func TestFetchPyPI(t *testing.T) { }{ { name: "returns latest semver version", - releases: map[string][]pypiRelease{ - "3.5.0": {{Yanked: false}}, - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, - // Go semver accepts these as v1.0.0 and v2.10.0, but 5.0.0 is still highest. - "1.0": {{Yanked: false}}, - "2.10": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.5.0.tar.gz"), + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0-py3-none-any.whl"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), + // Go semver accepts "1.0" as v1.0.0 but 5.0.0 is still highest. + notYanked("mypy-protobuf-1.0.tar.gz"), // Python-style pre-release: invalid Go semver, filtered out. - "2.0.0b7": {{Yanked: false}}, + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), }, wantVersion: "v5.0.0", }, { name: "skips fully yanked releases", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: true}, {Yanked: true}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + // Both files for 5.0.0 are yanked. + yankedFile("mypy_protobuf-5.0.0-py3-none-any.whl", "bad release"), + yankedFile("mypy_protobuf-5.0.0.tar.gz", "bad release"), }, wantVersion: "v3.6.0", }, { name: "version with at least one non-yanked file is available", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: true}, {Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + // wheel yanked but sdist not: version is still available + yankedFile("mypy_protobuf-5.0.0-py3-none-any.whl", "bad wheel"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, wantVersion: "v5.0.0", }, - { - name: "skips releases with empty file list", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {}, - }, - wantVersion: "v3.6.0", - }, { name: "skips pre-release versions", - releases: map[string][]pypiRelease{ - "1.2.5": {{Yanked: false}}, - "2.0.0b7": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-1.2.5.tar.gz"), + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), }, wantVersion: "v1.2.5", }, { name: "respects ignore_versions", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, ignoreVersions: map[string]struct{}{"v5.0.0": {}}, wantVersion: "v3.6.0", }, { name: "respects max_version exclusive upper bound", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, maxVersion: "v5.0.0", wantVersion: "v3.6.0", }, { name: "error when no valid versions remain", - releases: map[string][]pypiRelease{ + files: []pypiTestFile{ // Python-style pre-releases: invalid Go semver, all filtered out. - "2.0.0b7": {{Yanked: false}}, - "2.0.0rc1": {{Yanked: false}}, + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), + notYanked("mypy_protobuf-2.0.0rc1-py3-none-any.whl"), }, wantErr: "no versions found", }, @@ -106,10 +118,10 @@ func TestFetchPyPI(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Type", "application/vnd.pypi.simple.v1+json") if err := json.NewEncoder(w).Encode(struct { - Releases map[string][]pypiRelease `json:"releases"` - }{Releases: tt.releases}); err != nil { + Files []pypiTestFile `json:"files"` + }{Files: tt.files}); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } })) @@ -133,3 +145,34 @@ func TestFetchPyPI(t *testing.T) { }) } } + +func TestPyPIVersionFromFilename(t *testing.T) { + t.Parallel() + + tests := []struct { + filename string + pkg string + want string + }{ + // sdist, hyphenated package name (older style) + {"mypy-protobuf-3.6.0.tar.gz", "mypy-protobuf", "3.6.0"}, + // sdist, underscored package name (normalized) + {"mypy_protobuf-5.0.0.tar.gz", "mypy-protobuf", "5.0.0"}, + // wheel + {"mypy_protobuf-5.0.0-py3-none-any.whl", "mypy-protobuf", "5.0.0"}, + // pre-release (extraction still works; semver filter rejects it later) + {"mypy_protobuf-2.0.0b7-py3-none-any.whl", "mypy-protobuf", "2.0.0b7"}, + // package with dots in name + {"betterproto-1.2.5.tar.gz", "betterproto", "1.2.5"}, + // wrong package: no match + {"other_pkg-1.0.0.tar.gz", "mypy-protobuf", ""}, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + t.Parallel() + got := pypiVersionFromFilename(tt.filename, tt.pkg) + assert.Equal(t, tt.want, got) + }) + } +} From c80468e980dda931a45b1363ceeb28f72d65f2c9 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 12:53:33 -0400 Subject: [PATCH 4/7] Revert "Switch PyPI fetcher to Simple API (PEP 691)" This reverts commit fb4290b5931efd58b6a47b4eae09850d946090a5. --- internal/fetchclient/fetchclient.go | 74 ++++--------- internal/fetchclient/fetchclient_test.go | 127 ++++++++--------------- 2 files changed, 60 insertions(+), 141 deletions(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index dc66e6a2b..bf03ceb3f 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -27,8 +27,8 @@ const ( goProxyURL = "https://proxy.golang.org" npmRegistryURL = "https://registry.npmjs.org" mavenURL = "https://repo1.maven.org/maven2" - // docs: https://docs.pypi.org/api/simple/ - pypiURL = "https://pypi.org/simple" + // docs: https://warehouse.pypa.io/api-reference/json.html + pypiURL = "https://pypi.org/pypi" ) var ( @@ -427,13 +427,12 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ request, err := http.NewRequestWithContext( ctx, http.MethodGet, - fmt.Sprintf("%s/%s/", c.pypiBaseURL, strings.TrimPrefix(name, "/")), + fmt.Sprintf("%s/%s/json", c.pypiBaseURL, strings.TrimPrefix(name, "/")), nil, ) if err != nil { return "", err } - request.Header.Set("Accept", "application/vnd.pypi.simple.v1+json") response, err := c.httpClient.Do(request) if err != nil { return "", err @@ -443,30 +442,29 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return "", fmt.Errorf("received status code %d retrieving %q", response.StatusCode, request.URL.String()) } - // The Simple API JSON response (PEP 691) has a files array. Each file's - // yanked field is absent/false when available, or a non-empty string reason - // when yanked (PEP 592). var data struct { - Files []struct { - Filename string `json:"filename"` - Yanked json.RawMessage `json:"yanked"` - } `json:"files"` + Releases map[string][]struct { + Yanked bool `json:"yanked"` + } `json:"releases"` } if err := json.NewDecoder(response.Body).Decode(&data); err != nil { return "", err } - // Collect versions that have at least one non-yanked file. - available := make(map[string]struct{}) - for _, file := range data.Files { - if pypiFileYanked(file.Yanked) { + var versions []string + for version, files := range data.Releases { + if len(files) == 0 { continue } - if v := pypiVersionFromFilename(file.Filename, name); v != "" { - available[v] = struct{}{} + yanked := true + for _, file := range files { + if !file.Yanked { + yanked = false + break + } + } + if yanked { + continue } - } - var versions []string - for version := range available { v, ok := ensureSemverPrefix(version) if !ok { continue @@ -486,42 +484,6 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return versions[len(versions)-1], nil } -// pypiFileYanked reports whether a file's yanked field marks it as yanked. -// Per PEP 691, yanked is absent or false when not yanked, or a non-empty -// string (the reason) when yanked. -func pypiFileYanked(yanked json.RawMessage) bool { - s := string(yanked) - return len(s) > 0 && s != "false" && s != "null" -} - -// pypiVersionFromFilename extracts the version string from a PyPI filename. -// Wheel filenames: {dist}-{version}-{python}-{abi}-{platform}.whl -// Sdist filenames: {dist}-{version}.tar.gz / .tar.bz2 / .zip -// The distribution name is normalized ([-._] → _) in filenames. -func pypiVersionFromFilename(filename, pkg string) string { - normalizer := strings.NewReplacer("-", "_", ".", "_") - // Normalize both sides so hyphens and underscores match. Substitution is - // 1:1 so len(prefix) equals the number of original characters to skip. - normPkg := strings.ToLower(normalizer.Replace(pkg)) - normFile := strings.ToLower(normalizer.Replace(filename)) - prefix := normPkg + "_" - if !strings.HasPrefix(normFile, prefix) { - return "" - } - rest := filename[len(prefix):] - // Wheel: version ends at first '-' before the python/abi/platform tags. - if version, _, ok := strings.Cut(rest, "-"); ok { - return version - } - // Sdist: version is everything before the file extension. - for _, ext := range []string{".tar.gz", ".tar.bz2", ".zip"} { - if strings.HasSuffix(strings.ToLower(rest), ext) { - return rest[:len(rest)-len(ext)] - } - } - return "" -} - // ensureSemverPrefix checks if the given version is valid semver, optionally // prefixing with "v". The output version is not guaranteed to be the same // as input. This function returns false if the version is not valid semver or diff --git a/internal/fetchclient/fetchclient_test.go b/internal/fetchclient/fetchclient_test.go index 3ca18569e..8456c46ad 100644 --- a/internal/fetchclient/fetchclient_test.go +++ b/internal/fetchclient/fetchclient_test.go @@ -10,26 +10,10 @@ import ( "github.com/stretchr/testify/require" ) -// pypiTestFile mirrors the file entry in the PyPI Simple API JSON response. -// Shape verified against https://pypi.org/simple/mypy-protobuf/ with -// Accept: application/vnd.pypi.simple.v1+json. -type pypiTestFile struct { - Filename string `json:"filename"` - Yanked json.RawMessage `json:"yanked"` -} - -// notYanked returns a file entry that is not yanked. -func notYanked(filename string) pypiTestFile { - return pypiTestFile{Filename: filename, Yanked: json.RawMessage("false")} -} - -// yankedFile returns a file entry yanked with the given reason string. -// The reason is JSON-encoded as a string literal. -func yankedFile(filename, reason string) pypiTestFile { - return pypiTestFile{ - Filename: filename, - Yanked: json.RawMessage(`"` + reason + `"`), - } +// pypiRelease mirrors the file-level shape returned by the PyPI JSON API. +// Shape verified against https://pypi.org/pypi/mypy-protobuf/json. +type pypiRelease struct { + Yanked bool `json:"yanked"` } func TestFetchPyPI(t *testing.T) { @@ -37,7 +21,7 @@ func TestFetchPyPI(t *testing.T) { tests := []struct { name string - files []pypiTestFile + releases map[string][]pypiRelease ignoreVersions map[string]struct{} maxVersion string wantVersion string @@ -45,70 +29,74 @@ func TestFetchPyPI(t *testing.T) { }{ { name: "returns latest semver version", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.5.0.tar.gz"), - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0-py3-none-any.whl"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), - // Go semver accepts "1.0" as v1.0.0 but 5.0.0 is still highest. - notYanked("mypy-protobuf-1.0.tar.gz"), + releases: map[string][]pypiRelease{ + "3.5.0": {{Yanked: false}}, + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, + // Go semver accepts these as v1.0.0 and v2.10.0, but 5.0.0 is still highest. + "1.0": {{Yanked: false}}, + "2.10": {{Yanked: false}}, // Python-style pre-release: invalid Go semver, filtered out. - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), + "2.0.0b7": {{Yanked: false}}, }, wantVersion: "v5.0.0", }, { name: "skips fully yanked releases", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - // Both files for 5.0.0 are yanked. - yankedFile("mypy_protobuf-5.0.0-py3-none-any.whl", "bad release"), - yankedFile("mypy_protobuf-5.0.0.tar.gz", "bad release"), + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: true}, {Yanked: true}}, }, wantVersion: "v3.6.0", }, { name: "version with at least one non-yanked file is available", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - // wheel yanked but sdist not: version is still available - yankedFile("mypy_protobuf-5.0.0-py3-none-any.whl", "bad wheel"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: true}, {Yanked: false}}, }, wantVersion: "v5.0.0", }, + { + name: "skips releases with empty file list", + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {}, + }, + wantVersion: "v3.6.0", + }, { name: "skips pre-release versions", - files: []pypiTestFile{ - notYanked("mypy-protobuf-1.2.5.tar.gz"), - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), + releases: map[string][]pypiRelease{ + "1.2.5": {{Yanked: false}}, + "2.0.0b7": {{Yanked: false}}, }, wantVersion: "v1.2.5", }, { name: "respects ignore_versions", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, }, ignoreVersions: map[string]struct{}{"v5.0.0": {}}, wantVersion: "v3.6.0", }, { name: "respects max_version exclusive upper bound", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), + releases: map[string][]pypiRelease{ + "3.6.0": {{Yanked: false}}, + "5.0.0": {{Yanked: false}}, }, maxVersion: "v5.0.0", wantVersion: "v3.6.0", }, { name: "error when no valid versions remain", - files: []pypiTestFile{ + releases: map[string][]pypiRelease{ // Python-style pre-releases: invalid Go semver, all filtered out. - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), - notYanked("mypy_protobuf-2.0.0rc1-py3-none-any.whl"), + "2.0.0b7": {{Yanked: false}}, + "2.0.0rc1": {{Yanked: false}}, }, wantErr: "no versions found", }, @@ -118,10 +106,10 @@ func TestFetchPyPI(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("Content-Type", "application/vnd.pypi.simple.v1+json") + w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(struct { - Files []pypiTestFile `json:"files"` - }{Files: tt.files}); err != nil { + Releases map[string][]pypiRelease `json:"releases"` + }{Releases: tt.releases}); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } })) @@ -145,34 +133,3 @@ func TestFetchPyPI(t *testing.T) { }) } } - -func TestPyPIVersionFromFilename(t *testing.T) { - t.Parallel() - - tests := []struct { - filename string - pkg string - want string - }{ - // sdist, hyphenated package name (older style) - {"mypy-protobuf-3.6.0.tar.gz", "mypy-protobuf", "3.6.0"}, - // sdist, underscored package name (normalized) - {"mypy_protobuf-5.0.0.tar.gz", "mypy-protobuf", "5.0.0"}, - // wheel - {"mypy_protobuf-5.0.0-py3-none-any.whl", "mypy-protobuf", "5.0.0"}, - // pre-release (extraction still works; semver filter rejects it later) - {"mypy_protobuf-2.0.0b7-py3-none-any.whl", "mypy-protobuf", "2.0.0b7"}, - // package with dots in name - {"betterproto-1.2.5.tar.gz", "betterproto", "1.2.5"}, - // wrong package: no match - {"other_pkg-1.0.0.tar.gz", "mypy-protobuf", ""}, - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - t.Parallel() - got := pypiVersionFromFilename(tt.filename, tt.pkg) - assert.Equal(t, tt.want, got) - }) - } -} From 2ac3be83435721754fc1b5380050f9f1f39056a9 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 12:53:55 -0400 Subject: [PATCH 5/7] Fix PyPI docs URL --- internal/fetchclient/fetchclient.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index bf03ceb3f..a73254e9b 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -27,7 +27,7 @@ const ( goProxyURL = "https://proxy.golang.org" npmRegistryURL = "https://registry.npmjs.org" mavenURL = "https://repo1.maven.org/maven2" - // docs: https://warehouse.pypa.io/api-reference/json.html + // docs: https://docs.pypi.org/api/json/ pypiURL = "https://pypi.org/pypi" ) From b93886ae153760599ee9989ecf9e818c4f2b69fd Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 12:57:33 -0400 Subject: [PATCH 6/7] Switch PyPI fetcher to Simple Repository API Address review feedback from #2441: - Use the Simple Repository API (/simple/{name}/ with Accept: application/vnd.pypi.simple.v1+json) instead of the legacy JSON API, whose releases key is marked deprecated - Update docs URL to the Simple Repository API spec - Handle yanked per the spec: absent/false = available; true or any non-empty string reason = yanked (PEP 592) - Add pypiVersionFromFilename to extract version from wheel/sdist filenames, handling both hyphenated and normalized underscore forms - Add pypiFileYanked helper for the bool-or-string yanked check - Update tests to use Simple API response shape, add TestPyPIVersionFromFilename covering both wheel and sdist formats --- internal/fetchclient/fetchclient.go | 74 ++++++++++---- internal/fetchclient/fetchclient_test.go | 123 +++++++++++++++-------- 2 files changed, 137 insertions(+), 60 deletions(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index a73254e9b..746ddd29b 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -27,8 +27,8 @@ const ( goProxyURL = "https://proxy.golang.org" npmRegistryURL = "https://registry.npmjs.org" mavenURL = "https://repo1.maven.org/maven2" - // docs: https://docs.pypi.org/api/json/ - pypiURL = "https://pypi.org/pypi" + // docs: https://packaging.python.org/en/latest/specifications/simple-repository-api/ + pypiURL = "https://pypi.org/simple" ) var ( @@ -427,12 +427,13 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ request, err := http.NewRequestWithContext( ctx, http.MethodGet, - fmt.Sprintf("%s/%s/json", c.pypiBaseURL, strings.TrimPrefix(name, "/")), + fmt.Sprintf("%s/%s/", c.pypiBaseURL, strings.TrimPrefix(name, "/")), nil, ) if err != nil { return "", err } + request.Header.Set("Accept", "application/vnd.pypi.simple.v1+json") response, err := c.httpClient.Do(request) if err != nil { return "", err @@ -442,29 +443,30 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return "", fmt.Errorf("received status code %d retrieving %q", response.StatusCode, request.URL.String()) } + // Simple API JSON response per the simple repository API spec. + // Each file's yanked field is absent or false when available, true or a + // non-empty string reason when yanked (per PEP 592). var data struct { - Releases map[string][]struct { - Yanked bool `json:"yanked"` - } `json:"releases"` + Files []struct { + Filename string `json:"filename"` + Yanked json.RawMessage `json:"yanked"` + } `json:"files"` } if err := json.NewDecoder(response.Body).Decode(&data); err != nil { return "", err } - var versions []string - for version, files := range data.Releases { - if len(files) == 0 { + // Collect versions that have at least one non-yanked file. + available := make(map[string]struct{}) + for _, file := range data.Files { + if pypiFileYanked(file.Yanked) { continue } - yanked := true - for _, file := range files { - if !file.Yanked { - yanked = false - break - } - } - if yanked { - continue + if v := pypiVersionFromFilename(file.Filename, name); v != "" { + available[v] = struct{}{} } + } + var versions []string + for version := range available { v, ok := ensureSemverPrefix(version) if !ok { continue @@ -484,6 +486,42 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return versions[len(versions)-1], nil } +// pypiFileYanked reports whether a file's yanked field marks it as yanked. +// Per the simple repository API spec, yanked is absent or false when not +// yanked, and true or a non-empty string reason when yanked (PEP 592). +func pypiFileYanked(yanked json.RawMessage) bool { + s := string(yanked) + return len(s) > 0 && s != "false" && s != "null" +} + +// pypiVersionFromFilename extracts the version string from a PyPI filename. +// Wheel filenames: {dist}-{version}-{python}-{abi}-{platform}.whl +// Sdist filenames: {dist}-{version}.tar.gz / .tar.bz2 / .zip +// The distribution name is normalized ([-._] → _) in filenames. +func pypiVersionFromFilename(filename, pkg string) string { + normalizer := strings.NewReplacer("-", "_", ".", "_") + // Normalize both sides so hyphens and underscores match. Substitution is + // 1:1 so len(prefix) equals the number of original characters to skip. + normPkg := strings.ToLower(normalizer.Replace(pkg)) + normFile := strings.ToLower(normalizer.Replace(filename)) + prefix := normPkg + "_" + if !strings.HasPrefix(normFile, prefix) { + return "" + } + rest := filename[len(prefix):] + // Wheel: version ends at first '-' before the python/abi/platform tags. + if version, _, ok := strings.Cut(rest, "-"); ok { + return version + } + // Sdist: version is everything before the file extension. + for _, ext := range []string{".tar.gz", ".tar.bz2", ".zip"} { + if strings.HasSuffix(strings.ToLower(rest), ext) { + return rest[:len(rest)-len(ext)] + } + } + return "" +} + // ensureSemverPrefix checks if the given version is valid semver, optionally // prefixing with "v". The output version is not guaranteed to be the same // as input. This function returns false if the version is not valid semver or diff --git a/internal/fetchclient/fetchclient_test.go b/internal/fetchclient/fetchclient_test.go index 8456c46ad..4d68c0c89 100644 --- a/internal/fetchclient/fetchclient_test.go +++ b/internal/fetchclient/fetchclient_test.go @@ -10,10 +10,25 @@ import ( "github.com/stretchr/testify/require" ) -// pypiRelease mirrors the file-level shape returned by the PyPI JSON API. -// Shape verified against https://pypi.org/pypi/mypy-protobuf/json. -type pypiRelease struct { - Yanked bool `json:"yanked"` +// pypiTestFile mirrors a file entry from the PyPI Simple Repository API JSON +// response. Shape verified against https://pypi.org/simple/mypy-protobuf/ +// with Accept: application/vnd.pypi.simple.v1+json. +type pypiTestFile struct { + Filename string `json:"filename"` + Yanked json.RawMessage `json:"yanked"` +} + +func notYanked(filename string) pypiTestFile { + return pypiTestFile{Filename: filename, Yanked: json.RawMessage("false")} +} + +func yankedWithReason(filename, reason string) pypiTestFile { + return pypiTestFile{ + Filename: filename, + // Encode reason as a JSON string literal without json.Marshal to avoid + // the errchkjson lint rule; reason values in tests contain no special chars. + Yanked: json.RawMessage(`"` + reason + `"`), + } } func TestFetchPyPI(t *testing.T) { @@ -21,7 +36,7 @@ func TestFetchPyPI(t *testing.T) { tests := []struct { name string - releases map[string][]pypiRelease + files []pypiTestFile ignoreVersions map[string]struct{} maxVersion string wantVersion string @@ -29,74 +44,70 @@ func TestFetchPyPI(t *testing.T) { }{ { name: "returns latest semver version", - releases: map[string][]pypiRelease{ - "3.5.0": {{Yanked: false}}, - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, - // Go semver accepts these as v1.0.0 and v2.10.0, but 5.0.0 is still highest. - "1.0": {{Yanked: false}}, - "2.10": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.5.0.tar.gz"), + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0-py3-none-any.whl"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), + // Go semver accepts "1.0" as v1.0.0, but 5.0.0 is still highest. + notYanked("mypy-protobuf-1.0.tar.gz"), // Python-style pre-release: invalid Go semver, filtered out. - "2.0.0b7": {{Yanked: false}}, + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), }, wantVersion: "v5.0.0", }, { name: "skips fully yanked releases", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: true}, {Yanked: true}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + // Both files for 5.0.0 are yanked. + yankedWithReason("mypy_protobuf-5.0.0-py3-none-any.whl", "bad release"), + yankedWithReason("mypy_protobuf-5.0.0.tar.gz", "bad release"), }, wantVersion: "v3.6.0", }, { name: "version with at least one non-yanked file is available", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: true}, {Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + // Wheel yanked but sdist not: version is still available. + yankedWithReason("mypy_protobuf-5.0.0-py3-none-any.whl", "bad wheel"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, wantVersion: "v5.0.0", }, - { - name: "skips releases with empty file list", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {}, - }, - wantVersion: "v3.6.0", - }, { name: "skips pre-release versions", - releases: map[string][]pypiRelease{ - "1.2.5": {{Yanked: false}}, - "2.0.0b7": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-1.2.5.tar.gz"), + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), }, wantVersion: "v1.2.5", }, { name: "respects ignore_versions", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, ignoreVersions: map[string]struct{}{"v5.0.0": {}}, wantVersion: "v3.6.0", }, { name: "respects max_version exclusive upper bound", - releases: map[string][]pypiRelease{ - "3.6.0": {{Yanked: false}}, - "5.0.0": {{Yanked: false}}, + files: []pypiTestFile{ + notYanked("mypy-protobuf-3.6.0.tar.gz"), + notYanked("mypy_protobuf-5.0.0.tar.gz"), }, maxVersion: "v5.0.0", wantVersion: "v3.6.0", }, { name: "error when no valid versions remain", - releases: map[string][]pypiRelease{ + files: []pypiTestFile{ // Python-style pre-releases: invalid Go semver, all filtered out. - "2.0.0b7": {{Yanked: false}}, - "2.0.0rc1": {{Yanked: false}}, + notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), + notYanked("mypy_protobuf-2.0.0rc1-py3-none-any.whl"), }, wantErr: "no versions found", }, @@ -106,10 +117,10 @@ func TestFetchPyPI(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - w.Header().Set("Content-Type", "application/json") + w.Header().Set("Content-Type", "application/vnd.pypi.simple.v1+json") if err := json.NewEncoder(w).Encode(struct { - Releases map[string][]pypiRelease `json:"releases"` - }{Releases: tt.releases}); err != nil { + Files []pypiTestFile `json:"files"` + }{Files: tt.files}); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } })) @@ -133,3 +144,31 @@ func TestFetchPyPI(t *testing.T) { }) } } + +func TestPyPIVersionFromFilename(t *testing.T) { + t.Parallel() + + tests := []struct { + filename string + pkg string + want string + }{ + // sdist, hyphenated package name (older style) + {"mypy-protobuf-3.6.0.tar.gz", "mypy-protobuf", "3.6.0"}, + // sdist, underscored package name (normalized) + {"mypy_protobuf-5.0.0.tar.gz", "mypy-protobuf", "5.0.0"}, + // wheel + {"mypy_protobuf-5.0.0-py3-none-any.whl", "mypy-protobuf", "5.0.0"}, + // pre-release (extraction still works; semver filter rejects it later) + {"mypy_protobuf-2.0.0b7-py3-none-any.whl", "mypy-protobuf", "2.0.0b7"}, + // different package: no match + {"other_pkg-1.0.0.tar.gz", "mypy-protobuf", ""}, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, pypiVersionFromFilename(tt.filename, tt.pkg)) + }) + } +} From 9b390a6ca61d1f3700a7bf551bdcceeff3388518 Mon Sep 17 00:00:00 2001 From: Stefan VanBuren Date: Tue, 28 Apr 2026 15:49:06 -0400 Subject: [PATCH 7/7] Drop yanked handling This requires us to parse filenames for versions (it's not supported in the API); yanking happens so rarely that it doesn't seem like something we need to support. (Our previous approach of following GitHub releases would have this same issue.) --- internal/fetchclient/fetchclient.go | 56 +---------- internal/fetchclient/fetchclient_test.go | 119 +++-------------------- 2 files changed, 16 insertions(+), 159 deletions(-) diff --git a/internal/fetchclient/fetchclient.go b/internal/fetchclient/fetchclient.go index 746ddd29b..3b1a70095 100644 --- a/internal/fetchclient/fetchclient.go +++ b/internal/fetchclient/fetchclient.go @@ -443,30 +443,14 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return "", fmt.Errorf("received status code %d retrieving %q", response.StatusCode, request.URL.String()) } - // Simple API JSON response per the simple repository API spec. - // Each file's yanked field is absent or false when available, true or a - // non-empty string reason when yanked (per PEP 592). var data struct { - Files []struct { - Filename string `json:"filename"` - Yanked json.RawMessage `json:"yanked"` - } `json:"files"` + Versions []string `json:"versions"` } if err := json.NewDecoder(response.Body).Decode(&data); err != nil { return "", err } - // Collect versions that have at least one non-yanked file. - available := make(map[string]struct{}) - for _, file := range data.Files { - if pypiFileYanked(file.Yanked) { - continue - } - if v := pypiVersionFromFilename(file.Filename, name); v != "" { - available[v] = struct{}{} - } - } var versions []string - for version := range available { + for _, version := range data.Versions { v, ok := ensureSemverPrefix(version) if !ok { continue @@ -486,42 +470,6 @@ func (c *Client) fetchPyPI(ctx context.Context, name string, ignoreVersions map[ return versions[len(versions)-1], nil } -// pypiFileYanked reports whether a file's yanked field marks it as yanked. -// Per the simple repository API spec, yanked is absent or false when not -// yanked, and true or a non-empty string reason when yanked (PEP 592). -func pypiFileYanked(yanked json.RawMessage) bool { - s := string(yanked) - return len(s) > 0 && s != "false" && s != "null" -} - -// pypiVersionFromFilename extracts the version string from a PyPI filename. -// Wheel filenames: {dist}-{version}-{python}-{abi}-{platform}.whl -// Sdist filenames: {dist}-{version}.tar.gz / .tar.bz2 / .zip -// The distribution name is normalized ([-._] → _) in filenames. -func pypiVersionFromFilename(filename, pkg string) string { - normalizer := strings.NewReplacer("-", "_", ".", "_") - // Normalize both sides so hyphens and underscores match. Substitution is - // 1:1 so len(prefix) equals the number of original characters to skip. - normPkg := strings.ToLower(normalizer.Replace(pkg)) - normFile := strings.ToLower(normalizer.Replace(filename)) - prefix := normPkg + "_" - if !strings.HasPrefix(normFile, prefix) { - return "" - } - rest := filename[len(prefix):] - // Wheel: version ends at first '-' before the python/abi/platform tags. - if version, _, ok := strings.Cut(rest, "-"); ok { - return version - } - // Sdist: version is everything before the file extension. - for _, ext := range []string{".tar.gz", ".tar.bz2", ".zip"} { - if strings.HasSuffix(strings.ToLower(rest), ext) { - return rest[:len(rest)-len(ext)] - } - } - return "" -} - // ensureSemverPrefix checks if the given version is valid semver, optionally // prefixing with "v". The output version is not guaranteed to be the same // as input. This function returns false if the version is not valid semver or diff --git a/internal/fetchclient/fetchclient_test.go b/internal/fetchclient/fetchclient_test.go index 4d68c0c89..8e91d1f62 100644 --- a/internal/fetchclient/fetchclient_test.go +++ b/internal/fetchclient/fetchclient_test.go @@ -10,106 +10,43 @@ import ( "github.com/stretchr/testify/require" ) -// pypiTestFile mirrors a file entry from the PyPI Simple Repository API JSON -// response. Shape verified against https://pypi.org/simple/mypy-protobuf/ -// with Accept: application/vnd.pypi.simple.v1+json. -type pypiTestFile struct { - Filename string `json:"filename"` - Yanked json.RawMessage `json:"yanked"` -} - -func notYanked(filename string) pypiTestFile { - return pypiTestFile{Filename: filename, Yanked: json.RawMessage("false")} -} - -func yankedWithReason(filename, reason string) pypiTestFile { - return pypiTestFile{ - Filename: filename, - // Encode reason as a JSON string literal without json.Marshal to avoid - // the errchkjson lint rule; reason values in tests contain no special chars. - Yanked: json.RawMessage(`"` + reason + `"`), - } -} - func TestFetchPyPI(t *testing.T) { t.Parallel() tests := []struct { name string - files []pypiTestFile + versions []string ignoreVersions map[string]struct{} maxVersion string wantVersion string wantErr string }{ { - name: "returns latest semver version", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.5.0.tar.gz"), - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0-py3-none-any.whl"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), - // Go semver accepts "1.0" as v1.0.0, but 5.0.0 is still highest. - notYanked("mypy-protobuf-1.0.tar.gz"), - // Python-style pre-release: invalid Go semver, filtered out. - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), - }, - wantVersion: "v5.0.0", - }, - { - name: "skips fully yanked releases", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - // Both files for 5.0.0 are yanked. - yankedWithReason("mypy_protobuf-5.0.0-py3-none-any.whl", "bad release"), - yankedWithReason("mypy_protobuf-5.0.0.tar.gz", "bad release"), - }, - wantVersion: "v3.6.0", - }, - { - name: "version with at least one non-yanked file is available", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - // Wheel yanked but sdist not: version is still available. - yankedWithReason("mypy_protobuf-5.0.0-py3-none-any.whl", "bad wheel"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), - }, + name: "returns latest semver version", + versions: []string{"3.5.0", "3.6.0", "5.0.0", "1.0"}, wantVersion: "v5.0.0", }, { - name: "skips pre-release versions", - files: []pypiTestFile{ - notYanked("mypy-protobuf-1.2.5.tar.gz"), - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), - }, + name: "skips pre-release versions", + versions: []string{"1.2.5", "2.0.0b7"}, wantVersion: "v1.2.5", }, { - name: "respects ignore_versions", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), - }, + name: "respects ignore_versions", + versions: []string{"3.6.0", "5.0.0"}, ignoreVersions: map[string]struct{}{"v5.0.0": {}}, wantVersion: "v3.6.0", }, { - name: "respects max_version exclusive upper bound", - files: []pypiTestFile{ - notYanked("mypy-protobuf-3.6.0.tar.gz"), - notYanked("mypy_protobuf-5.0.0.tar.gz"), - }, + name: "respects max_version exclusive upper bound", + versions: []string{"3.6.0", "5.0.0"}, maxVersion: "v5.0.0", wantVersion: "v3.6.0", }, { - name: "error when no valid versions remain", - files: []pypiTestFile{ - // Python-style pre-releases: invalid Go semver, all filtered out. - notYanked("mypy_protobuf-2.0.0b7-py3-none-any.whl"), - notYanked("mypy_protobuf-2.0.0rc1-py3-none-any.whl"), - }, - wantErr: "no versions found", + name: "error when no valid versions remain", + versions: []string{"2.0.0b7", "2.0.0rc1"}, + wantErr: "no versions found", }, } @@ -119,8 +56,8 @@ func TestFetchPyPI(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Content-Type", "application/vnd.pypi.simple.v1+json") if err := json.NewEncoder(w).Encode(struct { - Files []pypiTestFile `json:"files"` - }{Files: tt.files}); err != nil { + Versions []string `json:"versions"` + }{Versions: tt.versions}); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } })) @@ -144,31 +81,3 @@ func TestFetchPyPI(t *testing.T) { }) } } - -func TestPyPIVersionFromFilename(t *testing.T) { - t.Parallel() - - tests := []struct { - filename string - pkg string - want string - }{ - // sdist, hyphenated package name (older style) - {"mypy-protobuf-3.6.0.tar.gz", "mypy-protobuf", "3.6.0"}, - // sdist, underscored package name (normalized) - {"mypy_protobuf-5.0.0.tar.gz", "mypy-protobuf", "5.0.0"}, - // wheel - {"mypy_protobuf-5.0.0-py3-none-any.whl", "mypy-protobuf", "5.0.0"}, - // pre-release (extraction still works; semver filter rejects it later) - {"mypy_protobuf-2.0.0b7-py3-none-any.whl", "mypy-protobuf", "2.0.0b7"}, - // different package: no match - {"other_pkg-1.0.0.tar.gz", "mypy-protobuf", ""}, - } - - for _, tt := range tests { - t.Run(tt.filename, func(t *testing.T) { - t.Parallel() - assert.Equal(t, tt.want, pypiVersionFromFilename(tt.filename, tt.pkg)) - }) - } -}