From ee3efadf407562d058e7c8714d1f4ecc4ad846ba Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Fri, 14 Nov 2025 17:00:50 +0200 Subject: [PATCH] refactor(servegit): vendor in lib/gitservice This is the only use of gitservice so lets just vendor it in. Test Plan: go test --- internal/servegit/gitservice.go | 173 +++++++++++++++++++++++++++ internal/servegit/gitservice_test.go | 106 ++++++++++++++++ internal/servegit/serve.go | 7 +- 3 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 internal/servegit/gitservice.go create mode 100644 internal/servegit/gitservice_test.go diff --git a/internal/servegit/gitservice.go b/internal/servegit/gitservice.go new file mode 100644 index 0000000000..9c815ba9ca --- /dev/null +++ b/internal/servegit/gitservice.go @@ -0,0 +1,173 @@ +// package servegit provides a smart Git HTTP transfer protocol handler. +package servegit + +import ( + "bytes" + "compress/gzip" + "context" + "net/http" + "os" + "os/exec" + "strconv" + "strings" + + "slices" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +var uploadPackArgs = []string{ + // Partial clones/fetches + "-c", "uploadpack.allowFilter=true", + + // Can fetch any object. Used in case of race between a resolve ref and a + // fetch of a commit. Safe to do, since this is only used internally. + "-c", "uploadpack.allowAnySHA1InWant=true", + + // The maximum size of memory that is consumed by each thread in git-pack-objects[1] + // for pack window memory when no limit is given on the command line. + // + // Important for large monorepos to not run into memory issues when cloned. + "-c", "pack.windowMemory=100m", + + "upload-pack", + + "--stateless-rpc", "--strict", +} + +// Handler is a smart Git HTTP transfer protocol as documented at +// https://www.git-scm.com/docs/http-protocol. +// +// This allows users to clone any git repo. We only support the smart +// protocol. We aim to support modern git features such as protocol v2 to +// minimize traffic. +type Handler struct { + // Dir is a function which takes a repository name and returns an absolute + // path to the GIT_DIR for it. + Dir func(context.Context, string) (string, error) + + // ErrorHook is called if we fail to run the git command. The main use of + // this is to inject logging. For example in src-cli we don't use + // sourcegraph/log so this allows us to use stdlib log. + // + // Note: This is required to be set + ErrorHook func(err error, stderr string) + + // CommandHook if non-nil will run with the git upload command before we + // start the command. + // + // This allows the command to be modified before running. In practice + // sourcegraph.com will add a flowrated writer for Stdout to treat our + // internal networks more kindly. + CommandHook func(*exec.Cmd) + + // Trace if non-nil is called at the start of serving a request. It will + // call the returned function when done executing. If the executation + // failed, it will pass in a non-nil error. + Trace func(ctx context.Context, svc, repo, protocol string) func(error) +} + +func (s *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + // Only support clones and fetches (git upload-pack). /info/refs sets the + // service field. + if svcQ := r.URL.Query().Get("service"); svcQ != "" && svcQ != "git-upload-pack" { + http.Error(w, "only support service git-upload-pack", http.StatusBadRequest) + return + } + + var repo, svc string + for _, suffix := range []string{"/info/refs", "/git-upload-pack"} { + if strings.HasSuffix(r.URL.Path, suffix) { + svc = suffix + repo = strings.TrimSuffix(r.URL.Path, suffix) + repo = strings.TrimPrefix(repo, "/") + break + } + } + + dir, err := s.Dir(ctx, repo) + if err != nil { + http.Error(w, "failed to determine repository path: "+err.Error(), http.StatusInternalServerError) + return + } + + if _, err = os.Stat(dir); os.IsNotExist(err) { + http.Error(w, "repository not found", http.StatusNotFound) + return + } else if err != nil { + http.Error(w, "failed to stat repo: "+err.Error(), http.StatusInternalServerError) + return + } + + body := r.Body + defer body.Close() + + if r.Header.Get("Content-Encoding") == "gzip" { + gzipReader, err := gzip.NewReader(body) + if err != nil { + http.Error(w, "malformed payload: "+err.Error(), http.StatusBadRequest) + return + } + defer gzipReader.Close() + + body = gzipReader + } + + // err is set if we fail to run command or have an unexpected svc. It is + // captured for tracing. + if s.Trace != nil { + done := s.Trace(ctx, svc, repo, r.Header.Get("Git-Protocol")) + defer func() { + done(err) + }() + } + + args := slices.Clone(uploadPackArgs) + switch svc { + case "/info/refs": + w.Header().Set("Content-Type", "application/x-git-upload-pack-advertisement") + _, _ = w.Write(packetWrite("# service=git-upload-pack\n")) + _, _ = w.Write([]byte("0000")) + args = append(args, "--advertise-refs") + case "/git-upload-pack": + w.Header().Set("Content-Type", "application/x-git-upload-pack-result") + default: + err = errors.Errorf("unexpected subpath (want /info/refs or /git-upload-pack): %q", svc) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + args = append(args, dir) + + env := os.Environ() + if protocol := r.Header.Get("Git-Protocol"); protocol != "" { + env = append(env, "GIT_PROTOCOL="+protocol) + } + + var stderr bytes.Buffer + cmd := exec.CommandContext(ctx, "git", args...) + cmd.Env = env + cmd.Stdout = w + cmd.Stderr = &stderr + cmd.Stdin = body + + if s.CommandHook != nil { + s.CommandHook(cmd) + } + + err = cmd.Run() + if err != nil { + err = errors.Errorf("error running git service command args=%q: %w", args, err) + s.ErrorHook(err, stderr.String()) + _, _ = w.Write([]byte("\n" + err.Error() + "\n")) + } +} + +func packetWrite(str string) []byte { + s := strconv.FormatInt(int64(len(str)+4), 16) + if len(s)%4 != 0 { + s = strings.Repeat("0", 4-len(s)%4) + s + } + return []byte(s + str) +} diff --git a/internal/servegit/gitservice_test.go b/internal/servegit/gitservice_test.go new file mode 100644 index 0000000000..e1b7b87083 --- /dev/null +++ b/internal/servegit/gitservice_test.go @@ -0,0 +1,106 @@ +package servegit + +import ( + "bytes" + "context" + "fmt" + "net/http/httptest" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// numTestCommits determines the number of files/commits/tags to create for +// the local test repo. The value of 25 causes clonev1 and clonev2 to use gzip +// compression but shallow to be uncompressed. The value of 10 does not trigger +// this same behavior. +const numTestCommits = 25 + +func TestHandler(t *testing.T) { + root := t.TempDir() + repo := filepath.Join(root, "testrepo") + + // Setup a repo with a commit so we can add bad refs + runCmd(t, root, "git", "init", repo) + + for i := range numTestCommits { + runCmd(t, repo, "sh", "-c", fmt.Sprintf("echo hello world > hello-%d.txt", i+1)) + runCmd(t, repo, "git", "add", fmt.Sprintf("hello-%d.txt", i+1)) + runCmd(t, repo, "git", "commit", "-m", fmt.Sprintf("c%d", i+1)) + runCmd(t, repo, "git", "tag", fmt.Sprintf("v%d", i+1)) + } + + ts := httptest.NewServer(&Handler{ + Dir: func(_ context.Context, s string) (string, error) { + return filepath.Join(root, s, ".git"), nil + }, + }) + defer ts.Close() + + t.Run("404", func(t *testing.T) { + c := exec.Command("git", "clone", ts.URL+"/doesnotexist") + c.Dir = t.TempDir() + b, err := c.CombinedOutput() + if !bytes.Contains(b, []byte("repository not found")) { + t.Fatal("expected clone to fail with repository not found", string(b), err) + } + }) + + cloneURL := ts.URL + "/testrepo" + + t.Run("clonev1", func(t *testing.T) { + runCmd(t, t.TempDir(), "git", "-c", "protocol.version=1", "clone", cloneURL) + }) + + cloneV2 := []struct { + Name string + Args []string + }{{ + "clonev2", + []string{}, + }, { + "shallow", + []string{"--depth=1"}, + }} + + for _, tc := range cloneV2 { + t.Run(tc.Name, func(t *testing.T) { + args := []string{"-c", "protocol.version=2", "clone"} + args = append(args, tc.Args...) + args = append(args, cloneURL) + + c := exec.Command("git", args...) + c.Dir = t.TempDir() + c.Env = []string{ + "GIT_TRACE_PACKET=1", + } + b, err := c.CombinedOutput() + if err != nil { + t.Fatalf("command failed: %s\nOutput: %s", err, b) + } + + // This is the same test done by git's tests for checking if the + // server is using protocol v2. + if !bytes.Contains(b, []byte("git< version 2")) { + t.Fatalf("protocol v2 not used by server. Output:\n%s", b) + } + }) + } +} + +func runCmd(t *testing.T, dir string, cmd string, arg ...string) { + t.Helper() + c := exec.Command(cmd, arg...) + c.Dir = dir + c.Env = []string{ + "GIT_COMMITTER_NAME=a", + "GIT_COMMITTER_EMAIL=a@a.com", + "GIT_AUTHOR_NAME=a", + "GIT_AUTHOR_EMAIL=a@a.com", + } + b, err := c.CombinedOutput() + if err != nil { + t.Fatalf("%s %s failed: %s\nOutput: %s", cmd, strings.Join(arg, " "), err, b) + } +} diff --git a/internal/servegit/serve.go b/internal/servegit/serve.go index 306bc67aa4..22286531e5 100644 --- a/internal/servegit/serve.go +++ b/internal/servegit/serve.go @@ -16,7 +16,6 @@ import ( "time" "github.com/sourcegraph/sourcegraph/lib/errors" - "github.com/sourcegraph/sourcegraph/lib/gitservice" ) type Serve struct { @@ -102,9 +101,9 @@ func (s *Serve) handler() http.Handler { }) fs := http.FileServer(http.Dir(s.Root)) - svc := &gitservice.Handler{ - Dir: func(name string) string { - return filepath.Join(s.Root, filepath.FromSlash(name)) + svc := &Handler{ + Dir: func(_ context.Context, name string) (string, error) { + return filepath.Join(s.Root, filepath.FromSlash(name)), nil }, ErrorHook: func(err error, stderr string) { s.Info.Printf("git-service error: %s\nstderr:\n%s", err.Error(), stderr)