Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions internal/servegit/gitservice.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
// package servegit provides a smart Git HTTP transfer protocol handler.
package servegit

import (
"bytes"
"compress/gzip"
"context"
"net/http"
"os"
"os/exec"
"strconv"
"strings"

"slices"

"github.com/sourcegraph/sourcegraph/lib/errors"
)

var uploadPackArgs = []string{
// Partial clones/fetches
"-c", "uploadpack.allowFilter=true",

// Can fetch any object. Used in case of race between a resolve ref and a
// fetch of a commit. Safe to do, since this is only used internally.
"-c", "uploadpack.allowAnySHA1InWant=true",

// The maximum size of memory that is consumed by each thread in git-pack-objects[1]
// for pack window memory when no limit is given on the command line.
//
// Important for large monorepos to not run into memory issues when cloned.
"-c", "pack.windowMemory=100m",

"upload-pack",

"--stateless-rpc", "--strict",
}

// Handler is a smart Git HTTP transfer protocol as documented at
// https://www.git-scm.com/docs/http-protocol.
//
// This allows users to clone any git repo. We only support the smart
// protocol. We aim to support modern git features such as protocol v2 to
// minimize traffic.
type Handler struct {
// Dir is a function which takes a repository name and returns an absolute
// path to the GIT_DIR for it.
Dir func(context.Context, string) (string, error)

// ErrorHook is called if we fail to run the git command. The main use of
// this is to inject logging. For example in src-cli we don't use
// sourcegraph/log so this allows us to use stdlib log.
//
// Note: This is required to be set
ErrorHook func(err error, stderr string)

// CommandHook if non-nil will run with the git upload command before we
// start the command.
//
// This allows the command to be modified before running. In practice
// sourcegraph.com will add a flowrated writer for Stdout to treat our
// internal networks more kindly.
CommandHook func(*exec.Cmd)

// Trace if non-nil is called at the start of serving a request. It will
// call the returned function when done executing. If the executation
// failed, it will pass in a non-nil error.
Trace func(ctx context.Context, svc, repo, protocol string) func(error)
}

func (s *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()

// Only support clones and fetches (git upload-pack). /info/refs sets the
// service field.
if svcQ := r.URL.Query().Get("service"); svcQ != "" && svcQ != "git-upload-pack" {
http.Error(w, "only support service git-upload-pack", http.StatusBadRequest)
return
}

var repo, svc string
for _, suffix := range []string{"/info/refs", "/git-upload-pack"} {
if strings.HasSuffix(r.URL.Path, suffix) {
svc = suffix
repo = strings.TrimSuffix(r.URL.Path, suffix)
repo = strings.TrimPrefix(repo, "/")
break
}
}

dir, err := s.Dir(ctx, repo)
if err != nil {
http.Error(w, "failed to determine repository path: "+err.Error(), http.StatusInternalServerError)
return
}

if _, err = os.Stat(dir); os.IsNotExist(err) {
http.Error(w, "repository not found", http.StatusNotFound)
return
} else if err != nil {
http.Error(w, "failed to stat repo: "+err.Error(), http.StatusInternalServerError)
return
}

body := r.Body
defer body.Close()

if r.Header.Get("Content-Encoding") == "gzip" {
gzipReader, err := gzip.NewReader(body)
if err != nil {
http.Error(w, "malformed payload: "+err.Error(), http.StatusBadRequest)
return
}
defer gzipReader.Close()

body = gzipReader
}

// err is set if we fail to run command or have an unexpected svc. It is
// captured for tracing.
if s.Trace != nil {
done := s.Trace(ctx, svc, repo, r.Header.Get("Git-Protocol"))
defer func() {
done(err)
}()
}

args := slices.Clone(uploadPackArgs)
switch svc {
case "/info/refs":
w.Header().Set("Content-Type", "application/x-git-upload-pack-advertisement")
_, _ = w.Write(packetWrite("# service=git-upload-pack\n"))
_, _ = w.Write([]byte("0000"))
args = append(args, "--advertise-refs")
case "/git-upload-pack":
w.Header().Set("Content-Type", "application/x-git-upload-pack-result")
default:
err = errors.Errorf("unexpected subpath (want /info/refs or /git-upload-pack): %q", svc)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
args = append(args, dir)

env := os.Environ()
if protocol := r.Header.Get("Git-Protocol"); protocol != "" {
env = append(env, "GIT_PROTOCOL="+protocol)
}

var stderr bytes.Buffer
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Env = env
cmd.Stdout = w
cmd.Stderr = &stderr
cmd.Stdin = body

if s.CommandHook != nil {
s.CommandHook(cmd)
}

err = cmd.Run()
if err != nil {
err = errors.Errorf("error running git service command args=%q: %w", args, err)
s.ErrorHook(err, stderr.String())
_, _ = w.Write([]byte("\n" + err.Error() + "\n"))
}
}

func packetWrite(str string) []byte {
s := strconv.FormatInt(int64(len(str)+4), 16)
if len(s)%4 != 0 {
s = strings.Repeat("0", 4-len(s)%4) + s
}
return []byte(s + str)
}
106 changes: 106 additions & 0 deletions internal/servegit/gitservice_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package servegit

import (
"bytes"
"context"
"fmt"
"net/http/httptest"
"os/exec"
"path/filepath"
"strings"
"testing"
)

// numTestCommits determines the number of files/commits/tags to create for
// the local test repo. The value of 25 causes clonev1 and clonev2 to use gzip
// compression but shallow to be uncompressed. The value of 10 does not trigger
// this same behavior.
const numTestCommits = 25

func TestHandler(t *testing.T) {
root := t.TempDir()
repo := filepath.Join(root, "testrepo")

// Setup a repo with a commit so we can add bad refs
runCmd(t, root, "git", "init", repo)

for i := range numTestCommits {
runCmd(t, repo, "sh", "-c", fmt.Sprintf("echo hello world > hello-%d.txt", i+1))
runCmd(t, repo, "git", "add", fmt.Sprintf("hello-%d.txt", i+1))
runCmd(t, repo, "git", "commit", "-m", fmt.Sprintf("c%d", i+1))
runCmd(t, repo, "git", "tag", fmt.Sprintf("v%d", i+1))
}

ts := httptest.NewServer(&Handler{
Dir: func(_ context.Context, s string) (string, error) {
return filepath.Join(root, s, ".git"), nil
},
})
defer ts.Close()

t.Run("404", func(t *testing.T) {
c := exec.Command("git", "clone", ts.URL+"/doesnotexist")
c.Dir = t.TempDir()
b, err := c.CombinedOutput()
if !bytes.Contains(b, []byte("repository not found")) {
t.Fatal("expected clone to fail with repository not found", string(b), err)
}
})

cloneURL := ts.URL + "/testrepo"

t.Run("clonev1", func(t *testing.T) {
runCmd(t, t.TempDir(), "git", "-c", "protocol.version=1", "clone", cloneURL)
})

cloneV2 := []struct {
Name string
Args []string
}{{
"clonev2",
[]string{},
}, {
"shallow",
[]string{"--depth=1"},
}}

for _, tc := range cloneV2 {
t.Run(tc.Name, func(t *testing.T) {
args := []string{"-c", "protocol.version=2", "clone"}
args = append(args, tc.Args...)
args = append(args, cloneURL)

c := exec.Command("git", args...)
c.Dir = t.TempDir()
c.Env = []string{
"GIT_TRACE_PACKET=1",
}
b, err := c.CombinedOutput()
if err != nil {
t.Fatalf("command failed: %s\nOutput: %s", err, b)
}

// This is the same test done by git's tests for checking if the
// server is using protocol v2.
if !bytes.Contains(b, []byte("git< version 2")) {
t.Fatalf("protocol v2 not used by server. Output:\n%s", b)
}
})
}
}

func runCmd(t *testing.T, dir string, cmd string, arg ...string) {
t.Helper()
c := exec.Command(cmd, arg...)
c.Dir = dir
c.Env = []string{
"GIT_COMMITTER_NAME=a",
"GIT_COMMITTER_EMAIL=a@a.com",
"GIT_AUTHOR_NAME=a",
"GIT_AUTHOR_EMAIL=a@a.com",
}
b, err := c.CombinedOutput()
if err != nil {
t.Fatalf("%s %s failed: %s\nOutput: %s", cmd, strings.Join(arg, " "), err, b)
}
}
7 changes: 3 additions & 4 deletions internal/servegit/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
"time"

"github.com/sourcegraph/sourcegraph/lib/errors"
"github.com/sourcegraph/sourcegraph/lib/gitservice"
)

type Serve struct {
Expand Down Expand Up @@ -102,9 +101,9 @@ func (s *Serve) handler() http.Handler {
})

fs := http.FileServer(http.Dir(s.Root))
svc := &gitservice.Handler{
Dir: func(name string) string {
return filepath.Join(s.Root, filepath.FromSlash(name))
svc := &Handler{
Dir: func(_ context.Context, name string) (string, error) {
return filepath.Join(s.Root, filepath.FromSlash(name)), nil
},
ErrorHook: func(err error, stderr string) {
s.Info.Printf("git-service error: %s\nstderr:\n%s", err.Error(), stderr)
Expand Down
Loading