From ac50e8e5cc867a259a52f7bac9999d97fc2a45df Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Feb 2025 13:16:08 +0200 Subject: [PATCH 1/5] patch template on the fly to add robots meta --- go.mod | 5 +++- go.sum | 2 ++ template.go | 19 +++++++++++++++ template_test.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 template_test.go diff --git a/go.mod b/go.mod index 9fb5ddd..385554c 100644 --- a/go.mod +++ b/go.mod @@ -16,9 +16,12 @@ require ( require ( github.com/dlclark/regexp2 v1.4.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect github.com/mozillazg/go-unidecode v0.1.1 // indirect gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) -go 1.18 +go 1.21 + +toolchain go1.23.2 diff --git a/go.sum b/go.sum index f329cf7..232f18f 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E= github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= diff --git a/template.go b/template.go index ecaed56..677019c 100644 --- a/template.go +++ b/template.go @@ -1,6 +1,7 @@ package docsite import ( + "bytes" "context" "fmt" "html/template" @@ -19,8 +20,21 @@ const ( rootTemplateName = "root" documentTemplateName = "document" searchTemplateName = "search" + metaRobots = `` + metaProperty = `` ) +func patchTemplateForSEO(data []byte) []byte { + if bytes.Contains(data, []byte(metaRobots)) { + return data + } + + content := string(data) + content = strings.Replace(content, metaProperty, fmt.Sprintf("%s\n %s", metaProperty, metaRobots), 1) + + return []byte(content) +} + func (s *Site) getTemplate(templatesFS http.FileSystem, name string, extraFuncs template.FuncMap) (*template.Template, error) { readFile := func(fs http.FileSystem, path string) ([]byte, error) { f, err := fs.Open(path) @@ -99,6 +113,11 @@ func (s *Site) getTemplate(templatesFS http.FileSystem, name string, extraFuncs if err != nil { return nil, errors.WithMessage(err, fmt.Sprintf("read template %s", path)) } + if name == documentTemplateName { + // We need to patch the template, since if we're loading an old version the template won't have the + // nofollow, noindex seo tag + data = patchTemplateForSEO(data) + } if _, err := tmpl.Parse(string(data)); err != nil { return nil, errors.WithMessage(err, fmt.Sprintf("parse template %s", path)) } diff --git a/template_test.go b/template_test.go new file mode 100644 index 0000000..7240163 --- /dev/null +++ b/template_test.go @@ -0,0 +1,61 @@ +package docsite + +import ( + "github.com/google/go-cmp/cmp" + "testing" +) + +func TestPatchTempalteForSEO(t *testing.T) { + tt := []struct { + name string + content string + want string + }{ + { + name: "content without robots meta is patched", + content: ` +{{define "seo"}} + + + + {{ if .Content }} +`, + want: ` +{{define "seo"}} + + + + + {{ if .Content }} +`, + }, + { + name: "content with robots meta is not patched", + content: ` +{{define "seo"}} + + + + + {{ if .Content }} +`, + want: ` +{{define "seo"}} + + + + + {{ if .Content }} +`, + }, + } + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + got := string(patchTemplateForSEO([]byte(tc.content))) + + if diff := cmp.Diff(tc.want, got); diff != "" { + t.Errorf("want and got mismatch (-want, +got): %s", diff) + } + }) + } +} From 090c4a8c5ba17a683b1f080255818327d5f67806 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Feb 2025 13:57:52 +0200 Subject: [PATCH 2/5] add config option to force serve content from config --- README.md | 13 +++++++++++++ cmd/docsite/site.go | 26 ++++++++++++++------------ config.json | 1 + 3 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 config.json diff --git a/README.md b/README.md index 34fd796..ce2cf43 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ The site data describes the location of its templates, assets, and content. It i - `redirects`: an object mapping URL paths (such as `/my/old/page`) to redirect destination URLs (such as `/my/new/page`). - `check` (optional): an object containing a single property `ignoreURLPattern`, which is a [RE2 regexp](https://golang.org/pkg/regexp/syntax/) of URLs to ignore when checking for broken URLs with `docsite check`. - `search` (optional): an object containing a single proprety `skipIndexURLPattern`, which is a [RE2 regexp](https://golang.org/pkg/regexp/syntax/) pattern that if matching any content file URL will remove that file from the search index. +- `forceServedDownloadedContent` (optional) (dev): While developing locally, you might want to see how docsite performs when it downloads the doc content remotely. With this set to true, docsite will download the content instead of serving from the filesystem The possible values for VFS URLs are: @@ -115,6 +116,18 @@ The `docsite` tool requires site data to be available in any of the following wa ## Development +## Running locally + +To run docsite locally and serve on port `:5080`, run: + +```shell +go run ./cmd/docsite/... -config docsite.json serve +``` + +### Force serving downloaded content + +For certain use cases you want to have docsite download the docs content as it does with production configuration. To force this behaviour locally you can set `"forceServedDownloadedContent": true` in you `docsite.json` configuration + ### Release a new version 1. Build the Docker image for `linux/amd64`: diff --git a/cmd/docsite/site.go b/cmd/docsite/site.go index afeee47..8994b1e 100644 --- a/cmd/docsite/site.go +++ b/cmd/docsite/site.go @@ -52,16 +52,17 @@ func siteFromFlags() (*docsite.Site, *docsiteConfig, error) { // See ["Site data" in README.md](../../README.md#site-data) for documentation on this type's // fields. type docsiteConfig struct { - Content string - ContentExcludePattern string - DefaultContentBranch string - BaseURLPath string - RootURL string - Templates string - Assets string - AssetsBaseURLPath string - Redirects map[string]string - Check struct { + Content string + ContentExcludePattern string + DefaultContentBranch string + BaseURLPath string + RootURL string + Templates string + Assets string + AssetsBaseURLPath string + ForceServeDownloadedContent bool + Redirects map[string]string + Check struct { IgnoreURLPattern string } Search struct { @@ -171,7 +172,6 @@ func addRedirectsFromAssets(site *docsite.Site) error { } const ( - DEBUG = false CODEHOST_URL = "https://codeload.github.com/sourcegraph/sourcegraph-public-snapshot/zip/refs/heads/$VERSION#*/doc/" ) @@ -195,8 +195,10 @@ func openDocsiteFromConfig(configData []byte, baseDir string) (*docsite.Site, *d return http.Dir(filepath.Join(baseDir, dir)) } - if DEBUG { + log.Printf("config %v", config) + if config.ForceServeDownloadedContent { content := newVersionedFileSystemURL(CODEHOST_URL, "master") + log.Printf("Force serving content from %s", CODEHOST_URL) if _, err := content.OpenVersion(context.Background(), ""); err != nil { return nil, nil, errors.WithMessage(err, "downloading content default version") } diff --git a/config.json b/config.json new file mode 100644 index 0000000..db2e121 --- /dev/null +++ b/config.json @@ -0,0 +1 @@ +{"forceServeDownloadedContent": true, "templates":"https://codeload.github.com/sourcegraph/sourcegraph-public-snapshot/zip/legacydocs#*/doc/_resources/templates/","assets":"https://codeload.github.com/sourcegraph/sourcegraph-public-snapshot/zip/legacydocs#*/doc/_resources/assets/","content":"https://codeload.github.com/sourcegraph/sourcegraph-public-snapshot/zip/refs/heads/$VERSION#*/doc/","defaultContentBranch":"legacydocs","baseURLPath":"/","assetsBaseURLPath":"/assets/"} From 3ec5b26ffec8d8400a8ab62517ed9083111f69aa Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Feb 2025 14:05:59 +0200 Subject: [PATCH 3/5] update go ci actions --- .github/workflows/go-release.yml | 2 +- .github/workflows/go.yml | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/workflows/go-release.yml b/.github/workflows/go-release.yml index af1d981..d3d0ebe 100644 --- a/.github/workflows/go-release.yml +++ b/.github/workflows/go-release.yml @@ -47,7 +47,7 @@ jobs: runs-on: ubuntu-latest steps: - name: checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Login to Docker Hub uses: docker/login-action@v2 diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index ce04999..508b1ab 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -22,9 +22,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Run golangci-lint - uses: golangci/golangci-lint-action@v2 + uses: golangci/golangci-lint-action@v6 with: version: latest args: --timeout=30m @@ -42,15 +42,13 @@ jobs: name: Test strategy: matrix: - go-version: [ 1.18.x ] platform: [ ubuntu-latest ] runs-on: ${{ matrix.platform }} steps: - - name: Install Go - uses: actions/setup-go@v2 - with: - go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 + - name: Install Go + uses: actions/setup-go@v5 + with: { go-version-file: 'go.mod' } - name: Run tests with coverage run: go test -v -race ./... From 0344a9a640063241deeecb34ecc657a5594e4d3b Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Feb 2025 15:20:02 +0200 Subject: [PATCH 4/5] remove deprecated ioutil --- cmd/docsite/serve.go | 6 +++--- cmd/docsite/site.go | 10 +++++----- template.go | 4 ++-- util.go | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cmd/docsite/serve.go b/cmd/docsite/serve.go index 5ebc3d4..4c80c9b 100644 --- a/cmd/docsite/serve.go +++ b/cmd/docsite/serve.go @@ -3,10 +3,10 @@ package main import ( "crypto/tls" "flag" - "io/ioutil" "log" "net" "net/http" + "os" "sync" ) @@ -49,11 +49,11 @@ func init() { } if *tlsCertPath != "" || *tlsKeyPath != "" { log.Printf("# TLS listener enabled") - tlsCert, err := ioutil.ReadFile(*tlsCertPath) + tlsCert, err := os.ReadFile(*tlsCertPath) if err != nil { return err } - tlsKey, err := ioutil.ReadFile(*tlsKeyPath) + tlsKey, err := os.ReadFile(*tlsKeyPath) if err != nil { return err } diff --git a/cmd/docsite/site.go b/cmd/docsite/site.go index 8994b1e..1e15b90 100644 --- a/cmd/docsite/site.go +++ b/cmd/docsite/site.go @@ -6,7 +6,7 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" + "io" "log" "net/http" "net/url" @@ -36,7 +36,7 @@ func siteFromFlags() (*docsite.Site, *docsiteConfig, error) { paths := filepath.SplitList(*configPath) for _, path := range paths { - data, err := ioutil.ReadFile(path) + data, err := os.ReadFile(path) if os.IsNotExist(err) { continue } else if err != nil { @@ -324,7 +324,7 @@ func (fs *versionedFileSystemURL) fetchAndCacheVersion(version string) (http.Fil if strings.Contains(urlStr, "$VERSION") && strings.Contains(urlStr, "github") && !strings.Contains(urlStr, "refs/heads/$VERSION") { return nil, fmt.Errorf("refusing to use insecure docsite configuration for multi-version-aware GitHub URLs: the URL pattern %q must include \"refs/heads/$VERSION\", not just \"$VERSION\" (see docsite README.md for more information)", urlStr) } - urlStr = strings.Replace(fs.url, "$VERSION", version, -1) + urlStr = strings.ReplaceAll(fs.url, "$VERSION", version) // HACK: Workaround for https://github.com/sourcegraph/sourcegraph-public-snapshot/issues/3030. This assumes // that tags all begin with "vN" where N is some number. @@ -363,7 +363,7 @@ func zipFileSystemAtURL(url, dir string) (http.FileSystem, error) { } else if resp.StatusCode != http.StatusOK { return nil, &os.PathError{Op: "Get", Path: url, Err: fmt.Errorf("HTTP response status code %d", resp.StatusCode)} } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { return nil, err } @@ -397,7 +397,7 @@ func mapFromZipArchive(z *zip.Reader, dir string) (map[string]string, error) { if err != nil { return nil, errors.WithMessagef(err, "open %q", zf.Name) } - data, err := ioutil.ReadAll(f) + data, err := io.ReadAll(f) f.Close() if err != nil { return nil, errors.WithMessagef(err, "read %q", zf.Name) diff --git a/template.go b/template.go index 677019c..971637d 100644 --- a/template.go +++ b/template.go @@ -5,7 +5,7 @@ import ( "context" "fmt" "html/template" - "io/ioutil" + "io" "net/http" "net/url" "os" @@ -42,7 +42,7 @@ func (s *Site) getTemplate(templatesFS http.FileSystem, name string, extraFuncs return nil, err } defer f.Close() - data, err := ioutil.ReadAll(f) + data, err := io.ReadAll(f) if err != nil { return nil, err } diff --git a/util.go b/util.go index 08528f1..b0f22a1 100644 --- a/util.go +++ b/util.go @@ -1,7 +1,7 @@ package docsite import ( - "io/ioutil" + "io" "net/http" ) @@ -11,5 +11,5 @@ func ReadFile(fs http.FileSystem, path string) ([]byte, error) { return nil, err } defer f.Close() - return ioutil.ReadAll(f) + return io.ReadAll(f) } From 6f4d7f79f7fbffd72517f81d16c4f9328ce15b35 Mon Sep 17 00:00:00 2001 From: William Bezuidenhout Date: Fri, 28 Feb 2025 15:22:57 +0200 Subject: [PATCH 5/5] go mod tidy --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 385554c..55bca28 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,7 @@ module github.com/sourcegraph/docsite require ( github.com/alecthomas/chroma v0.10.0 + github.com/google/go-cmp v0.7.0 github.com/mozillazg/go-slugify v0.2.0 github.com/pkg/errors v0.9.1 github.com/shurcooL/sanitized_anchor_name v1.0.0 @@ -16,7 +17,6 @@ require ( require ( github.com/dlclark/regexp2 v1.4.0 // indirect - github.com/google/go-cmp v0.7.0 // indirect github.com/mozillazg/go-unidecode v0.1.1 // indirect gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect