From 7ed02e6f92b311dff574f2eb57addc78f0b47eec Mon Sep 17 00:00:00 2001 From: Paul Gottschling Date: Fri, 8 Mar 2024 14:58:44 -0500 Subject: [PATCH] feat: Allow users to override comment delimiters Allow users to specify the comment delimiters they are using in their documentation. vale replaces these with HTML comment tags before linting, making it possible to control style rules for specific passages of prose in file formats that use non-HTML comment syntax. This is critical for controlling style rules within a page in MDX, and potentially other formats as well. This example configures the `CommentDelimiters` field for `*.md` files, indicating that `{/*` and `*/}` are the custom comment delimiters: ```ini [*.md] CommentDelimiters = "{/*,*/}" ``` Internally, custom delimiters are represented as a `[2]string`, and it is only possible to configure one set of custom comment delimiters for a given format block. More specific changes: - Refactor `applyPatterns`. Remove the method receiver and take only the necessary fields of `*core.Config` as parameters. This makes it easier to test `applyPatterns` without mocking an entire `*core.Config`. Also extract functions for `applyInlinePatterns` and `applyBlockPatterns` so we can use fewer arguments in a single function. - Add `applyCommentPatterns`, which works similarly to `applyInlinePatterns` and `applyBlockPatterns`, but for substituting comments. - For tests, import `https//github.com/stretchr/testify/assert`, a popular testing library, to get richer test output. Closes #762 --- go.mod | 6 +- go.sum | 2 - internal/core/config.go | 44 +++++----- internal/core/ini.go | 14 ++++ internal/core/ini_test.go | 96 +++++++++++++++++++++ internal/lint/html.go | 106 +++++++++++++++++++---- internal/lint/html_test.go | 167 +++++++++++++++++++++++++++++++++++++ internal/lint/lint.go | 17 ++-- 8 files changed, 400 insertions(+), 52 deletions(-) create mode 100644 internal/core/ini_test.go create mode 100644 internal/lint/html_test.go diff --git a/go.mod b/go.mod index a6d4d964d..b8152d605 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( github.com/errata-ai/ini v1.63.0 github.com/errata-ai/regexp2 v1.7.0 github.com/gobwas/glob v0.2.3 + github.com/jdkato/go-tree-sitter-julia v0.1.0 github.com/jdkato/twine v0.10.1 github.com/karrick/godirwalk v1.16.1 github.com/mholt/archiver/v3 v3.5.1 @@ -23,6 +24,7 @@ require ( github.com/remeh/sizedwaitgroup v1.0.0 github.com/smacker/go-tree-sitter v0.0.0-20240514083259-c5d1f3f5f99e github.com/spf13/pflag v1.0.5 + github.com/stretchr/testify v1.8.4 github.com/yuin/goldmark v1.5.6 golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/net v0.23.0 @@ -38,13 +40,13 @@ require ( github.com/Masterminds/semver/v3 v3.2.0 // indirect github.com/andybalholm/brotli v1.0.1 // indirect github.com/containerd/console v1.0.3 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/uuid v1.1.1 // indirect github.com/gookit/color v1.5.4 // indirect github.com/huandu/xstrings v1.3.3 // indirect github.com/imdario/mergo v0.3.11 // indirect - github.com/jdkato/go-tree-sitter-julia v0.1.0 // indirect github.com/klauspost/compress v1.11.4 // indirect github.com/klauspost/pgzip v1.2.5 // indirect github.com/kr/pretty v0.3.0 // indirect @@ -55,6 +57,7 @@ require ( github.com/montanaflynn/stats v0.7.1 // indirect github.com/nwaples/rardecode v1.1.0 // indirect github.com/pierrec/lz4/v4 v4.1.2 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.4 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/spf13/cast v1.3.1 // indirect @@ -65,4 +68,5 @@ require ( golang.org/x/term v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect gopkg.in/neurosnap/sentences.v1 v1.0.7 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index f284f1bb2..328d74ad0 100644 --- a/go.sum +++ b/go.sum @@ -63,8 +63,6 @@ github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4 github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.11 h1:3tnifQM4i+fbajXKBHXWEH+KvNHqojZ778UH75j3bGA= github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA= -github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d h1:nc/Dgjp4Zr3drV44bz2+fUaCb1ZZvFtNnudyuaWL7uQ= -github.com/jdkato/go-tree-sitter-julia v0.0.0-20240531060609-b738d045ba2d/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8= github.com/jdkato/go-tree-sitter-julia v0.1.0 h1:z+6zTbd6PHMKAge7GJx9QIwPQX2NOKb4Pj5jteJvaYY= github.com/jdkato/go-tree-sitter-julia v0.1.0/go.mod h1:lXNEZorcvU63DcANEklLMbDRjwam4VQ44MIV1Cck0w8= github.com/jdkato/twine v0.10.1 h1:Jexy1dua9nRyr45AQ3Bml1nCVYq3VIi9g09MOkg2Wwk= diff --git a/internal/core/config.go b/internal/core/config.go index 6faecdb63..63ef4f14d 100644 --- a/internal/core/config.go +++ b/internal/core/config.go @@ -175,27 +175,28 @@ type CLIFlags struct { // Config holds the configuration values from both the CLI and `.vale.ini`. type Config struct { // General configuration - BlockIgnores map[string][]string // A list of blocks to ignore - Checks []string // All checks to load - Formats map[string]string // A map of unknown -> known formats - Asciidoctor map[string]string // A map of asciidoctor attributes - FormatToLang map[string]string // A map of format to lang ID - GBaseStyles []string // Global base style - GChecks map[string]bool // Global checks - IgnoredClasses []string // A list of HTML classes to ignore - IgnoredScopes []string // A list of HTML tags to ignore - MinAlertLevel int // Lowest alert level to display - Vocab []string // The active project - RuleToLevel map[string]string // Single-rule level changes - SBaseStyles map[string][]string // Syntax-specific base styles - SChecks map[string]map[string]bool // Syntax-specific checks - SkippedScopes []string // A list of HTML blocks to ignore - Stylesheets map[string]string // XSLT stylesheet - TokenIgnores map[string][]string // A list of tokens to ignore - WordTemplate string // The template used in YAML -> regexp list conversions - RootINI string // the path to the project's .vale.ini file - Paths []string // A list of paths to search for styles - ConfigFiles []string // A list of configuration files to load + BlockIgnores map[string][]string // A list of blocks to ignore + Checks []string // All checks to load + Formats map[string]string // A map of unknown -> known formats + Asciidoctor map[string]string // A map of asciidoctor attributes + FormatToLang map[string]string // A map of format to lang ID + GBaseStyles []string // Global base style + GChecks map[string]bool // Global checks + IgnoredClasses []string // A list of HTML classes to ignore + IgnoredScopes []string // A list of HTML tags to ignore + MinAlertLevel int // Lowest alert level to display + Vocab []string // The active project + RuleToLevel map[string]string // Single-rule level changes + SBaseStyles map[string][]string // Syntax-specific base styles + SChecks map[string]map[string]bool // Syntax-specific checks + SkippedScopes []string // A list of HTML blocks to ignore + Stylesheets map[string]string // XSLT stylesheet + TokenIgnores map[string][]string // A list of tokens to ignore + CommentDelimiters map[string][2]string // Strings to treat as comment delimiters. Indicates the start and end delimiters. + WordTemplate string // The template used in YAML -> regexp list conversions + RootINI string // the path to the project's .vale.ini file + Paths []string // A list of paths to search for styles + ConfigFiles []string // A list of configuration files to load AcceptedTokens []string `json:"-"` // Project-specific vocabulary (okay) RejectedTokens []string `json:"-"` // Project-specific vocabulary (avoid) @@ -229,6 +230,7 @@ func NewConfig(flags *CLIFlags) (*Config, error) { cfg.SecToPat = make(map[string]glob.Glob) cfg.Stylesheets = make(map[string]string) cfg.TokenIgnores = make(map[string][]string) + cfg.CommentDelimiters = make(map[string][2]string) cfg.FormatToLang = make(map[string]string) cfg.Paths = []string{} cfg.ConfigFiles = []string{} diff --git a/internal/core/ini.go b/internal/core/ini.go index ffdb6597e..b2a4d9ebb 100644 --- a/internal/core/ini.go +++ b/internal/core/ini.go @@ -110,6 +110,20 @@ var syntaxOpts = map[string]func(string, *ini.Section, *Config) error{ cfg.BlockIgnores[label] = mergeValues(sec.Key("BlockIgnores").StringsWithShadows(",")) return nil }, + "CommentDelimiters": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam + d := mergeValues(sec.Key("CommentDelimiters").StringsWithShadows(",")) + if len(d) != 2 { + return NewE201FromTarget( + fmt.Sprintf("CommentDelimiters must be a comma-separated list of two delimiters, but got %v items", len(d)), + label, + cfg.Flags.Path) + } + var c [2]string + c[0], c[1] = d[0], d[1] + cfg.CommentDelimiters[label] = c + return nil + + }, "TokenIgnores": func(label string, sec *ini.Section, cfg *Config) error { //nolint:unparam cfg.TokenIgnores[label] = mergeValues(sec.Key("TokenIgnores").StringsWithShadows(",")) return nil diff --git a/internal/core/ini_test.go b/internal/core/ini_test.go new file mode 100644 index 000000000..088aa094e --- /dev/null +++ b/internal/core/ini_test.go @@ -0,0 +1,96 @@ +package core + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_processConfig_commentDelimiters(t *testing.T) { + cases := []struct { + description string + body string + expected map[string][2]string + }{ + { + description: "custom comment delimiters for markdown", + body: `[*.md] +CommentDelimiters = "{/*,*/}" +`, + expected: map[string][2]string{ + "*.md": [2]string{"{/*", "*/}"}, + }, + }, + { + description: "not set", + body: `[*.md] +TokenIgnores = (\$+[^\n$]+\$+) +`, + expected: map[string][2]string{}, + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + uCfg, err := shadowLoad([]byte(c.body)) + assert.NoError(t, err) + conf, err := NewConfig(&CLIFlags{}) + assert.NoError(t, err) + _, err = processConfig(uCfg, conf, false) + assert.NoError(t, err) + actual := conf.CommentDelimiters + assert.Equal(t, c.expected, actual) + }) + } +} + +func Test_processConfig_commentDelimiters_error(t *testing.T) { + cases := []struct { + description string + body string + expectedErr string + }{ + { + description: "global custom comment delimiters", + body: `[*] +CommentDelimiters = "{/*,*/}" +`, + expectedErr: "syntax-specific option", + }, + { + description: "more than two delimiters", + body: `[*.md] +CommentDelimiters = "{/*,*/},<<,>>" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items", + }, + { + description: "more than two delimiters (shadow)", + body: `[*.md] +CommentDelimiters = "{/*,*/}" + +[*.md] +CommentDelimiters = "<<,>>" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 4 items", + }, + { + description: "one delimiter is empty", + body: `[*.md] +CommentDelimiters = "{/*" +`, + expectedErr: "CommentDelimiters must be a comma-separated list of two delimiters, but got 1 items", + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + uCfg, err := shadowLoad([]byte(c.body)) + assert.NoError(t, err) + conf, err := NewConfig(&CLIFlags{}) + assert.NoError(t, err) + _, err = processConfig(uCfg, conf, false) + assert.ErrorContains(t, err, c.expectedErr) + }) + } +} diff --git a/internal/lint/html.go b/internal/lint/html.go index 6dc73d79f..6c9bcc068 100755 --- a/internal/lint/html.go +++ b/internal/lint/html.go @@ -24,25 +24,40 @@ func (l *Linter) lintHTML(f *core.File) error { return l.lintHTMLTokens(f, []byte(f.Content), 0) } -func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, error) { +type extensionConfig struct { + Normed, Real string +} + +var blockDelimiters map[string]string = map[string]string{ + ".adoc": "\n----\n$1\n----\n", + ".md": "\n```\n$1\n```\n", + ".rst": "\n::\n\n%s\n", + ".org": orgExample, +} + +func applyBlockPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + block, ok := blockDelimiters[exts.Normed] + if !ok { + return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed) + } + // TODO: Should we assume this? - s := reFrontMatter.ReplaceAllString(f.Content, block) + s := reFrontMatter.ReplaceAllString(content, block) - exts := []string{f.NormedExt, f.RealExt} - for syntax, regexes := range l.Manager.Config.BlockIgnores { + for syntax, regexes := range c.BlockIgnores { sec, err := glob.Compile(syntax) if err != nil { return s, err - } else if sec.MatchAny(exts) { + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { for _, r := range regexes { pat, errc := regexp2.CompileStd(r) if errc != nil { //nolint:gocritic return s, core.NewE201FromTarget( errc.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) - } else if strings.HasSuffix(f.NormedExt, ".rst") { + } else if strings.HasSuffix(exts.Normed, ".rst") { // HACK: We need to add padding for the literal block. for _, c := range pat.FindAllStringSubmatch(s, -1) { sec := fmt.Sprintf(block, core.Indent(c[0], " ")) @@ -54,39 +69,98 @@ func (l *Linter) applyPatterns(f *core.File, block, inline string) (string, erro return s, core.NewE201FromTarget( err.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } } } } } + return s, nil +} + +var inlineDelimiters map[string]string = map[string]string{ + ".adoc": "`$1`", + ".md": "`$1`", + ".rst": "``$1``", + ".org": "=$1=", +} + +func applyInlinePatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + inline, ok := inlineDelimiters[exts.Normed] + if !ok { + return content, fmt.Errorf("ignore patterns are not supported in '%s' files", exts.Normed) + } - for syntax, regexes := range l.Manager.Config.TokenIgnores { + for syntax, regexes := range c.TokenIgnores { sec, err := glob.Compile(syntax) if err != nil { - return s, err - } else if sec.MatchAny(exts) { + return content, err + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { for _, r := range regexes { pat, errc := regexp2.CompileStd(r) if errc != nil { - return s, core.NewE201FromTarget( + return content, core.NewE201FromTarget( errc.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } - s, err = pat.Replace(s, inline, 0, -1) + content, err = pat.Replace(content, inline, 0, -1) if err != nil { - return s, core.NewE201FromTarget( + return content, core.NewE201FromTarget( err.Error(), r, - l.Manager.Config.Flags.Path, + c.Flags.Path, ) } } } } + return content, nil +} + +// applyCommentPatterns replaces any custom comment delimiters with HTML comment +// tags based on the user configuration. This makes it possible to apply +// comment-based controls using custom comment delimiters. +func applyCommentPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + for syntax, delims := range c.CommentDelimiters { + sec, err := glob.Compile(syntax) + if err != nil { + return content, err + } else if sec.Match(exts.Normed) || sec.Match(exts.Real) { + // This field was not assigned, so do nothing. + if delims[0] == "" && delims[1] == "" { + return content, nil + } + // Return an error if only one delimiter is configured + if (delims[0] == "" && delims[1] != "") || (delims[0] != "" && delims[1] == "") { + return content, fmt.Errorf("CommentDelimiters must be empty or have two values") + } + + content = strings.ReplaceAll(content, delims[0], "") + + } + } + return content, nil +} + +func applyPatterns(c *core.Config, exts extensionConfig, content string) (string, error) { + s, err := applyBlockPatterns(c, exts, content) + if err != nil { + return s, err + } + + s, err = applyInlinePatterns(c, exts, s) + if err != nil { + return s, err + } + + s, err = applyCommentPatterns(c, exts, s) + if err != nil { + return s, err + } return s, nil } diff --git a/internal/lint/html_test.go b/internal/lint/html_test.go new file mode 100644 index 000000000..49b318f7f --- /dev/null +++ b/internal/lint/html_test.go @@ -0,0 +1,167 @@ +package lint + +import ( + "strings" + "testing" + + "github.com/errata-ai/vale/v3/internal/core" + "github.com/stretchr/testify/assert" +) + +func Test_applyPatterns(t *testing.T) { + cases := []struct { + description string + conf core.Config + exts extensionConfig + content string + expected string + }{ + { + description: "MDX comment in markdown, custom comment delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", "*/}"}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + + +`, "@", "`"), + }, + { + description: "MDX comment in markdown, no custom comment delimiter", + conf: core.Config{}, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + +{/* This is a comment */} +`, "@", "`"), + }, + { + description: "multiline MDX comment in markdown, custom comment delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", "*/}"}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* +This is a comment +*/} +`, + expected: strings.ReplaceAll(` +@@@ +title: Example page +description: Example page +@@@ + + +This is the intro pagragraph. + + +`, "@", "`"), + }, + { + description: "token ignore in cc file", + content: "Call \\c func to start the process.", + conf: core.Config{ + TokenIgnores: map[string][]string{ + "*.cc": []string{`(\\c \w+)`}, + }, + Formats: map[string]string{ + "cc": "md", + }, + }, + exts: extensionConfig{".md", ".cc"}, + expected: "Call `\\c func` to start the process.", + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + s, err := applyPatterns(&c.conf, c.exts, c.content) + assert.NoError(t, err) + assert.Equal(t, c.expected, s) + }) + } +} + +func Test_applyPatterns_errors(t *testing.T) { + cases := []struct { + description string + conf core.Config + exts extensionConfig + content string + expectedErr string + }{ + { + description: "only one delimiter", + conf: core.Config{ + CommentDelimiters: map[string][2]string{ + ".md": [2]string{"{/*", ""}, + }, + }, + exts: extensionConfig{".md", ".md"}, + content: `--- +title: Example page +description: Example page +--- + +This is the intro pagragraph. + +{/* This is a comment */} +`, + expectedErr: "", + }, + } + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + _, err := applyPatterns(&c.conf, c.exts, c.content) + assert.ErrorContains(t, err, c.expectedErr) + }) + } +} + +// TODO: Test for expected errors resulting from applyPatterns diff --git a/internal/lint/lint.go b/internal/lint/lint.go index e2c85847d..66c9a5b6c 100755 --- a/internal/lint/lint.go +++ b/internal/lint/lint.go @@ -2,7 +2,6 @@ package lint import ( "errors" - "fmt" "net/http" "os" "path/filepath" @@ -56,18 +55,12 @@ func NewLinter(cfg *core.Config) (*Linter, error) { // Transformations include block and token ignores, as well as some built-in // replacements. func (l *Linter) Transform(f *core.File) (string, error) { - switch f.NormedExt { - case ".adoc": - return l.applyPatterns(f, "\n----\n$1\n----\n", "`$1`") - case ".md": - return l.applyPatterns(f, "\n```\n$1\n```\n", "`$1`") - case ".rst": - return l.applyPatterns(f, "\n::\n\n%s\n", "``$1``") - case ".org": - return l.applyPatterns(f, orgExample, "=$1=") - default: - return f.Content, fmt.Errorf("ignore patterns are not supported in '%s' files", f.NormedExt) + exts := extensionConfig{ + Normed: f.NormedExt, + Real: f.RealExt, } + + return applyPatterns(l.Manager.Config, exts, f.Content) } // LintString src according to its format.