From 5fa294981980c8718c9388470d4b55e5dea4e1ba Mon Sep 17 00:00:00 2001 From: Ryan Ronnander Date: Mon, 23 Feb 2026 15:00:03 -0500 Subject: [PATCH] fix(core): use byte offsets for position reporting in raw-scoped script rules Script rules with `scope: raw` return begin/end byte offsets in their match arrays, but AddAlert ignores these and performs a text search via FindLoc/initialPosition to determine the alert position. When the matched text appears multiple times in the document, this always reports the position of the first occurrence rather than the intended one. Add locFromByteOffset() to compute line:column directly from the byte offsets the script provides, bypassing the text-search path. The new path activates when the alert carries valid byte offsets within a raw-scope block, falling back to the existing FindLoc path otherwise. Relates to #869, #272. Co-Authored-By: Claude Opus 4.6 --- internal/check/script.go | 13 +++---- internal/core/alert.go | 7 ++-- internal/core/file.go | 60 ++++++++++++++++++++++++++------ testdata/features/checks.feature | 2 +- 4 files changed, 61 insertions(+), 21 deletions(-) diff --git a/internal/check/script.go b/internal/check/script.go index c0a821332..e0096954a 100644 --- a/internal/check/script.go +++ b/internal/check/script.go @@ -78,12 +78,13 @@ func (s Script) Run(blk nlp.Block, _ *core.File, _ *core.Config) ([]core.Alert, // don't use our custom regexp2 library, which means the offsets // (`re2loc`) will be off. a := core.Alert{ - Check: s.Name, - Severity: s.Level, - Span: matchLoc, - Link: s.Link, - Match: matchText, - Action: s.Action} + Check: s.Name, + Severity: s.Level, + Span: matchLoc, + Link: s.Link, + Match: matchText, + Action: s.Action, + HasByteOffsets: true} if matchMsg, ok := match["message"].(string); ok { a.Message, a.Description = formatMessages(matchMsg, s.Description, matchText) diff --git a/internal/core/alert.go b/internal/core/alert.go index 4ff35fac1..1757276f7 100644 --- a/internal/core/alert.go +++ b/internal/core/alert.go @@ -27,9 +27,10 @@ type Alert struct { Message string // the output message Severity string // 'suggestion', 'warning', or 'error' Match string // the actual matched text - Line int // the source line - Limit int `json:"-"` // the max times to report - Hide bool `json:"-"` // should we hide this alert? + Line int // the source line + Limit int `json:"-"` // the max times to report + Hide bool `json:"-"` // should we hide this alert? + HasByteOffsets bool `json:"-"` // Span holds byte offsets into the raw document } // FormatAlert ensures that all required fields have data. diff --git a/internal/core/file.go b/internal/core/file.go index 58d70d0f9..a0b1d2084 100755 --- a/internal/core/file.go +++ b/internal/core/file.go @@ -252,6 +252,32 @@ func (f *File) assignLoc(ctx string, blk nlp.Block, pad int, a Alert) (int, []in return blk.Line + 1, a.Span } +// locFromByteOffset computes a 1-based line number and a [col, col+len] span +// from absolute byte offsets into the raw document text. This avoids the +// text-search approach used by FindLoc/initialPosition, which can report the +// wrong location when the matched text appears more than once. +func locFromByteOffset(ctx string, begin, end, pad int) (int, []int) { + line := 1 + lineStart := 0 + + for i := 0; i < begin && i < len(ctx); i++ { + if ctx[i] == '\n' { + line++ + lineStart = i + 1 + } + } + + col := nlp.StrLen(ctx[lineStart:begin]) + 1 + pad + matchLen := nlp.StrLen(ctx[begin:end]) + + span := []int{col, col + matchLen - 1} + if span[1] <= 0 { + span[1] = 1 + } + + return line, span +} + // SetText updates the file's content, lines, and history. func (f *File) SetText(s string) { f.Content = s @@ -271,19 +297,31 @@ func (f *File) AddAlert(a Alert, blk nlp.Block, lines, pad int, lookup bool) { ctx = old } - // NOTE: If the `ctx` document is large (as could be the case with - // `scope: raw`) this is *slow*. Thus, the cap at 1k. + // When the alert carries byte offsets from a script rule and falls within + // the document, compute line:column directly from those offsets instead of + // performing a text search. This fixes incorrect position reporting for + // script rules with `scope: raw` when the matched text appears more than + // once. // - // TODO: Actually fix this. - if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 { - a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...) - } + // We use blk.Context (the original document) rather than ctx, which may + // have been modified by ChkToCtx substitutions from earlier alerts. + if a.HasByteOffsets && a.Span[0] >= 0 && a.Span[1] <= len(blk.Context) { + a.Line, a.Span = locFromByteOffset(blk.Context, a.Span[0], a.Span[1], pad) + } else { + // NOTE: If the `ctx` document is large (as could be the case with + // `scope: raw`) this is *slow*. Thus, the cap at 1k. + // + // TODO: Actually fix this. + if len(a.Offset) == 0 && strings.Count(ctx, a.Match) > 1 && len(ctx) < 1000 { + a.Offset = append(a.Offset, strings.Fields(ctx[0:a.Span[0]])...) + } - if !lookup { - a.Line, a.Span = f.assignLoc(ctx, blk, pad, a) - } - if (!lookup && a.Span[0] < 0) || lookup { - a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a) + if !lookup { + a.Line, a.Span = f.assignLoc(ctx, blk, pad, a) + } + if (!lookup && a.Span[0] < 0) || lookup { + a.Line, a.Span = f.FindLoc(ctx, blk.Text, pad, lines, a) + } } if a.Span[0] > 0 { diff --git a/testdata/features/checks.feature b/testdata/features/checks.feature index 22c15ecfb..e170ca462 100755 --- a/testdata/features/checks.feature +++ b/testdata/features/checks.feature @@ -3,7 +3,7 @@ Feature: Checks When I test "checks/Script" Then the output should contain exactly: """ - test.md:4:19:Scripts.CustomMsg:Some message + test.md:1:2:Scripts.CustomMsg:Some message test.md:29:1:Checks.ScriptRE:Consider inserting a new section heading at this point. test.md:39:1:Checks.ScriptRE:Consider inserting a new section heading at this point. """