From 89c3540dfddba903596f878be63bcda734158e7b Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Wed, 18 May 2022 02:08:56 -0700 Subject: [PATCH 1/2] Fix columnation and forward port improvements Avoid boxed ints when creating line index table. Reject more bad inputs, but keep the convention that the current line at EOF is the last line (whether or not the last line is empty). --- .../dotty/tools/dotc/util/SourceFile.scala | 45 ++++--- .../tools/dotc/util/SourceFileTests.scala | 112 ++++++++++++++++++ compiler/test/dotty/tools/utils.scala | 2 + 3 files changed, 140 insertions(+), 19 deletions(-) create mode 100644 compiler/test/dotty/tools/dotc/util/SourceFileTests.scala diff --git a/compiler/src/dotty/tools/dotc/util/SourceFile.scala b/compiler/src/dotty/tools/dotc/util/SourceFile.scala index 42d07869f74e..f036b9124618 100644 --- a/compiler/src/dotty/tools/dotc/util/SourceFile.scala +++ b/compiler/src/dotty/tools/dotc/util/SourceFile.scala @@ -11,8 +11,7 @@ import core.Contexts._ import scala.io.Codec import Chars._ import scala.annotation.internal.sharable -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer +import scala.collection.mutable.ArrayBuilder import scala.util.chaining.given import java.io.File.separator @@ -91,9 +90,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends def apply(idx: Int): Char = content().apply(idx) - /** length of the original source file - * Note that when the source is from Tasty, content() could be empty even though length > 0. - * Use content().length to determine the length of content(). */ + /** length of the original source file. + * Note that when the source is from Tasty, content() could be empty even though length > 0. + * Use content().length to determine the length of content(). */ def length: Int = if lineIndicesCache ne null then lineIndicesCache.last else content().length @@ -119,10 +118,11 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends def positionInUltimateSource(position: SourcePosition): SourcePosition = SourcePosition(underlying, position.span shift start) - private def calculateLineIndicesFromContents() = { + private def calculateLineIndicesFromContents(): Array[Int] = val cs = content() - val buf = new ArrayBuffer[Int] - buf += 0 + val buf = new ArrayBuilder.ofInt + buf.sizeHint(cs.length / 30) // guesstimate line count + buf.addOne(0) var i = 0 while i < cs.length do val isLineBreak = @@ -130,12 +130,13 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends // don't identify the CR in CR LF as a line break, since LF will do. if ch == CR then i + 1 == cs.length || cs(i + 1) != LF else isLineBreakChar(ch) - if isLineBreak then buf += i + 1 + if isLineBreak then buf.addOne(i + 1) i += 1 - buf += cs.length // sentinel, so that findLine below works smoother - buf.toArray - } + buf.addOne(cs.length) // sentinel, so that findLine below works smoother + buf.result() + // offsets of lines, plus a sentinel which is the content length. + // if the last line is empty (end of content is NL) then the penultimate index == sentinel. private var lineIndicesCache: Array[Int] = _ private def lineIndices: Array[Int] = if lineIndicesCache eq null then @@ -156,7 +157,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends lineIndicesCache = indices /** Map line to offset of first character in line */ - def lineToOffset(index: Int): Int = lineIndices(index) + def lineToOffset(index: Int): Int = + if index < 0 || index >= lineIndices.length - 1 then throw new IndexOutOfBoundsException(index.toString) + lineIndices(index) /** Like `lineToOffset`, but doesn't crash if the index is out of bounds. */ def lineToOffsetOpt(index: Int): Option[Int] = @@ -168,14 +171,16 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends /** A cache to speed up offsetToLine searches to similar lines */ private var lastLine = 0 - /** Convert offset to line in this source file - * Lines are numbered from 0 + /** Convert offset to line in this source file. + * Lines are numbered from 0. Conventionally, a final empty line begins at EOF. + * For simplicity, the line at EOF is the last line (possibly non-empty). */ - def offsetToLine(offset: Int): Int = { + def offsetToLine(offset: Int): Int = + //if lineIndices.isEmpty || offset < lineIndices.head || offset > lineIndices.last then + // throw new IndexOutOfBoundsException(offset.toString) lastLine = Util.bestFit(lineIndices, lineIndices.length, offset, lastLine) if (offset >= length) lastLine -= 1 // compensate for the sentinel lastLine - } /** The index of the first character of the line containing position `offset` */ def startOfLine(offset: Int): Int = { @@ -183,9 +188,11 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends lineToOffset(offsetToLine(offset)) } - /** The start index of the line following the one containing position `offset` */ + /** The start index of the line following the one containing position `offset` or `length` at last line */ def nextLine(offset: Int): Int = - lineToOffset(offsetToLine(offset) + 1 min lineIndices.length - 1) + val next = offsetToLine(offset) + 1 + if next >= lineIndices.length - 1 then lineIndices.last + else lineToOffset(next) /** The content of the line containing position `offset` */ def lineContent(offset: Int): String = diff --git a/compiler/test/dotty/tools/dotc/util/SourceFileTests.scala b/compiler/test/dotty/tools/dotc/util/SourceFileTests.scala new file mode 100644 index 000000000000..fe83eee12173 --- /dev/null +++ b/compiler/test/dotty/tools/dotc/util/SourceFileTests.scala @@ -0,0 +1,112 @@ + +package dotty.tools +package dotc.util + +import Spans.* + +import org.junit.Assert.{assertThrows as _, *} +import org.junit.Test + +class SourceFileTests: + @Test def `i15209 source column handles tabs as line index`: Unit = + val text = "\ta\n \tb\n \t c\n" + val f = SourceFile.virtual("batch", text) + assertEquals(1, f.column(text.indexOf('a'))) + assertEquals(2, f.column(text.indexOf('b'))) + assertEquals(3, f.column(text.indexOf('c'))) + + def lineContentOf(code: String, offset: Int) = SourceFile.virtual("batch", code).lineContent(offset) + + @Test def si8205_lineToString: Unit = + assertEquals("", lineContentOf("", 0)) + assertEquals("abc", lineContentOf("abc", 0)) + assertEquals("abc", lineContentOf("abc", 3)) + assertEquals("code no newline", lineContentOf("code no newline", 1)) + assertEquals("\n", lineContentOf("\n", 0)) + assertEquals("abc\n", lineContentOf("abc\ndef", 0)) + assertEquals("abc\n", lineContentOf("abc\ndef", 3)) + assertEquals("def", lineContentOf("abc\ndef", 4)) + assertEquals("def", lineContentOf("abc\ndef", 6)) + assertEquals("def\n", lineContentOf("abc\ndef\n", 7)) + + @Test def CRisEOL: Unit = + assertEquals("\r", lineContentOf("\r", 0)) + assertEquals("abc\r", lineContentOf("abc\rdef", 0)) + assertEquals("abc\r", lineContentOf("abc\rdef", 3)) + assertEquals("def", lineContentOf("abc\rdef", 4)) + assertEquals("def", lineContentOf("abc\rdef", 6)) + assertEquals("def\r", lineContentOf("abc\rdef\r", 7)) + + @Test def CRNLisEOL(): Unit = + assertEquals("\r\n", lineContentOf("\r\n", 0)) + assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 0)) + assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 3)) + assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 4)) + assertEquals("def", lineContentOf("abc\r\ndef", 5)) + assertEquals("def", lineContentOf("abc\r\ndef", 7)) + assertEquals("def", lineContentOf("abc\r\ndef", 8)) + assertEquals("def\r\n", lineContentOf("abc\r\ndef\r\n", 9)) + + @Test def `t9885 lineToOffset throws on bad line`: Unit = + val text = "a\nb\nc\n" + val f = SourceFile.virtual("batch", text) + // was: EOL is line terminator, not line separator, so there is not an empty 4th line + val splitsville = text.split("\n") + assertEquals(List(0, 2, 4, 6), (0 to splitsville.nn.length).toList.map(f.lineToOffset)) + assertThrows[IndexOutOfBoundsException] { + f.lineToOffset(4) // was: 3 in Scala 2 (no empty line), 5 in Scala 3 (sentinel induces off-by-one) + } + + // Position and SourceFile used to count differently + val p = SourcePosition(f, Span(text.length - 1)) + val q = SourcePosition(f, Span(f.lineToOffset(p.line - 1))) + assertEquals(2, p.line) + assertEquals(p.line - 1, q.line) + assertEquals(p.column, q.column + 1) + assertEquals(f.startOfLine(p.span.start), SourcePosition(f, Span(f.lineToOffset(p.line))).span.start) + + @Test def `t9885 lineToOffset ignores lack of final EOL`: Unit = + val text = "a\nb\nc" + val f = SourceFile.virtual("batch", text) + assertThrows[IndexOutOfBoundsException] { + f.lineToOffset(3) + } + assertEquals(4, f.lineToOffset(2)) + assertEquals(2, f.offsetToLine(text.length)) + + @Test def `t11572 offsetToLine throws on bad offset`: Unit = + val text = "a\nb\nc\n" + val f = SourceFile.virtual("batch", text) + /* current code requires offsets untethered from source + assertThrows[IndexOutOfBoundsException] { + f.offsetToLine(-1) // was: -1 + } + assertThrows[IndexOutOfBoundsException] { + f.offsetToLine(7) // was: 3 + } + */ + assertEquals(0, f.offsetToLine(0)) + assertEquals(0, f.offsetToLine(1)) + assertEquals(1, f.offsetToLine(2)) + assertEquals(2, f.offsetToLine(4)) + assertEquals(2, f.offsetToLine(5)) + assertEquals(3, f.offsetToLine(6)) + + @Test def `t11572b offsetToLine throws on bad offset`: Unit = + val text = "a\nb\nc\nd" + val f = SourceFile.virtual("batch", text) + /* + assertThrows[IndexOutOfBoundsException] { + f.offsetToLine(-1) + } + assertThrows[IndexOutOfBoundsException] { + f.offsetToLine(8) + } + */ + assertEquals(0, f.offsetToLine(0)) + assertEquals(0, f.offsetToLine(1)) + assertEquals(1, f.offsetToLine(2)) + assertEquals(2, f.offsetToLine(4)) + assertEquals(2, f.offsetToLine(5)) + assertEquals(3, f.offsetToLine(6)) + assertEquals(3, f.offsetToLine(7)) diff --git a/compiler/test/dotty/tools/utils.scala b/compiler/test/dotty/tools/utils.scala index bfedc338f25a..e36ba6d4912d 100644 --- a/compiler/test/dotty/tools/utils.scala +++ b/compiler/test/dotty/tools/utils.scala @@ -39,6 +39,8 @@ def readFile(f: File): String = withFile(f)(_.mkString) private object Unthrown extends ControlThrowable +def assertThrows[T <: Throwable: ClassTag](body: => Any): Unit = assertThrows[T](_ => true)(body) + def assertThrows[T <: Throwable: ClassTag](p: T => Boolean)(body: => Any): Unit = try body From e93f41b6a458f1d5c23cca9b2276c366df9de026 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Mon, 23 May 2022 16:35:55 -0700 Subject: [PATCH 2/2] Line helpers --- .../dotc/reporting/MessageRendering.scala | 50 +++++++++++-------- .../dotty/tools/dotc/util/SourceFile.scala | 8 ++- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/reporting/MessageRendering.scala b/compiler/src/dotty/tools/dotc/reporting/MessageRendering.scala index f53359fb8b19..c3f88107c36d 100644 --- a/compiler/src/dotty/tools/dotc/reporting/MessageRendering.scala +++ b/compiler/src/dotty/tools/dotc/reporting/MessageRendering.scala @@ -13,13 +13,15 @@ import printing.SyntaxHighlighting import Diagnostic._ import util.{ SourcePosition, NoSourcePosition } import util.Chars.{ LF, CR, FF, SU } -import scala.annotation.switch -import scala.collection.mutable +import scala.annotation.switch +import scala.collection.mutable, mutable.StringBuilder +import scala.util.chaining.given trait MessageRendering { import Highlight.* import Offsets.* + import MessageRendering.* /** Remove ANSI coloring from `str`, useful for getting real length of * strings @@ -204,12 +206,12 @@ trait MessageRendering { |${Blue("Explanation").show} |${Blue("===========").show}""".stripMargin ) - sb.append(EOL).append(m.explanation) + sb.newLine(m.explanation) if (!m.explanation.endsWith(EOL)) sb.append(EOL) sb.toString } - private def appendFilterHelp(dia: Diagnostic, sb: mutable.StringBuilder): Unit = + private def appendFilterHelp(dia: Diagnostic, sb: StringBuilder): Unit = import dia._ val hasId = msg.errorId.errorNumber >= 0 val category = dia match { @@ -221,14 +223,14 @@ trait MessageRendering { if (hasId || category.nonEmpty) sb.append(EOL).append("Matching filters for @nowarn or -Wconf:") if (hasId) - sb.append(EOL).append(" - id=E").append(msg.errorId.errorNumber) - sb.append(EOL).append(" - name=").append(msg.errorId.productPrefix.stripSuffix("ID")) + sb.newLine(" - id=E").append(msg.errorId.errorNumber) + sb.newLine(" - name=").append(msg.errorId.productPrefix.stripSuffix("ID")) if (category.nonEmpty) - sb.append(EOL).append(" - cat=").append(category) + sb.newLine(" - cat=").append(category) /** The whole message rendered from `msg` */ def messageAndPos(dia: Diagnostic)(using Context): String = { - import dia._ + import dia.{pos, msg, level} val pos1 = pos.nonInlined val inlineStack = inlinePosStack(pos).filter(_ != pos1) val maxLineNumber = @@ -236,7 +238,7 @@ trait MessageRendering { else 0 given Level = Level(level) given Offset = Offset(maxLineNumber.toString.length + 2) - val sb = mutable.StringBuilder() + val sb = StringBuilder() val posString = posStr(pos, msg, diagnosticLevel(dia)) if (posString.nonEmpty) sb.append(posString).append(EOL) if (pos.exists) { @@ -248,16 +250,16 @@ trait MessageRendering { sb.append((srcBefore ::: marker :: err :: srcAfter).mkString(EOL)) if inlineStack.nonEmpty then - sb.append(EOL).append(newBox()) - sb.append(EOL).append(offsetBox).append(i"Inline stack trace") + sb.newLine(newBox()) + sb.newLine(offsetBox).append(i"Inline stack trace") for inlinedPos <- inlineStack if inlinedPos != pos1 do - sb.append(EOL).append(newBox(soft = true)) - sb.append(EOL).append(offsetBox).append(i"This location contains code that was inlined from $pos") + sb.newLine(newBox(soft = true)) + sb.newLine(offsetBox).append(i"This location contains code that was inlined from $pos") if inlinedPos.source.file.exists then val (srcBefore, srcAfter, _) = sourceLines(inlinedPos) val marker = positionMarker(inlinedPos) - sb.append(EOL).append((srcBefore ::: marker :: srcAfter).mkString(EOL)) - sb.append(EOL).append(endBox) + sb.newLine((srcBefore ::: marker :: srcAfter).mkString(EOL)) + sb.newLine(endBox) } else sb.append(msg.message) } @@ -266,16 +268,16 @@ trait MessageRendering { appendFilterHelp(dia, sb) if Diagnostic.shouldExplain(dia) then - sb.append(EOL).append(newBox()) - sb.append(EOL).append(offsetBox).append(" Explanation (enabled by `-explain`)") - sb.append(EOL).append(newBox(soft = true)) + sb.newLine(newBox()) + sb.newLine(offsetBox).append(" Explanation (enabled by `-explain`)") + sb.newLine(newBox(soft = true)) dia.msg.explanation.split(raw"\R").foreach { line => - sb.append(EOL).append(offsetBox).append(if line.isEmpty then "" else " ").append(line) + sb.newLine(offsetBox).append(if line.isEmpty then "" else " ").append(line) } - sb.append(EOL).append(endBox) + sb.newLine(endBox) else if dia.msg.canExplain then - sb.append(EOL).append(offsetBox) - sb.append(EOL).append(offsetBox).append(" longer explanation available when compiling with `-explain`") + sb.newLine(offsetBox) + sb.newLine(offsetBox).append(" longer explanation available when compiling with `-explain`") sb.toString } @@ -299,6 +301,10 @@ trait MessageRendering { } } +private object MessageRendering: + extension (sb: StringBuilder) + def newLine(s: String): sb.type = sb.tap(_.append(EOL).append(s)) + def mkLines: String = sb.mkString(EOL) private object Highlight { opaque type Level = Int diff --git a/compiler/src/dotty/tools/dotc/util/SourceFile.scala b/compiler/src/dotty/tools/dotc/util/SourceFile.scala index f036b9124618..c13c3a01dab7 100644 --- a/compiler/src/dotty/tools/dotc/util/SourceFile.scala +++ b/compiler/src/dotty/tools/dotc/util/SourceFile.scala @@ -198,11 +198,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends def lineContent(offset: Int): String = content.slice(startOfLine(offset), nextLine(offset)).mkString - /** The column corresponding to `offset`, starting at 0 */ - def column(offset: Int): Int = { - var idx = startOfLine(offset) - offset - idx - } + /** The column (index into the line) corresponding to `offset`, starting at 0 */ + def column(offset: Int): Int = + offset - startOfLine(offset) /** The padding of the column corresponding to `offset`, includes tabs */ def startColumnPadding(offset: Int): String = {