Skip to content

Commit c425bc8

Browse files
committed
Fix columnation and forward port improvements
Avoid boxed ints when creating line index table. Reject more bad inputs, but keep the convention that the current line at EOF is the last line (whether or not the last line is empty).
1 parent e5abec0 commit c425bc8

File tree

3 files changed

+143
-22
lines changed

3 files changed

+143
-22
lines changed

compiler/src/dotty/tools/dotc/util/SourceFile.scala

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import core.Contexts._
1111
import scala.io.Codec
1212
import Chars._
1313
import scala.annotation.internal.sharable
14-
import scala.collection.mutable
15-
import scala.collection.mutable.ArrayBuffer
14+
import scala.collection.mutable.ArrayBuilder
1615
import scala.util.chaining.given
1716

1817
import java.io.File.separator
@@ -95,8 +94,8 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
9594
def apply(idx: Int): Char = content().apply(idx)
9695

9796
/** length of the original source file
98-
* Note that when the source is from Tasty, content() could be empty even though length > 0.
99-
* Use content().length to determine the length of content(). */
97+
* Note that when the source is from Tasty, content() could be empty even though length > 0.
98+
* Use content().length to determine the length of content(). */
10099
def length: Int =
101100
if lineIndicesCache ne null then lineIndicesCache.last
102101
else content().length
@@ -122,23 +121,25 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
122121
def positionInUltimateSource(position: SourcePosition): SourcePosition =
123122
SourcePosition(underlying, position.span shift start)
124123

125-
private def calculateLineIndicesFromContents() = {
124+
private def calculateLineIndicesFromContents(): Array[Int] =
126125
val cs = content()
127-
val buf = new ArrayBuffer[Int]
128-
buf += 0
126+
val buf = new ArrayBuilder.ofInt
127+
buf.sizeHint(cs.length / 30) // guesstimate line count
128+
buf.addOne(0)
129129
var i = 0
130130
while i < cs.length do
131131
val isLineBreak =
132132
val ch = cs(i)
133133
// don't identify the CR in CR LF as a line break, since LF will do.
134134
if ch == CR then i + 1 == cs.length || cs(i + 1) != LF
135135
else isLineBreakChar(ch)
136-
if isLineBreak then buf += i + 1
136+
if isLineBreak then buf.addOne(i + 1)
137137
i += 1
138-
buf += cs.length // sentinel, so that findLine below works smoother
139-
buf.toArray
140-
}
138+
buf.addOne(cs.length) // sentinel, so that findLine below works smoother
139+
buf.result()
141140

141+
// offsets of lines, plus a sentinel which is the content length.
142+
// if the last line is empty (end of context is NL) then the penultimate index == sentinel.
142143
private var lineIndicesCache: Array[Int] = _
143144
private def lineIndices: Array[Int] =
144145
if lineIndicesCache eq null then
@@ -159,7 +160,9 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
159160
lineIndicesCache = indices
160161

161162
/** Map line to offset of first character in line */
162-
def lineToOffset(index: Int): Int = lineIndices(index)
163+
def lineToOffset(index: Int): Int =
164+
if index < 0 || index >= lineIndices.length - 1 then throw new IndexOutOfBoundsException(index.toString)
165+
lineIndices(index)
163166

164167
/** Like `lineToOffset`, but doesn't crash if the index is out of bounds. */
165168
def lineToOffsetOpt(index: Int): Option[Int] =
@@ -171,24 +174,28 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
171174
/** A cache to speed up offsetToLine searches to similar lines */
172175
private var lastLine = 0
173176

174-
/** Convert offset to line in this source file
175-
* Lines are numbered from 0
177+
/** Convert offset to line in this source file.
178+
* Lines are numbered from 0. Conventionally, a final empty line begins at EOF.
179+
* For simplicity, the line at EOF is the last line (possibly non-empty).
176180
*/
177-
def offsetToLine(offset: Int): Int = {
181+
def offsetToLine(offset: Int): Int =
182+
//if lineIndices.isEmpty || offset < lineIndices.head || offset > lineIndices.last then
183+
// throw new IndexOutOfBoundsException(offset.toString)
178184
lastLine = Util.bestFit(lineIndices, lineIndices.length, offset, lastLine)
179185
if (offset >= length) lastLine -= 1 // compensate for the sentinel
180186
lastLine
181-
}
182187

183188
/** The index of the first character of the line containing position `offset` */
184189
def startOfLine(offset: Int): Int = {
185190
require(offset >= 0)
186191
lineToOffset(offsetToLine(offset))
187192
}
188193

189-
/** The start index of the line following the one containing position `offset` */
194+
/** The start index of the line following the one containing position `offset` or `length` at last line */
190195
def nextLine(offset: Int): Int =
191-
lineToOffset(offsetToLine(offset) + 1 min lineIndices.length - 1)
196+
val next = offsetToLine(offset) + 1
197+
if next >= lineIndices.length - 1 then lineIndices.last
198+
else lineToOffset(next)
192199

193200
/** The content of the line containing position `offset` */
194201
def lineContent(offset: Int): String =
@@ -198,10 +205,10 @@ class SourceFile(val file: AbstractFile, computeContent: => Array[Char]) extends
198205
def column(offset: Int): Int = {
199206
var idx = startOfLine(offset)
200207
var col = 0
201-
while (idx != offset) {
202-
col += (if (idx < content().length && content()(idx) == '\t') (tabInc - col) % tabInc else 1)
208+
while idx != offset do
209+
if idx < content().length && content()(idx) == '\t' then col += tabInc - col % tabInc
210+
else col += 1
203211
idx += 1
204-
}
205212
col
206213
}
207214

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
2+
package dotty.tools
3+
package dotc.util
4+
5+
import Spans.*
6+
7+
import org.junit.Assert.{assertThrows as _, *}
8+
import org.junit.Test
9+
10+
class SourceFileTests:
11+
@Test def `i15209 source column handles tabs`: Unit =
12+
val text = "\ta\n \tb\n \t c\n"
13+
val f = SourceFile.virtual("batch", text)
14+
assertEquals(8, f.column(text.indexOf('a')))
15+
assertEquals(8, f.column(text.indexOf('b')))
16+
assertEquals(9, f.column(text.indexOf('c')))
17+
18+
def lineContentOf(code: String, offset: Int) = SourceFile.virtual("batch", code).lineContent(offset)
19+
20+
@Test def si8205_lineToString: Unit =
21+
assertEquals("", lineContentOf("", 0))
22+
assertEquals("abc", lineContentOf("abc", 0))
23+
assertEquals("abc", lineContentOf("abc", 3))
24+
assertEquals("code no newline", lineContentOf("code no newline", 1))
25+
assertEquals("\n", lineContentOf("\n", 0))
26+
assertEquals("abc\n", lineContentOf("abc\ndef", 0))
27+
assertEquals("abc\n", lineContentOf("abc\ndef", 3))
28+
assertEquals("def", lineContentOf("abc\ndef", 4))
29+
assertEquals("def", lineContentOf("abc\ndef", 6))
30+
assertEquals("def\n", lineContentOf("abc\ndef\n", 7))
31+
32+
@Test def CRisEOL: Unit =
33+
assertEquals("\r", lineContentOf("\r", 0))
34+
assertEquals("abc\r", lineContentOf("abc\rdef", 0))
35+
assertEquals("abc\r", lineContentOf("abc\rdef", 3))
36+
assertEquals("def", lineContentOf("abc\rdef", 4))
37+
assertEquals("def", lineContentOf("abc\rdef", 6))
38+
assertEquals("def\r", lineContentOf("abc\rdef\r", 7))
39+
40+
@Test def CRNLisEOL(): Unit =
41+
assertEquals("\r\n", lineContentOf("\r\n", 0))
42+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 0))
43+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 3))
44+
assertEquals("abc\r\n", lineContentOf("abc\r\ndef", 4))
45+
assertEquals("def", lineContentOf("abc\r\ndef", 5))
46+
assertEquals("def", lineContentOf("abc\r\ndef", 7))
47+
assertEquals("def", lineContentOf("abc\r\ndef", 8))
48+
assertEquals("def\r\n", lineContentOf("abc\r\ndef\r\n", 9))
49+
50+
@Test def `t9885 lineToOffset throws on bad line`: Unit =
51+
val text = "a\nb\nc\n"
52+
val f = SourceFile.virtual("batch", text)
53+
// was: EOL is line terminator, not line separator, so there is not an empty 4th line
54+
val splitsville = text.split("\n")
55+
assertEquals(List(0, 2, 4, 6), (0 to splitsville.nn.length).toList.map(f.lineToOffset))
56+
assertThrows[IndexOutOfBoundsException] {
57+
f.lineToOffset(4) // was: 3 in Scala 2 (no empty line), 5 in Scala 3 (sentinel induces off-by-one)
58+
}
59+
60+
// Position and SourceFile used to count differently
61+
val p = SourcePosition(f, Span(text.length - 1))
62+
val q = SourcePosition(f, Span(f.lineToOffset(p.line - 1)))
63+
assertEquals(2, p.line)
64+
assertEquals(p.line - 1, q.line)
65+
assertEquals(p.column, q.column + 1)
66+
assertEquals(f.startOfLine(p.span.start), SourcePosition(f, Span(f.lineToOffset(p.line))).span.start)
67+
68+
@Test def `t9885 lineToOffset ignores lack of final EOL`: Unit =
69+
val text = "a\nb\nc"
70+
val f = SourceFile.virtual("batch", text)
71+
assertThrows[IndexOutOfBoundsException] {
72+
f.lineToOffset(3)
73+
}
74+
assertEquals(4, f.lineToOffset(2))
75+
assertEquals(2, f.offsetToLine(text.length))
76+
77+
@Test def `t11572 offsetToLine throws on bad offset`: Unit =
78+
val text = "a\nb\nc\n"
79+
val f = SourceFile.virtual("batch", text)
80+
/* current code requires offsets untethered from source
81+
assertThrows[IndexOutOfBoundsException] {
82+
f.offsetToLine(-1) // was: -1
83+
}
84+
assertThrows[IndexOutOfBoundsException] {
85+
f.offsetToLine(7) // was: 3
86+
}
87+
*/
88+
assertEquals(0, f.offsetToLine(0))
89+
assertEquals(0, f.offsetToLine(1))
90+
assertEquals(1, f.offsetToLine(2))
91+
assertEquals(2, f.offsetToLine(4))
92+
assertEquals(2, f.offsetToLine(5))
93+
assertEquals(3, f.offsetToLine(6))
94+
95+
@Test def `t11572b offsetToLine throws on bad offset`: Unit =
96+
val text = "a\nb\nc\nd"
97+
val f = SourceFile.virtual("batch", text)
98+
/*
99+
assertThrows[IndexOutOfBoundsException] {
100+
f.offsetToLine(-1)
101+
}
102+
assertThrows[IndexOutOfBoundsException] {
103+
f.offsetToLine(8)
104+
}
105+
*/
106+
assertEquals(0, f.offsetToLine(0))
107+
assertEquals(0, f.offsetToLine(1))
108+
assertEquals(1, f.offsetToLine(2))
109+
assertEquals(2, f.offsetToLine(4))
110+
assertEquals(2, f.offsetToLine(5))
111+
assertEquals(3, f.offsetToLine(6))
112+
assertEquals(3, f.offsetToLine(7))

compiler/test/dotty/tools/utils.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ def readFile(f: File): String = withFile(f)(_.mkString)
3737

3838
private object Unthrown extends ControlThrowable
3939

40+
def assertThrows[T <: Throwable: ClassTag](body: => Any): Unit = assertThrows[T](_ => true)(body)
41+
4042
def assertThrows[T <: Throwable: ClassTag](p: T => Boolean)(body: => Any): Unit =
4143
try
4244
body
@@ -68,4 +70,4 @@ def toolArgsParse(lines: List[String]): List[String] = {
6870
// but avoid picking up comments like "% scalac ./a.scala" and "$ scalac a.scala"
6971
}.map(stripped).headOption
7072
args.map(dotc.config.CommandLineParser.tokenize).getOrElse(Nil)
71-
}
73+
}

0 commit comments

Comments
 (0)