Skip to content

Commit cd642d0

Browse files
committed
Forward port CommandLineParser rewrite
Including test and dependency on scala test-kit. Improvements on scala 2 were motivated by exponential parsing costs: scala/scala#6622 scala/scala#5882
1 parent 04d5068 commit cd642d0

File tree

3 files changed

+126
-49
lines changed

3 files changed

+126
-49
lines changed

compiler/src/dotty/tools/dotc/config/CommandLineParser.scala

+82-49
Original file line numberDiff line numberDiff line change
@@ -2,67 +2,100 @@ package dotty.tools.dotc
22
package config
33

44
import scala.annotation.tailrec
5-
import scala.annotation.internal.sharable
65

7-
/** A simple (overly so) command line parser.
8-
* !!! This needs a thorough test suite to make sure quoting is
9-
* done correctly and portably.
6+
/** A simple enough command line parser.
107
*/
118
object CommandLineParser {
12-
// splits a string into a quoted prefix and the rest of the string,
13-
// taking escaping into account (using \)
14-
// `"abc"def` will match as `DoubleQuoted(abc, def)`
15-
private class QuotedExtractor(quote: Char) {
16-
def unapply(in: String): Option[(String, String)] = {
17-
val del = quote.toString
18-
if (in startsWith del) {
19-
var escaped = false
20-
val (quoted, next) = (in substring 1) span {
21-
case `quote` if !escaped => false
22-
case '\\' if !escaped => escaped = true; true
23-
case _ => escaped = false; true
9+
private final val DQ = '"'
10+
private final val SQ = '\''
11+
12+
/** Split the line into tokens separated by whitespace or quotes.
13+
*
14+
* @return either an error message or reverse list of tokens
15+
*/
16+
private def tokens(in: String) = {
17+
import Character.isWhitespace
18+
import java.lang.{StringBuilder => Builder}
19+
import collection.mutable.ArrayBuffer
20+
21+
var accum: List[String] = Nil
22+
var pos = 0
23+
var start = 0
24+
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes
25+
26+
def cur: Int = if (done) -1 else in.charAt(pos)
27+
def bump() = pos += 1
28+
def done = pos >= in.length
29+
30+
def skipToQuote(q: Int) = {
31+
var escaped = false
32+
def terminal = in.charAt(pos) match {
33+
case _ if escaped => escaped = false ; false
34+
case '\\' => escaped = true ; false
35+
case `q` => true
36+
case _ => false
37+
}
38+
while (!done && !terminal) pos += 1
39+
!done
40+
}
41+
@tailrec
42+
def skipToDelim(): Boolean =
43+
cur match {
44+
case q @ (DQ | SQ) => { qpos += pos; bump(); skipToQuote(q) } && { qpos += pos; bump(); skipToDelim() }
45+
case -1 => true
46+
case c if isWhitespace(c) => true
47+
case _ => bump(); skipToDelim()
48+
}
49+
def skipWhitespace() = while (isWhitespace(cur)) pos += 1
50+
def copyText() = {
51+
val buf = new Builder
52+
var p = start
53+
var i = 0
54+
while (p < pos) {
55+
if (i >= qpos.size) {
56+
buf.append(in, p, pos)
57+
p = pos
58+
} else if (p == qpos(i)) {
59+
buf.append(in, qpos(i)+1, qpos(i+1))
60+
p = qpos(i+1)+1
61+
i += 2
62+
} else {
63+
buf.append(in, p, qpos(i))
64+
p = qpos(i)
2465
}
25-
// the only way to get out of the above loop is with an empty next or !escaped
26-
// require(next.isEmpty || !escaped)
27-
if (next startsWith del) Some((quoted, next substring 1))
28-
else None
2966
}
30-
else None
67+
buf.toString
3168
}
32-
}
33-
private object DoubleQuoted extends QuotedExtractor('"')
34-
private object SingleQuoted extends QuotedExtractor('\'')
35-
@sharable private val Word = """(\S+)(.*)""".r
36-
37-
// parse `in` for an argument, return it and the remainder of the input (or an error message)
38-
// (argument may be in single/double quotes, taking escaping into account, quotes are stripped)
39-
private def argument(in: String): Either[String, (String, String)] = in match {
40-
case DoubleQuoted(arg, rest) => Right((arg, rest))
41-
case SingleQuoted(arg, rest) => Right((arg, rest))
42-
case Word(arg, rest) => Right((arg, rest))
43-
case _ => Left(s"Illegal argument: $in")
44-
}
69+
def text() = {
70+
val res =
71+
if (qpos.isEmpty) in.substring(start, pos)
72+
else if (qpos(0) == start && qpos(1) == pos) in.substring(start+1, pos-1)
73+
else copyText()
74+
qpos.clear()
75+
res
76+
}
77+
def badquote = Left("Unmatched quote")
4578

46-
// parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments)
47-
@tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = {
48-
val trimmed = in.trim
49-
if (trimmed.isEmpty) Right((accum.reverse, ""))
50-
else argument(trimmed) match {
51-
case Right((arg, next)) =>
52-
(next span Character.isWhitespace) match {
53-
case("", rest) if rest.nonEmpty => Left("Arguments should be separated by whitespace.") // TODO: can this happen?
54-
case(ws, rest) => commandLine(rest, arg :: accum)
55-
}
56-
case Left(msg) => Left(msg)
79+
@tailrec def loop(): Either[String, List[String]] = {
80+
skipWhitespace()
81+
start = pos
82+
if (done) Right(accum)
83+
else if (!skipToDelim()) badquote
84+
else {
85+
accum = text() :: accum
86+
loop()
87+
}
5788
}
89+
loop()
5890
}
5991

6092
class ParseException(msg: String) extends RuntimeException(msg)
6193

62-
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
6394
def tokenize(line: String, errorFn: String => Unit): List[String] =
64-
commandLine(line) match {
65-
case Right((args, _)) => args
66-
case Left(msg) => errorFn(msg) ; Nil
95+
tokens(line) match {
96+
case Right(args) => args.reverse
97+
case Left(msg) => errorFn(msg) ; Nil
6798
}
99+
100+
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
68101
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
2+
package dotty.tools.dotc.config
3+
4+
import org.junit.Assert._
5+
import org.junit.Test
6+
import scala.tools.testkit.AssertUtil.assertThrows
7+
8+
class CommandLineParserTest {
9+
import CommandLineParser.{tokenize, ParseException}
10+
11+
@Test
12+
def parserTokenizes(): Unit = {
13+
assertEquals(Nil, tokenize(""))
14+
assertEquals(List("x"), tokenize("x"))
15+
assertEquals(List("x"), tokenize(" x "))
16+
assertEquals(List("x","y"), tokenize("x y"))
17+
assertEquals(List("x","y","z"), tokenize("x y z"))
18+
}
19+
@Test
20+
def parserTrims(): Unit = {
21+
assertEquals(Nil, tokenize(" "))
22+
assertEquals(List("x"), tokenize(" x "))
23+
assertEquals(List("x"), tokenize("\nx\n"))
24+
assertEquals(List("x","y","z"), tokenize(" x y z "))
25+
}
26+
@Test
27+
def parserQuotes(): Unit = {
28+
assertEquals(List("x"), tokenize("'x'"))
29+
assertEquals(List("x"), tokenize(""""x""""))
30+
assertEquals(List("x","y","z"), tokenize("x 'y' z"))
31+
assertEquals(List("x"," y ","z"), tokenize("x ' y ' z"))
32+
assertEquals(List("x","y","z"), tokenize("""x "y" z"""))
33+
assertEquals(List("x"," y ","z"), tokenize("""x " y " z"""))
34+
// interior quotes
35+
assertEquals(List("x y z"), tokenize("x' y 'z")) // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z"))
36+
assertEquals(List("x\ny\nz"), tokenize("x'\ny\n'z"))
37+
assertEquals(List("x'y'z"), tokenize("""x"'y'"z"""))
38+
assertEquals(List("abcxyz"), tokenize(""""abc"xyz"""))
39+
// missing quotes
40+
assertThrows[ParseException](tokenize(""""x""")) // was assertEquals(List("\"x"), tokenize(""""x"""))
41+
assertThrows[ParseException](tokenize("""x'"""))
42+
}
43+
}

project/Build.scala

+1
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ object Build {
238238
scalaModuleInfo ~= (_.map(_.withOverrideScalaVersion(false))),
239239

240240
libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % Test,
241+
libraryDependencies += "org.scala-lang" % "scala-testkit" % stdlibVersion(Bootstrapped) % Test,
241242

242243
// If someone puts a source file at the root (e.g., for manual testing),
243244
// don't pick it up as part of any project.

0 commit comments

Comments
 (0)