Skip to content

Commit 38cb5e3

Browse files
authored
Merge pull request #7676 from som-snytt/issue/update-commandlineparser
Forward port CommandLineParser rewrite
2 parents ed35255 + bac75e5 commit 38cb5e3

File tree

2 files changed

+133
-61
lines changed

2 files changed

+133
-61
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,96 @@
1-
package dotty.tools.dotc
2-
package config
1+
package dotty.tools.dotc.config
32

43
import scala.annotation.tailrec
5-
import scala.annotation.internal.sharable
4+
import scala.collection.mutable.ArrayBuffer
5+
import java.lang.Character.isWhitespace
66

7-
/** A simple (overly so) command line parser.
8-
* !!! This needs a thorough test suite to make sure quoting is
9-
* done correctly and portably.
7+
/** A simple enough command line parser.
108
*/
11-
object CommandLineParser {
12-
// splits a string into a quoted prefix and the rest of the string,
13-
// taking escaping into account (using \)
14-
// `"abc"def` will match as `DoubleQuoted(abc, def)`
15-
private class QuotedExtractor(quote: Char) {
16-
def unapply(in: String): Option[(String, String)] = {
17-
val del = quote.toString
18-
if (in startsWith del) {
19-
var escaped = false
20-
val (quoted, next) = (in substring 1) span {
21-
case `quote` if !escaped => false
22-
case '\\' if !escaped => escaped = true; true
23-
case _ => escaped = false; true
24-
}
25-
// the only way to get out of the above loop is with an empty next or !escaped
26-
// require(next.isEmpty || !escaped)
27-
if (next startsWith del) Some((quoted, next substring 1))
28-
else None
29-
}
30-
else None
31-
}
32-
}
33-
private object DoubleQuoted extends QuotedExtractor('"')
34-
private object SingleQuoted extends QuotedExtractor('\'')
35-
@sharable private val Word = """(\S+)(.*)""".r
36-
37-
// parse `in` for an argument, return it and the remainder of the input (or an error message)
38-
// (argument may be in single/double quotes, taking escaping into account, quotes are stripped)
39-
private def argument(in: String): Either[String, (String, String)] = in match {
40-
case DoubleQuoted(arg, rest) => Right((arg, rest))
41-
case SingleQuoted(arg, rest) => Right((arg, rest))
42-
case Word(arg, rest) => Right((arg, rest))
43-
case _ => Left(s"Illegal argument: $in")
44-
}
45-
46-
// parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments)
47-
@tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = {
48-
val trimmed = in.trim
49-
if (trimmed.isEmpty) Right((accum.reverse, ""))
50-
else argument(trimmed) match {
51-
case Right((arg, next)) =>
52-
(next span Character.isWhitespace) match {
53-
case("", rest) if rest.nonEmpty => Left("Arguments should be separated by whitespace.") // TODO: can this happen?
54-
case(ws, rest) => commandLine(rest, arg :: accum)
55-
}
56-
case Left(msg) => Left(msg)
57-
}
58-
}
9+
object CommandLineParser:
10+
inline private val DQ = '"'
11+
inline private val SQ = '\''
12+
inline private val EOF = -1
5913

60-
class ParseException(msg: String) extends RuntimeException(msg)
14+
/** Split the line into tokens separated by whitespace or quotes.
15+
*
16+
* Invoke `errorFn` with message on bad quote.
17+
*/
18+
def tokenize(line: String, errorFn: String => Unit): List[String] =
19+
20+
var accum: List[String] = Nil
21+
22+
var pos = 0
23+
var start = 0
24+
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes
25+
26+
inline def cur = if done then EOF else line.charAt(pos): Int
27+
inline def bump() = pos += 1
28+
inline def done = pos >= line.length
29+
30+
def skipToQuote(q: Int): Boolean =
31+
var escaped = false
32+
def terminal = cur match
33+
case _ if escaped => escaped = false ; false
34+
case '\\' => escaped = true ; false
35+
case `q` | EOF => true
36+
case _ => false
37+
while !terminal do bump()
38+
!done
39+
40+
@tailrec def skipToDelim(): Boolean =
41+
inline def quote() = { qpos += pos ; bump() }
42+
cur match
43+
case q @ (DQ | SQ) => { quote() ; skipToQuote(q) } && { quote() ; skipToDelim() }
44+
case -1 => true
45+
case c if isWhitespace(c) => true
46+
case _ => bump(); skipToDelim()
47+
48+
def copyText(): String =
49+
val buf = new java.lang.StringBuilder
50+
var p = start
51+
var i = 0
52+
while p < pos do
53+
if i >= qpos.size then
54+
buf.append(line, p, pos)
55+
p = pos
56+
else if p == qpos(i) then
57+
buf.append(line, qpos(i)+1, qpos(i+1))
58+
p = qpos(i+1)+1
59+
i += 2
60+
else
61+
buf.append(line, p, qpos(i))
62+
p = qpos(i)
63+
buf.toString
64+
65+
def text(): String =
66+
val res =
67+
if qpos.isEmpty then line.substring(start, pos)
68+
else if qpos(0) == start && qpos(1) == pos then line.substring(start+1, pos-1)
69+
else copyText()
70+
qpos.clear()
71+
res
72+
73+
inline def badquote() = errorFn(s"Unmatched quote [${qpos.last}](${line.charAt(qpos.last)})")
74+
75+
inline def skipWhitespace() = while isWhitespace(cur) do pos += 1
76+
77+
@tailrec def loop(): List[String] =
78+
skipWhitespace()
79+
start = pos
80+
if done then
81+
accum.reverse
82+
else if !skipToDelim() then
83+
badquote()
84+
Nil
85+
else
86+
accum = text() :: accum
87+
loop()
88+
end loop
89+
90+
loop()
91+
92+
end tokenize
6193

6294
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
63-
def tokenize(line: String, errorFn: String => Unit): List[String] =
64-
commandLine(line) match {
65-
case Right((args, _)) => args
66-
case Left(msg) => errorFn(msg) ; Nil
67-
}
68-
}
95+
96+
class ParseException(msg: String) extends RuntimeException(msg)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
package dotty.tools.dotc.config
3+
4+
import org.junit.Assert.{assertEquals, assertTrue}
5+
import org.junit.Test
6+
7+
class CommandLineParserTest:
8+
import CommandLineParser.{tokenize, ParseException}
9+
10+
private def check(tokens: String*)(input: String): Unit = assertEquals(tokens, tokenize(input))
11+
12+
private def checkFails(input: String, output: String): Unit =
13+
var txt: String = null
14+
val res = tokenize(input, msg => txt = msg)
15+
assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", txt ne null)
16+
assertEquals(output, txt)
17+
18+
@Test def parserTokenizes() =
19+
check()("")
20+
check("x")("x")
21+
check("x", "y")("x y")
22+
check("x", "y", "z")("x y z")
23+
24+
@Test def parserTrims() =
25+
check()(" ")
26+
check("x")(" x ")
27+
check("x")("\nx\n")
28+
check("x", "y", "z")(" x y z ")
29+
30+
@Test def parserQuotes() =
31+
check("x")("'x'")
32+
check("x")(""""x"""")
33+
check("x", "y", "z")("x 'y' z")
34+
check("x", " y ", "z")("x ' y ' z")
35+
check("x", "y", "z")("""x "y" z""")
36+
check("x", " y ", "z")("""x " y " z""")
37+
// interior quotes
38+
check("x y z")("x' y 'z") // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z"))
39+
check("x\ny\nz")("x'\ny\n'z")
40+
check("x'y'z")("""x"'y'"z""")
41+
check("abcxyz")(""""abc"xyz""")
42+
// missing quotes
43+
checkFails(""""x""", "Unmatched quote [0](\")") // was assertEquals(List("\"x"), tokenize(""""x"""))
44+
checkFails("""x'""", "Unmatched quote [1](')")

0 commit comments

Comments
 (0)