Skip to content

Forward port CommandLineParser rewrite #7676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 89 additions & 61 deletions compiler/src/dotty/tools/dotc/config/CommandLineParser.scala
Original file line number Diff line number Diff line change
@@ -1,68 +1,96 @@
package dotty.tools.dotc
package config
package dotty.tools.dotc.config

import scala.annotation.tailrec
import scala.annotation.internal.sharable
import scala.collection.mutable.ArrayBuffer
import java.lang.Character.isWhitespace

/** A simple (overly so) command line parser.
* !!! This needs a thorough test suite to make sure quoting is
* done correctly and portably.
/** A simple enough command line parser.
*/
object CommandLineParser {
// splits a string into a quoted prefix and the rest of the string,
// taking escaping into account (using \)
// `"abc"def` will match as `DoubleQuoted(abc, def)`
private class QuotedExtractor(quote: Char) {
def unapply(in: String): Option[(String, String)] = {
val del = quote.toString
if (in startsWith del) {
var escaped = false
val (quoted, next) = (in substring 1) span {
case `quote` if !escaped => false
case '\\' if !escaped => escaped = true; true
case _ => escaped = false; true
}
// the only way to get out of the above loop is with an empty next or !escaped
// require(next.isEmpty || !escaped)
if (next startsWith del) Some((quoted, next substring 1))
else None
}
else None
}
}
private object DoubleQuoted extends QuotedExtractor('"')
private object SingleQuoted extends QuotedExtractor('\'')
@sharable private val Word = """(\S+)(.*)""".r

// parse `in` for an argument, return it and the remainder of the input (or an error message)
// (argument may be in single/double quotes, taking escaping into account, quotes are stripped)
private def argument(in: String): Either[String, (String, String)] = in match {
case DoubleQuoted(arg, rest) => Right((arg, rest))
case SingleQuoted(arg, rest) => Right((arg, rest))
case Word(arg, rest) => Right((arg, rest))
case _ => Left(s"Illegal argument: $in")
}

// parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments)
@tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = {
val trimmed = in.trim
if (trimmed.isEmpty) Right((accum.reverse, ""))
else argument(trimmed) match {
case Right((arg, next)) =>
(next span Character.isWhitespace) match {
case("", rest) if rest.nonEmpty => Left("Arguments should be separated by whitespace.") // TODO: can this happen?
case(ws, rest) => commandLine(rest, arg :: accum)
}
case Left(msg) => Left(msg)
}
}
object CommandLineParser:
inline private val DQ = '"'
inline private val SQ = '\''
inline private val EOF = -1

class ParseException(msg: String) extends RuntimeException(msg)
/** Split the line into tokens separated by whitespace or quotes.
*
* Invoke `errorFn` with message on bad quote.
*/
def tokenize(line: String, errorFn: String => Unit): List[String] =

var accum: List[String] = Nil

var pos = 0
var start = 0
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes

inline def cur = if done then EOF else line.charAt(pos): Int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is that type ascription needed? Could it be moved to be the result type of cur instead?

Suggested change
inline def cur = if done then EOF else line.charAt(pos): Int
inline def cur: Int = if done then EOF else line.charAt(pos)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

charAt is a char. The benefits of this style choice are to point out the problematic expression; it looks more cool; in result type position, it's like having eyeglasses wrapped in tape where they broke. That's why implicits, the nerdiest feature, requires it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I guess that's fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a bit quirky, and it was so long ago, I had to reconstruct my thinking. On the up side, with all the talk about command line parsing support for main entry point, probably this code will be replaced anyway. It was mostly aimed at Process building.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also thanks, this was to get my feet wet with the repo and process, low benefit and low risk.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks to you for porting this! And sorry for the long delay before the review, it slipped my mind.

inline def bump() = pos += 1
inline def done = pos >= line.length

def skipToQuote(q: Int): Boolean =
var escaped = false
def terminal = cur match
case _ if escaped => escaped = false ; false
case '\\' => escaped = true ; false
case `q` | EOF => true
case _ => false
while !terminal do bump()
!done

@tailrec def skipToDelim(): Boolean =
inline def quote() = { qpos += pos ; bump() }
cur match
case q @ (DQ | SQ) => { quote() ; skipToQuote(q) } && { quote() ; skipToDelim() }
case -1 => true
case c if isWhitespace(c) => true
case _ => bump(); skipToDelim()

def copyText(): String =
val buf = new java.lang.StringBuilder
var p = start
var i = 0
while p < pos do
if i >= qpos.size then
buf.append(line, p, pos)
p = pos
else if p == qpos(i) then
buf.append(line, qpos(i)+1, qpos(i+1))
p = qpos(i+1)+1
i += 2
else
buf.append(line, p, qpos(i))
p = qpos(i)
buf.toString

def text(): String =
val res =
if qpos.isEmpty then line.substring(start, pos)
else if qpos(0) == start && qpos(1) == pos then line.substring(start+1, pos-1)
else copyText()
qpos.clear()
res

inline def badquote() = errorFn(s"Unmatched quote [${qpos.last}](${line.charAt(qpos.last)})")

inline def skipWhitespace() = while isWhitespace(cur) do pos += 1

@tailrec def loop(): List[String] =
skipWhitespace()
start = pos
if done then
accum.reverse
else if !skipToDelim() then
badquote()
Nil
else
accum = text() :: accum
loop()
end loop

loop()

end tokenize

def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x))
def tokenize(line: String, errorFn: String => Unit): List[String] =
commandLine(line) match {
case Right((args, _)) => args
case Left(msg) => errorFn(msg) ; Nil
}
}

class ParseException(msg: String) extends RuntimeException(msg)
44 changes: 44 additions & 0 deletions compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

package dotty.tools.dotc.config

import org.junit.Assert.{assertEquals, assertTrue}
import org.junit.Test

class CommandLineParserTest:
import CommandLineParser.{tokenize, ParseException}

private def check(tokens: String*)(input: String): Unit = assertEquals(tokens, tokenize(input))

private def checkFails(input: String, output: String): Unit =
var txt: String = null
val res = tokenize(input, msg => txt = msg)
assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", txt ne null)
assertEquals(output, txt)

@Test def parserTokenizes() =
check()("")
check("x")("x")
check("x", "y")("x y")
check("x", "y", "z")("x y z")

@Test def parserTrims() =
check()(" ")
check("x")(" x ")
check("x")("\nx\n")
check("x", "y", "z")(" x y z ")

@Test def parserQuotes() =
check("x")("'x'")
check("x")(""""x"""")
check("x", "y", "z")("x 'y' z")
check("x", " y ", "z")("x ' y ' z")
check("x", "y", "z")("""x "y" z""")
check("x", " y ", "z")("""x " y " z""")
// interior quotes
check("x y z")("x' y 'z") // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z"))
check("x\ny\nz")("x'\ny\n'z")
check("x'y'z")("""x"'y'"z""")
check("abcxyz")(""""abc"xyz""")
// missing quotes
checkFails(""""x""", "Unmatched quote [0](\")") // was assertEquals(List("\"x"), tokenize(""""x"""))
checkFails("""x'""", "Unmatched quote [1](')")