-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Forward port CommandLineParser rewrite #7676
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
smarter
merged 2 commits into
scala:master
from
som-snytt:issue/update-commandlineparser
Apr 23, 2020
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
150 changes: 89 additions & 61 deletions
150
compiler/src/dotty/tools/dotc/config/CommandLineParser.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,96 @@ | ||
package dotty.tools.dotc | ||
package config | ||
package dotty.tools.dotc.config | ||
|
||
import scala.annotation.tailrec | ||
import scala.annotation.internal.sharable | ||
import scala.collection.mutable.ArrayBuffer | ||
import java.lang.Character.isWhitespace | ||
|
||
/** A simple (overly so) command line parser. | ||
* !!! This needs a thorough test suite to make sure quoting is | ||
* done correctly and portably. | ||
/** A simple enough command line parser. | ||
*/ | ||
object CommandLineParser { | ||
// splits a string into a quoted prefix and the rest of the string, | ||
// taking escaping into account (using \) | ||
// `"abc"def` will match as `DoubleQuoted(abc, def)` | ||
private class QuotedExtractor(quote: Char) { | ||
def unapply(in: String): Option[(String, String)] = { | ||
val del = quote.toString | ||
if (in startsWith del) { | ||
var escaped = false | ||
val (quoted, next) = (in substring 1) span { | ||
case `quote` if !escaped => false | ||
case '\\' if !escaped => escaped = true; true | ||
case _ => escaped = false; true | ||
} | ||
// the only way to get out of the above loop is with an empty next or !escaped | ||
// require(next.isEmpty || !escaped) | ||
if (next startsWith del) Some((quoted, next substring 1)) | ||
else None | ||
} | ||
else None | ||
} | ||
} | ||
private object DoubleQuoted extends QuotedExtractor('"') | ||
private object SingleQuoted extends QuotedExtractor('\'') | ||
@sharable private val Word = """(\S+)(.*)""".r | ||
|
||
// parse `in` for an argument, return it and the remainder of the input (or an error message) | ||
// (argument may be in single/double quotes, taking escaping into account, quotes are stripped) | ||
private def argument(in: String): Either[String, (String, String)] = in match { | ||
case DoubleQuoted(arg, rest) => Right((arg, rest)) | ||
case SingleQuoted(arg, rest) => Right((arg, rest)) | ||
case Word(arg, rest) => Right((arg, rest)) | ||
case _ => Left(s"Illegal argument: $in") | ||
} | ||
|
||
// parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments) | ||
@tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = { | ||
val trimmed = in.trim | ||
if (trimmed.isEmpty) Right((accum.reverse, "")) | ||
else argument(trimmed) match { | ||
case Right((arg, next)) => | ||
(next span Character.isWhitespace) match { | ||
case("", rest) if rest.nonEmpty => Left("Arguments should be separated by whitespace.") // TODO: can this happen? | ||
case(ws, rest) => commandLine(rest, arg :: accum) | ||
} | ||
case Left(msg) => Left(msg) | ||
} | ||
} | ||
object CommandLineParser: | ||
inline private val DQ = '"' | ||
inline private val SQ = '\'' | ||
inline private val EOF = -1 | ||
|
||
class ParseException(msg: String) extends RuntimeException(msg) | ||
/** Split the line into tokens separated by whitespace or quotes. | ||
* | ||
* Invoke `errorFn` with message on bad quote. | ||
*/ | ||
def tokenize(line: String, errorFn: String => Unit): List[String] = | ||
|
||
var accum: List[String] = Nil | ||
|
||
var pos = 0 | ||
var start = 0 | ||
val qpos = new ArrayBuffer[Int](16) // positions of paired quotes | ||
|
||
inline def cur = if done then EOF else line.charAt(pos): Int | ||
inline def bump() = pos += 1 | ||
inline def done = pos >= line.length | ||
|
||
def skipToQuote(q: Int): Boolean = | ||
var escaped = false | ||
def terminal = cur match | ||
case _ if escaped => escaped = false ; false | ||
case '\\' => escaped = true ; false | ||
case `q` | EOF => true | ||
case _ => false | ||
while !terminal do bump() | ||
!done | ||
|
||
@tailrec def skipToDelim(): Boolean = | ||
inline def quote() = { qpos += pos ; bump() } | ||
cur match | ||
case q @ (DQ | SQ) => { quote() ; skipToQuote(q) } && { quote() ; skipToDelim() } | ||
case -1 => true | ||
case c if isWhitespace(c) => true | ||
case _ => bump(); skipToDelim() | ||
|
||
def copyText(): String = | ||
val buf = new java.lang.StringBuilder | ||
var p = start | ||
var i = 0 | ||
while p < pos do | ||
if i >= qpos.size then | ||
buf.append(line, p, pos) | ||
p = pos | ||
else if p == qpos(i) then | ||
buf.append(line, qpos(i)+1, qpos(i+1)) | ||
p = qpos(i+1)+1 | ||
i += 2 | ||
else | ||
buf.append(line, p, qpos(i)) | ||
p = qpos(i) | ||
buf.toString | ||
|
||
def text(): String = | ||
val res = | ||
if qpos.isEmpty then line.substring(start, pos) | ||
else if qpos(0) == start && qpos(1) == pos then line.substring(start+1, pos-1) | ||
else copyText() | ||
qpos.clear() | ||
res | ||
|
||
inline def badquote() = errorFn(s"Unmatched quote [${qpos.last}](${line.charAt(qpos.last)})") | ||
|
||
inline def skipWhitespace() = while isWhitespace(cur) do pos += 1 | ||
|
||
@tailrec def loop(): List[String] = | ||
skipWhitespace() | ||
start = pos | ||
if done then | ||
accum.reverse | ||
else if !skipToDelim() then | ||
badquote() | ||
Nil | ||
else | ||
accum = text() :: accum | ||
loop() | ||
end loop | ||
|
||
loop() | ||
|
||
end tokenize | ||
|
||
def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x)) | ||
def tokenize(line: String, errorFn: String => Unit): List[String] = | ||
commandLine(line) match { | ||
case Right((args, _)) => args | ||
case Left(msg) => errorFn(msg) ; Nil | ||
} | ||
} | ||
|
||
class ParseException(msg: String) extends RuntimeException(msg) |
44 changes: 44 additions & 0 deletions
44
compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
package dotty.tools.dotc.config | ||
|
||
import org.junit.Assert.{assertEquals, assertTrue} | ||
import org.junit.Test | ||
|
||
class CommandLineParserTest: | ||
import CommandLineParser.{tokenize, ParseException} | ||
|
||
private def check(tokens: String*)(input: String): Unit = assertEquals(tokens, tokenize(input)) | ||
|
||
private def checkFails(input: String, output: String): Unit = | ||
var txt: String = null | ||
val res = tokenize(input, msg => txt = msg) | ||
assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", txt ne null) | ||
assertEquals(output, txt) | ||
|
||
@Test def parserTokenizes() = | ||
check()("") | ||
check("x")("x") | ||
check("x", "y")("x y") | ||
check("x", "y", "z")("x y z") | ||
|
||
@Test def parserTrims() = | ||
check()(" ") | ||
check("x")(" x ") | ||
check("x")("\nx\n") | ||
check("x", "y", "z")(" x y z ") | ||
|
||
@Test def parserQuotes() = | ||
check("x")("'x'") | ||
check("x")(""""x"""") | ||
check("x", "y", "z")("x 'y' z") | ||
check("x", " y ", "z")("x ' y ' z") | ||
check("x", "y", "z")("""x "y" z""") | ||
check("x", " y ", "z")("""x " y " z""") | ||
// interior quotes | ||
check("x y z")("x' y 'z") // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z")) | ||
check("x\ny\nz")("x'\ny\n'z") | ||
check("x'y'z")("""x"'y'"z""") | ||
check("abcxyz")(""""abc"xyz""") | ||
// missing quotes | ||
checkFails(""""x""", "Unmatched quote [0](\")") // was assertEquals(List("\"x"), tokenize(""""x""")) | ||
checkFails("""x'""", "Unmatched quote [1](')") |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is that type ascription needed? Could it be moved to be the result type of cur instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
charAt
is a char. The benefits of this style choice are to point out the problematic expression; it looks more cool; in result type position, it's like having eyeglasses wrapped in tape where they broke. That's why implicits, the nerdiest feature, requires it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, I guess that's fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a bit quirky, and it was so long ago, I had to reconstruct my thinking. On the up side, with all the talk about command line parsing support for main entry point, probably this code will be replaced anyway. It was mostly aimed at Process building.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also thanks, this was to get my feet wet with the repo and process, low benefit and low risk.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks to you for porting this! And sorry for the long delay before the review, it slipped my mind.