From 0fd317046206f034793ce5d9eb1cb001b5698e36 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Fri, 15 Nov 2019 19:18:52 -0800 Subject: [PATCH 1/2] Forward port CommandLineParser rewrite Improvements on scala 2 were motivated by exponential parsing costs: scala/scala#6622 scala/scala#5882 --- .../tools/dotc/config/CommandLineParser.scala | 131 +++++++++++------- .../dotc/config/CommandLineParserTest.scala | 43 ++++++ 2 files changed, 125 insertions(+), 49 deletions(-) create mode 100644 compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala diff --git a/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala b/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala index 3f4e84337d35..e69915dc78bd 100644 --- a/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala +++ b/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala @@ -2,67 +2,100 @@ package dotty.tools.dotc package config import scala.annotation.tailrec -import scala.annotation.internal.sharable -/** A simple (overly so) command line parser. - * !!! This needs a thorough test suite to make sure quoting is - * done correctly and portably. +/** A simple enough command line parser. */ object CommandLineParser { - // splits a string into a quoted prefix and the rest of the string, - // taking escaping into account (using \) - // `"abc"def` will match as `DoubleQuoted(abc, def)` - private class QuotedExtractor(quote: Char) { - def unapply(in: String): Option[(String, String)] = { - val del = quote.toString - if (in startsWith del) { - var escaped = false - val (quoted, next) = (in substring 1) span { - case `quote` if !escaped => false - case '\\' if !escaped => escaped = true; true - case _ => escaped = false; true + private final val DQ = '"' + private final val SQ = '\'' + + /** Split the line into tokens separated by whitespace or quotes. + * + * @return either an error message or reverse list of tokens + */ + private def tokens(in: String) = { + import Character.isWhitespace + import java.lang.{StringBuilder => Builder} + import collection.mutable.ArrayBuffer + + var accum: List[String] = Nil + var pos = 0 + var start = 0 + val qpos = new ArrayBuffer[Int](16) // positions of paired quotes + + def cur: Int = if (done) -1 else in.charAt(pos) + def bump() = pos += 1 + def done = pos >= in.length + + def skipToQuote(q: Int) = { + var escaped = false + def terminal = in.charAt(pos) match { + case _ if escaped => escaped = false ; false + case '\\' => escaped = true ; false + case `q` => true + case _ => false + } + while (!done && !terminal) pos += 1 + !done + } + @tailrec + def skipToDelim(): Boolean = + cur match { + case q @ (DQ | SQ) => { qpos += pos; bump(); skipToQuote(q) } && { qpos += pos; bump(); skipToDelim() } + case -1 => true + case c if isWhitespace(c) => true + case _ => bump(); skipToDelim() + } + def skipWhitespace() = while (isWhitespace(cur)) pos += 1 + def copyText() = { + val buf = new Builder + var p = start + var i = 0 + while (p < pos) { + if (i >= qpos.size) { + buf.append(in, p, pos) + p = pos + } else if (p == qpos(i)) { + buf.append(in, qpos(i)+1, qpos(i+1)) + p = qpos(i+1)+1 + i += 2 + } else { + buf.append(in, p, qpos(i)) + p = qpos(i) } - // the only way to get out of the above loop is with an empty next or !escaped - // require(next.isEmpty || !escaped) - if (next startsWith del) Some((quoted, next substring 1)) - else None } - else None + buf.toString } - } - private object DoubleQuoted extends QuotedExtractor('"') - private object SingleQuoted extends QuotedExtractor('\'') - @sharable private val Word = """(\S+)(.*)""".r - - // parse `in` for an argument, return it and the remainder of the input (or an error message) - // (argument may be in single/double quotes, taking escaping into account, quotes are stripped) - private def argument(in: String): Either[String, (String, String)] = in match { - case DoubleQuoted(arg, rest) => Right((arg, rest)) - case SingleQuoted(arg, rest) => Right((arg, rest)) - case Word(arg, rest) => Right((arg, rest)) - case _ => Left(s"Illegal argument: $in") - } + def text() = { + val res = + if (qpos.isEmpty) in.substring(start, pos) + else if (qpos(0) == start && qpos(1) == pos) in.substring(start+1, pos-1) + else copyText() + qpos.clear() + res + } + def badquote = Left("Unmatched quote") - // parse a list of whitespace-separated arguments (ignoring whitespace in quoted arguments) - @tailrec private def commandLine(in: String, accum: List[String] = Nil): Either[String, (List[String], String)] = { - val trimmed = in.trim - if (trimmed.isEmpty) Right((accum.reverse, "")) - else argument(trimmed) match { - case Right((arg, next)) => - (next span Character.isWhitespace) match { - case("", rest) if rest.nonEmpty => Left("Arguments should be separated by whitespace.") // TODO: can this happen? - case(ws, rest) => commandLine(rest, arg :: accum) - } - case Left(msg) => Left(msg) + @tailrec def loop(): Either[String, List[String]] = { + skipWhitespace() + start = pos + if (done) Right(accum) + else if (!skipToDelim()) badquote + else { + accum = text() :: accum + loop() + } } + loop() } class ParseException(msg: String) extends RuntimeException(msg) - def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x)) def tokenize(line: String, errorFn: String => Unit): List[String] = - commandLine(line) match { - case Right((args, _)) => args - case Left(msg) => errorFn(msg) ; Nil + tokens(line) match { + case Right(args) => args.reverse + case Left(msg) => errorFn(msg) ; Nil } + + def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x)) } diff --git a/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala b/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala new file mode 100644 index 000000000000..ee2cfbaa958c --- /dev/null +++ b/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala @@ -0,0 +1,43 @@ + +package dotty.tools.dotc.config + +import org.junit.Assert.{assertEquals, assertTrue} +import org.junit.Test + +class CommandLineParserTest: + import CommandLineParser.{tokenize, ParseException} + + private def check(tokens: String*)(input: String): Unit = assertEquals(tokens, tokenize(input)) + + private def checkFails(input: String): Unit = + var failed = false + val res = tokenize(input, _ => failed = true) + assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", failed) + + @Test def parserTokenizes() = + check()("") + check("x")("x") + check("x", "y")("x y") + check("x", "y", "z")("x y z") + + @Test def parserTrims() = + check()(" ") + check("x")(" x ") + check("x")("\nx\n") + check("x", "y", "z")(" x y z ") + + @Test def parserQuotes() = + check("x")("'x'") + check("x")(""""x"""") + check("x", "y", "z")("x 'y' z") + check("x", " y ", "z")("x ' y ' z") + check("x", "y", "z")("""x "y" z""") + check("x", " y ", "z")("""x " y " z""") + // interior quotes + check("x y z")("x' y 'z") // was assertEquals(List("x'","y","'z"), tokenize("x' y 'z")) + check("x\ny\nz")("x'\ny\n'z") + check("x'y'z")("""x"'y'"z""") + check("abcxyz")(""""abc"xyz""") + // missing quotes + checkFails(""""x""") // was assertEquals(List("\"x"), tokenize(""""x""")) + checkFails("""x'""") From bac75e5a74864628dbe4fb454c8e62fde370ca1c Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Fri, 27 Mar 2020 04:17:53 -0700 Subject: [PATCH 2/2] Upgrade syntax of CommandLineParser and test --- .../tools/dotc/config/CommandLineParser.scala | 113 +++++++++--------- .../dotc/config/CommandLineParserTest.scala | 13 +- 2 files changed, 61 insertions(+), 65 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala b/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala index e69915dc78bd..e3ca896d18d2 100644 --- a/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala +++ b/compiler/src/dotty/tools/dotc/config/CommandLineParser.scala @@ -1,101 +1,96 @@ -package dotty.tools.dotc -package config +package dotty.tools.dotc.config import scala.annotation.tailrec +import scala.collection.mutable.ArrayBuffer +import java.lang.Character.isWhitespace /** A simple enough command line parser. */ -object CommandLineParser { - private final val DQ = '"' - private final val SQ = '\'' +object CommandLineParser: + inline private val DQ = '"' + inline private val SQ = '\'' + inline private val EOF = -1 /** Split the line into tokens separated by whitespace or quotes. * - * @return either an error message or reverse list of tokens + * Invoke `errorFn` with message on bad quote. */ - private def tokens(in: String) = { - import Character.isWhitespace - import java.lang.{StringBuilder => Builder} - import collection.mutable.ArrayBuffer + def tokenize(line: String, errorFn: String => Unit): List[String] = var accum: List[String] = Nil - var pos = 0 + + var pos = 0 var start = 0 - val qpos = new ArrayBuffer[Int](16) // positions of paired quotes + val qpos = new ArrayBuffer[Int](16) // positions of paired quotes - def cur: Int = if (done) -1 else in.charAt(pos) - def bump() = pos += 1 - def done = pos >= in.length + inline def cur = if done then EOF else line.charAt(pos): Int + inline def bump() = pos += 1 + inline def done = pos >= line.length - def skipToQuote(q: Int) = { + def skipToQuote(q: Int): Boolean = var escaped = false - def terminal = in.charAt(pos) match { + def terminal = cur match case _ if escaped => escaped = false ; false case '\\' => escaped = true ; false - case `q` => true + case `q` | EOF => true case _ => false - } - while (!done && !terminal) pos += 1 + while !terminal do bump() !done - } - @tailrec - def skipToDelim(): Boolean = - cur match { - case q @ (DQ | SQ) => { qpos += pos; bump(); skipToQuote(q) } && { qpos += pos; bump(); skipToDelim() } + + @tailrec def skipToDelim(): Boolean = + inline def quote() = { qpos += pos ; bump() } + cur match + case q @ (DQ | SQ) => { quote() ; skipToQuote(q) } && { quote() ; skipToDelim() } case -1 => true case c if isWhitespace(c) => true case _ => bump(); skipToDelim() - } - def skipWhitespace() = while (isWhitespace(cur)) pos += 1 - def copyText() = { - val buf = new Builder + + def copyText(): String = + val buf = new java.lang.StringBuilder var p = start var i = 0 - while (p < pos) { - if (i >= qpos.size) { - buf.append(in, p, pos) + while p < pos do + if i >= qpos.size then + buf.append(line, p, pos) p = pos - } else if (p == qpos(i)) { - buf.append(in, qpos(i)+1, qpos(i+1)) + else if p == qpos(i) then + buf.append(line, qpos(i)+1, qpos(i+1)) p = qpos(i+1)+1 i += 2 - } else { - buf.append(in, p, qpos(i)) + else + buf.append(line, p, qpos(i)) p = qpos(i) - } - } buf.toString - } - def text() = { + + def text(): String = val res = - if (qpos.isEmpty) in.substring(start, pos) - else if (qpos(0) == start && qpos(1) == pos) in.substring(start+1, pos-1) + if qpos.isEmpty then line.substring(start, pos) + else if qpos(0) == start && qpos(1) == pos then line.substring(start+1, pos-1) else copyText() qpos.clear() res - } - def badquote = Left("Unmatched quote") - @tailrec def loop(): Either[String, List[String]] = { + inline def badquote() = errorFn(s"Unmatched quote [${qpos.last}](${line.charAt(qpos.last)})") + + inline def skipWhitespace() = while isWhitespace(cur) do pos += 1 + + @tailrec def loop(): List[String] = skipWhitespace() start = pos - if (done) Right(accum) - else if (!skipToDelim()) badquote - else { + if done then + accum.reverse + else if !skipToDelim() then + badquote() + Nil + else accum = text() :: accum loop() - } - } - loop() - } + end loop - class ParseException(msg: String) extends RuntimeException(msg) + loop() - def tokenize(line: String, errorFn: String => Unit): List[String] = - tokens(line) match { - case Right(args) => args.reverse - case Left(msg) => errorFn(msg) ; Nil - } + end tokenize def tokenize(line: String): List[String] = tokenize(line, x => throw new ParseException(x)) -} + + class ParseException(msg: String) extends RuntimeException(msg) diff --git a/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala b/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala index ee2cfbaa958c..a6bd651c6365 100644 --- a/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala +++ b/compiler/test/dotty/tools/dotc/config/CommandLineParserTest.scala @@ -9,10 +9,11 @@ class CommandLineParserTest: private def check(tokens: String*)(input: String): Unit = assertEquals(tokens, tokenize(input)) - private def checkFails(input: String): Unit = - var failed = false - val res = tokenize(input, _ => failed = true) - assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", failed) + private def checkFails(input: String, output: String): Unit = + var txt: String = null + val res = tokenize(input, msg => txt = msg) + assertTrue(s"Expected bad tokenization for [$input] but result was [$res]", txt ne null) + assertEquals(output, txt) @Test def parserTokenizes() = check()("") @@ -39,5 +40,5 @@ class CommandLineParserTest: check("x'y'z")("""x"'y'"z""") check("abcxyz")(""""abc"xyz""") // missing quotes - checkFails(""""x""") // was assertEquals(List("\"x"), tokenize(""""x""")) - checkFails("""x'""") + checkFails(""""x""", "Unmatched quote [0](\")") // was assertEquals(List("\"x"), tokenize(""""x""")) + checkFails("""x'""", "Unmatched quote [1](')")