@@ -258,33 +258,6 @@ def __init__(
258258 else :
259259 self .shortcuts = shortcuts
260260
261- # this regular expression matches C-style comments and quoted
262- # strings, i.e. stuff between single or double quote marks
263- # it's used with _comment_replacer() to strip out the C-style
264- # comments, while leaving C-style comments that are inside either
265- # double or single quotes.
266- #
267- # this big regular expression can be broken down into 3 regular
268- # expressions that are OR'ed together with a pipe character
269- #
270- # /\*.*\*/ Matches C-style comments (i.e. /* comment */)
271- # does not match unclosed comments.
272- # \'(?:\\.|[^\\\'])*\' Matches a single quoted string, allowing
273- # for embedded backslash escaped single quote
274- # marks.
275- # "(?:\\.|[^\\"])*" Matches a double quoted string, allowing
276- # for embedded backslash escaped double quote
277- # marks.
278- #
279- # by way of reminder the (?:...) regular expression syntax is just
280- # a non-capturing version of regular parenthesis. We need the non-
281- # capturing syntax because _comment_replacer() looks at match
282- # groups
283- self .comment_pattern = re .compile (
284- r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"' ,
285- re .DOTALL | re .MULTILINE
286- )
287-
288261 # commands have to be a word, so make a regular expression
289262 # that matches the first word in the line. This regex has three
290263 # parts:
@@ -337,6 +310,9 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
337310 if not word :
338311 return False , 'cannot be an empty string'
339312
313+ if word .startswith (constants .COMMENT_CHAR ):
314+ return False , 'cannot start with the comment character'
315+
340316 for (shortcut , _ ) in self .shortcuts :
341317 if word .startswith (shortcut ):
342318 # Build an error string with all shortcuts listed
@@ -360,24 +336,23 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
360336 def tokenize (self , line : str ) -> List [str ]:
361337 """Lex a string into a list of tokens.
362338
363- Comments are removed, and shortcuts and aliases are expanded.
339+ shortcuts and aliases are expanded and comments are removed
364340
365341 Raises ValueError if there are unclosed quotation marks.
366342 """
367343
368- # strip C-style comments
369- # shlex will handle the python/shell style comments for us
370- line = re .sub (self .comment_pattern , self ._comment_replacer , line )
371-
372344 # expand shortcuts and aliases
373345 line = self ._expand (line )
374346
347+ # check if this line is a comment
348+ if line .strip ().startswith (constants .COMMENT_CHAR ):
349+ return []
350+
375351 # split on whitespace
376- lexer = shlex .shlex (line , posix = False )
377- lexer .whitespace_split = True
352+ tokens = shlex .split (line , comments = False , posix = False )
378353
379354 # custom lexing
380- tokens = self ._split_on_punctuation (list ( lexer ) )
355+ tokens = self ._split_on_punctuation (tokens )
381356 return tokens
382357
383358 def parse (self , line : str ) -> Statement :
@@ -632,15 +607,6 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
632607
633608 return command , args
634609
635- @staticmethod
636- def _comment_replacer (match ):
637- matched_string = match .group (0 )
638- if matched_string .startswith ('/' ):
639- # the matched string was a comment, so remove it
640- return ''
641- # the matched string was a quoted string, return the match
642- return matched_string
643-
644610 def _split_on_punctuation (self , tokens : List [str ]) -> List [str ]:
645611 """Further splits tokens from a command line using punctuation characters
646612
0 commit comments