@@ -236,33 +236,6 @@ def __init__(
236236 else :
237237 self .shortcuts = shortcuts
238238
239- # this regular expression matches C-style comments and quoted
240- # strings, i.e. stuff between single or double quote marks
241- # it's used with _comment_replacer() to strip out the C-style
242- # comments, while leaving C-style comments that are inside either
243- # double or single quotes.
244- #
245- # this big regular expression can be broken down into 3 regular
246- # expressions that are OR'ed together with a pipe character
247- #
248- # /\*.*\*/ Matches C-style comments (i.e. /* comment */)
249- # does not match unclosed comments.
250- # \'(?:\\.|[^\\\'])*\' Matches a single quoted string, allowing
251- # for embedded backslash escaped single quote
252- # marks.
253- # "(?:\\.|[^\\"])*" Matches a double quoted string, allowing
254- # for embedded backslash escaped double quote
255- # marks.
256- #
257- # by way of reminder the (?:...) regular expression syntax is just
258- # a non-capturing version of regular parenthesis. We need the non-
259- # capturing syntax because _comment_replacer() looks at match
260- # groups
261- self .comment_pattern = re .compile (
262- r'/\*.*\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"' ,
263- re .DOTALL | re .MULTILINE
264- )
265-
266239 # commands have to be a word, so make a regular expression
267240 # that matches the first word in the line. This regex has three
268241 # parts:
@@ -315,6 +288,9 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
315288 if not word :
316289 return False , 'cannot be an empty string'
317290
291+ if word .startswith (constants .COMMENT_CHAR ):
292+ return False , 'cannot start with the comment character'
293+
318294 for (shortcut , _ ) in self .shortcuts :
319295 if word .startswith (shortcut ):
320296 # Build an error string with all shortcuts listed
@@ -338,24 +314,23 @@ def is_valid_command(self, word: str) -> Tuple[bool, str]:
338314 def tokenize (self , line : str ) -> List [str ]:
339315 """Lex a string into a list of tokens.
340316
341- Comments are removed, and shortcuts and aliases are expanded.
317+ shortcuts and aliases are expanded and comments are removed
342318
343319 Raises ValueError if there are unclosed quotation marks.
344320 """
345321
346- # strip C-style comments
347- # shlex will handle the python/shell style comments for us
348- line = re .sub (self .comment_pattern , self ._comment_replacer , line )
349-
350322 # expand shortcuts and aliases
351323 line = self ._expand (line )
352324
325+ # check if this line is a comment
326+ if line .strip ().startswith (constants .COMMENT_CHAR ):
327+ return []
328+
353329 # split on whitespace
354- lexer = shlex .shlex (line , posix = False )
355- lexer .whitespace_split = True
330+ tokens = shlex .split (line , comments = False , posix = False )
356331
357332 # custom lexing
358- tokens = self ._split_on_punctuation (list ( lexer ) )
333+ tokens = self ._split_on_punctuation (tokens )
359334 return tokens
360335
361336 def parse (self , line : str ) -> Statement :
@@ -610,15 +585,6 @@ def _command_and_args(tokens: List[str]) -> Tuple[str, str]:
610585
611586 return command , args
612587
613- @staticmethod
614- def _comment_replacer (match ):
615- matched_string = match .group (0 )
616- if matched_string .startswith ('/' ):
617- # the matched string was a comment, so remove it
618- return ''
619- # the matched string was a quoted string, return the match
620- return matched_string
621-
622588 def _split_on_punctuation (self , tokens : List [str ]) -> List [str ]:
623589 """Further splits tokens from a command line using punctuation characters
624590
0 commit comments