From 26d088cc94828879bb1fe43f9713b3b52eb13762 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sat, 5 May 2018 21:59:06 -0600 Subject: [PATCH 01/12] Remove check on self.identchars in do_alias() self.identchars is no longer used by cmd2. --- cmd2/cmd2.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index ad2038d4a..401d20463 100755 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -2434,13 +2434,6 @@ def do_alias(self, arglist): name = arglist[0] value = ' '.join(arglist[1:]) - # Check for a valid name - for cur_char in name: - if cur_char not in self.identchars: - self.perror("Alias names can only contain the following characters: {}".format(self.identchars), - traceback_war=False) - return - # Set the alias self.aliases[name] = value self.poutput("Alias {!r} created".format(name)) From 537542b8492f3c4d1c56296804ae82c123d0efce Mon Sep 17 00:00:00 2001 From: kotfu Date: Sat, 5 May 2018 22:04:48 -0600 Subject: [PATCH 02/12] Remove unit test for identchars in aliases --- tests/test_cmd2.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py index b6416005c..33f5d86ed 100644 --- a/tests/test_cmd2.py +++ b/tests/test_cmd2.py @@ -1688,12 +1688,6 @@ def test_alias_lookup_invalid_alias(base_app, capsys): out, err = capsys.readouterr() assert "not found" in err -def test_alias_with_invalid_name(base_app, capsys): - run_cmd(base_app, 'alias @ help') - out, err = capsys.readouterr() - assert "can only contain the following characters" in err - - def test_unalias(base_app): # Create an alias run_cmd(base_app, 'alias fake pyscript') From a0c0db15103a54dba20fb309956a7b3cf90bc645 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 00:20:46 -0600 Subject: [PATCH 03/12] Fix alias expansion when not followed by whitespace --- cmd2/parsing.py | 27 ++++++++++---- tests/test_parsing.py | 85 +++++++++++++++++++++++++++++++++---------- 2 files changed, 86 insertions(+), 26 deletions(-) diff --git a/cmd2/parsing.py b/cmd2/parsing.py index 908e9272f..eff29843a 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -144,13 +144,26 @@ def __init__( # aliases have to be a word, so make a regular expression # that matches the first word in the line. This regex has two # parts, the first parenthesis enclosed group matches one - # or more non-whitespace characters, and the second group - # matches either a whitespace character or the end of the - # string. We use \A and \Z to ensure we always match the - # beginning and end of a string that may have multiple - # lines - self.command_pattern = re.compile(r'\A(\S+)(\s|\Z)') - + # or more non-whitespace characters with a non-greedy match + # (that's what the '+?' part does). The second group must be + # dynamically created because it needs to match either whitespace, + # something in REDIRECTION_CHARS, one of the terminators, + # or the end of the string. We use \A and \Z to ensure we always + # match the beginning and end of a string that may have multiple + # lines (if it's a multiline command) + second_group_items = [] + second_group_items.extend(constants.REDIRECTION_CHARS) + second_group_items.extend(terminators) + # escape each item so it will for sure get treated as a literal + second_group_items = [re.escape(x) for x in second_group_items] + # add the whitespace and end of string, not escaped because they + # are not literals + second_group_items.extend([r'\s', r'\Z']) + # join them up with a pipe + second_group = '|'.join(second_group_items) + # build the regular expression + expr = r'\A(\S+?)({})'.format(second_group) + self.command_pattern = re.compile(expr) def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 7940bbd89..9e48bcf90 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -49,7 +49,7 @@ def test_tokenize(parser, line, tokens): (['command'], 'command', None), (['command', 'arg1', 'arg2'], 'command', 'arg1 arg2') ]) -def test_command_and_args(parser, tokens, command, args): +def test_parse_command_and_args(parser, tokens, command, args): (parsed_command, parsed_args) = parser._command_and_args(tokens) assert command == parsed_command assert args == parsed_args @@ -59,20 +59,20 @@ def test_command_and_args(parser, tokens, command, args): '"one word"', "'one word'", ]) -def test_single_word(parser, line): +def test_parse_single_word(parser, line): statement = parser.parse(line) assert statement.command == line assert not statement.args assert statement.argv == [utils.strip_quotes(line)] -def test_word_plus_terminator(parser): +def test_parse_word_plus_terminator(parser): line = 'termbare;' statement = parser.parse(line) assert statement.command == 'termbare' assert statement.terminator == ';' assert statement.argv == ['termbare'] -def test_suffix_after_terminator(parser): +def test_parse_suffix_after_terminator(parser): line = 'termbare; suffx' statement = parser.parse(line) assert statement.command == 'termbare' @@ -80,14 +80,14 @@ def test_suffix_after_terminator(parser): assert statement.suffix == 'suffx' assert statement.argv == ['termbare'] -def test_command_with_args(parser): +def test_parse_command_with_args(parser): line = 'command with args' statement = parser.parse(line) assert statement.command == 'command' assert statement.args == 'with args' assert statement.argv == ['command', 'with', 'args'] -def test_command_with_quoted_args(parser): +def test_parse_command_with_quoted_args(parser): line = 'command with "quoted args" and "some not"' statement = parser.parse(line) assert statement.command == 'command' @@ -103,20 +103,20 @@ def test_parse_command_with_args_terminator_and_suffix(parser): assert statement.suffix == 'and suffix' assert statement.argv == ['command', 'with', 'args', 'and', 'terminator'] -def test_hashcomment(parser): +def test_parse_hashcomment(parser): statement = parser.parse('hi # this is all a comment') assert statement.command == 'hi' assert not statement.args assert statement.argv == ['hi'] -def test_c_comment(parser): +def test_parse_c_comment(parser): statement = parser.parse('hi /* this is | all a comment */') assert statement.command == 'hi' assert not statement.args assert not statement.pipe_to assert statement.argv == ['hi'] -def test_c_comment_empty(parser): +def test_parse_c_comment_empty(parser): statement = parser.parse('/* this is | all a comment */') assert not statement.command assert not statement.args @@ -130,14 +130,18 @@ def test_parse_what_if_quoted_strings_seem_to_start_comments(parser): assert not statement.pipe_to assert statement.argv == ['what', 'if', 'quoted strings /* seem to ', 'start', 'comments?'] -def test_simple_piped(parser): - statement = parser.parse('simple | piped') +@pytest.mark.parametrize('line',[ + 'simple | piped', + 'simple|piped', +]) +def test_parse_simple_pipe(parser, line): + statement = parser.parse(line) assert statement.command == 'simple' assert not statement.args assert statement.argv == ['simple'] assert statement.pipe_to == 'piped' -def test_double_pipe_is_not_a_pipe(parser): +def test_parse_double_pipe_is_not_a_pipe(parser): line = 'double-pipe || is not a pipe' statement = parser.parse(line) assert statement.command == 'double-pipe' @@ -145,7 +149,7 @@ def test_double_pipe_is_not_a_pipe(parser): assert statement.argv == ['double-pipe', '||', 'is', 'not', 'a', 'pipe'] assert not statement.pipe_to -def test_complex_pipe(parser): +def test_parse_complex_pipe(parser): line = 'command with args, terminator;sufx | piped' statement = parser.parse(line) assert statement.command == 'command' @@ -155,7 +159,20 @@ def test_complex_pipe(parser): assert statement.suffix == 'sufx' assert statement.pipe_to == 'piped' -def test_output_redirect(parser): +@pytest.mark.parametrize('line,output', [ + ('help > out.txt', '>'), + ('help>out.txt', '>'), + ('help >> out.txt', '>>'), + ('help>>out.txt', '>>'), +]) +def test_parse_redirect(parser,line): + statement = parser.parse(line) + assert statement.command == 'help' + assert not statement.args + assert statement.output == '>' + assert statement.output_to == 'out.txt' + +def test_parse_redirect_with_args(parser): line = 'output into > afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -164,7 +181,7 @@ def test_output_redirect(parser): assert statement.output == '>' assert statement.output_to == 'afile.txt' -def test_output_redirect_with_dash_in_path(parser): +def test_parse_redirect_with_dash_in_path(parser): line = 'output into > python-cmd2/afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -173,7 +190,7 @@ def test_output_redirect_with_dash_in_path(parser): assert statement.output == '>' assert statement.output_to == 'python-cmd2/afile.txt' -def test_output_redirect_append(parser): +def test_parse_redirect_append(parser): line = 'output appended to >> /tmp/afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -182,7 +199,7 @@ def test_output_redirect_append(parser): assert statement.output == '>>' assert statement.output_to == '/tmp/afile.txt' -def test_pipe_and_redirect(parser): +def test_parse_pipe_and_redirect(parser): line = 'output into;sufx | pipethrume plz > afile.txt' statement = parser.parse(line) assert statement.command == 'output' @@ -307,12 +324,12 @@ def test_empty_statement_raises_exception(): ('!ls -al /tmp', 'shell', 'ls -al /tmp'), ('l', 'shell', 'ls -al') ]) -def test_alias_and_shortcut_expansion(parser, line, command, args): +def test_parse_alias_and_shortcut_expansion(parser, line, command, args): statement = parser.parse(line) assert statement.command == command assert statement.args == args -def test_alias_on_multiline_command(parser): +def test_parse_alias_on_multiline_command(parser): line = 'anothermultiline has > inside an unfinished command' statement = parser.parse(line) assert statement.multiline_command == 'multiline' @@ -320,6 +337,36 @@ def test_alias_on_multiline_command(parser): assert statement.args == 'has > inside an unfinished command' assert not statement.terminator +@pytest.mark.parametrize('line,output', [ + ('helpalias > out.txt', '>'), + ('helpalias>out.txt', '>'), + ('helpalias >> out.txt', '>>'), + ('helpalias>>out.txt', '>>'), +]) +def test_parse_alias_redirection(parser, line, output): + statement = parser.parse(line) + assert statement.command == 'help' + assert not statement.args + assert statement.output == output + assert statement.output_to == 'out.txt' + +@pytest.mark.parametrize('line', [ + 'helpalias | less', + 'helpalias|less', +]) +def test_parse_alias_pipe(parser, line): + statement = parser.parse(line) + assert statement.command == 'help' + assert not statement.args + assert statement.pipe_to == 'less' + +def test_parse_alias_terminator_no_whitespace(parser): + line = 'helpalias;' + statement = parser.parse(line) + assert statement.command == 'help' + assert not statement.args + assert statement.terminator == ';' + def test_parse_command_only_command_and_args(parser): line = 'help history' statement = parser.parse_command_only(line) From 285b45bfa9ae79a936c35fd9c4b0ea0706082a3d Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 00:22:41 -0600 Subject: [PATCH 04/12] Oops, fixed an oversight that broke the build This is what you get for being too hasty when you push. --- tests/test_parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 9e48bcf90..2f3f338f0 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -165,11 +165,11 @@ def test_parse_complex_pipe(parser): ('help >> out.txt', '>>'), ('help>>out.txt', '>>'), ]) -def test_parse_redirect(parser,line): +def test_parse_redirect(parser,line, output): statement = parser.parse(line) assert statement.command == 'help' assert not statement.args - assert statement.output == '>' + assert statement.output == output assert statement.output_to == 'out.txt' def test_parse_redirect_with_args(parser): From 529b783234f7721935c0e87a785c094784cb4fff Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 01:12:14 -0600 Subject: [PATCH 05/12] =?UTF-8?q?Don=E2=80=99t=20allow=20wierd=20character?= =?UTF-8?q?s=20in=20alias=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd2/cmd2.py | 19 +++++++++++++++++++ tests/test_cmd2.py | 11 +++++++++++ 2 files changed, 30 insertions(+) diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index 401d20463..661dd20e7 100755 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -730,6 +730,19 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, persistent_histor # If this string is non-empty, then this warning message will print if a broken pipe error occurs while printing self.broken_pipe_warning = '' + # regular expression to test for invalid characters in aliases + invalid_items = [] + invalid_items.extend(constants.REDIRECTION_CHARS) + invalid_items.extend(self.terminators) + # escape each item so it will for sure get treated as a literal + invalid_items = [re.escape(x) for x in invalid_items] + # don't allow whitespace + invalid_items.append(r'\s') + # join them up with a pipe + expr = '|'.join(invalid_items) + # and compile it into a pattern + self.invalid_alias_pattern = re.compile(expr) + # If a startup script is provided, then add it in the queue to load if startup_script is not None: startup_script = os.path.expanduser(startup_script) @@ -2434,6 +2447,12 @@ def do_alias(self, arglist): name = arglist[0] value = ' '.join(arglist[1:]) + # Validate the alias to ensure it doesn't include wierd characters + # like terminators, output redirection, or whitespace + if self.invalid_alias_pattern.search(name): + self.perror('Alias names can not contain special characters.', traceback_war=False) + return + # Set the alias self.aliases[name] = value self.poutput("Alias {!r} created".format(name)) diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py index 33f5d86ed..9dcfe6926 100644 --- a/tests/test_cmd2.py +++ b/tests/test_cmd2.py @@ -1705,6 +1705,17 @@ def test_unalias_non_existing(base_app, capsys): out, err = capsys.readouterr() assert "does not exist" in err +@pytest.mark.parametrize('alias_name', [ + '">"', + '"no>pe"' + '"no spaces"', + '"nopipe|"', + '"noterm;"', +]) +def test_create_invalid_alias(base_app, alias_name, capsys): + run_cmd(base_app, 'alias {} help'.format(alias_name)) + out, err = capsys.readouterr() + assert "can not contain" in err def test_ppaged(base_app): msg = 'testing...' From 0efb62cfc2b80dabcf0e94ad3315e3ea32c02d4f Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:07:16 -0600 Subject: [PATCH 06/12] Fix bungled merge from master --- cmd2/parsing.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/cmd2/parsing.py b/cmd2/parsing.py index a1e211759..d7feeb48b 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -142,15 +142,21 @@ def __init__( ) # aliases have to be a word, so make a regular expression - # that matches the first word in the line. This regex has two - # parts, the first parenthesis enclosed group matches one - # or more non-whitespace characters with a non-greedy match - # (that's what the '+?' part does). The second group must be - # dynamically created because it needs to match either whitespace, - # something in REDIRECTION_CHARS, one of the terminators, - # or the end of the string. We use \A and \Z to ensure we always - # match the beginning and end of a string that may have multiple - # lines (if it's a multiline command) + # that matches the first word in the line. This regex has three + # parts: + # - the '\A\s*' matches the beginning of the string (even + # if contains multiple lines) and gobbles up any leading + # whitespace + # - the first parenthesis enclosed group matches one + # or more non-whitespace characters with a non-greedy match + # (that's what the '+?' part does). The non-greedy match + # ensures that this first group doesn't include anything + # matched by the second group + # - the second parenthesis group must be dynamically created + # because it needs to match either whitespace, something in + # REDIRECTION_CHARS, one of the terminators, or the end of + # the string (\Z matches the end of the string even if it + # contains multiple lines) second_group_items = [] second_group_items.extend(constants.REDIRECTION_CHARS) second_group_items.extend(terminators) @@ -162,7 +168,7 @@ def __init__( # join them up with a pipe second_group = '|'.join(second_group_items) # build the regular expression - expr = r'\A(\S+?)({})'.format(second_group) + expr = r'\A\s*(\S+?)({})'.format(second_group) self.command_pattern = re.compile(expr) def tokenize(self, line: str) -> List[str]: From e945560fe80087bbd0bf91c41e37b17131e83e69 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:07:27 -0600 Subject: [PATCH 07/12] Fix pylint warnings --- tests/test_parsing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 1ebadae82..4b972d51e 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -46,7 +46,7 @@ def test_tokenize(parser, line, tokens): def test_tokenize_unclosed_quotes(parser): with pytest.raises(ValueError): - tokens = parser.tokenize('command with "unclosed quotes') + _ = parser.tokenize('command with "unclosed quotes') @pytest.mark.parametrize('tokens,command,args', [ ([], None, None), @@ -313,7 +313,7 @@ def test_parse_redirect_to_unicode_filename(parser): def test_parse_unclosed_quotes(parser): with pytest.raises(ValueError): - tokens = parser.tokenize("command with 'unclosed quotes") + _ = parser.tokenize("command with 'unclosed quotes") def test_empty_statement_raises_exception(): app = cmd2.Cmd() From 3343aad02757739fd7ddb91b095db277a59574d9 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:15:42 -0600 Subject: [PATCH 08/12] Add more unit tests --- tests/test_parsing.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 4b972d51e..f560f9938 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -38,7 +38,10 @@ def test_parse_empty_string(parser): ('command /* with some comment */ arg', ['command', 'arg']), ('command arg1 arg2 # comment at the end', ['command', 'arg1', 'arg2']), ('42 arg1 arg2', ['theanswer', 'arg1', 'arg2']), - ('l', ['shell', 'ls', '-al']) + ('l', ['shell', 'ls', '-al']), + ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), + ('help|less', ['help', '|', 'less']), + ('l|less', ['shell', 'ls', '-al', '|', 'less']), ]) def test_tokenize(parser, line, tokens): tokens_to_test = parser.tokenize(line) @@ -69,15 +72,21 @@ def test_parse_single_word(parser, line): assert not statement.args assert statement.argv == [utils.strip_quotes(line)] -def test_parse_word_plus_terminator(parser): - line = 'termbare;' +@pytest.mark.parametrize('line', [ + 'termbare;', + 'termbare ;', +]) +def test_parse_word_plus_terminator(parser, line): statement = parser.parse(line) assert statement.command == 'termbare' assert statement.terminator == ';' assert statement.argv == ['termbare'] -def test_parse_suffix_after_terminator(parser): - line = 'termbare; suffx' +@pytest.mark.parametrize('line', [ + 'termbare; suffx', + 'termbare ;suffx', +]) +def test_parse_suffix_after_terminator(parser, line): statement = parser.parse(line) assert statement.command == 'termbare' assert statement.terminator == ';' From 339094832f9160e1961f4f95c4dc684413ab84c5 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:20:34 -0600 Subject: [PATCH 09/12] Clarify comments for self.invalid_alias_pattern --- cmd2/cmd2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index 5c82e953f..764f3ce73 100755 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -731,6 +731,8 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, persistent_histor self.broken_pipe_warning = '' # regular expression to test for invalid characters in aliases + # we will construct it dynamically, because some of the components + # like terminator characters, can change invalid_items = [] invalid_items.extend(constants.REDIRECTION_CHARS) invalid_items.extend(self.terminators) @@ -738,7 +740,8 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, persistent_histor invalid_items = [re.escape(x) for x in invalid_items] # don't allow whitespace invalid_items.append(r'\s') - # join them up with a pipe + # join them up with a pipe to form a regular expression + # that looks something like r';|>|\||\s' expr = '|'.join(invalid_items) # and compile it into a pattern self.invalid_alias_pattern = re.compile(expr) From 76e7e67e45ca2cac8339a3c3fe56d91d730ad58a Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:22:45 -0600 Subject: [PATCH 10/12] Rename unit test --- tests/test_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index f560f9938..43baa0f46 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -56,7 +56,7 @@ def test_tokenize_unclosed_quotes(parser): (['command'], 'command', None), (['command', 'arg1', 'arg2'], 'command', 'arg1 arg2') ]) -def test_parse_command_and_args(parser, tokens, command, args): +def test_command_and_args(parser, tokens, command, args): (parsed_command, parsed_args) = parser._command_and_args(tokens) assert command == parsed_command assert args == parsed_args From d6d92949784b223ad03f70999bc55518c92d9fc9 Mon Sep 17 00:00:00 2001 From: kotfu Date: Sun, 6 May 2018 10:29:55 -0600 Subject: [PATCH 11/12] Add unit tests to ensure multiple terminator chars works --- tests/test_parsing.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 43baa0f46..ad4d31cd3 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -16,7 +16,7 @@ def parser(): parser = StatementParser( allow_redirection=True, - terminators=[';'], + terminators=[';', '&'], multiline_commands=['multiline'], aliases={'helpalias': 'help', '42': 'theanswer', @@ -40,6 +40,9 @@ def test_parse_empty_string(parser): ('42 arg1 arg2', ['theanswer', 'arg1', 'arg2']), ('l', ['shell', 'ls', '-al']), ('termbare ; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), + ('termbare; > /tmp/output', ['termbare', ';', '>', '/tmp/output']), + ('termbare & > /tmp/output', ['termbare', '&', '>', '/tmp/output']), + ('termbare& > /tmp/output', ['termbare', '&', '>', '/tmp/output']), ('help|less', ['help', '|', 'less']), ('l|less', ['shell', 'ls', '-al', '|', 'less']), ]) @@ -72,24 +75,28 @@ def test_parse_single_word(parser, line): assert not statement.args assert statement.argv == [utils.strip_quotes(line)] -@pytest.mark.parametrize('line', [ - 'termbare;', - 'termbare ;', +@pytest.mark.parametrize('line,terminator', [ + ('termbare;', ';'), + ('termbare ;', ';'), + ('termbare&', '&'), + ('termbare &', '&'), ]) -def test_parse_word_plus_terminator(parser, line): +def test_parse_word_plus_terminator(parser, line, terminator): statement = parser.parse(line) assert statement.command == 'termbare' - assert statement.terminator == ';' + assert statement.terminator == terminator assert statement.argv == ['termbare'] -@pytest.mark.parametrize('line', [ - 'termbare; suffx', - 'termbare ;suffx', +@pytest.mark.parametrize('line,terminator', [ + ('termbare; suffx', ';'), + ('termbare ;suffx', ';'), + ('termbare& suffx', '&'), + ('termbare &suffx', '&'), ]) -def test_parse_suffix_after_terminator(parser, line): +def test_parse_suffix_after_terminator(parser, line, terminator): statement = parser.parse(line) assert statement.command == 'termbare' - assert statement.terminator == ';' + assert statement.terminator == terminator assert statement.suffix == 'suffx' assert statement.argv == ['termbare'] @@ -163,12 +170,12 @@ def test_parse_double_pipe_is_not_a_pipe(parser): assert not statement.pipe_to def test_parse_complex_pipe(parser): - line = 'command with args, terminator;sufx | piped' + line = 'command with args, terminator&sufx | piped' statement = parser.parse(line) assert statement.command == 'command' assert statement.args == "with args, terminator" assert statement.argv == ['command', 'with', 'args,', 'terminator'] - assert statement.terminator == ';' + assert statement.terminator == '&' assert statement.suffix == 'sufx' assert statement.pipe_to == 'piped' @@ -251,13 +258,16 @@ def test_parse_unfinished_multiliine_command(parser): assert statement.argv == ['multiline', 'has', '>', 'inside', 'an', 'unfinished', 'command'] assert not statement.terminator -def test_parse_multiline_command_ignores_redirectors_within_it(parser): - line = 'multiline has > inside;' +@pytest.mark.parametrize('line,terminator',[ + ('multiline has > inside;', ';'), + ('multiline has > inside &', '&'), +]) +def test_parse_multiline_command_ignores_redirectors_within_it(parser, line, terminator): statement = parser.parse(line) assert statement.multiline_command == 'multiline' assert statement.args == 'has > inside' assert statement.argv == ['multiline', 'has', '>', 'inside'] - assert statement.terminator == ';' + assert statement.terminator == terminator def test_parse_multiline_with_incomplete_comment(parser): """A terminator within a comment will be ignored and won't terminate a multiline command. From f5f0c90aa44ec658b33da422c4f0dc1cea2e6b98 Mon Sep 17 00:00:00 2001 From: kotfu Date: Mon, 7 May 2018 21:01:56 -0600 Subject: [PATCH 12/12] Make alias checking and command parsing use the same regex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a new is_valid_command() method on StatementParser to determine whether a string of characters could be a valid command. That means it can’t include any redirection, quote chars, whitespace, or terminator characters. This method is used when someone tries to create an alias, to ensure when we try and parse the alias that it will actually parse. This nicely encapsulates and standardizes all the logic for parsing and expansion into the StatementParser class. Also fix a bug in the regex to match valid command names, and add a bunch of new unit tests to ensure the bug stays fixed. --- cmd2/cmd2.py | 32 +++++++--------------- cmd2/parsing.py | 62 +++++++++++++++++++++++++++++++++++-------- tests/test_cmd2.py | 3 ++- tests/test_parsing.py | 24 +++++++++++++++++ 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/cmd2/cmd2.py b/cmd2/cmd2.py index 764f3ce73..4bec394b1 100755 --- a/cmd2/cmd2.py +++ b/cmd2/cmd2.py @@ -730,22 +730,6 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, persistent_histor # If this string is non-empty, then this warning message will print if a broken pipe error occurs while printing self.broken_pipe_warning = '' - # regular expression to test for invalid characters in aliases - # we will construct it dynamically, because some of the components - # like terminator characters, can change - invalid_items = [] - invalid_items.extend(constants.REDIRECTION_CHARS) - invalid_items.extend(self.terminators) - # escape each item so it will for sure get treated as a literal - invalid_items = [re.escape(x) for x in invalid_items] - # don't allow whitespace - invalid_items.append(r'\s') - # join them up with a pipe to form a regular expression - # that looks something like r';|>|\||\s' - expr = '|'.join(invalid_items) - # and compile it into a pattern - self.invalid_alias_pattern = re.compile(expr) - # If a startup script is provided, then add it in the queue to load if startup_script is not None: startup_script = os.path.expanduser(startup_script) @@ -2396,15 +2380,17 @@ def do_alias(self, arglist): name = arglist[0] value = ' '.join(arglist[1:]) - # Validate the alias to ensure it doesn't include wierd characters + # Validate the alias to ensure it doesn't include weird characters # like terminators, output redirection, or whitespace - if self.invalid_alias_pattern.search(name): - self.perror('Alias names can not contain special characters.', traceback_war=False) - return + valid, invalidchars = self.statement_parser.is_valid_command(name) + if valid: + # Set the alias + self.aliases[name] = value + self.poutput("Alias {!r} created".format(name)) + else: + errmsg = "Aliases can not contain: {}".format(invalidchars) + self.perror(errmsg, traceback_war=False) - # Set the alias - self.aliases[name] = value - self.poutput("Alias {!r} created".format(name)) def complete_alias(self, text, line, begidx, endidx): """ Tab completion for alias """ diff --git a/cmd2/parsing.py b/cmd2/parsing.py index d7feeb48b..3a9b390b2 100644 --- a/cmd2/parsing.py +++ b/cmd2/parsing.py @@ -141,7 +141,7 @@ def __init__( re.DOTALL | re.MULTILINE ) - # aliases have to be a word, so make a regular expression + # commands have to be a word, so make a regular expression # that matches the first word in the line. This regex has three # parts: # - the '\A\s*' matches the beginning of the string (even @@ -157,19 +157,51 @@ def __init__( # REDIRECTION_CHARS, one of the terminators, or the end of # the string (\Z matches the end of the string even if it # contains multiple lines) - second_group_items = [] - second_group_items.extend(constants.REDIRECTION_CHARS) - second_group_items.extend(terminators) + # + invalid_command_chars = [] + invalid_command_chars.extend(constants.QUOTES) + invalid_command_chars.extend(constants.REDIRECTION_CHARS) + invalid_command_chars.extend(terminators) # escape each item so it will for sure get treated as a literal - second_group_items = [re.escape(x) for x in second_group_items] + second_group_items = [re.escape(x) for x in invalid_command_chars] # add the whitespace and end of string, not escaped because they # are not literals second_group_items.extend([r'\s', r'\Z']) # join them up with a pipe second_group = '|'.join(second_group_items) # build the regular expression - expr = r'\A\s*(\S+?)({})'.format(second_group) - self.command_pattern = re.compile(expr) + expr = r'\A\s*(\S*?)({})'.format(second_group) + self._command_pattern = re.compile(expr) + + def is_valid_command(self, word: str) -> Tuple[bool, str]: + """Determine whether a word is a valid alias. + + Aliases can not include redirection characters, whitespace, + or termination characters. + + If word is not a valid command, return False and a comma + separated string of characters that can not appear in a command. + This string is suitable for inclusion in an error message of your + choice: + + valid, invalidchars = statement_parser.is_valid_command('>') + if not valid: + errmsg = "Aliases can not contain: {}".format(invalidchars) + """ + valid = False + + errmsg = 'whitespace, quotes, ' + errchars = [] + errchars.extend(constants.REDIRECTION_CHARS) + errchars.extend(self.terminators) + errmsg += ', '.join([shlex.quote(x) for x in errchars]) + + match = self._command_pattern.search(word) + if match: + if word == match.group(1): + valid = True + errmsg = None + return valid, errmsg def tokenize(self, line: str) -> List[str]: """Lex a string into a list of tokens. @@ -344,16 +376,24 @@ def parse_command_only(self, rawinput: str) -> Statement: command = None args = None - match = self.command_pattern.search(line) + match = self._command_pattern.search(line) if match: # we got a match, extract the command command = match.group(1) - # the command_pattern regex is designed to match the spaces + # the match could be an empty string, if so, turn it into none + if not command: + command = None + # the _command_pattern regex is designed to match the spaces # between command and args with a second match group. Using # the end of the second match group ensures that args has # no leading whitespace. The rstrip() makes sure there is # no trailing whitespace args = line[match.end(2):].rstrip() + # if the command is none that means the input was either empty + # or something wierd like '>'. args should be None if we couldn't + # parse a command + if not command or not args: + args = None # build the statement # string representation of args must be an empty string instead of @@ -375,11 +415,11 @@ def _expand(self, line: str) -> str: for cur_alias in tmp_aliases: keep_expanding = False # apply our regex to line - match = self.command_pattern.search(line) + match = self._command_pattern.search(line) if match: # we got a match, extract the command command = match.group(1) - if command == cur_alias: + if command and command == cur_alias: # rebuild line with the expanded alias line = self.aliases[cur_alias] + match.group(2) + line[match.end(2):] tmp_aliases.remove(cur_alias) diff --git a/tests/test_cmd2.py b/tests/test_cmd2.py index c3c9a29fb..bc76505f0 100644 --- a/tests/test_cmd2.py +++ b/tests/test_cmd2.py @@ -1707,10 +1707,11 @@ def test_unalias_non_existing(base_app, capsys): @pytest.mark.parametrize('alias_name', [ '">"', - '"no>pe"' + '"no>pe"', '"no spaces"', '"nopipe|"', '"noterm;"', + 'noembedded"quotes', ]) def test_create_invalid_alias(base_app, alias_name, capsys): run_cmd(base_app, 'alias {} help'.format(alias_name)) diff --git a/tests/test_parsing.py b/tests/test_parsing.py index ad4d31cd3..bfb55b23a 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -439,3 +439,27 @@ def test_parse_command_only_quoted_args(parser): assert statement.command == 'shell' assert statement.args == 'ls -al "/tmp/directory with spaces/doit.sh"' assert statement.command_and_args == line.replace('l', 'shell ls -al') + +@pytest.mark.parametrize('line', [ + 'helpalias > out.txt', + 'helpalias>out.txt', + 'helpalias >> out.txt', + 'helpalias>>out.txt', + 'help|less', + 'helpalias;', +]) +def test_parse_command_only_specialchars(parser, line): + statement = parser.parse_command_only(line) + assert statement.command == 'help' + +@pytest.mark.parametrize('line', [ + ';', + '>', + "'", + '"', + '|', +]) +def test_parse_command_only_none(parser, line): + statement = parser.parse_command_only(line) + assert statement.command == None + assert statement.args == None