Skip to content

Add spell correction for CLI options #5461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions regression/cbmc/unknown-argument-suggestion/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
CORE
dummy.c
--traec
did you mean --trace
^EXIT=(1|64)$
^SIGNAL=0$
--
--
This checks that we get a useful suggestion when we make a typo on the
commandline.

The error code is 1 on linux/osx and 64 on windows for some reason.
1 change: 1 addition & 0 deletions regression/validate-trace-xml-schema/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# these test for invalid command line handling
'bad_option/test_multiple.desc',
'bad_option/test.desc',
'unknown-argument-suggestion/test.desc',
# this one produces XML intermingled with main XML output when used with --xml-ui
'graphml_witness2/test.desc',
# produces intermingled XML on the command line
Expand Down
1 change: 1 addition & 0 deletions src/util/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SRC = allocate_objects.cpp \
cout_message.cpp \
dstring.cpp \
endianness_map.cpp \
edit_distance.cpp \
expr.cpp \
expr_initializer.cpp \
expr_util.cpp \
Expand Down
61 changes: 61 additions & 0 deletions src/util/cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Author: Daniel Kroening, [email protected]

#include "cmdline.h"

#include <util/edit_distance.h>
#include <util/exception_utils.h>
#include <util/invariant.h>

Expand Down Expand Up @@ -250,6 +251,66 @@ cmdlinet::option_namest cmdlinet::option_names() const
return option_namest{*this};
}

std::vector<std::string>
cmdlinet::get_argument_suggestions(const std::string &unknown_argument)
{
struct suggestiont
{
std::size_t distance;
std::string suggestion;

bool operator<(const suggestiont &other) const
{
return distance < other.distance;
}
};

auto argument_suggestions = std::vector<suggestiont>{};
// We allow 3 errors here. This can lead to the output being a bit chatty,
// which we mitigate by reducing suggestions to those with the minimum
// distance further down below
const auto argument_matcher = levenshtein_automatont{unknown_argument, 3};
for(const auto &option : options)
{
if(option.islong)
{
const auto long_name = "--" + option.optstring;
if(auto distance = argument_matcher.get_edit_distance(long_name))
{
argument_suggestions.push_back({distance.value(), long_name});
}
}
if(!option.islong)
{
const auto short_name = std::string{"-"} + option.optchar;
if(auto distance = argument_matcher.get_edit_distance(short_name))
{
argument_suggestions.push_back({distance.value(), short_name});
}
}
}

auto final_suggestions = std::vector<std::string>{};
if(!argument_suggestions.empty())
{
// we only want to keep suggestions with the minimum distance
// because otherwise they become quickly too noisy to be useful
auto min = std::min_element(
argument_suggestions.begin(), argument_suggestions.end());
INVARIANT(
min != argument_suggestions.end(),
"there is a minimum because it's not empty");
for(auto const &suggestion : argument_suggestions)
{
if(suggestion.distance == min->distance)
{
final_suggestions.push_back(suggestion.suggestion);
}
}
}
return final_suggestions;
}

cmdlinet::option_namest::option_names_iteratort::option_names_iteratort(
const cmdlinet *command_line,
std::size_t index)
Expand Down
3 changes: 3 additions & 0 deletions src/util/cmdline.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ class cmdlinet
cmdlinet();
virtual ~cmdlinet();

std::vector<std::string>
get_argument_suggestions(const std::string &unknown_argument);

protected:
struct optiont
{
Expand Down
73 changes: 73 additions & 0 deletions src/util/edit_distance.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/// \file
/// \author Diffblue Ltd.
///
/// Provides a way to compute edit distance between two strings

#include "edit_distance.h"

levenshtein_automatont::levenshtein_automatont(
const std::string &string,
std::size_t allowed_errors)
{
const std::size_t layer_offset = string.size() + 1;
for(std::size_t i = 0; i <= allowed_errors; ++i)
{
final_states.push_back(string.size() + layer_offset * i);
}
for(std::size_t string_index = 0; string_index < string.size();
++string_index)
{
for(std::size_t error_layer = 0; error_layer <= allowed_errors;
++error_layer)
{
// position string_index matches
nfa.add_transition(
error_layer * layer_offset + string_index,
string[string_index],
error_layer * layer_offset + string_index + 1);
if(error_layer < allowed_errors)
{
// insertion, swap or deletion
nfa.add_arbitrary_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index);
nfa.add_epsilon_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index + 1);
nfa.add_arbitrary_transition(
error_layer * layer_offset + string_index,
(error_layer + 1) * layer_offset + string_index + 1);
}
}
}
for(std::size_t error_layer = 0; error_layer < allowed_errors; ++error_layer)
{
// arbitrary transitions between error layers
nfa.add_arbitrary_transition(
error_layer * layer_offset + string.size(),
(error_layer + 1) * layer_offset + string.size());
}
}

bool levenshtein_automatont::matches(const std::string &string) const
{
return get_edit_distance(string).has_value();
}

optionalt<std::size_t>
levenshtein_automatont::get_edit_distance(const std::string &string) const
{
auto current = nfa.initial_state(0);
for(const auto c : string)
{
current = nfa.next_state(current, c);
}
for(std::size_t distance = 0; distance < final_states.size(); ++distance)
{
if(current.contains(final_states[distance]))
{
return distance;
}
}
return nullopt;
}
46 changes: 46 additions & 0 deletions src/util/edit_distance.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/// \file
/// \author Diffblue Ltd.
///
/// Loosely based on this blog post:
/// http://blog.notdot.net/2010/07/Damn-Cool-Algorithms-Levenshtein-Automata
/// Provides a way to compute edit distance between two strings
///
/// No conversion to DFA or other optimisations are done here because for our
/// use case (i.e. suggestions for errors in command line specifications) this
/// is fast enough without them.

#ifndef CPROVER_UTIL_EDIT_DISTANCE_H
#define CPROVER_UTIL_EDIT_DISTANCE_H

#include "nfa.h"

#include <cstddef>
#include <string>

#include <util/optional.h>

/// Simple automaton that can detect whether a string can be transformed into
/// another with a limited number of deletions, insertions or substitutions.
/// Not a very fast implementation, but should be good enough for small strings.
struct levenshtein_automatont
{
private:
nfat<char> nfa;
using state_labelt = nfat<char>::state_labelt;
std::vector<state_labelt> final_states;

public:
levenshtein_automatont(
const std::string &string,
std::size_t allowed_errors = 2);

bool matches(const std::string &string) const;
optionalt<std::size_t> get_edit_distance(const std::string &string) const;

void dump_automaton_dot_to(std::ostream &out)
{
nfa.dump_automaton_dot_to(out);
};
};

#endif // CPROVER_UTIL_EDIT_DISTANCE_H
Loading