From 1ca23c7aa156f9d4b8d2d30181289f1838db7969 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Sun, 12 Mar 2023 16:23:53 +0200 Subject: [PATCH 01/26] add boyer moore algorithm implementation --- strings/boyer_moore.cpp | 166 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 strings/boyer_moore.cpp diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp new file mode 100644 index 00000000000..bd7213eeb20 --- /dev/null +++ b/strings/boyer_moore.cpp @@ -0,0 +1,166 @@ +/** + * @file + * @brief Boyer-moore's algorithm finding all occurrences of pattern in given + * text. + * @author [Stoycho Kyosev](https://github.com/stoychoX) + */ +#include +#include +#include +#include +#include + +#define APLHABET_SIZE CHAR_MAX + +/// @brief A structure representing all the data we need to search the +/// preprocessed pattern in text. +struct pattern { + std::string pat; + + std::vector bad_char; + std::vector good_suffix; +}; + +/// @brief A function that preprocess the good suffix thable +/// @param str The string being preprocessed +/// @param arg The good suffix table +void init_good_suffix(const std::string& str, std::vector& arg) { + arg.resize(str.size() + 1, 0); + + // border_pos[i] - the index of the longest proper suffix of str[i..] which + // is also a proper prefix. + std::vector border_pos(str.size() + 1, 0); + + int current_char = str.length(); + + size_t border_index = str.length() + 1; + + border_pos[current_char] = border_index; + + while (current_char > 0) { + while (border_index <= str.length() && + str[current_char - 1] != str[border_index - 1]) { + if (arg[border_index] == 0) + arg[border_index] = border_index - current_char; + + border_index = border_pos[border_index]; + } + + current_char--; + border_index--; + border_pos[current_char] = border_index; + } + + size_t largest_border_index = border_pos[0]; + + for (size_t i = 0; i < str.size(); i++) { + if (arg[i] == 0) + arg[i] = largest_border_index; + + // If we go pass the largest border we find the next one as we iterate + if (i == largest_border_index) + largest_border_index = border_pos[largest_border_index]; + } +} + +/// @brief A function that preprocess the bad char table +/// @param str The string being preprocessed +/// @param arg The bad char table +void init_bad_char(const std::string& str, std::vector& arg) { + arg.resize(APLHABET_SIZE, str.length()); + + for (size_t i = 0; i < str.length(); i++) + arg[str[i]] = str.length() - i - 1; +} + +/// @brief A function that initializes pattern +/// @param str Text used for initialization +/// @param arg Initialized structure +void init_pattern(const std::string& str, pattern& arg) { + arg.pat = str; + init_bad_char(str, arg.bad_char); + init_good_suffix(str, arg.good_suffix); +} + +/// @brief A function that implements Boyer-Moore's algorithm. +/// @param str Text we are seatching in. +/// @param arg pattern structure containing the preprocessed pattern +/// @return (vector of) indexes of the occurrences of pattern in text +std::vector search(const std::string& str, const pattern& arg) { + size_t index_position = arg.pat.size() - 1; + std::vector index_storage; + + while (index_position < str.length()) { + int index_string = index_position; + int index_pattern = arg.pat.size() - 1; + + while (index_pattern >= 0 && + str[index_string] == arg.pat[index_pattern]) { + --index_pattern; + --index_string; + } + + if (index_pattern < 0) { + index_storage.push_back(index_position - arg.pat.length() + 1); + index_position += arg.good_suffix[0]; + } else { + index_position += std::max(arg.bad_char[str[index_string]], + arg.good_suffix[index_pattern + 1]); + } + } + + return index_storage; +} + +/// @brief Check if pat is prefix of str +/// @param str pointer to some part of the input text +/// @param pat the searched pattern +/// @param len length of the searched pattern +/// @return true if pat is prefix of str. false otherwise. +bool is_prefix(const char* str, const char* pat, size_t len) { + if (strlen(str) < len) + return false; + + for (size_t i = 0; i < len; i++) + if (str[i] != pat[i]) + return false; + + return true; +} + +void and_test(const char* text) { + pattern ands; + init_pattern("and", ands); + std::vector indexes = search(text, ands); + + assert(indexes.size() == 2); + assert(is_prefix(text + indexes[0], "and", 3)); + assert(is_prefix(text + indexes[1], "and", 3)); +} + +void pat_test(const char* text) { + pattern pat; + init_pattern("pat", pat); + std::vector indexes = search(text, pat); + + assert(indexes.size() == 6); + + for (const auto& idx : indexes) { + assert(is_prefix(text + idx, "pat", 3)); + } +} + +void tests() { + const char* text = + "When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \ + Tuesday our story starts, \ + there was nothing about pat the cloudy sky outside to pat suggest that\ + strange and \ + mysterious things would pat soon be happening all pat over the \ + country."; + + and_test(text); + pat_test(text); +} + +int main() { tests(); } From 6a561c15cbca85504e80f2ac4e2772f5700fc060 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Thu, 16 Mar 2023 10:22:11 +0200 Subject: [PATCH 02/26] add a one-line description of what the library/header is for --- strings/boyer_moore.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index bd7213eeb20..df6d2bb38f0 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -4,11 +4,11 @@ * text. * @author [Stoycho Kyosev](https://github.com/stoychoX) */ -#include -#include -#include -#include -#include +#include /// for assert +#include /// for strlen +#include /// for CHAR_MAX macro +#include /// for std::string +#include /// for std::vector #define APLHABET_SIZE CHAR_MAX From 859385e8d4994988de2b228aa6ac6eadf9aa92d7 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Thu, 16 Mar 2023 10:28:37 +0200 Subject: [PATCH 03/26] fix comments pattern and make tests static --- strings/boyer_moore.cpp | 77 ++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index df6d2bb38f0..64202a48bf0 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -12,8 +12,10 @@ #define APLHABET_SIZE CHAR_MAX -/// @brief A structure representing all the data we need to search the -/// preprocessed pattern in text. +/** + * @brief A structure representing all the data we need to search the + * preprocessed pattern in text. + */ struct pattern { std::string pat; @@ -21,9 +23,13 @@ struct pattern { std::vector good_suffix; }; -/// @brief A function that preprocess the good suffix thable -/// @param str The string being preprocessed -/// @param arg The good suffix table +/** + * @brief A function that preprocess the good suffix thable + * + * @param str The string being preprocessed + * @param arg The good suffix table + * @return void + */ void init_good_suffix(const std::string& str, std::vector& arg) { arg.resize(str.size() + 1, 0); @@ -63,9 +69,13 @@ void init_good_suffix(const std::string& str, std::vector& arg) { } } -/// @brief A function that preprocess the bad char table -/// @param str The string being preprocessed -/// @param arg The bad char table +/** + * @brief A function that preprocess the bad char table + * + * @param str The string being preprocessed + * @param arg The bad char table + * @return void + */ void init_bad_char(const std::string& str, std::vector& arg) { arg.resize(APLHABET_SIZE, str.length()); @@ -73,19 +83,25 @@ void init_bad_char(const std::string& str, std::vector& arg) { arg[str[i]] = str.length() - i - 1; } -/// @brief A function that initializes pattern -/// @param str Text used for initialization -/// @param arg Initialized structure +/** + * @brief A function that initializes pattern + * + * @param str Text used for initialization + * @param arg Initialized structure + * @return void + */ void init_pattern(const std::string& str, pattern& arg) { arg.pat = str; init_bad_char(str, arg.bad_char); init_good_suffix(str, arg.good_suffix); } - -/// @brief A function that implements Boyer-Moore's algorithm. -/// @param str Text we are seatching in. -/// @param arg pattern structure containing the preprocessed pattern -/// @return (vector of) indexes of the occurrences of pattern in text +/** + * @brief A function that implements Boyer-Moore's algorithm. + * + * @param str Text we are seatching in. + * @param arg pattern structure containing the preprocessed pattern + * @return (vector of) indexes of the occurrences of pattern in text + */ std::vector search(const std::string& str, const pattern& arg) { size_t index_position = arg.pat.size() - 1; std::vector index_storage; @@ -112,11 +128,14 @@ std::vector search(const std::string& str, const pattern& arg) { return index_storage; } -/// @brief Check if pat is prefix of str -/// @param str pointer to some part of the input text -/// @param pat the searched pattern -/// @param len length of the searched pattern -/// @return true if pat is prefix of str. false otherwise. +/** + * @brief Check if pat is prefix of str. + * + * @param str pointer to some part of the input text. + * @param pat the searched pattern. + * @param len length of the searched pattern + * @return true if pat is prefix of str. false otherwise. + */ bool is_prefix(const char* str, const char* pat, size_t len) { if (strlen(str) < len) return false; @@ -149,8 +168,11 @@ void pat_test(const char* text) { assert(is_prefix(text + idx, "pat", 3)); } } - -void tests() { +/** + * @brief Self-test implementations + * @return void + */ +static void tests() { const char* text = "When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \ Tuesday our story starts, \ @@ -163,4 +185,11 @@ void tests() { pat_test(text); } -int main() { tests(); } +/** + * @brief Main function + * + * @return 0 + */ +int main() { + tests(); +} From 01c804936b6c4ef844a61c786cdb58901f7b8890 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Thu, 30 Mar 2023 17:25:57 +0300 Subject: [PATCH 04/26] add documentation --- strings/boyer_moore.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 64202a48bf0..0d8497522e0 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -10,7 +10,7 @@ #include /// for std::string #include /// for std::vector -#define APLHABET_SIZE CHAR_MAX +#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use /** * @brief A structure representing all the data we need to search the @@ -19,7 +19,10 @@ struct pattern { std::string pat; + /// @brief bad char table used in Bad Character Heuristic [https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/] std::vector bad_char; + + /// @brief good suffix table used for Good Suffix heuristic [https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp] std::vector good_suffix; }; From e5444da4a93aa5578eaadb0dfcd504aa69563c54 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Thu, 30 Mar 2023 17:32:37 +0300 Subject: [PATCH 05/26] add namespaces --- strings/boyer_moore.cpp | 69 +++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 0d8497522e0..330f8c625ec 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -4,14 +4,26 @@ * text. * @author [Stoycho Kyosev](https://github.com/stoychoX) */ -#include /// for assert -#include /// for strlen -#include /// for CHAR_MAX macro -#include /// for std::string -#include /// for std::vector +#include /// for assert +#include /// for strlen +#include /// for CHAR_MAX macro +#include /// for std::string +#include /// for std::vector -#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use +#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use +/** + * @namespace + * @brief String algorithms + */ +namespace strings { +/** + * @namespace + * @brief Functions for the [Boyer + * Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm) + * algorithm implementation + */ +namespace boyer_moore { /** * @brief A structure representing all the data we need to search the * preprocessed pattern in text. @@ -19,16 +31,18 @@ struct pattern { std::string pat; - /// @brief bad char table used in Bad Character Heuristic [https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/] + /// @brief bad char table used in Bad Character Heuristic + /// [https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/] std::vector bad_char; - /// @brief good suffix table used for Good Suffix heuristic [https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp] + /// @brief good suffix table used for Good Suffix heuristic + /// [https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp] std::vector good_suffix; }; /** * @brief A function that preprocess the good suffix thable - * + * * @param str The string being preprocessed * @param arg The good suffix table * @return void @@ -74,7 +88,7 @@ void init_good_suffix(const std::string& str, std::vector& arg) { /** * @brief A function that preprocess the bad char table - * + * * @param str The string being preprocessed * @param arg The bad char table * @return void @@ -88,7 +102,7 @@ void init_bad_char(const std::string& str, std::vector& arg) { /** * @brief A function that initializes pattern - * + * * @param str Text used for initialization * @param arg Initialized structure * @return void @@ -100,7 +114,7 @@ void init_pattern(const std::string& str, pattern& arg) { } /** * @brief A function that implements Boyer-Moore's algorithm. - * + * * @param str Text we are seatching in. * @param arg pattern structure containing the preprocessed pattern * @return (vector of) indexes of the occurrences of pattern in text @@ -133,7 +147,7 @@ std::vector search(const std::string& str, const pattern& arg) { /** * @brief Check if pat is prefix of str. - * + * * @param str pointer to some part of the input text. * @param pat the searched pattern. * @param len length of the searched pattern @@ -149,26 +163,28 @@ bool is_prefix(const char* str, const char* pat, size_t len) { return true; } +} // namespace boyer_moore +} // namespace strings void and_test(const char* text) { - pattern ands; - init_pattern("and", ands); - std::vector indexes = search(text, ands); + strings::boyer_moore::pattern ands; + strings::boyer_moore::init_pattern("and", ands); + std::vector indexes = strings::boyer_moore::search(text, ands); assert(indexes.size() == 2); - assert(is_prefix(text + indexes[0], "and", 3)); - assert(is_prefix(text + indexes[1], "and", 3)); + assert(strings::boyer_moore::is_prefix(text + indexes[0], "and", 3)); + assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3)); } void pat_test(const char* text) { - pattern pat; - init_pattern("pat", pat); - std::vector indexes = search(text, pat); + strings::boyer_moore::pattern pat; + strings::boyer_moore::init_pattern("pat", pat); + std::vector indexes = strings::boyer_moore::search(text, pat); assert(indexes.size() == 6); for (const auto& idx : indexes) { - assert(is_prefix(text + idx, "pat", 3)); + assert(strings::boyer_moore::is_prefix(text + idx, "pat", 3)); } } /** @@ -190,9 +206,10 @@ static void tests() { /** * @brief Main function - * - * @return 0 + * + * @return 0 on exit */ -int main() { - tests(); +int main() { + tests(); // run self-test implementations + return 0; } From 718284f3dd7536a0d2a057c5993dc2b6558cde80 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Sat, 1 Apr 2023 15:14:26 +0300 Subject: [PATCH 06/26] fix all warnings for clang-tydy.exe --- strings/boyer_moore.cpp | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 330f8c625ec..80fbbbeadc4 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -54,7 +54,7 @@ void init_good_suffix(const std::string& str, std::vector& arg) { // is also a proper prefix. std::vector border_pos(str.size() + 1, 0); - int current_char = str.length(); + size_t current_char = str.length(); size_t border_index = str.length() + 1; @@ -63,8 +63,9 @@ void init_good_suffix(const std::string& str, std::vector& arg) { while (current_char > 0) { while (border_index <= str.length() && str[current_char - 1] != str[border_index - 1]) { - if (arg[border_index] == 0) + if (arg[border_index] == 0) { arg[border_index] = border_index - current_char; + } border_index = border_pos[border_index]; } @@ -77,12 +78,14 @@ void init_good_suffix(const std::string& str, std::vector& arg) { size_t largest_border_index = border_pos[0]; for (size_t i = 0; i < str.size(); i++) { - if (arg[i] == 0) + if (arg[i] == 0) { arg[i] = largest_border_index; + } // If we go pass the largest border we find the next one as we iterate - if (i == largest_border_index) + if (i == largest_border_index) { largest_border_index = border_pos[largest_border_index]; + } } } @@ -96,8 +99,9 @@ void init_good_suffix(const std::string& str, std::vector& arg) { void init_bad_char(const std::string& str, std::vector& arg) { arg.resize(APLHABET_SIZE, str.length()); - for (size_t i = 0; i < str.length(); i++) + for (size_t i = 0; i < str.length(); i++) { arg[str[i]] = str.length() - i - 1; + } } /** @@ -124,8 +128,8 @@ std::vector search(const std::string& str, const pattern& arg) { std::vector index_storage; while (index_position < str.length()) { - int index_string = index_position; - int index_pattern = arg.pat.size() - 1; + size_t index_string = index_position; + size_t index_pattern = arg.pat.size() - 1; while (index_pattern >= 0 && str[index_string] == arg.pat[index_pattern]) { @@ -154,12 +158,15 @@ std::vector search(const std::string& str, const pattern& arg) { * @return true if pat is prefix of str. false otherwise. */ bool is_prefix(const char* str, const char* pat, size_t len) { - if (strlen(str) < len) + if (strlen(str) < len) { return false; + } - for (size_t i = 0; i < len; i++) - if (str[i] != pat[i]) + for (size_t i = 0; i < len; i++) { + if (str[i] != pat[i]) { return false; + } + } return true; } From dd98f84ce7385034562d1a56a953038e30721279 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Mon, 10 Apr 2023 17:47:51 +0300 Subject: [PATCH 07/26] Change lib from limits to climits (CHAR_MAX macro) Co-authored-by: David Leal --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 80fbbbeadc4..1832d59c6ac 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -6,7 +6,7 @@ */ #include /// for assert #include /// for strlen -#include /// for CHAR_MAX macro +#include /// for CHAR_MAX macro #include /// for std::string #include /// for std::vector From 3f438ff1a6adfe52d1a0c379c690ff46aac7135b Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Wed, 19 Apr 2023 23:08:00 +0300 Subject: [PATCH 08/26] Add breif description of boyer-moore algorithm --- strings/boyer_moore.cpp | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 1832d59c6ac..5e08f314004 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -1,12 +1,46 @@ /** * @file - * @brief Boyer-moore's algorithm finding all occurrences of pattern in given - * text. + * @brief + * The Boyer–Moore algorithm searches for occurrences of pattern P in text T by + * performing explicit character comparisons at different alignments. Instead of + * a brute-force search of all alignments (of which there are n - m + 1), + * Boyer–Moore uses information gained by preprocessing P to skip as many + * alignments as possible. + * + * The key insight in this algorithm is that if the end of the pattern is + * compared to the text, then jumps along the text can be made rather than + * checking every character of the text. The reason that this works is that in + * lining up the pattern against the text, the last character of the pattern is + * compared to the character in the text. + * + * If the characters do not match, there is no need to continue searching + * backwards along the text. This leaves us with two cases. + * + * Case 1: + * If the character in the text does not match any of the characters in the + * pattern, then the next character in the text to check is located m characters + * farther along the text, where m is the length of the pattern. + * + * Case 2: + * If the character in the text is in the pattern, then a partial shift of the + * pattern along the text is done to line up along the matching character and + * the process is repeated. + * + * There are two shift rules: + * + * The bad character rule + * [https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_bad_character_rule] + * + * The good suffix rule + * [https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_good_suffix_rule] + * + * The shift rules are implemented as constant-time table lookups, using tables + * generated during the preprocessing of P. * @author [Stoycho Kyosev](https://github.com/stoychoX) */ #include /// for assert +#include /// for CHAR_MAX macro #include /// for strlen -#include /// for CHAR_MAX macro #include /// for std::string #include /// for std::vector From 845d8a13a4e96febdfe99ca42f3b7b59a70471f7 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Wed, 19 Apr 2023 23:10:16 +0300 Subject: [PATCH 09/26] Fix styling --- strings/boyer_moore.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 5e08f314004..285525cd6b7 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -28,11 +28,11 @@ * * There are two shift rules: * - * The bad character rule - * [https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_bad_character_rule] + * [The bad character rule] + * (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_bad_character_rule) * - * The good suffix rule - * [https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_good_suffix_rule] + * [The good suffix rule] + * (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_good_suffix_rule) * * The shift rules are implemented as constant-time table lookups, using tables * generated during the preprocessing of P. From d87a3e9899528c311c66870c64429271c2e5192a Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Mon, 1 May 2023 13:17:48 +0300 Subject: [PATCH 10/26] Add needed documentation --- strings/boyer_moore.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 285525cd6b7..142aa40ce2b 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -7,6 +7,7 @@ * Boyer–Moore uses information gained by preprocessing P to skip as many * alignments as possible. * + * @description * The key insight in this algorithm is that if the end of the pattern is * compared to the text, then jumps along the text can be made rather than * checking every character of the text. The reason that this works is that in @@ -206,7 +207,10 @@ bool is_prefix(const char* str, const char* pat, size_t len) { } } // namespace boyer_moore } // namespace strings - +/** + * @brief A test case in which we search for every appearance of the word 'and' + * @param text The text in which we search for appearance of the word 'and' + */ void and_test(const char* text) { strings::boyer_moore::pattern ands; strings::boyer_moore::init_pattern("and", ands); @@ -217,6 +221,10 @@ void and_test(const char* text) { assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3)); } +/** + * @brief A test case in which we search for every appearance of the word 'pat' + * @param text The text in which we search for appearance of the word 'pat' + */ void pat_test(const char* text) { strings::boyer_moore::pattern pat; strings::boyer_moore::init_pattern("pat", pat); @@ -247,7 +255,6 @@ static void tests() { /** * @brief Main function - * * @return 0 on exit */ int main() { From 87fffc9de898cc6a91e900f8c2a63b993e9853ab Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Wed, 10 May 2023 15:17:32 +0300 Subject: [PATCH 11/26] my commit --- .vscode/settings.json | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 67fe06477bf..f6d76514ffb 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -59,6 +59,33 @@ "stdexcept": "cpp", "streambuf": "cpp", "typeinfo": "cpp", - "valarray": "cpp" + "valarray": "cpp", + "bit": "cpp", + "charconv": "cpp", + "compare": "cpp", + "concepts": "cpp", + "format": "cpp", + "forward_list": "cpp", + "ios": "cpp", + "locale": "cpp", + "queue": "cpp", + "stack": "cpp", + "xfacet": "cpp", + "xhash": "cpp", + "xiosbase": "cpp", + "xlocale": "cpp", + "xlocbuf": "cpp", + "xlocinfo": "cpp", + "xlocmes": "cpp", + "xlocmon": "cpp", + "xlocnum": "cpp", + "xloctime": "cpp", + "xmemory": "cpp", + "xstddef": "cpp", + "xstring": "cpp", + "xtr1common": "cpp", + "xtree": "cpp", + "xutility": "cpp", + "climits": "cpp" } } From 97b38ad04ec1e8455b666ac343ec13b5279f361b Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Sat, 13 May 2023 07:57:16 +0300 Subject: [PATCH 12/26] fix type of index_pattern --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 142aa40ce2b..fed992232ef 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -164,7 +164,7 @@ std::vector search(const std::string& str, const pattern& arg) { while (index_position < str.length()) { size_t index_string = index_position; - size_t index_pattern = arg.pat.size() - 1; + int index_pattern = arg.pat.size() - 1; while (index_pattern >= 0 && str[index_string] == arg.pat[index_pattern]) { From c42f5f6ca7830db6c7dbe4d8e1ee81b7e197eaae Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Sat, 13 May 2023 08:22:44 +0300 Subject: [PATCH 13/26] Fix clang-warnings --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index fed992232ef..62adc54106e 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -164,7 +164,7 @@ std::vector search(const std::string& str, const pattern& arg) { while (index_position < str.length()) { size_t index_string = index_position; - int index_pattern = arg.pat.size() - 1; + int index_pattern = static_cast(arg.pat.size()) - 1; while (index_pattern >= 0 && str[index_string] == arg.pat[index_pattern]) { From bc5e3469c3b60f26d436e57a21b184170d922f93 Mon Sep 17 00:00:00 2001 From: David Leal Date: Fri, 26 May 2023 15:37:35 -0600 Subject: [PATCH 14/26] chore: apply suggestions from code review --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 62adc54106e..96ce50d1a40 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -7,7 +7,7 @@ * Boyer–Moore uses information gained by preprocessing P to skip as many * alignments as possible. * - * @description + * @details * The key insight in this algorithm is that if the end of the pattern is * compared to the text, then jumps along the text can be made rather than * checking every character of the text. The reason that this works is that in From 959075abf5b6a8da68594f763733e3e2ab5779d4 Mon Sep 17 00:00:00 2001 From: David Leal Date: Fri, 26 May 2023 21:46:25 +0000 Subject: [PATCH 15/26] chore: add print message after tests --- strings/boyer_moore.cpp | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 96ce50d1a40..169eb9a813a 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -39,11 +39,13 @@ * generated during the preprocessing of P. * @author [Stoycho Kyosev](https://github.com/stoychoX) */ -#include /// for assert -#include /// for CHAR_MAX macro -#include /// for strlen -#include /// for std::string -#include /// for std::vector + +#include /// for assert +#include /// for CHAR_MAX macro +#include /// for strlen +#include /// for IO operations +#include /// for std::string +#include /// for std::vector #define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use @@ -66,13 +68,13 @@ namespace boyer_moore { struct pattern { std::string pat; - /// @brief bad char table used in Bad Character Heuristic - /// [https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/] - std::vector bad_char; + std::vector + bad_char; ///< bad char table used in [Bad Character + ///< Heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/) - /// @brief good suffix table used for Good Suffix heuristic - /// [https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp] - std::vector good_suffix; + std::vector + good_suffix; ///< good suffix table used for [Good Suffix + ///< heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp) }; /** @@ -251,6 +253,8 @@ static void tests() { and_test(text); pat_test(text); + + std::cout << "All tests have successfully passed!\n"; } /** From 5cb9c0a99f7409ad0e5e1b046e4ced3d81ccb4da Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:53:05 +0300 Subject: [PATCH 16/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 169eb9a813a..766c487ce23 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -146,7 +146,7 @@ void init_bad_char(const std::string& str, std::vector& arg) { * * @param str Text used for initialization * @param arg Initialized structure - * @return void + * @returns void */ void init_pattern(const std::string& str, pattern& arg) { arg.pat = str; From 259f53c32d278d79ef09c62b574b4aaabb1eff0c Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:53:23 +0300 Subject: [PATCH 17/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 766c487ce23..ebbf8536f0f 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -131,7 +131,7 @@ void init_good_suffix(const std::string& str, std::vector& arg) { * * @param str The string being preprocessed * @param arg The bad char table - * @return void + * @returns void */ void init_bad_char(const std::string& str, std::vector& arg) { arg.resize(APLHABET_SIZE, str.length()); From 7115d6309461ba360875212f976c24aeb3baa180 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:53:41 +0300 Subject: [PATCH 18/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index ebbf8536f0f..58d5b00aff6 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -226,6 +226,7 @@ void and_test(const char* text) { /** * @brief A test case in which we search for every appearance of the word 'pat' * @param text The text in which we search for appearance of the word 'pat' + * @returns void */ void pat_test(const char* text) { strings::boyer_moore::pattern pat; From a0bb48e0fd2a086d6fb8b7b1d7e115a21c63a7a2 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:53:51 +0300 Subject: [PATCH 19/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 58d5b00aff6..b37171148f9 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -192,7 +192,8 @@ std::vector search(const std::string& str, const pattern& arg) { * @param str pointer to some part of the input text. * @param pat the searched pattern. * @param len length of the searched pattern - * @return true if pat is prefix of str. false otherwise. + * @returns `true` if pat IS prefix of str. + * @returns `false` if pat is NOT a prefix of str. */ bool is_prefix(const char* str, const char* pat, size_t len) { if (strlen(str) < len) { From 44b4ffa1910204ea3f41ca4d074fce7b2262daf0 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:54:01 +0300 Subject: [PATCH 20/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index b37171148f9..7cd136cc689 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -261,7 +261,7 @@ static void tests() { /** * @brief Main function - * @return 0 on exit + * @returns 0 on exit */ int main() { tests(); // run self-test implementations From 45effd1b17d4dc86004f0f8682a233e9b5ddcb26 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:54:11 +0300 Subject: [PATCH 21/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 7cd136cc689..d18a4368efe 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -242,7 +242,7 @@ void pat_test(const char* text) { } /** * @brief Self-test implementations - * @return void + * @returns void */ static void tests() { const char* text = From 4039f016b40c60817c8706848bb7b885fb554f61 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:54:21 +0300 Subject: [PATCH 22/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index d18a4368efe..2f2438ec53c 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -213,6 +213,7 @@ bool is_prefix(const char* str, const char* pat, size_t len) { /** * @brief A test case in which we search for every appearance of the word 'and' * @param text The text in which we search for appearance of the word 'and' + * @returns void */ void and_test(const char* text) { strings::boyer_moore::pattern ands; From e6677b8f05352e341574705b5de5af0ce1bc6f13 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Tue, 30 May 2023 18:54:29 +0300 Subject: [PATCH 23/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 2f2438ec53c..1bd1bd9fb61 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -82,7 +82,7 @@ struct pattern { * * @param str The string being preprocessed * @param arg The good suffix table - * @return void + * @returns void */ void init_good_suffix(const std::string& str, std::vector& arg) { arg.resize(str.size() + 1, 0); From 961a35bc181cd0eb18db64281325fac5e2b6c981 Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Wed, 31 May 2023 11:09:00 +0200 Subject: [PATCH 24/26] Update strings/boyer_moore.cpp Co-authored-by: realstealthninja <68815218+realstealthninja@users.noreply.github.com> --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 1bd1bd9fb61..cc80d68f3c5 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -158,7 +158,7 @@ void init_pattern(const std::string& str, pattern& arg) { * * @param str Text we are seatching in. * @param arg pattern structure containing the preprocessed pattern - * @return (vector of) indexes of the occurrences of pattern in text + * @return Vector of indexes of the occurrences of pattern in text */ std::vector search(const std::string& str, const pattern& arg) { size_t index_position = arg.pat.size() - 1; From 611cfb474a39cc3d449494b60a7843023a696c6e Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev Date: Wed, 31 May 2023 14:08:28 +0300 Subject: [PATCH 25/26] fix: variable name --- strings/boyer_moore.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index cc80d68f3c5..09fb0f3190d 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -237,8 +237,8 @@ void pat_test(const char* text) { assert(indexes.size() == 6); - for (const auto& idx : indexes) { - assert(strings::boyer_moore::is_prefix(text + idx, "pat", 3)); + for (const auto& currentIndex : indexes) { + assert(strings::boyer_moore::is_prefix(text + currentIndex, "pat", 3)); } } /** From 5800dd82b643a1f3d49a8c373f85414ddd6bfb0d Mon Sep 17 00:00:00 2001 From: Stoycho Kyosev <68247567+stoychoX@users.noreply.github.com> Date: Fri, 9 Jun 2023 16:10:52 +0300 Subject: [PATCH 26/26] Update strings/boyer_moore.cpp Co-authored-by: David Leal --- strings/boyer_moore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp index 09fb0f3190d..a8c4cbf8d41 100644 --- a/strings/boyer_moore.cpp +++ b/strings/boyer_moore.cpp @@ -1,7 +1,7 @@ /** * @file * @brief - * The Boyer–Moore algorithm searches for occurrences of pattern P in text T by + * The [Boyer–Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm) algorithm searches for occurrences of pattern P in text T by * performing explicit character comparisons at different alignments. Instead of * a brute-force search of all alignments (of which there are n - m + 1), * Boyer–Moore uses information gained by preprocessing P to skip as many