From 304b8d03374c5130226134488be83b4958c1dc87 Mon Sep 17 00:00:00 2001 From: landerrosette <57791410+landerrosette@users.noreply.github.com> Date: Thu, 28 Nov 2024 21:24:18 +0800 Subject: [PATCH] 5.9 Regular expression pattern matching --- BoyerMoore.cpp | 9 +++-- BoyerMoore.h | 5 ++- CMakeLists.txt | 19 +++++---- KMP.cpp | 13 +++--- KMP.h | 5 ++- LSD.h | 1 + MSD.h | 1 + NFA.cpp | 51 +++++++++++++++++++++++ NFA.h | 22 ++++++++++ Quick3string.h | 1 + README.md | 56 +++++++++++++++++++++----- RabinKarp.cpp | 6 +-- RabinKarp.h | 7 ++-- StringST.h | 5 ++- StringSTTest.h | 13 ------ StringSorting.h | 4 +- SubstrSearcher.h | 4 +- TST.h | 32 +++++++-------- OrderedSTTest.cpp => TestOrderedST.cpp | 8 ++-- OrderedSTTest.h => TestOrderedST.h | 10 ++--- STTest.cpp => TestST.cpp | 8 ++-- STTest.h => TestST.h | 12 +++--- StringSTTest.cpp => TestStringST.cpp | 7 ++-- TestStringST.h | 13 ++++++ TrieST.h | 33 +++++++-------- main_GREP.cpp | 30 ++++++++++++++ main_ST.cpp.in => main_TestST.cpp.in | 18 ++++----- 27 files changed, 273 insertions(+), 120 deletions(-) create mode 100644 NFA.cpp create mode 100644 NFA.h delete mode 100644 StringSTTest.h rename OrderedSTTest.cpp => TestOrderedST.cpp (94%) rename OrderedSTTest.h => TestOrderedST.h (61%) rename STTest.cpp => TestST.cpp (88%) rename STTest.h => TestST.h (62%) rename StringSTTest.cpp => TestStringST.cpp (87%) create mode 100644 TestStringST.h create mode 100644 main_GREP.cpp rename main_ST.cpp.in => main_TestST.cpp.in (57%) diff --git a/BoyerMoore.cpp b/BoyerMoore.cpp index 8eda4d0..25e6ba3 100644 --- a/BoyerMoore.cpp +++ b/BoyerMoore.cpp @@ -1,13 +1,14 @@ #include "BoyerMoore.h" +#include -BoyerMoore::BoyerMoore(const std::string &pat) : pat(pat) { +BoyerMoore::BoyerMoore(std::string pat) : pat(std::move(pat)) { // 计算跳跃表 - int M = pat.length(), R = 256; + int M = this->pat.length(), R = 256; right = std::vector(R, -1); - for (int j = 0; j < M; ++j) right[pat[j]] = j; + for (int j = 0; j < M; ++j) right[this->pat[j]] = j; } -int BoyerMoore::search(const std::string &txt) const { +int BoyerMoore::search(std::string_view txt) const { int N = txt.length(), M = pat.length(); for (int skip, i = 0; i <= N - M; i += skip) { // 模式字符串和文本在位置i匹配吗? diff --git a/BoyerMoore.h b/BoyerMoore.h index 94c7c85..01ca0b2 100644 --- a/BoyerMoore.h +++ b/BoyerMoore.h @@ -4,6 +4,7 @@ #include "SubstrSearcher.h" #include +#include class BoyerMoore : public SubstrSearcher { private: @@ -11,9 +12,9 @@ class BoyerMoore : public SubstrSearcher { std::string pat; public: - explicit BoyerMoore(const std::string &pat); + explicit BoyerMoore(std::string pat); - int search(const std::string &txt) const override; + int search(std::string_view txt) const override; }; diff --git a/CMakeLists.txt b/CMakeLists.txt index a09948a..f7be9bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.28) +cmake_minimum_required(VERSION 3.20) project(algs4) set(CMAKE_CXX_STANDARD 17) @@ -31,20 +31,20 @@ foreach (ST "SequentialSearchST" "BinarySearchST" "BST" "RedBlackBST" "SeparateC set(ST_INIT_ARGS "20") endif () - configure_file(main_ST.cpp.in main_${ST}.cpp @ONLY) + configure_file(main_TestST.cpp.in main_Test${ST}.cpp @ONLY) - add_executable(${ST} main_${ST}.cpp STTest.cpp) + add_executable(Test${ST} main_Test${ST}.cpp TestST.cpp) - target_include_directories(${ST} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_include_directories(Test${ST} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) if (ST STREQUAL "BinarySearchST" OR ST STREQUAL "BST" OR ST STREQUAL "RedBlackBST") - target_sources(${ST} PRIVATE OrderedSTTest.cpp) + target_sources(Test${ST} PRIVATE TestOrderedST.cpp) - target_compile_definitions(${ST} PRIVATE ORDERED) + target_compile_definitions(Test${ST} PRIVATE ORDERED) elseif (ST STREQUAL "TrieST" OR ST STREQUAL "TST") - target_sources(${ST} PRIVATE StringSTTest.cpp) + target_sources(Test${ST} PRIVATE TestStringST.cpp) - target_compile_definitions(${ST} PRIVATE STRING) + target_compile_definitions(Test${ST} PRIVATE STRING) endif () unset(ST_INIT_ARGS) @@ -112,3 +112,6 @@ foreach (SUBSTR_SEARCHER "KMP" "BoyerMoore" "RabinKarp") target_include_directories(${SUBSTR_SEARCHER} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) endforeach () + +# 5.9 +add_executable(GREP main_GREP.cpp NFA.cpp Digraph.cpp DirectedDFS.cpp) diff --git a/KMP.cpp b/KMP.cpp index e39b7da..f50b4d8 100644 --- a/KMP.cpp +++ b/KMP.cpp @@ -1,19 +1,20 @@ #include "KMP.h" +#include -KMP::KMP(const std::string &pat) : pat(pat) { +KMP::KMP(std::string pat) : pat(std::move(pat)) { // 构造DFA - int M = pat.length(), R = 256; + int M = this->pat.length(), R = 256; dfa = std::vector(R, std::vector(M)); - dfa[pat[0]][0] = 1; + dfa[this->pat[0]][0] = 1; for (int X = 0, j = 1; j < M; ++j) { // 计算dfa[][j] for (int c = 0; c < R; ++c) dfa[c][j] = dfa[c][X]; // 复制匹配失败情况下的值 - dfa[pat[j]][j] = j + 1; // 设置匹配成功情况下的值 - X = dfa[pat[j]][X]; // 更新重启状态 + dfa[this->pat[j]][j] = j + 1; // 设置匹配成功情况下的值 + X = dfa[this->pat[j]][X]; // 更新重启状态 } } -int KMP::search(const std::string &txt) const { +int KMP::search(std::string_view txt) const { int i, j, N = txt.length(), M = pat.length(); for (i = 0, j = 0; i < N && j < M; ++i) j = dfa[txt[i]][j]; if (j == M) return i - M; // 找到匹配(到达模式字符串的末尾) diff --git a/KMP.h b/KMP.h index 3ad911a..ae5ac6c 100644 --- a/KMP.h +++ b/KMP.h @@ -4,6 +4,7 @@ #include "SubstrSearcher.h" #include +#include class KMP : public SubstrSearcher { private: @@ -11,9 +12,9 @@ class KMP : public SubstrSearcher { std::vector > dfa; public: - explicit KMP(const std::string &pat); + explicit KMP(std::string pat); - int search(const std::string &txt) const override; + int search(std::string_view txt) const override; }; diff --git a/LSD.h b/LSD.h index dfd063f..7268ada 100644 --- a/LSD.h +++ b/LSD.h @@ -3,6 +3,7 @@ #include +#include #include "StringSorting.h" class LSD : public StringSorting { diff --git a/MSD.h b/MSD.h index b02890b..ec2921d 100644 --- a/MSD.h +++ b/MSD.h @@ -4,6 +4,7 @@ #include #include +#include #include "Sorting.h" #include "StringSorting.h" diff --git a/NFA.cpp b/NFA.cpp new file mode 100644 index 0000000..bc4b8ef --- /dev/null +++ b/NFA.cpp @@ -0,0 +1,51 @@ +#include "NFA.h" +#include +#include "DirectedDFS.h" + +NFA::NFA(std::string_view regexp) : re(regexp.begin(), regexp.end()), M(re.size()), G(M + 1) { + std::list ops; + for (int i = 0; i < M; ++i) { + int lp = i; // left position + if (re[i] == '(' || re[i] == '|') ops.push_front(i); + else if (re[i] == ')') { + int orPos = ops.front(); + ops.pop_front(); + if (re[orPos] == '|') { + lp = ops.front(); + ops.pop_front(); + G.addEdge(lp, orPos + 1); + G.addEdge(orPos, i); + } else lp = orPos; + } + if (i < M - 1 && re[i + 1] == '*') { + // 查看下一个字符 + G.addEdge(lp, i + 1); + G.addEdge(i + 1, lp); + } + if (re[i] == '(' || re[i] == '*' || re[i] == ')') G.addEdge(i, i + 1); + } +} + +bool NFA::recognizes(std::string_view txt) const { + std::list pc; + DirectedDFS dfs(G, 0); + for (int v = 0; v < G.V(); ++v) + if (dfs.marked(v)) pc.push_front(v); + + for (int i = 0; i < txt.length(); ++i) { + // 计算txt[i+1]可能到达的所有状态 + std::list match; + for (int v: pc) { + if (v < M) { + if (re[v] == txt[i] || re[v] == '.') match.push_front(v + 1); + } + } + pc = std::list(); + dfs = DirectedDFS(G, match); + for (int v = 0; v < G.V(); ++v) + if (dfs.marked(v)) pc.push_front(v); + } + for (int v: pc) + if (v == M) return true; + return false; +} diff --git a/NFA.h b/NFA.h new file mode 100644 index 0000000..ea861e9 --- /dev/null +++ b/NFA.h @@ -0,0 +1,22 @@ +#ifndef NFA_H +#define NFA_H + + +#include "Digraph.h" +#include +#include + +class NFA { +private: + std::vector re; // 匹配转换 + int M; // 状态数量 + Digraph G; // epsilon转换 + +public: + explicit NFA(std::string_view regexp); + + bool recognizes(std::string_view txt) const; +}; + + +#endif //NFA_H diff --git a/Quick3string.h b/Quick3string.h index 6df12de..62962b1 100644 --- a/Quick3string.h +++ b/Quick3string.h @@ -4,6 +4,7 @@ #include "StringSorting.h" #include +#include class Quick3string : public StringSorting { private: diff --git a/README.md b/README.md index 174645f..e11ddc5 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ## Overview Algorithms 4/e textbook -This repository contains C++ implementations of the algorithms in the textbook +This repository contains C++ implementations of the algorithms and (a few) clients in the textbook Algorithms, 4th Edition by Robert Sedgewick and Kevin Wayne. ## Algorithms @@ -56,12 +56,50 @@ This repository contains C++ implementations of the algorithms in the textbook - **5.6** Substring search (Knuth-Morris-Pratt): [KMP.h](KMP.h) | [KMP.cpp](KMP.cpp) - **5.7** Substring search (Boyer-Moore): [BoyerMoore.h](BoyerMoore.h) | [BoyerMoore.cpp](BoyerMoore.cpp) - **5.8** Substring search (Rabin-Karp): [RabinKarp.h](RabinKarp.h) | [RabinKarp.cpp](RabinKarp.cpp) +- **5.9** Regular expression pattern matching: [NFA.h](NFA.h) | [NFA.cpp](NFA.cpp) +- ... + +## Clients + +### Fundamentals + +- UF: [main_UF.cpp](main_UF.cpp) + +### Sorting + +- Selection | Insertion | Shell | Merge | MergeBU | Quick | Quick3way | Heap: [main_Sorting.cpp.in](main_Sorting.cpp.in) +- MaxPQ: [main_MaxPQ.cpp](main_MaxPQ.cpp) + +### Symbol Tables + +- TestSequentialSearchST | TestBinarySearchST | TestBST | TestRedBlackBST | TestSeparateChainingHashST | + TestLinearProbingHashST: [main_TestST.cpp.in](main_TestST.cpp.in) + +### Graphs + +- DepthFirstPaths | BreadthFirstPaths: [main_Paths.cpp.in](main_Paths.cpp.in) +- CC | KosarajuSCC: [main_CC.cpp.in](main_CC.cpp.in) +- DirectedDFS: [main_DirectedDFS.cpp](main_DirectedDFS.cpp) +- Topological: [main_Topological.cpp](main_Topological.cpp) +- PrimMST | KruskalMST: [main_MST.cpp.in](main_MST.cpp.in) +- DijkstraSP | AcyclicSP | BellmanFordSP: [main_SP.cpp.in](main_SP.cpp.in) + +### Strings + +- LSD | MSD | Quick3string: [main_Sorting.cpp.in](main_Sorting.cpp.in) +- TestTrieST | TestTST: [main_TestST.cpp.in](main_TestST.cpp.in) +- KMP | BoyerMoore | RabinKarp: [main_SubstrSearch.cpp.in](main_SubstrSearch.cpp.in) +- GREP: [main_GREP.cpp](main_GREP.cpp) - ... ## Build and Run -A simple client is provided for each algorithm in `main_*.cpp`s. To build them, ensure you have CMake 3.28 or higher and -a C++17 compatible compiler. Follow these steps: +### Prerequisites + +- CMake 3.20 or later +- C++ compiler with C++17 support + +### Steps 1. Create and navigate to a build directory: @@ -70,30 +108,30 @@ a C++17 compatible compiler. Follow these steps: cd build ``` -2. Configure and build all targets: +2. Configure and build all targets. This will produce all clients: ```shell cmake .. cmake --build . ``` - Alternatively, you can build a specific target that corresponds to a specific algorithm. For example: + Alternatively, build a specific target that produces a specific client: ```shell cmake --build . --target UF ``` -3. (Optional) Get sample input files from the book's website: https://algs4.cs.princeton.edu/code/. -4. Run the executable. You may redirect the input from a file to save typing: +3. (Optional) Download sample input files from the booksite: https://algs4.cs.princeton.edu/code/. +4. Run the client. You may redirect the input from a file (possibly one obtained in step 3): ```shell ./UF < tinyUF.txt ``` - Some algorithms require additional command-line arguments. For example: + Some clients may expect command-line arguments. For example: ```shell ./DepthFirstPaths tinyCG.txt 0 ``` - This runs the depth-first search algorithm on the `tinyCG.txt` graph, starting from vertex `0`. + This will run `DepthFirstPaths` on the graph in `tinyCG.txt` starting from vertex 0. diff --git a/RabinKarp.cpp b/RabinKarp.cpp index 98c2260..b4b4867 100644 --- a/RabinKarp.cpp +++ b/RabinKarp.cpp @@ -1,7 +1,7 @@ #include "RabinKarp.h" #include -long long RabinKarp::hash(const std::string &key, int M) const { +long long RabinKarp::hash(std::string_view key, int M) const { long long h = 0; for (int j = 0; j < M; ++j) h = (R * h + key[j]) % Q; return h; @@ -25,12 +25,12 @@ long long RabinKarp::longRandomPrime() { } } -RabinKarp::RabinKarp(const std::string &pat) : M(pat.length()) { +RabinKarp::RabinKarp(std::string_view pat) : M(pat.length()) { for (int i = 1; i <= M - 1; ++i) RM = (R * RM) % Q; // 计算 R^(M-1) % Q patHash = hash(pat, M); } -int RabinKarp::search(const std::string &txt) const { +int RabinKarp::search(std::string_view txt) const { int N = txt.length(); long long txtHash = hash(txt, M); if (patHash == txtHash && check(0)) return 0; // 一开始就匹配成功 diff --git a/RabinKarp.h b/RabinKarp.h index c74dcd8..4c6e8ad 100644 --- a/RabinKarp.h +++ b/RabinKarp.h @@ -3,6 +3,7 @@ #include "SubstrSearcher.h" +#include class RabinKarp : public SubstrSearcher { private: @@ -12,16 +13,16 @@ class RabinKarp : public SubstrSearcher { int R = 256; // 字母表的大小 long long RM = 1; // R^(M-1) % Q - long long hash(const std::string &key, int M) const; + long long hash(std::string_view key, int M) const; static long long longRandomPrime(); bool check(int i) const { return true; } // 蒙特卡洛算法(只要散列值相同就认为找到了) public: - explicit RabinKarp(const std::string &pat); + explicit RabinKarp(std::string_view pat); - int search(const std::string &txt) const override; + int search(std::string_view txt) const override; }; diff --git a/StringST.h b/StringST.h index 9aa211f..99f733b 100644 --- a/StringST.h +++ b/StringST.h @@ -5,15 +5,16 @@ #include "ST.h" #include #include +#include template class StringST : public ST { public: - virtual std::string longestPrefixOf(const std::string &s) const = 0; + virtual std::string longestPrefixOf(std::string_view s) const = 0; virtual std::list keysWithPrefix(const std::string &pre) const = 0; - virtual std::list keysThatMatch(const std::string &pat) const = 0; + virtual std::list keysThatMatch(std::string_view pat) const = 0; }; diff --git a/StringSTTest.h b/StringSTTest.h deleted file mode 100644 index 6913adc..0000000 --- a/StringSTTest.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef STRINGSTTEST_H -#define STRINGSTTEST_H - - -#include -#include "StringST.h" - -namespace STTest { - void testStringST(StringST &st, std::ostream &os); -} - - -#endif //STRINGSTTEST_H diff --git a/StringSorting.h b/StringSorting.h index 98f356b..73f3fdd 100644 --- a/StringSorting.h +++ b/StringSorting.h @@ -3,11 +3,11 @@ #include "Sorting.h" -#include +#include class StringSorting : public Sorting { protected: - static char charAt(const std::string &s, int d) { return d < s.length() ? s[d] : -1; } + static char charAt(std::string_view s, int d) { return d < s.length() ? s[d] : -1; } }; diff --git a/SubstrSearcher.h b/SubstrSearcher.h index dd234c7..fcb3e43 100644 --- a/SubstrSearcher.h +++ b/SubstrSearcher.h @@ -2,13 +2,13 @@ #define SUBSTRSEARCHER_H -#include +#include class SubstrSearcher { public: virtual ~SubstrSearcher() = default; - virtual int search(const std::string &txt) const = 0; + virtual int search(std::string_view txt) const = 0; }; diff --git a/TST.h b/TST.h index 49611e9..599a5b0 100644 --- a/TST.h +++ b/TST.h @@ -5,6 +5,7 @@ #include "StringST.h" #include #include +#include template class TST : public StringST { @@ -21,18 +22,18 @@ class TST : public StringST { std::shared_ptr root; - std::shared_ptr get(std::shared_ptr x, const std::string &key, int d) const; + std::shared_ptr get(std::shared_ptr x, std::string_view key, int d) const; - std::shared_ptr put(std::shared_ptr x, const std::string &key, const Value &val, int d); + std::shared_ptr put(std::shared_ptr x, std::string_view key, const Value &val, int d); - std::shared_ptr remove(std::shared_ptr x, const std::string &key, int d); + std::shared_ptr remove(std::shared_ptr x, std::string_view key, int d); void collect(std::shared_ptr x, const std::string &pre, std::list &q) const; - void collect(std::shared_ptr x, const std::string &pre, const std::string &pat, + void collect(std::shared_ptr x, const std::string &pre, std::string_view pat, std::list &q) const; - int search(std::shared_ptr x, const std::string &s, int d, int length) const; + int search(std::shared_ptr x, std::string_view s, int d, int length) const; std::shared_ptr min(std::shared_ptr x) const; @@ -49,16 +50,15 @@ class TST : public StringST { std::list keys() const override { return keysWithPrefix(""); } - std::string longestPrefixOf(const std::string &s) const override; + std::string longestPrefixOf(std::string_view s) const override; std::list keysWithPrefix(const std::string &pre) const override; - std::list keysThatMatch(const std::string &pat) const override; + std::list keysThatMatch(std::string_view pat) const override; }; template -std::shared_ptr::Node> TST< - Value>::get(std::shared_ptr x, const std::string &key, int d) const { +std::shared_ptr::Node> TST::get(std::shared_ptr x, std::string_view key, int d) const { if (!x) return nullptr; if (key.empty()) { auto preRoot = std::make_shared('\0'); @@ -73,7 +73,7 @@ std::shared_ptr::Node> TST< } template -std::shared_ptr::Node> TST::put(std::shared_ptr x, const std::string &key, +std::shared_ptr::Node> TST::put(std::shared_ptr x, std::string_view key, const Value &val, int d) { char c = key[d]; if (!x) x = std::make_shared(c); @@ -88,7 +88,7 @@ std::shared_ptr::Node> TST::put(std::shared_ptr } template -std::shared_ptr::Node> TST::remove(std::shared_ptr x, const std::string &key, int d) { +std::shared_ptr::Node> TST::remove(std::shared_ptr x, std::string_view key, int d) { if (!x) return nullptr; char c = key[d]; if (c < x->c) x->left = remove(x->left, key, d); @@ -120,7 +120,7 @@ void TST::collect(std::shared_ptr x, const std::string &pre, std::l } template -void TST::collect(std::shared_ptr x, const std::string &pre, const std::string &pat, +void TST::collect(std::shared_ptr x, const std::string &pre, std::string_view pat, std::list &q) const { if (!x) return; int d = pre.length(); @@ -134,7 +134,7 @@ void TST::collect(std::shared_ptr x, const std::string &pre, const } template -int TST::search(std::shared_ptr x, const std::string &s, int d, int length) const { +int TST::search(std::shared_ptr x, std::string_view s, int d, int length) const { if (!x) return length; char c = s[d]; if (c < x->c) return search(x->left, s, d, length); @@ -169,9 +169,9 @@ std::optional TST::get(const std::string &key) const { } template -std::string TST::longestPrefixOf(const std::string &s) const { +std::string TST::longestPrefixOf(std::string_view s) const { int length = search(root, s, 0, 0); - return s.substr(0, length); + return std::string(s.substr(0, length)); } template @@ -186,7 +186,7 @@ std::list TST::keysWithPrefix(const std::string &pre) const } template -std::list TST::keysThatMatch(const std::string &pat) const { +std::list TST::keysThatMatch(std::string_view pat) const { std::list q; collect(root, "", pat, q); return q; diff --git a/OrderedSTTest.cpp b/TestOrderedST.cpp similarity index 94% rename from OrderedSTTest.cpp rename to TestOrderedST.cpp index 57b0657..9909867 100644 --- a/OrderedSTTest.cpp +++ b/TestOrderedST.cpp @@ -1,8 +1,8 @@ -#include "OrderedSTTest.h" -#include "STTest.h" +#include "TestOrderedST.h" +#include "TestST.h" #include -namespace STTest { +namespace TestST { void init(OrderedST &st, std::istream &is, std::ostream &os) { init(static_cast &>(st), is, os); os << "min = " << st.min().value_or(INVALID_KEY) << std::endl; @@ -29,7 +29,7 @@ namespace STTest { listAll(st, os); } - void testOrderedST(OrderedST &st, std::ostream &os) { + void testOrderedST(const OrderedST &st, std::ostream &os) { // print keys in order using select os << "Testing select:" << std::endl; os << "--------------------------------" << std::endl; diff --git a/OrderedSTTest.h b/TestOrderedST.h similarity index 61% rename from OrderedSTTest.h rename to TestOrderedST.h index 16e547f..f41e448 100644 --- a/OrderedSTTest.h +++ b/TestOrderedST.h @@ -1,19 +1,19 @@ -#ifndef ORDEREDSTTEST_H -#define ORDEREDSTTEST_H +#ifndef TESTORDEREDST_H +#define TESTORDEREDST_H #include #include "OrderedST.h" -namespace STTest { +namespace TestST { void init(OrderedST &st, std::istream &is, std::ostream &os); void removeSome(OrderedST &st, std::ostream &os); void removeAll(OrderedST &st, std::ostream &os); - void testOrderedST(OrderedST &st, std::ostream &os); + void testOrderedST(const OrderedST &st, std::ostream &os); } -#endif //ORDEREDSTTEST_H +#endif //TESTORDEREDST_H diff --git a/STTest.cpp b/TestST.cpp similarity index 88% rename from STTest.cpp rename to TestST.cpp index bc8b460..2aa5dd2 100644 --- a/STTest.cpp +++ b/TestST.cpp @@ -1,14 +1,14 @@ -#include "STTest.h" +#include "TestST.h" #include -namespace STTest { +namespace TestST { void init(ST &st, std::istream &is, std::ostream &os) { std::string word; for (int i = 0; is >> word; ++i) st.put(word, i); os << "size = " << st.size() << std::endl; } - void listAll(ST &st, std::ostream &os) { + void listAll(const ST &st, std::ostream &os) { // print keys using keys() for (const auto &s: st.keys()) { os << s << " " << st.get(s).value_or(INVALID_VALUE) << std::endl; @@ -36,7 +36,7 @@ namespace STTest { listAll(st, os); } - void testKeys(ST &st, std::ostream &os) { + void testKeys(const ST &st, std::ostream &os) { os << "Testing keys():" << std::endl; os << "--------------------------------" << std::endl; listAll(st, os); diff --git a/STTest.h b/TestST.h similarity index 62% rename from STTest.h rename to TestST.h index 75805f2..19b4533 100644 --- a/STTest.h +++ b/TestST.h @@ -1,24 +1,24 @@ -#ifndef STTEST_H -#define STTEST_H +#ifndef TESTST_H +#define TESTST_H #include #include "ST.h" -namespace STTest { +namespace TestST { inline constexpr char INVALID_KEY[] = ""; inline constexpr int INVALID_VALUE = -1; void init(ST &st, std::istream &is, std::ostream &os); - void listAll(ST &st, std::ostream &os); + void listAll(const ST &st, std::ostream &os); void removeSome(ST &st, std::ostream &os); void removeAll(ST &st, std::ostream &os); - void testKeys(ST &st, std::ostream &os); + void testKeys(const ST &st, std::ostream &os); } -#endif //STTEST_H +#endif //TESTST_H diff --git a/StringSTTest.cpp b/TestStringST.cpp similarity index 87% rename from StringSTTest.cpp rename to TestStringST.cpp index b6f1078..1cb7597 100644 --- a/StringSTTest.cpp +++ b/TestStringST.cpp @@ -1,7 +1,7 @@ -#include "StringSTTest.h" +#include "TestStringST.h" -namespace STTest { - void testStringST(StringST &st, std::ostream &os) { +namespace TestST { + void testStringST(const StringST &st, std::ostream &os) { os << "longestPrefixOf(\"shellsort\"):" << std::endl; os << "--------------------------------" << std::endl; os << st.longestPrefixOf("shellsort") << std::endl; @@ -24,6 +24,5 @@ namespace STTest { for (const auto &s: st.keysThatMatch(".he.l.")) { os << s << std::endl; } - os << std::endl; } } diff --git a/TestStringST.h b/TestStringST.h new file mode 100644 index 0000000..08ff410 --- /dev/null +++ b/TestStringST.h @@ -0,0 +1,13 @@ +#ifndef TESTSTRINGST_H +#define TESTSTRINGST_H + + +#include +#include "StringST.h" + +namespace TestST { + void testStringST(const StringST &st, std::ostream &os); +} + + +#endif //TESTSTRINGST_H diff --git a/TrieST.h b/TrieST.h index 6d39168..13a1134 100644 --- a/TrieST.h +++ b/TrieST.h @@ -6,6 +6,7 @@ #include #include #include +#include template class TrieST : public StringST { @@ -20,18 +21,18 @@ class TrieST : public StringST { std::shared_ptr root; - std::shared_ptr get(std::shared_ptr x, const std::string &key, int d) const; + std::shared_ptr get(std::shared_ptr x, std::string_view key, int d) const; - std::shared_ptr put(std::shared_ptr x, const std::string &key, const Value &val, int d); + std::shared_ptr put(std::shared_ptr x, std::string_view key, const Value &val, int d); - std::shared_ptr remove(std::shared_ptr x, const std::string &key, int d); + std::shared_ptr remove(std::shared_ptr x, std::string_view key, int d); void collect(std::shared_ptr x, const std::string &pre, std::list &q) const; - void collect(std::shared_ptr x, const std::string &pre, const std::string &pat, + void collect(std::shared_ptr x, const std::string &pre, std::string_view pat, std::list &q) const; - int search(std::shared_ptr x, const std::string &s, int d, int length) const; + int search(std::shared_ptr x, std::string_view s, int d, int length) const; public: std::optional get(const std::string &key) const override; @@ -44,15 +45,15 @@ class TrieST : public StringST { std::list keys() const override { return keysWithPrefix(""); } - std::string longestPrefixOf(const std::string &s) const override; + std::string longestPrefixOf(std::string_view s) const override; std::list keysWithPrefix(const std::string &pre) const override; - std::list keysThatMatch(const std::string &pat) const override; + std::list keysThatMatch(std::string_view pat) const override; }; template -std::shared_ptr::Node> TrieST::get(std::shared_ptr x, const std::string &key, +std::shared_ptr::Node> TrieST::get(std::shared_ptr x, std::string_view key, int d) const { if (!x) return nullptr; if (d == key.length()) return x; @@ -61,7 +62,7 @@ std::shared_ptr::Node> TrieST::get(std::shared_ptr } template -std::shared_ptr::Node> TrieST::put(std::shared_ptr x, const std::string &key, +std::shared_ptr::Node> TrieST::put(std::shared_ptr x, std::string_view key, const Value &val, int d) { if (!x) x = std::make_shared(); if (d == key.length()) { @@ -75,7 +76,7 @@ std::shared_ptr::Node> TrieST::put(std::shared_ptr } template -std::shared_ptr::Node> TrieST::remove(std::shared_ptr x, const std::string &key, +std::shared_ptr::Node> TrieST::remove(std::shared_ptr x, std::string_view key, int d) { if (!x) return nullptr; if (d == key.length()) { @@ -102,7 +103,7 @@ void TrieST::collect(std::shared_ptr x, const std::string &pre, std } template -void TrieST::collect(std::shared_ptr x, const std::string &pre, const std::string &pat, +void TrieST::collect(std::shared_ptr x, const std::string &pre, std::string_view pat, std::list &q) const { if (!x) return; int d = pre.length(); @@ -116,7 +117,7 @@ void TrieST::collect(std::shared_ptr x, const std::string &pre, con } template -int TrieST::search(std::shared_ptr x, const std::string &s, int d, int length) const { +int TrieST::search(std::shared_ptr x, std::string_view s, int d, int length) const { if (!x) return length; if (x->val) length = d; if (d == s.length()) return length; @@ -132,20 +133,20 @@ std::optional TrieST::get(const std::string &key) const { } template -std::string TrieST::longestPrefixOf(const std::string &s) const { +std::string TrieST::longestPrefixOf(std::string_view s) const { int length = search(root, s, 0, 0); - return s.substr(0, length); + return std::string(s.substr(0, length)); } template std::list TrieST::keysWithPrefix(const std::string &pre) const { std::list q; - collect(get(root, pre, 0), pre, q); + collect(get(root, pre, 0), std::string(pre), q); return q; } template -std::list TrieST::keysThatMatch(const std::string &pat) const { +std::list TrieST::keysThatMatch(std::string_view pat) const { std::list q; collect(root, "", pat, q); return q; diff --git a/main_GREP.cpp b/main_GREP.cpp new file mode 100644 index 0000000..85f2347 --- /dev/null +++ b/main_GREP.cpp @@ -0,0 +1,30 @@ +/****************************************************************************** + * % more tinyL.txt + * AC + * AD + * AAA + * ABD + * ADD + * BCD + * ABCCBD + * BABAAA + * BABBAAA + * + * % ./GREP "(A*B|AC)D" < tinyL.txt + * ABD + * ABCCBD + * + ******************************************************************************/ + + +#include +#include "NFA.h" +#include + +int main(int argc, char *argv[]) { + std::string regexp = "(.*" + std::string(argv[1]) + ".*)"; + NFA nfa(regexp); + for (std::string txt; std::getline(std::cin, txt);) + if (nfa.recognizes(txt)) std::cout << txt << std::endl; + return 0; +} diff --git a/main_ST.cpp.in b/main_TestST.cpp.in similarity index 57% rename from main_ST.cpp.in rename to main_TestST.cpp.in index 051ea90..21f5116 100644 --- a/main_ST.cpp.in +++ b/main_TestST.cpp.in @@ -1,10 +1,10 @@ #include -#include "STTest.h" +#include "TestST.h" #ifdef ORDERED -#include "OrderedSTTest.h" +#include "TestOrderedST.h" #endif #ifdef STRING -#include "StringSTTest.h" +#include "TestStringST.h" #endif #include "@ST@.h" @@ -14,20 +14,20 @@ int main(int argc, char *argv[]) { #else @ST@ st{@ST_INIT_ARGS@}; #endif - STTest::init(st, std::cin, std::cout); + TestST::init(st, std::cin, std::cout); std::cout << std::endl; - STTest::testKeys(st, std::cout); + TestST::testKeys(st, std::cout); std::cout << std::endl; #ifdef ORDERED - STTest::testOrderedST(st, std::cout); + TestST::testOrderedST(st, std::cout); std::cout << std::endl; #endif #ifdef STRING - STTest::testStringST(st, std::cout); + TestST::testStringST(st, std::cout); std::cout << std::endl; #endif - STTest::removeSome(st, std::cout); + TestST::removeSome(st, std::cout); std::cout << std::endl; - STTest::removeAll(st, std::cout); + TestST::removeAll(st, std::cout); return 0; }