Skip to content

Commit

Permalink
5.9 Regular expression pattern matching
Browse files Browse the repository at this point in the history
  • Loading branch information
landerrosette committed Nov 28, 2024
1 parent 4317564 commit 304b8d0
Show file tree
Hide file tree
Showing 27 changed files with 273 additions and 120 deletions.
9 changes: 5 additions & 4 deletions BoyerMoore.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#include "BoyerMoore.h"
#include <utility>

BoyerMoore::BoyerMoore(const std::string &pat) : pat(pat) {
BoyerMoore::BoyerMoore(std::string pat) : pat(std::move(pat)) {
// 计算跳跃表
int M = pat.length(), R = 256;
int M = this->pat.length(), R = 256;
right = std::vector(R, -1);
for (int j = 0; j < M; ++j) right[pat[j]] = j;
for (int j = 0; j < M; ++j) right[this->pat[j]] = j;
}

int BoyerMoore::search(const std::string &txt) const {
int BoyerMoore::search(std::string_view txt) const {
int N = txt.length(), M = pat.length();
for (int skip, i = 0; i <= N - M; i += skip) {
// 模式字符串和文本在位置i匹配吗?
Expand Down
5 changes: 3 additions & 2 deletions BoyerMoore.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@

#include "SubstrSearcher.h"
#include <vector>
#include <string>

class BoyerMoore : public SubstrSearcher {
private:
std::vector<int> right;
std::string pat;

public:
explicit BoyerMoore(const std::string &pat);
explicit BoyerMoore(std::string pat);

int search(const std::string &txt) const override;
int search(std::string_view txt) const override;
};


Expand Down
19 changes: 11 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.28)
cmake_minimum_required(VERSION 3.20)
project(algs4)

set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -31,20 +31,20 @@ foreach (ST "SequentialSearchST" "BinarySearchST" "BST" "RedBlackBST" "SeparateC
set(ST_INIT_ARGS "20")
endif ()

configure_file(main_ST.cpp.in main_${ST}.cpp @ONLY)
configure_file(main_TestST.cpp.in main_Test${ST}.cpp @ONLY)

add_executable(${ST} main_${ST}.cpp STTest.cpp)
add_executable(Test${ST} main_Test${ST}.cpp TestST.cpp)

target_include_directories(${ST} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(Test${ST} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})

if (ST STREQUAL "BinarySearchST" OR ST STREQUAL "BST" OR ST STREQUAL "RedBlackBST")
target_sources(${ST} PRIVATE OrderedSTTest.cpp)
target_sources(Test${ST} PRIVATE TestOrderedST.cpp)

target_compile_definitions(${ST} PRIVATE ORDERED)
target_compile_definitions(Test${ST} PRIVATE ORDERED)
elseif (ST STREQUAL "TrieST" OR ST STREQUAL "TST")
target_sources(${ST} PRIVATE StringSTTest.cpp)
target_sources(Test${ST} PRIVATE TestStringST.cpp)

target_compile_definitions(${ST} PRIVATE STRING)
target_compile_definitions(Test${ST} PRIVATE STRING)
endif ()

unset(ST_INIT_ARGS)
Expand Down Expand Up @@ -112,3 +112,6 @@ foreach (SUBSTR_SEARCHER "KMP" "BoyerMoore" "RabinKarp")

target_include_directories(${SUBSTR_SEARCHER} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
endforeach ()

# 5.9
add_executable(GREP main_GREP.cpp NFA.cpp Digraph.cpp DirectedDFS.cpp)
13 changes: 7 additions & 6 deletions KMP.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#include "KMP.h"
#include <utility>

KMP::KMP(const std::string &pat) : pat(pat) {
KMP::KMP(std::string pat) : pat(std::move(pat)) {
// 构造DFA
int M = pat.length(), R = 256;
int M = this->pat.length(), R = 256;
dfa = std::vector(R, std::vector<int>(M));
dfa[pat[0]][0] = 1;
dfa[this->pat[0]][0] = 1;
for (int X = 0, j = 1; j < M; ++j) {
// 计算dfa[][j]
for (int c = 0; c < R; ++c) dfa[c][j] = dfa[c][X]; // 复制匹配失败情况下的值
dfa[pat[j]][j] = j + 1; // 设置匹配成功情况下的值
X = dfa[pat[j]][X]; // 更新重启状态
dfa[this->pat[j]][j] = j + 1; // 设置匹配成功情况下的值
X = dfa[this->pat[j]][X]; // 更新重启状态
}
}

int KMP::search(const std::string &txt) const {
int KMP::search(std::string_view txt) const {
int i, j, N = txt.length(), M = pat.length();
for (i = 0, j = 0; i < N && j < M; ++i) j = dfa[txt[i]][j];
if (j == M) return i - M; // 找到匹配(到达模式字符串的末尾)
Expand Down
5 changes: 3 additions & 2 deletions KMP.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@

#include "SubstrSearcher.h"
#include <vector>
#include <string>

class KMP : public SubstrSearcher {
private:
std::string pat;
std::vector<std::vector<int> > dfa;

public:
explicit KMP(const std::string &pat);
explicit KMP(std::string pat);

int search(const std::string &txt) const override;
int search(std::string_view txt) const override;
};


Expand Down
1 change: 1 addition & 0 deletions LSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


#include <vector>
#include <string>
#include "StringSorting.h"

class LSD : public StringSorting {
Expand Down
1 change: 1 addition & 0 deletions MSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <vector>
#include <string_view>
#include <string>
#include "Sorting.h"
#include "StringSorting.h"

Expand Down
51 changes: 51 additions & 0 deletions NFA.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include "NFA.h"
#include <list>
#include "DirectedDFS.h"

NFA::NFA(std::string_view regexp) : re(regexp.begin(), regexp.end()), M(re.size()), G(M + 1) {
std::list<int> ops;
for (int i = 0; i < M; ++i) {
int lp = i; // left position
if (re[i] == '(' || re[i] == '|') ops.push_front(i);
else if (re[i] == ')') {
int orPos = ops.front();
ops.pop_front();
if (re[orPos] == '|') {
lp = ops.front();
ops.pop_front();
G.addEdge(lp, orPos + 1);
G.addEdge(orPos, i);
} else lp = orPos;
}
if (i < M - 1 && re[i + 1] == '*') {
// 查看下一个字符
G.addEdge(lp, i + 1);
G.addEdge(i + 1, lp);
}
if (re[i] == '(' || re[i] == '*' || re[i] == ')') G.addEdge(i, i + 1);
}
}

bool NFA::recognizes(std::string_view txt) const {
std::list<int> pc;
DirectedDFS dfs(G, 0);
for (int v = 0; v < G.V(); ++v)
if (dfs.marked(v)) pc.push_front(v);

for (int i = 0; i < txt.length(); ++i) {
// 计算txt[i+1]可能到达的所有状态
std::list<int> match;
for (int v: pc) {
if (v < M) {
if (re[v] == txt[i] || re[v] == '.') match.push_front(v + 1);
}
}
pc = std::list<int>();
dfs = DirectedDFS(G, match);
for (int v = 0; v < G.V(); ++v)
if (dfs.marked(v)) pc.push_front(v);
}
for (int v: pc)
if (v == M) return true;
return false;
}
22 changes: 22 additions & 0 deletions NFA.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef NFA_H
#define NFA_H


#include "Digraph.h"
#include <vector>
#include <string_view>

class NFA {
private:
std::vector<char> re; // 匹配转换
int M; // 状态数量
Digraph G; // epsilon转换

public:
explicit NFA(std::string_view regexp);

bool recognizes(std::string_view txt) const;
};


#endif //NFA_H
1 change: 1 addition & 0 deletions Quick3string.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "StringSorting.h"
#include <vector>
#include <string>

class Quick3string : public StringSorting {
private:
Expand Down
56 changes: 47 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## Overview

<IMG SRC="http://algs4.cs.princeton.edu/cover.png" align=right hspace=25 width=100 alt = "Algorithms 4/e textbook">
This repository contains C++ implementations of the algorithms in the textbook
This repository contains C++ implementations of the algorithms and (a few) clients in the textbook
<a href = "http://amzn.to/13VNJi7">Algorithms, 4th Edition</a> by Robert Sedgewick and Kevin Wayne.

## Algorithms
Expand Down Expand Up @@ -56,12 +56,50 @@ This repository contains C++ implementations of the algorithms in the textbook
- **5.6** Substring search (Knuth-Morris-Pratt): [KMP.h](KMP.h) | [KMP.cpp](KMP.cpp)
- **5.7** Substring search (Boyer-Moore): [BoyerMoore.h](BoyerMoore.h) | [BoyerMoore.cpp](BoyerMoore.cpp)
- **5.8** Substring search (Rabin-Karp): [RabinKarp.h](RabinKarp.h) | [RabinKarp.cpp](RabinKarp.cpp)
- **5.9** Regular expression pattern matching: [NFA.h](NFA.h) | [NFA.cpp](NFA.cpp)
- ...

## Clients

### Fundamentals

- UF: [main_UF.cpp](main_UF.cpp)

### Sorting

- Selection | Insertion | Shell | Merge | MergeBU | Quick | Quick3way | Heap: [main_Sorting.cpp.in](main_Sorting.cpp.in)
- MaxPQ: [main_MaxPQ.cpp](main_MaxPQ.cpp)

### Symbol Tables

- TestSequentialSearchST | TestBinarySearchST | TestBST | TestRedBlackBST | TestSeparateChainingHashST |
TestLinearProbingHashST: [main_TestST.cpp.in](main_TestST.cpp.in)

### Graphs

- DepthFirstPaths | BreadthFirstPaths: [main_Paths.cpp.in](main_Paths.cpp.in)
- CC | KosarajuSCC: [main_CC.cpp.in](main_CC.cpp.in)
- DirectedDFS: [main_DirectedDFS.cpp](main_DirectedDFS.cpp)
- Topological: [main_Topological.cpp](main_Topological.cpp)
- PrimMST | KruskalMST: [main_MST.cpp.in](main_MST.cpp.in)
- DijkstraSP | AcyclicSP | BellmanFordSP: [main_SP.cpp.in](main_SP.cpp.in)

### Strings

- LSD | MSD | Quick3string: [main_Sorting.cpp.in](main_Sorting.cpp.in)
- TestTrieST | TestTST: [main_TestST.cpp.in](main_TestST.cpp.in)
- KMP | BoyerMoore | RabinKarp: [main_SubstrSearch.cpp.in](main_SubstrSearch.cpp.in)
- GREP: [main_GREP.cpp](main_GREP.cpp)
- ...

## Build and Run

A simple client is provided for each algorithm in `main_*.cpp`s. To build them, ensure you have CMake 3.28 or higher and
a C++17 compatible compiler. Follow these steps:
### Prerequisites

- CMake 3.20 or later
- C++ compiler with C++17 support

### Steps

1. Create and navigate to a build directory:

Expand All @@ -70,30 +108,30 @@ a C++17 compatible compiler. Follow these steps:
cd build
```

2. Configure and build all targets:
2. Configure and build all targets. This will produce all clients:

```shell
cmake ..
cmake --build .
```

Alternatively, you can build a specific target that corresponds to a specific algorithm. For example:
Alternatively, build a specific target that produces a specific client:

```shell
cmake --build . --target UF
```

3. (Optional) Get sample input files from the book's website: https://algs4.cs.princeton.edu/code/.
4. Run the executable. You may redirect the input from a file to save typing:
3. (Optional) Download sample input files from the booksite: https://algs4.cs.princeton.edu/code/.
4. Run the client. You may redirect the input from a file (possibly one obtained in step 3):

```shell
./UF < tinyUF.txt
```

Some algorithms require additional command-line arguments. For example:
Some clients may expect command-line arguments. For example:

```shell
./DepthFirstPaths tinyCG.txt 0
```

This runs the depth-first search algorithm on the `tinyCG.txt` graph, starting from vertex `0`.
This will run `DepthFirstPaths` on the graph in `tinyCG.txt` starting from vertex 0.
6 changes: 3 additions & 3 deletions RabinKarp.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "RabinKarp.h"
#include <random>

long long RabinKarp::hash(const std::string &key, int M) const {
long long RabinKarp::hash(std::string_view key, int M) const {
long long h = 0;
for (int j = 0; j < M; ++j) h = (R * h + key[j]) % Q;
return h;
Expand All @@ -25,12 +25,12 @@ long long RabinKarp::longRandomPrime() {
}
}

RabinKarp::RabinKarp(const std::string &pat) : M(pat.length()) {
RabinKarp::RabinKarp(std::string_view pat) : M(pat.length()) {
for (int i = 1; i <= M - 1; ++i) RM = (R * RM) % Q; // 计算 R^(M-1) % Q
patHash = hash(pat, M);
}

int RabinKarp::search(const std::string &txt) const {
int RabinKarp::search(std::string_view txt) const {
int N = txt.length();
long long txtHash = hash(txt, M);
if (patHash == txtHash && check(0)) return 0; // 一开始就匹配成功
Expand Down
7 changes: 4 additions & 3 deletions RabinKarp.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


#include "SubstrSearcher.h"
#include <string_view>

class RabinKarp : public SubstrSearcher {
private:
Expand All @@ -12,16 +13,16 @@ class RabinKarp : public SubstrSearcher {
int R = 256; // 字母表的大小
long long RM = 1; // R^(M-1) % Q

long long hash(const std::string &key, int M) const;
long long hash(std::string_view key, int M) const;

static long long longRandomPrime();

bool check(int i) const { return true; } // 蒙特卡洛算法(只要散列值相同就认为找到了)

public:
explicit RabinKarp(const std::string &pat);
explicit RabinKarp(std::string_view pat);

int search(const std::string &txt) const override;
int search(std::string_view txt) const override;
};


Expand Down
5 changes: 3 additions & 2 deletions StringST.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
#include "ST.h"
#include <string>
#include <list>
#include <string_view>

template<typename Value>
class StringST : public ST<std::string, Value> {
public:
virtual std::string longestPrefixOf(const std::string &s) const = 0;
virtual std::string longestPrefixOf(std::string_view s) const = 0;

virtual std::list<std::string> keysWithPrefix(const std::string &pre) const = 0;

virtual std::list<std::string> keysThatMatch(const std::string &pat) const = 0;
virtual std::list<std::string> keysThatMatch(std::string_view pat) const = 0;
};


Expand Down
Loading

0 comments on commit 304b8d0

Please sign in to comment.