Skip to content

Commit d869a3f

Browse files
committed
added false positives to config + updated regex
1 parent 8f51d09 commit d869a3f

File tree

4 files changed

+206
-41
lines changed

4 files changed

+206
-41
lines changed

config/config.php

+33
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,39 @@
8383
'/z/' => ['z', 'Ζ', 'ž', 'Ž', 'ź', 'Ź', 'ż', 'Ż'],
8484
],
8585

86+
/*
87+
|--------------------------------------------------------------------------
88+
| False Positives
89+
|--------------------------------------------------------------------------
90+
|
91+
| An array of false positives
92+
|
93+
|
94+
*/
95+
'false_positives' => [
96+
'scunthorpe',
97+
'cockburn',
98+
'penistone',
99+
'lightwater',
100+
'assume',
101+
'bass',
102+
'class',
103+
'compass',
104+
'pass',
105+
'dickinson',
106+
'middlesex',
107+
'cockerel',
108+
'butterscotch',
109+
'blackcock',
110+
'countryside',
111+
'arsenal',
112+
'flick',
113+
'flicker',
114+
'analyst',
115+
'cocktail',
116+
],
117+
118+
86119
/*
87120
|--------------------------------------------------------------------------
88121
| Profanities

src/BlaspExpressionService.php

+8-9
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private function loadConfiguration()
9090
private function generateSeparatorExpression(): string
9191
{
9292
return $this->generateEscapedExpression($this->separators, $this->escapedSeparatorCharacters);
93+
return !empty($separatorExpression) ? $separatorExpression . '?' : '';
9394
}
9495

9596
/**
@@ -101,7 +102,7 @@ private function generateSubstitutionExpression(): array
101102

102103
foreach ($this->substitutions as $character => $substitutions) {
103104

104-
$characterExpressions[$character] = $this->generateEscapedExpression($substitutions, [], '+?') . self::SEPARATOR_PLACEHOLDER;
105+
$characterExpressions[$character] = $this->generateEscapedExpression($substitutions, [], '+') . self::SEPARATOR_PLACEHOLDER;
105106
}
106107

107108
return $characterExpressions;
@@ -118,7 +119,6 @@ private function generateEscapedExpression(array $characters = [], array $escape
118119
$regex = $escapedCharacters;
119120

120121
foreach ($characters as $character) {
121-
122122
$regex[] = preg_quote($character, '/');
123123
}
124124

@@ -138,11 +138,6 @@ private function generateProfanityExpressionArray()
138138

139139
$this->profanityExpressions[$this->profanities[$i]] = $this->generateProfanityExpression($this->profanities[$i]);
140140
}
141-
142-
uksort($this->profanityExpressions, function($a, $b) {
143-
144-
return strlen($b) - strlen($a);
145-
});
146141
}
147142

148143
/**
@@ -153,8 +148,12 @@ private function generateProfanityExpressionArray()
153148
*/
154149
private function generateProfanityExpression($profanity): string
155150
{
156-
$expression = '/' . preg_replace(array_keys($this->characterExpressions), array_values($this->characterExpressions), $profanity) . '(?:s?)?\b/i';
151+
$expression = preg_replace(array_keys($this->characterExpressions), array_values($this->characterExpressions), $profanity);
152+
153+
$expression = str_replace(self::SEPARATOR_PLACEHOLDER, $this->separatorExpression, $expression);
154+
155+
$expression = '/' . $expression . '(?:s?)\b/i';
157156

158-
return str_replace(self::SEPARATOR_PLACEHOLDER, $this->separatorExpression, $expression);
157+
return $expression;
159158
}
160159
}

src/BlaspService.php

+74-25
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ class BlaspService extends BlaspExpressionService
1111
*
1212
* @var string
1313
*/
14-
public string $sourceString;
14+
public string $sourceString = '';
1515

1616
/**
1717
* The sanitised string with profanities masked.
1818
*
1919
* @var string
2020
*/
21-
public string $cleanString;
21+
public string $cleanString = '';
2222

2323
/**
2424
* A boolean value indicating if the incoming string
@@ -81,56 +81,105 @@ public function check(string $string): self
8181
*/
8282
private function handle(): self
8383
{
84-
foreach ($this->profanityExpressions as $profanity => $expression) {
84+
// Convert false positives to lowercase for case-insensitive comparison
85+
$falsePositives = array_map('strtolower', config('blasp.false_positives'));
86+
$continue = true;
8587

86-
while ($this->stringHasProfanity($expression)) {
88+
// Sort profanities by length (longer first) to match longer profanities first
89+
uksort($this->profanityExpressions, function($a, $b) {
90+
return strlen($b) - strlen($a); // Sort by length, descending
91+
});
8792

88-
$this->hasProfanity = true;
93+
// Loop through until no more profanities are detected
94+
while ($continue) {
95+
$continue = false;
8996

90-
if (!in_array($profanity, $this->uniqueProfanitiesFound)) {
91-
$this->uniqueProfanitiesFound[] = $profanity;
92-
}
97+
foreach ($this->profanityExpressions as $profanity => $expression) {
98+
preg_match_all($expression, $this->cleanString, $matches, PREG_OFFSET_CAPTURE);
9399

94-
$this->generateProfanityReplacement($expression);
95-
}
100+
if (!empty($matches[0])) {
101+
foreach ($matches[0] as $match) {
102+
// Get the start and length of the match
103+
$start = $match[1];
104+
$length = strlen($match[0]);
105+
106+
// Use boundaries to extract the full word around the match
107+
$fullWord = $this->getFullWordContext($this->cleanString, $start, $length);
108+
109+
// Check if the full word (in lowercase) is in the false positives list
110+
if (in_array(strtolower($fullWord), $falsePositives, true)) {
111+
continue; // Skip checking this word if it's a false positive
112+
}
113+
114+
$continue = true; // Continue if we find any profanities
96115

116+
$this->hasProfanity = true;
117+
118+
// Replace the found profanity
119+
$this->generateProfanityReplacement($match);
120+
121+
// Avoid adding duplicates to the unique list
122+
if (!in_array($profanity, $this->uniqueProfanitiesFound)) {
123+
$this->uniqueProfanitiesFound[] = $profanity;
124+
}
125+
}
126+
}
127+
}
97128
}
98129

99130
return $this;
100131
}
101132

102133
/**
103-
* Check if the incoming string contains any profanities.
134+
* Mask the profanities found in the incoming string.
104135
*
105136
* @param string $profanity
106-
* @return bool
137+
* @return string
107138
*/
108-
private function stringHasProfanity(string $profanity): bool
139+
private function generateProfanityReplacement(array $match): void
109140
{
110-
return preg_match($profanity, $this->cleanString) === 1;
141+
$start = $match[1]; // Starting position of the profanity
142+
$length = mb_strlen($match[0], 'UTF-8'); // Length of the profanity
143+
$replacement = str_repeat("*", $length); // Mask with asterisks
144+
145+
// Replace only the profanity in the cleanString, preserving the original case and spaces
146+
$this->cleanString = mb_substr($this->cleanString, 0, $start) .
147+
$replacement .
148+
mb_substr($this->cleanString, $start + $length);
149+
150+
// Increment profanity count
151+
$this->profanitiesCount++;
111152
}
112153

113154
/**
114-
* Mask the profanities found in the incoming string.
155+
* Get the full word context surrounding the matched profanity.
115156
*
116-
* @param string $profanity
157+
* @param string $string
158+
* @param int $start
159+
* @param int $length
117160
* @return string
118161
*/
119-
private function generateProfanityReplacement(string $profanity): string
162+
private function getFullWordContext(string $string, int $start, int $length): string
120163
{
121-
preg_match_all($profanity, $this->cleanString, $matches, PREG_OFFSET_CAPTURE);
164+
// Define word boundaries (spaces, punctuation, etc.)
165+
$left = $start;
166+
$right = $start + $length;
122167

123-
foreach ($matches[0] as $match) {
124-
$start = $match[1];
125-
$length = mb_strlen($match[0], 'UTF-8');
126-
$replacement = str_repeat("*", $length);
168+
// Move the left pointer backwards to find the start of the full word
169+
while ($left > 0 && preg_match('/\w/', $string[$left - 1])) {
170+
$left--;
171+
}
127172

128-
$this->cleanString = substr_replace($this->cleanString, $replacement, $start, $length);
129-
130-
$this->profanitiesCount++;
173+
// Move the right pointer forwards to find the end of the full word
174+
while ($right < strlen($string) && preg_match('/\w/', $string[$right])) {
175+
$right++;
131176
}
177+
178+
// Return the full word surrounding the matched profanity
179+
return substr($string, $left, $right - $left);
132180
}
133181

182+
134183
/**
135184
* Get the incoming string.
136185
*

tests/BlaspCheckTest.php

+91-7
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public function setUp(): void
1414
{
1515
parent::setUp();
1616

17-
Config::set('blasp.profanities', ['fucking', 'shit', 'cunt', 'fuck']);
17+
Config::set('blasp.profanities', ['fucking', 'shit', 'cunt', 'fuck', 'penis', 'cock', 'twat', 'ass', 'dick', 'sex', 'butt', 'arse', 'lick', 'anal']);
1818
Config::set('blasp.separators', [' ', '-', '_']);
1919
Config::set('blasp.substitutions', [
2020
'/a/' => ['a', '4', '@', 'Á', 'á', 'À', 'Â', 'à', 'Â', 'â', 'Ä', 'ä', 'Ã', 'ã', 'Å', 'å', 'æ', 'Æ', 'α', 'Δ', 'Λ', 'λ'],
@@ -120,14 +120,12 @@ public function test_multiple_profanities_no_spaces()
120120
{
121121
$blaspService = new BlaspService();
122122

123-
$result = $blaspService->check('cuntfuck');
124-
125-
dd($result);
123+
$result = $blaspService->check('cuntfuck shit');
126124

127125
$this->assertTrue($result->hasProfanity);
128-
$this->assertSame(2, $result->profanitiesCount);
129-
$this->assertCount(2, $result->uniqueProfanitiesFound);
130-
$this->assertSame('********', $result->cleanString);
126+
$this->assertSame(3, $result->profanitiesCount);
127+
$this->assertCount(3, $result->uniqueProfanitiesFound);
128+
$this->assertSame('******** ****', $result->cleanString);
131129
}
132130

133131
public function test_multiple_profanities()
@@ -154,4 +152,90 @@ public function test_scunthorpe_problem()
154152
$this->assertSame('I live in a town called Scunthorpe', $result->cleanString);
155153
}
156154

155+
public function test_penistone_problem()
156+
{
157+
$blaspService = new BlaspService();
158+
159+
$result = $blaspService->check('I live in a town called Penistone');
160+
161+
$this->assertTrue(!$result->hasProfanity);
162+
$this->assertSame(0, $result->profanitiesCount);
163+
$this->assertCount(0, $result->uniqueProfanitiesFound);
164+
$this->assertSame('I live in a town called Penistone', $result->cleanString);
165+
}
166+
167+
public function test_false_positives()
168+
{
169+
$words = [
170+
'Scunthorpe',
171+
'Cockburn',
172+
'Penistone',
173+
'Lightwater',
174+
'Assume',
175+
'bass',
176+
'class',
177+
'Compass',
178+
'Pass',
179+
'Dickinson',
180+
'Middlesex',
181+
'Cockerel',
182+
'Butterscotch',
183+
'Blackcock',
184+
'Countryside',
185+
'Arsenal',
186+
'Flick',
187+
'Flicker',
188+
'Analyst',
189+
'blackCocktail',
190+
];
191+
192+
foreach ($words as $word) {
193+
194+
$blaspService = new BlaspService();
195+
196+
$result = $blaspService->check($word);
197+
198+
$this->assertTrue(!$result->hasProfanity);
199+
$this->assertSame(0, $result->profanitiesCount);
200+
$this->assertCount(0, $result->uniqueProfanitiesFound);
201+
$this->assertSame($word, $result->cleanString);
202+
}
203+
}
204+
205+
public function test_cuntfuck_fuckcunt()
206+
{
207+
$blaspService = new BlaspService();
208+
209+
$result = $blaspService->check('cuntfuck fuckcunt');
210+
211+
$this->assertTrue($result->hasProfanity);
212+
$this->assertSame(4, $result->profanitiesCount);
213+
$this->assertCount(2, $result->uniqueProfanitiesFound);
214+
$this->assertSame('******** ********', $result->cleanString);
215+
}
216+
217+
public function test_fucking_shit_cunt_fuck()
218+
{
219+
$blaspService = new BlaspService();
220+
221+
$result = $blaspService->check('fuckingshitcuntfuck');
222+
223+
$this->assertTrue($result->hasProfanity);
224+
$this->assertSame(4, $result->profanitiesCount);
225+
$this->assertCount(4, $result->uniqueProfanitiesFound);
226+
$this->assertSame('*******************', $result->cleanString);
227+
}
228+
229+
public function test_billy_butcher()
230+
{
231+
$blaspService = new BlaspService();
232+
233+
$result = $blaspService->check('oi! cunt!');
234+
235+
$this->assertTrue($result->hasProfanity);
236+
$this->assertSame(1, $result->profanitiesCount);
237+
$this->assertCount(1, $result->uniqueProfanitiesFound);
238+
$this->assertSame('oi! ****!', $result->cleanString);
239+
}
240+
157241
}

0 commit comments

Comments
 (0)