Skip to content

Commit 301bf49

Browse files
authored
Merge pull request #1 from Blaspsoft/bugs/minor-fixes
Bugs/minor fixes
2 parents 8e3bf02 + c76844d commit 301bf49

File tree

4 files changed

+238
-38
lines changed

4 files changed

+238
-38
lines changed

config/config.php

+33
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,39 @@
8383
'/z/' => ['z', 'Ζ', 'ž', 'Ž', 'ź', 'Ź', 'ż', 'Ż'],
8484
],
8585

86+
/*
87+
|--------------------------------------------------------------------------
88+
| False Positives
89+
|--------------------------------------------------------------------------
90+
|
91+
| An array of false positives
92+
|
93+
|
94+
*/
95+
'false_positives' => [
96+
'scunthorpe',
97+
'cockburn',
98+
'penistone',
99+
'lightwater',
100+
'assume',
101+
'bass',
102+
'class',
103+
'compass',
104+
'pass',
105+
'dickinson',
106+
'middlesex',
107+
'cockerel',
108+
'butterscotch',
109+
'blackcock',
110+
'countryside',
111+
'arsenal',
112+
'flick',
113+
'flicker',
114+
'analyst',
115+
'cocktail',
116+
],
117+
118+
86119
/*
87120
|--------------------------------------------------------------------------
88121
| Profanities

src/BlaspExpressionService.php

+8-9
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private function loadConfiguration()
9090
private function generateSeparatorExpression(): string
9191
{
9292
return $this->generateEscapedExpression($this->separators, $this->escapedSeparatorCharacters);
93+
return !empty($separatorExpression) ? $separatorExpression . '?' : '';
9394
}
9495

9596
/**
@@ -101,7 +102,7 @@ private function generateSubstitutionExpression(): array
101102

102103
foreach ($this->substitutions as $character => $substitutions) {
103104

104-
$characterExpressions[$character] = $this->generateEscapedExpression($substitutions, [], '+?') . self::SEPARATOR_PLACEHOLDER;
105+
$characterExpressions[$character] = $this->generateEscapedExpression($substitutions, [], '+') . self::SEPARATOR_PLACEHOLDER;
105106
}
106107

107108
return $characterExpressions;
@@ -118,7 +119,6 @@ private function generateEscapedExpression(array $characters = [], array $escape
118119
$regex = $escapedCharacters;
119120

120121
foreach ($characters as $character) {
121-
122122
$regex[] = preg_quote($character, '/');
123123
}
124124

@@ -138,11 +138,6 @@ private function generateProfanityExpressionArray()
138138

139139
$this->profanityExpressions[$this->profanities[$i]] = $this->generateProfanityExpression($this->profanities[$i]);
140140
}
141-
142-
uksort($this->profanityExpressions, function($a, $b) {
143-
144-
return strlen($b) - strlen($a);
145-
});
146141
}
147142

148143
/**
@@ -153,8 +148,12 @@ private function generateProfanityExpressionArray()
153148
*/
154149
private function generateProfanityExpression($profanity): string
155150
{
156-
$expression = '/' . preg_replace(array_keys($this->characterExpressions), array_values($this->characterExpressions), $profanity) . '(?:s?)?\b/i';
151+
$expression = preg_replace(array_keys($this->characterExpressions), array_values($this->characterExpressions), $profanity);
152+
153+
$expression = str_replace(self::SEPARATOR_PLACEHOLDER, $this->separatorExpression, $expression);
154+
155+
$expression = '/' . $expression . '(?:s?)\b/i';
157156

158-
return str_replace(self::SEPARATOR_PLACEHOLDER, $this->separatorExpression, $expression);
157+
return $expression;
159158
}
160159
}

src/BlaspService.php

+71-28
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ class BlaspService extends BlaspExpressionService
1111
*
1212
* @var string
1313
*/
14-
public string $sourceString;
14+
public string $sourceString = '';
1515

1616
/**
1717
* The sanitised string with profanities masked.
1818
*
1919
* @var string
2020
*/
21-
public string $cleanString;
21+
public string $cleanString = '';
2222

2323
/**
2424
* A boolean value indicating if the incoming string
@@ -81,62 +81,105 @@ public function check(string $string): self
8181
*/
8282
private function handle(): self
8383
{
84-
foreach ($this->profanityExpressions as $profanity => $expression) {
84+
// Convert false positives to lowercase for case-insensitive comparison
85+
$falsePositives = array_map('strtolower', config('blasp.false_positives'));
86+
$continue = true;
8587

86-
/**
87-
* Skip if profanity ends with s as this
88-
* will be picked up using regex.
89-
**/
90-
/*if(substr($profanity, -1) == 's') {
88+
// Sort profanities by length (longer first) to match longer profanities first
89+
uksort($this->profanityExpressions, function($a, $b) {
90+
return strlen($b) - strlen($a); // Sort by length, descending
91+
});
9192

92-
continue;
93-
}*/
93+
// Loop through until no more profanities are detected
94+
while ($continue) {
95+
$continue = false;
9496

95-
if($this->stringHasProfanity($expression)) {
97+
foreach ($this->profanityExpressions as $profanity => $expression) {
98+
preg_match_all($expression, $this->cleanString, $matches, PREG_OFFSET_CAPTURE);
9699

97-
$this->hasProfanity = true;
100+
if (!empty($matches[0])) {
101+
foreach ($matches[0] as $match) {
102+
// Get the start and length of the match
103+
$start = $match[1];
104+
$length = strlen($match[0]);
98105

99-
$this->uniqueProfanitiesFound[] = $profanity;
106+
// Use boundaries to extract the full word around the match
107+
$fullWord = $this->getFullWordContext($this->cleanString, $start, $length);
100108

101-
$string = $this->generateProfanityReplacement($expression);
102-
}
109+
// Check if the full word (in lowercase) is in the false positives list
110+
if (in_array(strtolower($fullWord), $falsePositives, true)) {
111+
continue; // Skip checking this word if it's a false positive
112+
}
113+
114+
$continue = true; // Continue if we find any profanities
115+
116+
$this->hasProfanity = true;
117+
118+
// Replace the found profanity
119+
$this->generateProfanityReplacement($match);
103120

121+
// Avoid adding duplicates to the unique list
122+
if (!in_array($profanity, $this->uniqueProfanitiesFound)) {
123+
$this->uniqueProfanitiesFound[] = $profanity;
124+
}
125+
}
126+
}
127+
}
104128
}
105129

106130
return $this;
107131
}
108132

109133
/**
110-
* Check if the incoming string contains any profanities.
134+
* Mask the profanities found in the incoming string.
111135
*
112136
* @param string $profanity
113-
* @return bool
137+
* @return string
114138
*/
115-
private function stringHasProfanity(string $profanity): bool
139+
private function generateProfanityReplacement(array $match): void
116140
{
117-
return preg_match($profanity, $this->cleanString) === 1;
141+
$start = $match[1]; // Starting position of the profanity
142+
$length = mb_strlen($match[0], 'UTF-8'); // Length of the profanity
143+
$replacement = str_repeat("*", $length); // Mask with asterisks
144+
145+
// Replace only the profanity in the cleanString, preserving the original case and spaces
146+
$this->cleanString = mb_substr($this->cleanString, 0, $start) .
147+
$replacement .
148+
mb_substr($this->cleanString, $start + $length);
149+
150+
// Increment profanity count
151+
$this->profanitiesCount++;
118152
}
119153

120154
/**
121-
* Mask the profanities found in the incoming string.
155+
* Get the full word context surrounding the matched profanity.
122156
*
123-
* @param string $profanity
157+
* @param string $string
158+
* @param int $start
159+
* @param int $length
124160
* @return string
125161
*/
126-
private function generateProfanityReplacement(string $profanity): string
162+
private function getFullWordContext(string $string, int $start, int $length): string
127163
{
128-
preg_match_all($profanity, $this->cleanString, $matches, PREG_OFFSET_CAPTURE);
129-
130-
foreach ($matches[0] as $match) {
164+
// Define word boundaries (spaces, punctuation, etc.)
165+
$left = $start;
166+
$right = $start + $length;
131167

132-
$this->cleanString = substr_replace($this->cleanString, str_repeat("*", mb_strlen($match[0], 'UTF-8')), $match[1], strlen($match[0]));
168+
// Move the left pointer backwards to find the start of the full word
169+
while ($left > 0 && preg_match('/\w/', $string[$left - 1])) {
170+
$left--;
171+
}
133172

134-
$this->profanitiesCount++;
173+
// Move the right pointer forwards to find the end of the full word
174+
while ($right < strlen($string) && preg_match('/\w/', $string[$right])) {
175+
$right++;
135176
}
136177

137-
return $this->cleanString;
178+
// Return the full word surrounding the matched profanity
179+
return substr($string, $left, $right - $left);
138180
}
139181

182+
140183
/**
141184
* Get the incoming string.
142185
*

tests/BlaspCheckTest.php

+126-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public function setUp(): void
1414
{
1515
parent::setUp();
1616

17-
Config::set('blasp.profanities', ['fucking', 'shit', 'cunt', 'fuck']);
17+
Config::set('blasp.profanities', ['fucking', 'shit', 'cunt', 'fuck', 'penis', 'cock', 'twat', 'ass', 'dick', 'sex', 'butt', 'arse', 'lick', 'anal', 'clusterfuck', 'bullshit', 'fucked', 'damn', 'crap', 'hell']);
1818
Config::set('blasp.separators', [' ', '-', '_']);
1919
Config::set('blasp.substitutions', [
2020
'/a/' => ['a', '4', '@', 'Á', 'á', 'À', 'Â', 'à', 'Â', 'â', 'Ä', 'ä', 'Ã', 'ã', 'Å', 'å', 'æ', 'Æ', 'α', 'Δ', 'Λ', 'λ'],
@@ -116,6 +116,18 @@ public function test_combination_match()
116116
$this->assertSame('This is a ********* sentence', $result->cleanString);
117117
}
118118

119+
public function test_multiple_profanities_no_spaces()
120+
{
121+
$blaspService = new BlaspService();
122+
123+
$result = $blaspService->check('cuntfuck shit');
124+
125+
$this->assertTrue($result->hasProfanity);
126+
$this->assertSame(3, $result->profanitiesCount);
127+
$this->assertCount(3, $result->uniqueProfanitiesFound);
128+
$this->assertSame('******** ****', $result->cleanString);
129+
}
130+
119131
public function test_multiple_profanities()
120132
{
121133
$blaspService = new BlaspService();
@@ -128,4 +140,117 @@ public function test_multiple_profanities()
128140
$this->assertSame('This is a ********* sentence you ******* ****!', $result->cleanString);
129141
}
130142

143+
public function test_scunthorpe_problem()
144+
{
145+
$blaspService = new BlaspService();
146+
147+
$result = $blaspService->check('I live in a town called Scunthorpe');
148+
149+
$this->assertTrue(!$result->hasProfanity);
150+
$this->assertSame(0, $result->profanitiesCount);
151+
$this->assertCount(0, $result->uniqueProfanitiesFound);
152+
$this->assertSame('I live in a town called Scunthorpe', $result->cleanString);
153+
}
154+
155+
public function test_penistone_problem()
156+
{
157+
$blaspService = new BlaspService();
158+
159+
$result = $blaspService->check('I live in a town called Penistone');
160+
161+
$this->assertTrue(!$result->hasProfanity);
162+
$this->assertSame(0, $result->profanitiesCount);
163+
$this->assertCount(0, $result->uniqueProfanitiesFound);
164+
$this->assertSame('I live in a town called Penistone', $result->cleanString);
165+
}
166+
167+
public function test_false_positives()
168+
{
169+
$words = [
170+
'Scunthorpe',
171+
'Cockburn',
172+
'Penistone',
173+
'Lightwater',
174+
'Assume',
175+
'bass',
176+
'class',
177+
'Compass',
178+
'Pass',
179+
'Dickinson',
180+
'Middlesex',
181+
'Cockerel',
182+
'Butterscotch',
183+
'Blackcock',
184+
'Countryside',
185+
'Arsenal',
186+
'Flick',
187+
'Flicker',
188+
'Analyst',
189+
'blackCocktail',
190+
];
191+
192+
foreach ($words as $word) {
193+
194+
$blaspService = new BlaspService();
195+
196+
$result = $blaspService->check($word);
197+
198+
$this->assertTrue(!$result->hasProfanity);
199+
$this->assertSame(0, $result->profanitiesCount);
200+
$this->assertCount(0, $result->uniqueProfanitiesFound);
201+
$this->assertSame($word, $result->cleanString);
202+
}
203+
}
204+
205+
public function test_cuntfuck_fuckcunt()
206+
{
207+
$blaspService = new BlaspService();
208+
209+
$result = $blaspService->check('cuntfuck fuckcunt');
210+
211+
$this->assertTrue($result->hasProfanity);
212+
$this->assertSame(4, $result->profanitiesCount);
213+
$this->assertCount(2, $result->uniqueProfanitiesFound);
214+
$this->assertSame('******** ********', $result->cleanString);
215+
}
216+
217+
public function test_fucking_shit_cunt_fuck()
218+
{
219+
$blaspService = new BlaspService();
220+
221+
$result = $blaspService->check('fuckingshitcuntfuck');
222+
223+
$this->assertTrue($result->hasProfanity);
224+
$this->assertSame(4, $result->profanitiesCount);
225+
$this->assertCount(4, $result->uniqueProfanitiesFound);
226+
$this->assertSame('*******************', $result->cleanString);
227+
}
228+
229+
public function test_billy_butcher()
230+
{
231+
$blaspService = new BlaspService();
232+
233+
$result = $blaspService->check('oi! cunt!');
234+
235+
$this->assertTrue($result->hasProfanity);
236+
$this->assertSame(1, $result->profanitiesCount);
237+
$this->assertCount(1, $result->uniqueProfanitiesFound);
238+
$this->assertSame('oi! ****!', $result->cleanString);
239+
}
240+
241+
public function test_paragraph()
242+
{
243+
$blaspService = new BlaspService();
244+
245+
$paragraph = "This damn project is such a pain in the ass. I can't believe I have to deal with this bullshit every single day. It's like everything is completely fucked up, and nobody gives a shit. Sometimes I just want to scream, 'What the hell is going on?' Honestly, it's a total clusterfuck, and I'm so fucking done with this crap.";
246+
247+
$result = $blaspService->check($paragraph);
248+
249+
$expectedOutcome = "This **** project is such a pain in the ***. I can't believe I have to deal with this ******** every single day. It's like everything is completely ****** up, and nobody gives a ****. Sometimes I just want to scream, 'What the **** is going on?' Honestly, it's a total ***********, and I'm so ******* done with this ****.";
250+
251+
$this->assertTrue($result->hasProfanity);
252+
$this->assertSame(9, $result->profanitiesCount);
253+
$this->assertCount(9, $result->uniqueProfanitiesFound);
254+
$this->assertSame($expectedOutcome, $result->cleanString);
255+
}
131256
}

0 commit comments

Comments
 (0)