Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

features/variantManagement add variant #10

Merged
merged 8 commits into from
Jan 31, 2025
64 changes: 36 additions & 28 deletions src/profanityChecker.ts
Original file line number Diff line number Diff line change
@@ -1,61 +1,69 @@
import { ProfanityConfig } from './profanityConfig.js';

/**
* Profanity checker that inherits configuration settings from ProfanityConfig.
* Provides methods to check bad words in a sentence and censor them.
*/
export class ProfanityChecker extends ProfanityConfig {
/**
* Normalizes a sentence by removing non-alphabetical characters and splitting it into words.
* Normalize the input sentence by removing unwanted characters and splitting it into an array of words.
* @param sentence - The sentence to normalize.
* @returns An array of words from the sentence.
* @returns {string[]} - An array of normalized words.
*/
private static normalizeSentence(sentence: string): string[] {
return sentence.replace(/[^a-zA-Zàâäéèêëîïôöùûüç\s]/g, '').split(/\s+/);
public static normalizeSentence(sentence: string): string[] {
return sentence
.replace(/[^a-zA-Z0-9àâäéèêëîïôöùûüç\s.,!?]/gu, ' ') // Ajout du modificateur 'u' pour Unicode et 'g' pour global
.trim() // Supprime les espaces en début/fin.
.toLowerCase() // Convertir en minuscule.
.split(/\s+/); // Divise en mots.
}

/**
* Checks whether the sentence contains any bad words from the badWordsSet, excluding whitelisted words.
* @param sentence - The sentence to check for bad words.
* @returns `true` if bad words are found, `false` otherwise.
* Check if the sentence contains any bad words.
* @param sentence - The sentence to check.
* @returns {boolean} - True if bad words are found, otherwise false.
*/
public static hasBadWords(sentence: string): boolean {
const words = this.normalizeSentence(sentence);

return words.some((word) => {
const cleanedWord = word.replace(/[.,!?]+$/u, '').toLowerCase();
return (
this.badWordsSet.has(word.toLowerCase()) &&
!this.whiteListWordsSet.has(word.toLowerCase())
this.badWordsSet.has(cleanedWord) &&
!this.whiteListWordsSet.has(cleanedWord)
);
});
}

/**
* Censors bad words in the sentence by replacing them with the censored character.
* Censor bad words in the sentence by replacing them with asterisks.
* @param sentence - The sentence to censor.
* @returns The sentence with censored words replaced by the censor character.
* @returns {string} - The censored sentence.
*/
public static censoredSentence(sentence: string): string {
const words = this.normalizeSentence(sentence);
const censoredWords = words.map((word) =>
this.badWordsSet.has(word.toLowerCase()) &&
!this.whiteListWordsSet.has(word.toLowerCase())
? this.censoreSet.repeat(word.length)
: word,
);
return censoredWords.join(' ');
const censoredWords = words.map((word) => {
const cleanedWord = word.replace(/[.,!?]+$/u, '').toLowerCase();
JKS9 marked this conversation as resolved.
Fixed
Show resolved Hide resolved

if (
this.badWordsSet.has(cleanedWord) &&
!this.whiteListWordsSet.has(cleanedWord)
) {
return '*'.repeat(cleanedWord.length) + word.slice(cleanedWord.length); // Replace with asterisks, keeping punctuation.
}
return word; // Keep the original word if not a bad word.
});

return censoredWords.join(' ').trim(); // Join words into a sentence.
}

/**
* Returns a list of all bad words currently in the badWordsSet.
* @returns An array of bad words.
* List all bad words currently in the set.
* @returns {string[]} - An array of bad words.
*/
public static listbadWords(): string[] {
return Array.from(this.badWordsSet);
public static listBadWords(): string[] {
return Array.from(this.badWordsSet); // Convert the set to an array.
}

/**
* Returns a list of all whitelisted words currently in the whiteListWordsSet.
* @returns An array of whitelisted words.
* List all whitelisted words currently in the set.
* @returns {string[]} - An array of whitelisted words.
*/
public static listWhiteListWords(): string[] {
return Array.from(this.whiteListWordsSet);
Expand Down
197 changes: 173 additions & 24 deletions src/test/profanityCheckerFr.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,123 @@ import { ProfanityChecker } from '../profanityChecker.js';
import { ProfanityConfig } from '../profanityConfig.js';
import { test, expect, beforeAll } from 'vitest';

// Configuration initiale
beforeAll(() => {
ProfanityConfig.changeCensoredWords('*');
ProfanityConfig.addBadWords(['salope', 'putain de merde', 'con', 'delete']);
ProfanityConfig.deleteBadWords(['delete']); // "merde" ne sera pas censuré même s'il est interdit
ProfanityConfig.addWhiteList(['merde', 'white']); // "merde" ne sera pas censuré même s'il est interdit
ProfanityConfig.deleteWhiteList(['white']); // "merde" ne sera pas censuré même s'il est interdit
ProfanityConfig.addBadWords([
'salope',
'putain de merde',
'con',
'sale pute',
]);
ProfanityConfig.deleteBadWords(['delete']);
ProfanityConfig.addWhiteList(['merde', 'white']);
ProfanityConfig.deleteWhiteList(['white']);
});

test('hasBadWords detects bad words', () => {
const sentence = 'ceci est une phrase avec un mot interdit';
expect(ProfanityChecker.hasBadWords(sentence)).toBe(false);
expect(
ProfanityChecker.hasBadWords('ceci est une phrase avec un mot interdit'),
).toBe(false);
});

test('hasBadWords detects bad words', () => {
const sentence = "ceci est une phrase avec un mot interdit 'salope'";
expect(ProfanityChecker.hasBadWords(sentence)).toBe(true);
test('hasBadWords detects single bad word', () => {
expect(
ProfanityChecker.hasBadWords(
"ceci est une phrase avec un mot interdit 'salope'",
),
).toBe(true);
});

test('hasBadWords detects multiple bad words', () => {
const sentence = "ceci est une phrase avec un mot interdit 'putain de merde'";
expect(ProfanityChecker.hasBadWords(sentence)).toBe(true);
expect(
ProfanityChecker.hasBadWords(
"ceci est une phrase avec un mot interdit 'putain de merde'",
),
).toBe(true);
});

test('hasBadWords detects no bad words', () => {
const sentence = 'ceci est une phrase propre';
expect(ProfanityChecker.hasBadWords(sentence)).toBe(false);
expect(ProfanityChecker.hasBadWords('ceci est une phrase propre')).toBe(
false,
);
});

test('hasBadWords detects bad phrase', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase sale> pute')).toBe(
true,
);
});

test('hasBadWords detects bad word with punctuation', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase sale! pute')).toBe(
true,
);
});

test('hasBadWords detects bad word with special character before', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase sale>pute')).toBe(
true,
);
});

test('hasBadWords detects bad word with special character after', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase sale pute!')).toBe(
true,
);
});

test('hasBadWords detects bad word at the end with punctuation', () => {
expect(ProfanityChecker.hasBadWords('ici est une phrase sale, pute!')).toBe(
true,
);
});

test('hasBadWords detects bad word at the start with punctuation', () => {
expect(ProfanityChecker.hasBadWords('pute! est ici')).toBe(true);
});

test('hasBadWords detects bad words with mixed punctuation', () => {
expect(ProfanityChecker.hasBadWords('salope, pute; et con.')).toBe(true);
});

test('hasBadWords does not detect whitelisted words', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase avec merde')).toBe(
false,
);
});

test('hasBadWords detects bad words with numbers', () => {
expect(ProfanityChecker.hasBadWords('ceci est une phrase 1234 pute')).toBe(
true,
);
});

test('hasBadWords detects consecutive bad words with punctuation', () => {
expect(ProfanityChecker.hasBadWords('salope, pute!')).toBe(true);
});

test('hasBadWords handles empty string', () => {
expect(ProfanityChecker.hasBadWords('')).toBe(false);
});

test('censoredSentence censors bad words', () => {
const sentence = 'ceci est une phrase avec un mot con';
const expected = 'ceci est une phrase avec un mot ***';
expect(ProfanityChecker.censoredSentence(sentence)).toBe(expected);
expect(
ProfanityChecker.censoredSentence('ceci est une phrase sale> pute'),
).toBe('ceci est une phrase sale ****');
});

test('censoredSentence censors single bad word', () => {
expect(
ProfanityChecker.censoredSentence('ceci est une phrase avec un mot con'),
).toBe('ceci est une phrase avec un mot ***');
});

test('censoredSentence censors multiple bad words', () => {
const sentence = "ceci est une phrase avec un mot 'putain de merde'";
const expected = 'ceci est une phrase avec un mot ****** de merde';
expect(ProfanityChecker.censoredSentence(sentence)).toBe(expected);
expect(
ProfanityChecker.censoredSentence(
"ceci est une phrase avec un mot 'putain de merde'",
),
).toBe('ceci est une phrase avec un mot ****** de merde');
});

test('censoredSentence leaves clean sentence unchanged', () => {
Expand All @@ -50,13 +128,84 @@ test('censoredSentence leaves clean sentence unchanged', () => {

test('whitelisted words are not censored', () => {
const sentence = 'ceci est une phrase avec merde mais pas de censure';
const expected = 'ceci est une phrase avec merde mais pas de censure';
expect(ProfanityChecker.censoredSentence(sentence)).toBe(expected);
expect(ProfanityChecker.censoredSentence(sentence)).toBe(sentence);
});

test('censoredSentence censors bad phrase with multiple bad words', () => {
expect(ProfanityChecker.censoredSentence('salope et pute')).toBe(
'****** et ****',
);
});

test('censoredSentence handles punctuation', () => {
expect(ProfanityChecker.censoredSentence('ceci est une phrase, conne!')).toBe(
'ceci est une phrase, *****!',
);
});

test('censoredSentence handles mixed case', () => {
expect(ProfanityChecker.censoredSentence('Ceci est une Salope')).toBe(
'ceci est une ******',
);
});

test('censoredSentence handles bad words at the start', () => {
expect(ProfanityChecker.censoredSentence('salope est ici')).toBe(
'****** est ici',
);
});

test('censoredSentence handles bad words at the end', () => {
expect(ProfanityChecker.censoredSentence('ici est une pute')).toBe(
'ici est une ****',
);
});

test('censoredSentence handles consecutive bad words', () => {
expect(ProfanityChecker.censoredSentence('salope pute')).toBe('****** ****');
});

test('censoredSentence handles empty string', () => {
expect(ProfanityChecker.censoredSentence('')).toBe('');
});

test('censoredSentence handles only bad words', () => {
expect(ProfanityChecker.censoredSentence('salope pute con')).toBe(
'****** **** ***',
);
});

test('censoredSentence handles mixed languages', () => {
expect(ProfanityChecker.censoredSentence('this is a salope')).toBe(
'this is a ******',
);
});

test('censoredSentence handles special characters', () => {
expect(
ProfanityChecker.censoredSentence('ceci est une phrase sale> pute !'),
).toBe('ceci est une phrase sale **** !');
});

test('censoredSentence handles numbers', () => {
expect(
ProfanityChecker.censoredSentence('ceci est une phrase 1234 con'),
).toBe('ceci est une phrase 1234 ***');
});

test('censoredSentence handles multiple spaces', () => {
expect(
ProfanityChecker.censoredSentence(
'ceci est une phrase connards',
),
).toBe('ceci est une phrase connards');
});

test('listBadWords returns the correct list of bad words', () => {
const badWordsList = ProfanityChecker.listbadWords();
expect(badWordsList).toEqual(badWordsList);
const badWordsList = ProfanityChecker.listBadWords();
expect(badWordsList).toEqual(
expect.arrayContaining(['salope', 'putain de merde', 'con', 'sale pute']),
);
});

test('listWhiteListWords returns the correct list of whitelisted words', () => {
Expand Down
Loading
Loading