add variant words

JKS9 · Jan 31, 2025 · b132efe · b132efe
1 parent e43e6f0
commit b132efe
Show file tree

Hide file tree

Showing 6 changed files with 117 additions and 29 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 node_modules
-dist
+dist
+discussion.md
diff --git a/package-lock.json b/package-lock.json
diff --git a/src/profanityChecker.ts b/src/profanityChecker.ts
@@ -1,29 +1,17 @@
 import { ProfanityConfig } from './profanityConfig.js';
+import { parseString } from './utils/normalize.js';
 
 export class ProfanityChecker extends ProfanityConfig {
-  /**
-   * Normalize the input sentence by removing unwanted characters and splitting it into an array of words.
-   * @param sentence - The sentence to normalize.
-   * @returns {string[]} - An array of normalized words.
-   */
-  public static normalizeSentence(sentence: string): string[] {
-    return sentence
-      .replace(/[^a-zA-Z0-9àâäéèêëîïôöùûüç\s.,!?]/gu, ' ') // Ajout du modificateur 'u' pour Unicode et 'g' pour global
-      .trim() // Supprime les espaces en début/fin.
-      .toLowerCase() // Convertir en minuscule.
-      .split(/\s+/); // Divise en mots.
-  }
-
   /**
    * Check if the sentence contains any bad words.
    * @param sentence - The sentence to check.
    * @returns {boolean} - True if bad words are found, otherwise false.
    */
   public static hasBadWords(sentence: string): boolean {
-    const words = this.normalizeSentence(sentence);
+    const words = parseString(sentence);
 
     return words.some((word) => {
-      const cleanedWord = word.replace(/[.,!?]$/gu, '').toLowerCase();
+      const cleanedWord = word.replace(/[.,!?]/gu, '').toLowerCase();
       return (
         this.badWordsSet.has(cleanedWord) &&
         !this.whiteListWordsSet.has(cleanedWord)
@@ -37,15 +25,11 @@ export class ProfanityChecker extends ProfanityConfig {
    * @returns {string} - The censored sentence.
    */
   public static censoredSentence(sentence: string): string {
-    const words = this.normalizeSentence(sentence);
-    const censoredWords = words.map((word) => {
-      const cleanedWord = word.replace(/[.,!?]$/gu, '').toLowerCase();
+    const words = parseString(sentence);
 
-      if (
-        this.badWordsSet.has(cleanedWord) &&
-        !this.whiteListWordsSet.has(cleanedWord)
-      ) {
-        return this.censoreSet.repeat(cleanedWord.length) + word.slice(cleanedWord.length); // Replace with asterisks, keeping punctuation.
+    const censoredWords = words.map((word) => {
+      if (this.badWordsSet.has(word) && !this.whiteListWordsSet.has(word)) {
+        return this.censoreSet.repeat(word.length); // Replace with asterisks, keeping punctuation.
       }
       return word; // Keep the original word if not a bad word.
     });

diff --git a/src/test/profanityCheckerFr.test.ts b/src/test/profanityCheckerFr.test.ts
@@ -5,7 +5,7 @@ import { test, expect, beforeAll } from 'vitest';
 beforeAll(() => {
   ProfanityConfig.changeCensoredWords('*');
   ProfanityConfig.addBadWords([
-    'salope',
+    'Salope',
     'putain de merde',
     'con',
     'sale pute',
@@ -139,13 +139,13 @@ test('censoredSentence censors bad phrase with multiple bad words', () => {
 
 test('censoredSentence handles punctuation', () => {
   expect(ProfanityChecker.censoredSentence('ceci est une phrase, conne!')).toBe(
-    'ceci est une phrase, *****!',
+    'ceci est une phrase , ***** !',
   );
 });
 
 test('censoredSentence handles mixed case', () => {
   expect(ProfanityChecker.censoredSentence('Ceci est une Salope')).toBe(
-    'ceci est une ******',
+    'Ceci est une ******',
   );
 });
 

diff --git a/src/utils/normalize.ts b/src/utils/normalize.ts
@@ -0,0 +1,11 @@
+export const parseString = (input: string): string[] => {
+  if (!input || input.length === 0) {
+    return [];
+  }
+
+  input = input.replace(/'/g, " ' ");
+
+  const words = input.match(/[\wÀ-ÿ]+|[.,!?;:]/g) || [];
+
+  return words.map((word) => word.trim()).filter((word) => word.length > 0);
+};
diff --git a/src/words/words.ts b/src/words/words.ts
@@ -20,6 +20,7 @@ export const badWords: string[] = [
   'mûrde',
   'mürde',
   'myrde',
+  'putain',
   'merda',
   'merdä',
   'merdà',
@@ -117,6 +118,96 @@ export const badWords: string[] = [
   'me.rde',
   'me<rde',
   'me>rde',
+  'Merde',
+  'Con',
+  'Salope',
+  'Putain',
+  'Enculé',
+  'Bordel',
+  'Nique',
+  'Conne',
+  'Chié',
+  'Fils de pute',
+  'Va te faire foutre',
+  'Batard',
+  'Clochard',
+  'Connard',
+  'Salopard',
+  'Branler',
+  'Pédé',
+  'Gros con',
+  'Tapette',
+  'Grosse vache',
+  'Sale pute',
+  'Pute',
+  'Sucer',
+  'Fion',
+  'Cogne',
+  'Trou du cul',
+  'Zoophile',
+  'Raciste',
+  'Homophobe',
+  'Grosse merde',
+  'Connasse',
+  'Andouille',
+  'Pauvre con',
+  'Gouine',
+  'Bouffon',
+  'Nique ta mère',
+  'Saloperie',
+  'Abruti',
+  'Sac à merde',
+  'Tarlouze',
+  'Fils de chien',
+  'Merdeux',
+  'Enculard',
+  'Sauropode',
+  'Boudin',
+  'Conasse',
+  'Grosse tache',
+  'Taré',
+  'Putain de merde',
+  'Foufoune',
+  'Bâtard',
+  'Pouffiasse',
+  'Blessé',
+  'Culé',
+  'Peste',
+  'Gogole',
+  'Pétasse',
+  'Crétin',
+  'Merdeuse',
+  'Gros tas',
+  'Bitch',
+  'Gros nul',
+  'Paon',
+  'Gros porc',
+  'Bête',
+  'Sale bâtard',
+  'Soupe au lait',
+  'Bordélique',
+  'Barjot',
+  'Stronzo',
+  'Suceur',
+  'Imbécile',
+  'Clodo',
+  'Culs',
+  'Foutre',
+  'Pisse',
+  'Pénis',
+  'Cochon',
+  'Fille de pute',
+  'Baiseur',
+  'Ordure',
+  'Traînée',
+  'Gros suceur',
+  'Gourde',
+  'Chiffon',
+  'Glandeur',
+  'Macho',
+  'Sale con',
+  'Carnage',
+  'Ducon',
   'me/rde',
   'me?rde',
   'mer!de',
@@ -276,6 +367,7 @@ export const badWords: string[] = [
   'co/n',
   'co?n',
   'ocn',
+  'Salope',
   'salope',
   'sälope',
   'sàlope',