From 58f271556aa878e619457054f8a2f423e8b574ca Mon Sep 17 00:00:00 2001 From: Eugene Toder Date: Sat, 28 Dec 2024 22:29:49 -0500 Subject: [PATCH] fix(censor): don't generate the same character twice in a row (#85) In randomCharFromSetCensorStrategy(). This produces more interesting strings and avoids generating "@$$" as a side-effect. Fixes #82 --- src/censor/BuiltinStrategies.ts | 24 ++++++++++++++++++------ test/censor/BuiltinStrategies.test.ts | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/censor/BuiltinStrategies.ts b/src/censor/BuiltinStrategies.ts index 780d7c1..23ff8a1 100644 --- a/src/censor/BuiltinStrategies.ts +++ b/src/censor/BuiltinStrategies.ts @@ -139,25 +139,37 @@ export function fixedCharCensorStrategy(char: string): TextCensorStrategy { /** * A text censoring strategy that generates replacement strings made up of - * random characters from the set of characters provided. + * random characters from the set of characters provided. The strings never + * contain two of the same character in a row. * * @example * ```typescript * const strategy = randomCharFromSetCensorStrategy('$#!'); * const censor = new TextCensor().setStrategy(strategy); * // Before: 'fuck you!' - * // After: '!##$ you!' + * // After: '!#$# you!' * ``` * @param charset - Set of characters from which the replacement string should - * be constructed. Must not be empty. + * be constructed. Must have at least two characters. * @returns A [[TextCensorStrategy]] for use with the [[TextCensor]]. */ export function randomCharFromSetCensorStrategy(charset: string): TextCensorStrategy { const chars = [...charset]; - if (chars.length === 0) throw new Error('The character set passed must not be empty.'); + if (chars.length < 2) throw new Error('The character set passed must have at least 2 characters.'); return (ctx: CensorContext) => { - let censored = ''; - for (let i = 0; i < ctx.matchLength; i++) censored += chars[Math.floor(Math.random() * chars.length)]; + if (ctx.matchLength === 0) return ''; + + let lastIdx = Math.floor(Math.random() * chars.length); + let censored = chars[lastIdx]; + for (let i = 1; i < ctx.matchLength; i++) { + let idx = Math.floor(Math.random() * (chars.length - 1)); + // Transform the distribution for idx from [0, len-1) to + // [0, lastIdx) ∪ (lastIdx, len) to exclude lastIdx while + // ensuring a uniform distribution of generated characters. + if (idx >= lastIdx) idx++; + lastIdx = idx; + censored += chars[idx]; + } return censored; }; } diff --git a/test/censor/BuiltinStrategies.test.ts b/test/censor/BuiltinStrategies.test.ts index a95bf64..59e40d1 100644 --- a/test/censor/BuiltinStrategies.test.ts +++ b/test/censor/BuiltinStrategies.test.ts @@ -130,8 +130,13 @@ describe('fixedCharCensorStrategy()', () => { }); describe('randomCharFromSetCensorStrategy()', () => { - it('should throw if the charset is empty', () => { - expect(() => randomCharFromSetCensorStrategy('')).toThrow(new Error('The character set passed must not be empty.')); + it('should throw if the charset has less than 2 characters', () => { + expect(() => randomCharFromSetCensorStrategy('')).toThrow( + new Error('The character set passed must have at least 2 characters.'), + ); + expect(() => randomCharFromSetCensorStrategy('a')).toThrow( + new Error('The character set passed must have at least 2 characters.'), + ); }); it('should work for matchLength 0', () => { @@ -144,4 +149,11 @@ describe('randomCharFromSetCensorStrategy()', () => { const strategy = randomCharFromSetCensorStrategy(charset); expect([...strategy({ ...partialCtx, matchLength: 5 })].every((c) => charset.includes(c))).toBeTruthy(); }); + + it('should not repeat the same character twice in a row', () => { + const strategy = randomCharFromSetCensorStrategy('ab'); + for (let i = 0; i < 100; i++) { + expect(['aba', 'bab']).toContain(strategy({ ...partialCtx, matchLength: 3 })); + } + }); });