Skip to content

Commit

Permalink
fix: soft yer edge cases
Browse files Browse the repository at this point in the history
  • Loading branch information
noomorph committed Dec 31, 2024
1 parent 6b26064 commit 7ff40dc
Show file tree
Hide file tree
Showing 9 changed files with 1,188 additions and 1,057 deletions.
2 changes: 2 additions & 0 deletions src/common/fleetingVowels.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ describe('fleetingVowels', () => {
expect(inferFleetingVowel('pėsȯk, kotȯk i orėl')).toBe(
'pės(o)k, kot(o)k i or(e)l',
);
expect(inferFleetingVowel('pės-afrikanec')).toBe('p(e)s-afrikań(e)c');
expect(inferFleetingVowel('afrikanec-pės')).toBe('afrikań(e)c-p(e)s');
});

it('does not infer incorrect fleeting vowels in the word', () => {
Expand Down
84 changes: 66 additions & 18 deletions src/common/fleetingVowels.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
import { ALL_LETTERS, ALL_CONSONANTS } from '../substitutions';
import {
ALL_CONSONANTS,
ALL_LETTERS,
HARD_YER_LOOSE,
SOFT_YER_LOOSE,
VOCALIZED,
YERS,
} from '../substitutions';
import { soften } from './soften';

export function markFleetingVowel(word: string, add: string): string {
let i = 0;
Expand All @@ -8,49 +16,89 @@ export function markFleetingVowel(word: string, add: string): string {
i++;
}

if (word[i] !== add[i] && word[i + 1] === add[i]) {
return replaceFleetingVowel(word, i);
if (word[i] !== add[i]) {
if (word[i + 1] === add[i]) {
return replaceFleetingVowel(word, i);
}

if (word[i + 1] === add[i + 1] && isLJNJ(add, i - 1)) {
return replaceFleetingVowel(word, i);
}
}

return word;
}

export function inferFleetingVowel(word: string): string {
let i = word.length - 1;
let end = word.length;
let replaced = false;
let result = word;

while (i > 0) {
for (let i = end - 1; i >= 0; i--) {
const char = word[i];
if (!ALL_LETTERS.has(char)) {
end = i;
replaced = false;
continue;
}

if (!replaced && isFleetingVowel(char)) {
if (isLastSyllable(word, i, end)) {
if (YERS.has(char) || isEC(word, i, end)) {
if (isFleetingSyllable(word, i, end) && canOmitYer(word, i)) {
result = replaceFleetingVowel(result, i);
}
}

i--;
}

return result;
}

function isFleetingVowel(char: string): boolean {
return char === 'è' || char === 'ė' || char === 'ȯ' || char === 'ò';
function replaceFleetingVowel(word: string, j: number): string {
const consonant = shouldSoftenPreceedingConsonant(word, j)
? soften(word[j - 1])
: word[j - 1];

const before = word.slice(0, j - 1);
const after = word.slice(j + 1);
return `${before}${consonant}${toBracketNotation(word[j])}${after}`;
}

function replaceFleetingVowel(word: string, j: number): string {
const fleetingVowel = word[j].normalize('NFD')[0];
return `${word.slice(0, j)}(${fleetingVowel})${word.slice(j + 1)}`;
function shouldSoftenPreceedingConsonant(word: string, i: number): boolean {
return isLN(word, i - 1) && toBracketNotation(word[i]) === '(e)';
}

function toBracketNotation(maybeYer: string): string {
if (SOFT_YER_LOOSE.has(maybeYer)) {
return '(e)';
}

if (HARD_YER_LOOSE.has(maybeYer)) {
return '(o)';
}

return maybeYer;
}

function isLastSyllable(word: string, i: number, end: number): boolean {
function isFleetingSyllable(word: string, i: number, end: number): boolean {
if (i === end - 2) return ALL_CONSONANTS.has(word[i + 1]);
if (i === end - 3) return word[i + 1] === 'n' && word[i + 2] === 'j';
if (i === end - 3) return isLJNJ(word, i + 1);
return false;
}

function canOmitYer(word: string, i: number): boolean {
const [c2, c1] = isLJNJ(word, i - 2)
? [word[i - 3], word[i - 2]]
: [word[i - 2], word[i - 1]];

return (!ALL_LETTERS.has(c2) || VOCALIZED.has(c2)) && c1 !== word[i + 1];
}

function isLJNJ(word: string, i: number): boolean {
return i >= 0 && word[i + 1] === 'j' && isLN(word, i);
}

function isLN(word: string, i: number): boolean {
const c = word[i];
return c === 'l' || c === 'n' || c === 'L' || c === 'N';
}

function isEC(word: string, i: number, end: number): boolean {
return i > 0 && word[i] === 'e' && word[i + 1] === 'c' && i + 2 === end;
}
53 changes: 53 additions & 0 deletions src/common/soften.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { soften } from './soften';

describe('soften function', () => {
test('softens the last consonant by default', () => {
expect(soften('dnes')).toBe('dneś');
expect(soften('gaz')).toBe('gaź');
expect(soften('lad')).toBe('laď');
});

test('softens the specified consonant at given index', () => {
expect(soften('lad', 0)).toBe('ľad');
expect(soften('selsky', 2)).toBe('seľsky');
expect(soften('měd', 2)).toBe('měď');
});

test('handles negative indices', () => {
expect(soften('dnes', -1)).toBe('dneś');
expect(soften('test', -2)).toBe('teśt');
});

test('handles all softenable consonants', () => {
expect(soften('D')).toBe('Ď');
expect(soften('L')).toBe('Ľ');
expect(soften('N')).toBe('Ń');
expect(soften('R')).toBe('Ŕ');
expect(soften('S')).toBe('Ś');
expect(soften('T')).toBe('Ť');
expect(soften('Z')).toBe('Ź');
expect(soften('d')).toBe('ď');
expect(soften('l')).toBe('ľ');
expect(soften('n')).toBe('ń');
expect(soften('r')).toBe('ŕ');
expect(soften('s')).toBe('ś');
expect(soften('t')).toBe('ť');
expect(soften('z')).toBe('ź');
});

test('does not change non-softenable consonants and vowels', () => {
expect(soften('baba')).toBe('baba');
expect(soften('mama')).toBe('mama');
expect(soften('papa')).toBe('papa');
});

test('handles empty strings', () => {
expect(soften('')).toBe('');
expect(soften('', 0)).toBe('');
});

test('handles out of range index', () => {
expect(soften('test', 4)).toBe('test');
expect(soften('test', -5)).toBe('test');
});
});
27 changes: 27 additions & 0 deletions src/common/soften.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const FULL_SOFTENABLE_CONSONANT_MAP: Record<string, string> = {
D: 'Ď',
L: 'Ľ',
N: 'Ń',
R: 'Ŕ',
S: 'Ś',
T: 'Ť',
Z: 'Ź',
d: 'ď',
l: 'ľ',
n: 'ń',
r: 'ŕ',
s: 'ś',
t: 'ť',
z: 'ź',
};

export function soften(str: string, index = str.length - 1): string {
const pos = index < 0 ? str.length + index : index;
if (pos < 0) return str;

const before = str.slice(0, pos) || '';
const softened = FULL_SOFTENABLE_CONSONANT_MAP[str[pos]] || str[pos] || '';
const after = str.slice(pos + 1) || '';

return before + softened + after;
}
Loading

0 comments on commit 7ff40dc

Please sign in to comment.