Skip to content

Commit

Permalink
add rule "duplicated diacritics on the same letter"
Browse files Browse the repository at this point in the history
  • Loading branch information
aborazmeh committed Jul 9, 2024
1 parent dd2f86a commit 0aebfac
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 1 deletion.
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ Only diacritics will be removed and there likely to be an extra space

أهلا وسهلا بكم

### No Duplicated Diacritics on the Same Letter

يونََُُِِس: قال أبو عبيدة، «يقال:يونس بضم النون وكسرها». والمشهور في القراءة يونُس برفع النون من غير همز.

Duplicated diacritics will be removed

يونَُِس: قال أبو عبيدة، «يقال:يونس بضم النون وكسرها». والمشهور في القراءة يونُس برفع النون من غير همز.

### No Shadda With Madda

Shadda can't be combined with Madda
Expand All @@ -51,6 +59,7 @@ These are default options, you can change them in your .textlintrc file
"remove_loose_diacritics": true,
"no_shadda_with_madda": true,
"no_shadda_with_sukun": true,
"no_duplicated_diacritics": true,
}
}
}
Expand Down Expand Up @@ -86,7 +95,6 @@ Test textlint rule by [textlint-tester](https://github.com/textlint/textlint-tes
- No Tanween *and* Haraka on the same letter
- No Haraka *and* Sukun on the same letter
- No Sukun on the first letter of the word
- Duplicated diacritics on the same letter
- Option for no combining diacritics
- No Shadda with Sukun
- Normalize diacritics: like `U+FE7C` and `U+FE7D` to `U+0651`, and `U+FCF2` to `U+0651` and `U+064E`
Expand Down
34 changes: 34 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export interface Options {
remove_loose_diacritics?: boolean;
no_shadda_with_madda?: boolean;
no_shadda_with_sukun?: boolean;
no_duplicated_diacritics?: boolean;
}

const regex = {
Expand Down Expand Up @@ -73,13 +74,42 @@ function noShaddaWithSukun(node: TxtStrNode, text: string, context: Readonly<Tex
}
}

function noDuplicatedDiacritics(node: TxtStrNode, text: string, context: Readonly<TextlintRuleContext>) {
const { report, fixer, locator, RuleError } = context;

let match;
let matches = [];
const doubleRegex = new RegExp(`(${regex.diacritics})${regex.diacritics}*\\1`, "g");

for (let i = 0; i < text.length; i++) {
doubleRegex.lastIndex = i; // Start matching from the current position
match = doubleRegex.exec(text);
if (match) {
matches.push(match);
i = match.index; // Move to the start of the match for the next iteration
}
}

for (const match of matches) {
const index = match.index ?? 0;
const matchRange = [index, index + match[0].length] as const;
const remove = fixer.removeRange([index + match[0].length - 1, index + match[0].length]);
const ruleError = new RuleError("Found duplicated Arabic diacritic on the same letter.", {
padding: locator.range(matchRange),
fix: remove
});
report(node, ruleError);
}
}

const report: TextlintRuleModule<Options> = (context, options = {}) => {
const { getSource, Syntax } = context;
return {
[Syntax.Str](node) {
const removeLooseDiacritics = options.remove_loose_diacritics ?? true;
const shaddaWithMaddaOpt = options.no_shadda_with_madda ?? true;
const shaddaWithSukunOpt = options.no_shadda_with_sukun ?? true;
const duplicatedDiacriticsOpt = options.no_duplicated_diacritics ?? true;

const text = getSource(node); // Get text
noLooseDiacritics(node, text, context, removeLooseDiacritics);
Expand All @@ -91,6 +121,10 @@ const report: TextlintRuleModule<Options> = (context, options = {}) => {
if (shaddaWithSukunOpt) {
noShaddaWithSukun(node, text, context);
}

if (duplicatedDiacriticsOpt) {
noDuplicatedDiacritics(node, text, context);
}
}
};
};
Expand Down
23 changes: 23 additions & 0 deletions test/index-test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ tester.run("rule", rule, {
"أهلاً وسهلاً",
"الآن",
"ضيّق",
"يونَُِس: قال أبو عبيدة، «يقال:يونس بضم النون وكسرها». والمشهور في القراءة يونُس برفع النون من غير همز.",
{
text: "ضيّْق",
options: {
Expand Down Expand Up @@ -81,6 +82,28 @@ tester.run("rule", rule, {
range: [2, 4]
}
]
},
{
text: "يونََُُِِس: قال أبو عبيدة، «يقال:يونس بضم النون وكسرها». والمشهور في القراءة يونُس برفع النون من غير همز.",
output: "يونَُِس: قال أبو عبيدة، «يقال:يونس بضم النون وكسرها». والمشهور في القراءة يونُس برفع النون من غير همز.",
errors: [
{
message: "Found duplicated Arabic diacritic on the same letter.",
range: [3, 7]
},
{ message: "Found duplicated Arabic diacritic on the same letter.", range: [4, 8] },
{ message: "Found duplicated Arabic diacritic on the same letter.", range: [5, 9] }
]
},
{
text: "تجربة لشدَّّتين على نفس الحرف",
output: "تجربة لشدَّتين على نفس الحرف",
errors: [
{
message: "Found duplicated Arabic diacritic on the same letter.",
range: [9, 12]
}
]
}
]
});

0 comments on commit 0aebfac

Please sign in to comment.