Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ar] Fix some bugs and add new rules #6785

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

/**
* Support for Arabic.
*
* @since 4.9
*/
public class Arabic extends Language implements AutoCloseable {
Expand Down Expand Up @@ -125,7 +126,11 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
new ArabicWordCoherencyRule(messages),
new ArabicWordinessRule(messages),
new ArabicWrongWordInContextRule(messages),
new ArabicTransVerbRule(messages)
new ArabicTransVerbRule(messages),
new ArabicTransVerbDirectToIndirectRule(messages),
new ArabicTransVerbIndirectToDirectRule(messages),
new ArabicTransVerbIndirectToIndirectRule(messages),
new ArabicInflectedOneWordReplaceRule(messages)
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.ar;

import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.SimpleReplaceDataLoader;
import org.languagetool.rules.patterns.RuleFilter;
import org.languagetool.tagging.ar.ArabicTagger;
import org.languagetool.tools.ArabicWordMaps;

import java.util.*;

/**
* Filter that maps suggestion from adverb to adjective.
*
* @since 5.8
*/
public class AdjectiveToExclamationFilter extends RuleFilter {

public AdjectiveToExclamationFilter() {
this.adj2compList = loadFromPath(FILE_NAME);
}

private final ArabicTagger tagger = new ArabicTagger();
private static final String FILE_NAME = "/ar/arabic_adjective_exclamation.txt";
private final Map<String, List<String>> adj2compList;

private final Map<String, String> adj2comp = new HashMap<String, String>() {{
// tri letters verb:
put("رشيد", "أرشد");
put("طويل", "أطول");
put("بديع", "أبدع");
//
// TODO: add more Masdar verb
//put("", "");
}};


@Nullable
@Override
public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments, int patternTokenPos, AnalyzedTokenReadings[] patternTokens) {

// This rule return only the comparative according to given adjective
String adj = arguments.get("adj"); // extract adjective
String noun = arguments.get("noun"); // the second argument
int adjTokenIndex;
try {
adjTokenIndex = Integer.valueOf(arguments.get("adj_pos")) - 1;
} catch (NumberFormatException e) {
throw new RuntimeException("Error parsing adj_pos from : " + arguments.get("adj_pos"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use e as the second argument here, so no information is lost.

}

// filter tokens which have a lemma of adjective

// some cases can have multiple lemmas, but only adjective lemma are used
List<String> adjLemmas = tagger.getLemmas(patternTokens[adjTokenIndex], "adj");

// get comparative from Adj/comp list
List<String> compList = new ArrayList<>();

for (String adjlemma : adjLemmas) {
// get comparative suitable to adjective
List<String> comparativeList = adj2compList.get(adjlemma);
if (comparativeList != null) {
compList.addAll(comparativeList);
}
}

// remove duplicates
compList = new ArrayList<>(new HashSet<>(compList));
RuleMatch newMatch = new RuleMatch(match.getRule(), match.getSentence(), match.getFromPos(), match.getToPos(), match.getMessage(), match.getShortMessage());
// generate suggestion
List<String> suggestionList = prepareSuggestions(compList, noun);
for (String sug : suggestionList) {
newMatch.addSuggestedReplacement(sug);
}
return newMatch;
}

/* prepare suggesiyton for a list of comparative */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo "suggesiyton"

protected static List<String> prepareSuggestions(List<String> compList, String noun) {
List<String> sugList = new ArrayList<>();
for (String comp : compList) {
sugList.addAll(prepareSuggestions(comp, noun));
}
return sugList;
}

protected static List<String> prepareSuggestions(String comp, String noun) {
/*
الحالات:
الاسم ليس ضميرا


ال كم الولد جميل==> ما أجمل الولد
أجمل بالولد

حالة الضمير

كم هو جميل==> ما أجمله
أجمل به

حالة الضفة غير الثلاثية
اسم:
كم الطالب شديد الاستيعاب
ما أشد استيعاب الطالب
أشدد باستيعابه

ضمير
كم هو شديد الاستيعاب
ما أشد استيعابه
أشد باستيعابه
*/

List<String> sugList = new ArrayList<>();
StringBuilder suggestion = new StringBuilder();
String newNoun = noun;
// first form of exclamation ما أجمل
// suggestion.append("ما");
// suggestion.append(" ");
suggestion.append(comp);
if (noun == null || noun.isEmpty()) {
} else if (isPronoun(noun)) {
// no space adding
suggestion.append(ArabicWordMaps.getAttachedPronoun(noun));
} else {
//if comparative is of second form don't add a space
if (!comp.endsWith(" ب")) {
suggestion.append(" ");
}
suggestion.append(noun);
}

// add suggestions
sugList.add(suggestion.toString());
return sugList;
}

/* test if the word is an isolated pronoun */
private static boolean isPronoun(String word) {
if (word == null) {
return false;
}
return (word.equals("هو")
|| word.equals("هي")
|| word.equals("هم")
|| word.equals("هما")
|| word.equals("أنا")
);
}

/* get correspondant attched to unattached pronoun */
private static String getAttachedPronoun(String word) {
if (word == null) {
return "";
}
Map<String, String> isolatedToAttachedPronoun = new HashMap<>();
isolatedToAttachedPronoun.put("هو", "ه");
isolatedToAttachedPronoun.put("هي", "ها");
isolatedToAttachedPronoun.put("هم", "هم");
isolatedToAttachedPronoun.put("هن", "هن");
isolatedToAttachedPronoun.put("نحن", "نا");
return isolatedToAttachedPronoun.getOrDefault(word, "");
}

protected static Map<String, List<String>> loadFromPath(String path) {
return new SimpleReplaceDataLoader().loadWords(path);
}

public static String getDataFilePath() {
return FILE_NAME;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/* LanguageTool, a natural language style checker
* Copyright (C) 2022 Sohaib Afifi, Taha Zerrouki
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/

package org.languagetool.rules.ar;


import org.languagetool.language.Arabic;
import org.languagetool.rules.AbstractAdvancedSynthesizerFilter;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.synthesis.ar.ArabicSynthesizer;

/*
* Synthesize suggestions using the lemma from one token (lemma_from)
* and the POS tag from another one (postag_from).
*
* The lemma_select and postag_select attributes are required
* to choose one among several possible readings.
*/
public class AdvancedSynthesizerFilter extends AbstractAdvancedSynthesizerFilter {

private final ArabicSynthesizer synth = new ArabicSynthesizer(new Arabic());

@Override
protected Synthesizer getSynthesizer() {
return synth;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import java.util.ResourceBundle;

/**
* A rule that matches words which should not be used and suggests correct ones instead.
* A rule that matches words which should not be used and suggests correct ones instead.
*
* @author Sohaib AFIFI
* @since 5.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ public String getDescription() {

@Override
public String getShort() {
return "كلمات متشابهة لفظا يرجى التحقق منها";
return "كلمات صحيحة متشابهة لفظا يرجى التحقق منها";
}

@Override
public String getMessage() {
return "قل $suggestions";
return "؟ربما تقصد $suggestions";
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public boolean isMisspelled(String word) {
String striped = ArabicStringTools.removeTashkeel(word);
return super.isMisspelled(striped);
}

@Override
protected boolean isLatinScript() {
return false;
Expand Down
Loading