Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rule): 異なる種類の助詞の重複を許可する #7

Merged
merged 3 commits into from
Feb 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions src/no-doubled-joshi.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,10 @@ import {RuleHelper} from "textlint-rule-helper";
import {getTokenizer} from "kuromojin";
import splitSentences, {Syntax as SentenceSyntax} from "sentence-splitter";
import StringSource from "textlint-util-to-string";
// 助詞どうか
const is助詞Token = token => {
return token.pos === "助詞";
};
const is読点Token = token => {
return token.surface_form === "、" && token.pos === "名詞";
};
import {
is助詞Token, is読点Token,
createKeyFromKey, restoreToSurfaceFromKey
} from "./token-utils";
/**
* Create token map object
* {
Expand All @@ -23,11 +20,12 @@ const is読点Token = token => {
function createSurfaceKeyMap(tokens) {
// 助詞のみを対象とする
return tokens.filter(is助詞Token).reduce((keyMap, token) => {
// "は" : [token]
if (!keyMap[token.surface_form]) {
keyMap[token.surface_form] = [];
// "は:助詞.係助詞" : [token]
const tokenKey = createKeyFromKey(token);
if (!keyMap[tokenKey]) {
keyMap[tokenKey] = [];
}
keyMap[token.surface_form].push(token);
keyMap[tokenKey].push(token);
return keyMap;
}, {});
}
Expand Down Expand Up @@ -100,12 +98,13 @@ export default function (context, options = {}) {

joshiTokens = [tokenA, tokenB, tokenC, tokenD, tokenE, tokenF]
joshiTokenSurfaceKeyMap = {
"は": [tokenA, tokenC, tokenE],
"で": [tokenB, tokenD, tokenF]
"は:助詞.係助詞": [tokenA, tokenC, tokenE],
"で:助詞.係助詞": [tokenB, tokenD, tokenF]
}
*/
Object.keys(joshiTokenSurfaceKeyMap).forEach(key => {
let tokens = joshiTokenSurfaceKeyMap[key];
const tokens = joshiTokenSurfaceKeyMap[key];
const joshiName = restoreToSurfaceFromKey(key);
// strict mode ではない時例外を除去する
if (!isStrict) {
if (matchExceptionRule(tokens)) {
Expand All @@ -117,27 +116,28 @@ export default function (context, options = {}) {
}
// if found differenceIndex less than
// tokes are sorted ascending order
tokens.reduce((prev, current) => {
let startPosition = countableTokens.indexOf(prev);
let otherPosition = countableTokens.indexOf(current);
// if difference
let differenceIndex = otherPosition - startPosition;
var reduder = (prev, current) => {
const startPosition = countableTokens.indexOf(prev);
const otherPosition = countableTokens.indexOf(current);
// 助詞token同士の距離が設定値以下ならエラーを報告する
const differenceIndex = otherPosition - startPosition;
if (differenceIndex <= minInterval) {
let originalPosition = source.originalPositionFor({
const originalPosition = source.originalPositionFor({
line: sentence.loc.start.line,
column: sentence.loc.start.column + (current.word_position - 1)
});
// padding position
var padding = {
// padding positionを計算する
const padding = {
line: originalPosition.line - 1,
// matchLastToken.word_position start with 1
// this is padding column start with 0 (== -1)
column: originalPosition.column
};
report(node, new RuleError(`一文に二回以上利用されている助詞 "${key}" がみつかりました。`, padding));
report(node, new RuleError(`一文に二回以上利用されている助詞 "${joshiName}" がみつかりました。`, padding));
}
return current;
});
};
tokens.reduce(reduder);
});
};
sentences.forEach(checkSentence);
Expand Down
22 changes: 22 additions & 0 deletions src/token-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// LICENSE : MIT
"use strict";
// 助詞どうか
export const is助詞Token = (token) => {
return token.pos === "助詞";
};

export const is読点Token = (token) => {
return token.surface_form === "、" && token.pos === "名詞";
};

// 助詞tokenから品詞細分類1までを元にしたkeyを作る
// http://www.unixuser.org/~euske/doc/postag/index.html#chasen
// http://chasen.naist.jp/snapshot/ipadic/ipadic/doc/ipadic-ja.pdf
export const createKeyFromKey = (token) => {
// e.g.) "は:助詞.係助詞"
return `${token.surface_form}:${token.pos}.${token.pos_detail_1}`
};
// keyからsurfaceを取り出す
export const restoreToSurfaceFromKey = (key) => {
return key.split(":")[0];
};
20 changes: 19 additions & 1 deletion test/no-doubled-joshi-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@ import assert from "power-assert";
import rule from "../src/no-doubled-joshi";
import TextLintTester from "textlint-tester";
var tester = new TextLintTester();
/*
`**`のような装飾は取り除かれてから評価されているので、
テストでの強調という意味合いのみで利用する。
*/
tester.run("no-double-joshi", rule, {

valid: [
"私は彼が好きだ",
"既存のコードの利用", // "の" の例外
Expand All @@ -11,9 +16,13 @@ tester.run("no-double-joshi", rule, {
// 、 tokenを距離 + 1 として考える
"右がiPhone、左がAndroidです。",
"ナイフで切断した後、ハンマーで破砕した。",
"まずは試していただいて"
// 接続助詞のてが重複は許容
"まずは試していただいて",
// 1個目の「と」は格助詞、2個めの「と」は接続助詞
"ターミナルで「test」**と**入力する**と**、画面に表示されます。"
],
invalid: [
// エラー位置は最後の助詞の位置を表示する
{
text: "私は彼は好きだ",
errors: [
Expand Down Expand Up @@ -122,6 +131,15 @@ tester.run("no-double-joshi", rule, {
column: 16
}
]
}, {
text: "これとあれとそれを持ってきて。",
errors: [
{
message: `一文に二回以上利用されている助詞 "と" がみつかりました。`,
line: 1,
column: 6
}
]
}
]
});