Skip to content

Commit

Permalink
feat: support sentence split
Browse files Browse the repository at this point in the history
  • Loading branch information
azu committed May 13, 2021
1 parent eb13cc6 commit 4c1346e
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 22 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
},
"dependencies": {
"kuromojin": "^3.0.0",
"sentence-splitter": "^3.2.1",
"textlint-util-to-string": "^3.1.1"
}
}
45 changes: 26 additions & 19 deletions src/textlint-rule-no-filler.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import type { TxtParentNode, TxtTextNode } from "@textlint/ast-node-types";
import type { TxtTextNode } from "@textlint/ast-node-types";
import type { TextlintRuleReporter } from "@textlint/types";
import { StringSource } from "textlint-util-to-string";
import { tokenize } from "kuromojin";
import { splitAST, Syntax as SentenceSyntax, SentenceNode } from "sentence-splitter";

export type Options = {};
/**
Expand All @@ -17,7 +18,7 @@ const maskCodeNode = (codeNode: TxtTextNode) => {
value: codeNode.value.replace(/./g, "X")
};
};
const sourceWithoutStyle = (node: TxtParentNode) => {
const sourceWithoutStyle = (node: SentenceNode) => {
const nodeMaskedCode = {
...node,
children: node.children.map((childNode) => {
Expand All @@ -34,25 +35,31 @@ const report: TextlintRuleReporter<Options> = (context) => {
const { Syntax, RuleError, report } = context;
return {
async [Syntax.Paragraph](node) {
const source = sourceWithoutStyle(node);
const tokens = await tokenize(source.toString());
tokens.forEach((token) => {
if (token.pos === "フィラー") {
const index = token.word_position - 1;
const originalIndex = source.originalIndexFromIndex(index);
report(
node,
new RuleError(
`フィラー(つなぎ表現)である「${token.surface_form}」を検知しました。
const splitNode = splitAST(node);
const sentences = splitNode.children.filter(
(node) => node.type === SentenceSyntax.Sentence
) as SentenceNode[];
for (const sentence of sentences) {
const source = sourceWithoutStyle(sentence);
const tokens = await tokenize(source.toString());
tokens.forEach((token) => {
if (token.pos === "フィラー") {
const index = token.word_position - 1;
const originalIndex = source.originalIndexFromIndex(index);
report(
sentence,
new RuleError(
`フィラー(つなぎ表現)である「${token.surface_form}」を検知しました。
「えーと」「あの」「まあ」などのつなぎ表現は話し言葉(口語)であるため、文章を読みにくくします。`,
{
index: originalIndex
}
)
);
}
});
{
index: originalIndex
}
)
);
}
});
}
}
};
};
Expand Down
21 changes: 20 additions & 1 deletion test/textlint-rule-no-filler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const errorMessage = (word: string) => {
「えーと」「あの」「まあ」などのつなぎ表現は話し言葉(口語)であるため、文章を読みにくくします。`;
};
tester.run("textlint-rule-no-filler", rule, {
valid: ["これは問題ない文章です。"],
valid: ["これは問題ない文章です。", "`code`と`code`"],
invalid: [
{
text: "えーと、フィラーについてですね。",
Expand Down Expand Up @@ -54,6 +54,25 @@ tester.run("textlint-rule-no-filler", rule, {
index: 0
}
]
},
// mutiline
{
text: `なんか、これは問題あるかも。
サーバとサーバーの表記揺れがある。
この雇入と雇入れの違いを見つける。
なんか、これは問題あるかも。
`,
errors: [
{
message: errorMessage("なんか"),
index: 0
},
{
message: errorMessage("なんか"),
line: 4,
column: 1
}
]
}
]
});
50 changes: 48 additions & 2 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@
readdirp "^2.2.1"
upath "^1.1.1"

"@textlint/ast-node-types@^4.2.4", "@textlint/ast-node-types@^4.4.3":
"@textlint/ast-node-types@^4.2.4", "@textlint/ast-node-types@^4.4.2", "@textlint/ast-node-types@^4.4.3":
version "4.4.3"
resolved "https://registry.yarnpkg.com/@textlint/ast-node-types/-/ast-node-types-4.4.3.tgz#fdba16e8126cddc50f45433ce7f6c55e7829566c"
integrity sha512-qi2jjgO6Tn3KNPGnm6B7p6QTEPvY95NFsIAaJuwbulur8iJUEenp1OnoUfiDaC/g2WPPEFkcfXpmnu8XEMFo2A==
Expand Down Expand Up @@ -1664,6 +1664,16 @@ concat-stream@^1.6.2:
readable-stream "^2.2.2"
typedarray "^0.0.6"

concat-stream@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/concat-stream/-/concat-stream-2.0.0.tgz#414cf5af790a48c60ab9be4527d56d5e41133cb1"
integrity sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==
dependencies:
buffer-from "^1.0.0"
inherits "^2.0.3"
readable-stream "^3.0.2"
typedarray "^0.0.6"

confirmer@^1.1.2:
version "1.1.2"
resolved "https://registry.yarnpkg.com/confirmer/-/confirmer-1.1.2.tgz#df36b3eb5ca5992750de8eea9db24781bb4cc254"
Expand Down Expand Up @@ -3238,6 +3248,11 @@ object.pick@^1.3.0:
dependencies:
isobject "^3.0.1"

object_values@^0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/object_values/-/object_values-0.1.2.tgz#f8fbc31d2e537170a4cbcfb28dd61501b3207334"
integrity sha512-tZgUiKLraVH+4OAedBYrr4/K6KmAQw2RPNd1AuNdhLsuz5WP3VB7WuiKBWbOcjeqqAjus2ChIIWC8dSfmg7ReA==

once@^1.3.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1"
Expand Down Expand Up @@ -3575,6 +3590,15 @@ readable-stream@^2.0.2, readable-stream@^2.2.2:
string_decoder "~1.1.1"
util-deprecate "~1.0.1"

readable-stream@^3.0.2:
version "3.6.0"
resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198"
integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==
dependencies:
inherits "^2.0.3"
string_decoder "^1.1.1"
util-deprecate "^1.0.1"

readdirp@^2.2.1:
version "2.2.1"
resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-2.2.1.tgz#0e87622a3325aa33e892285caf8b4e846529a525"
Expand Down Expand Up @@ -3798,6 +3822,11 @@ safe-buffer@~5.1.0, safe-buffer@~5.1.1:
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==

safe-buffer@~5.2.0:
version "5.2.1"
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==

safe-regex@^1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/safe-regex/-/safe-regex-1.1.0.tgz#40a3669f3b077d1e943d44629e157dd48023bf2e"
Expand Down Expand Up @@ -3825,6 +3854,16 @@ semver@^6.1.1, semver@^6.1.2, semver@^6.3.0:
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d"
integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw==

sentence-splitter@^3.2.1:
version "3.2.1"
resolved "https://registry.yarnpkg.com/sentence-splitter/-/sentence-splitter-3.2.1.tgz#d6f5b66f4ab130f511d50e41e6f42070e2936ffb"
integrity sha512-aG+Tf8M1wVUd2uPSUtdMXdJlKZLcdh+oVE8iEn8KwfxYZ87qDpe7+o0nGZdr+96g2H76Qz/8TrG9dIxyp7c70w==
dependencies:
"@textlint/ast-node-types" "^4.4.2"
concat-stream "^2.0.0"
object_values "^0.1.2"
structured-source "^3.0.2"

set-blocking@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
Expand Down Expand Up @@ -4068,6 +4107,13 @@ string.prototype.trimstart@^1.0.4:
call-bind "^1.0.2"
define-properties "^1.1.3"

string_decoder@^1.1.1:
version "1.3.0"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.3.0.tgz#42f114594a46cf1a8e30b0a84f56c78c3edac21e"
integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
dependencies:
safe-buffer "~5.2.0"

string_decoder@~1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8"
Expand Down Expand Up @@ -4514,7 +4560,7 @@ use@^3.1.0:
resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==

util-deprecate@~1.0.1:
util-deprecate@^1.0.1, util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=
Expand Down

0 comments on commit 4c1346e

Please sign in to comment.