Skip to content

Commit

Permalink
feat: add js object & generate type files.
Browse files Browse the repository at this point in the history
  • Loading branch information
jaywcjlove committed Jun 4, 2022
1 parent 0db9eb2 commit 5357862
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 14 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ build
node_modules
npm-debug.log*
package-lock.json
dist
cjs
esm

Expand Down
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

最权威的《通用规范汉字表》(**T**able **o**f **G**eneral **S**tandard **C**hinese **C**haracters),数据来源 2013-06-01 中华人民共和国教育部发布的[《通用规范汉字表》](http://www.moe.gov.cn/jyb_sjzl/ziliao/A19/201306/t20130601_186002.html)

⚠️ 注意:拼音注音没有找到官方数据来源,数据来源[《维基词典》](https://zh.wiktionary.org/wiki/Appendix:汉语拼音索引/通用规范汉字表)汉语拼音索引。
⚠️ 注意:拼音注音没有找到官方数据来源,数据来源[《维基词典》](https://zh.wiktionary.org/wiki/Appendix:汉语拼音索引/通用规范汉字表)汉语拼音索引。PDF 数据存放在 [@v1.0.0](https://github.com/jaywcjlove/table-of-general-standard-chinese-characters/releases/tag/v1.0.0)

## 安装

Expand All @@ -25,6 +25,17 @@ import data from 'togscc/data/characters.json';
]
```

```js
import characters from 'togscc/dist/characters';
import pinyin from 'togscc/dist/pinyin';
import raw from 'togscc/dist/pinyin.raw';
import togscc from 'togscc';

console.log(togscc['']) // => ["zhēng","zhèng"]
console.log("你好世界!".split('').map((han) => togscc[han] ? togscc[han] : han))
// => ["nǐ", ["hǎo","hào"], "shì", "jiè", "!"]
```

## 数据说明

| 文件名 | 说明 | 数据说明 |
Expand Down
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"name": "togscc",
"version": "1.0.0",
"version": "1.0.1",
"description": "通用规范汉字表 (Table of General Standard Chinese Characters)",
"main": "dist/togscc.js",
"scripts": {
"start": "node scripts/start.mjs",
"watch": "jest --watch",
Expand All @@ -14,6 +15,7 @@
"keywords": [
"chinese-characters",
"Chinese",
"togscc",
"characters",
"汉字",
"中文",
Expand All @@ -22,7 +24,8 @@
"author": "jaywcjlove",
"license": "MIT",
"files": [
"data"
"data",
"dist"
],
"devDependencies": {
"jest": "^28.1.0"
Expand Down
83 changes: 72 additions & 11 deletions scripts/start.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ import path from 'path';
const root = process.cwd();
const pathLog = (filename) => ` 👉 \x1b[34;1m${path.extname(filename).replace(/^\./, '').toLocaleUpperCase()}\x1b[0m\x1b[32;1m ${path.relative(root, filename)} \x1b[0m`;

const createScript = (funName, content) => `(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
typeof define === 'function' && define.amd ? define(factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.${funName} = factory());
})(this, (function () { 'use strict';
var ${funName} = ${content};
return ${funName};
}));`;

;(async () => {
const pyRawPath = path.resolve(root, 'data/pinyin.raw.txt');
const pyRawJSONPath = path.resolve(root, 'data/pinyin.raw.json');
Expand All @@ -30,13 +39,13 @@ const pathLog = (filename) => ` 👉 \x1b[34;1m${path.extname(filename).replace(
});
});

fs.promises.writeFile(pyRawJSONPath, JSON.stringify(pyRawObj, null, 2));
await fs.promises.writeFile(pyRawJSONPath, JSON.stringify(pyRawObj, null, 2));
console.log(`${pathLog(pyRawJSONPath)}`);

fs.promises.writeFile(pyRawJSONMinPath, JSON.stringify(pyRawObj, null, 0));
await fs.promises.writeFile(pyRawJSONMinPath, JSON.stringify(pyRawObj, null, 0));
console.log(`${pathLog(pyRawJSONMinPath)}`);

fs.promises.writeFile(pyRawJSONEffectPath, JSON.stringify(pyRawEffect, null, 2));
await fs.promises.writeFile(pyRawJSONEffectPath, JSON.stringify(pyRawEffect, null, 2));
console.log(`${pathLog(pyRawJSONEffectPath)}`);

const charactersPath = path.resolve(root, 'data/characters.txt');
Expand All @@ -55,6 +64,7 @@ const pathLog = (filename) => ` 👉 \x1b[34;1m${path.extname(filename).replace(
const pinyinArr = [];
// 多音字
const pinyinDuoyinObj = {};
const hanToPinyinObj = {};

charactersStr.split('\n').forEach((str) => {
const han = str.trim();
Expand All @@ -66,29 +76,80 @@ const pathLog = (filename) => ` 👉 \x1b[34;1m${path.extname(filename).replace(
if (py.length > 1) {
pinyinDuoyinObj[han] = py;
}
hanToPinyinObj[han] = py.length === 1 ? py.join() : py;
});
fs.promises.writeFile(charactersJsonPath, JSON.stringify(charactersObj, null, 2));

await fs.promises.writeFile(charactersJsonPath, JSON.stringify(charactersObj, null, 2));
console.log(`${pathLog(charactersJsonPath)} 汉字: ${charactersObj.length}`);

fs.promises.writeFile(charactersJsonMinPath, JSON.stringify(charactersObj, null, 0));
await fs.promises.writeFile(charactersJsonMinPath, JSON.stringify(charactersObj, null, 0));
console.log(`${pathLog(charactersJsonMinPath)} 汉字: ${charactersObj.length}`);

fs.promises.writeFile(pinyinPath, pinyinStr.replace(/\n$/g, ''));
await fs.promises.writeFile(pinyinPath, pinyinStr.replace(/\n$/g, ''));
console.log(`${pathLog(pinyinPath)} 拼音: ${pinyinArr.length}`);

fs.promises.writeFile(pinyinJsonPath, JSON.stringify(pinyinArr, null, 2));
await fs.promises.writeFile(pinyinJsonPath, JSON.stringify(pinyinArr, null, 2));
console.log(`${pathLog(pinyinJsonPath)} 拼音: ${pinyinArr.length}`);

fs.promises.writeFile(pinyinJsonMinPath, JSON.stringify(pinyinArr, null, 0));
await fs.promises.writeFile(pinyinJsonMinPath, JSON.stringify(pinyinArr, null, 0));
console.log(`${pathLog(pinyinJsonMinPath)} 拼音: ${pinyinArr.length}`);

fs.promises.writeFile(pinyinJsonDuoPath, JSON.stringify(pinyinDuoyinObj, null, 2));
await fs.promises.writeFile(pinyinJsonDuoPath, JSON.stringify(pinyinDuoyinObj, null, 2));
console.log(`${pathLog(pinyinJsonDuoPath)} 多音字: ${Object.keys(pinyinDuoyinObj).length}`);

fs.promises.writeFile(pinyinJsonDuoMinPath, JSON.stringify(pinyinDuoyinObj, null, 0));
await fs.promises.writeFile(pinyinJsonDuoMinPath, JSON.stringify(pinyinDuoyinObj, null, 0));
console.log(`${pathLog(pinyinJsonDuoMinPath)} 多音字: ${Object.keys(pinyinDuoyinObj).length}`);

fs.promises.writeFile(charactersPinyinCSVPath, pinyinCSV);
await fs.promises.writeFile(charactersPinyinCSVPath, pinyinCSV);
console.log(`${pathLog(charactersPinyinCSVPath)}`);

const distDir = path.resolve(root, 'dist');
if (!fs.existsSync(distDir)) {
await fs.promises.mkdir('dist');
}

const charactersScript = path.resolve(root, 'dist/characters.js');
await fs.promises.writeFile(charactersScript, createScript('characters', JSON.stringify(charactersObj, null, 0)));
console.log(`${pathLog(charactersScript)}`);

const charactersType = path.resolve(root, 'dist/characters.d.ts');
await fs.promises.writeFile(charactersType, `declare const characters: string[];\nexport default characters;`);
console.log(`${pathLog(charactersType)}`);


const pinyinScript = path.resolve(root, 'dist/pinyin.js');
await fs.promises.writeFile(pinyinScript, createScript('pinyin', JSON.stringify(pinyinArr, null, 0)));
console.log(`${pathLog(pinyinScript)}`);

const pinyinType = path.resolve(root, 'dist/pinyin.d.ts');
await fs.promises.writeFile(pinyinType, `declare const obj: string[];\nexport default obj;`);
console.log(`${pathLog(pinyinType)}`);


const pinyinDuoScript = path.resolve(root, 'dist/pinyin.duo.js');
await fs.promises.writeFile(pinyinDuoScript, createScript('duoyin', JSON.stringify(pinyinDuoyinObj, null, 0)));
console.log(`${pathLog(pinyinDuoScript)}`);

const pinyinDuoType = path.resolve(root, 'dist/pinyin.duo.d.ts');
await fs.promises.writeFile(pinyinDuoType, `declare const obj: Record<string, string[]>;\nexport default obj;`);
console.log(`${pathLog(pinyinDuoType)}`);


const pinyinRawScript = path.resolve(root, 'dist/pinyin.raw.js');
await fs.promises.writeFile(pinyinRawScript, createScript('pinyinRaw', JSON.stringify(pyRawObj, null, 0)));
console.log(`${pathLog(pinyinRawScript)}`);

const pinyinRawType = path.resolve(root, 'dist/pinyin.raw.d.ts');
await fs.promises.writeFile(pinyinRawType, `declare const obj: Record<string, string[]>;\nexport default obj;`);
console.log(`${pathLog(pinyinRawType)}`);


const togsccScript = path.resolve(root, 'dist/togscc.js');
await fs.promises.writeFile(togsccScript, createScript('togscc', JSON.stringify(hanToPinyinObj, null, 0)));
console.log(`${pathLog(togsccScript)}`);

const togsccType = path.resolve(root, 'dist/togscc.d.ts');
await fs.promises.writeFile(togsccType, `declare const obj: Record<string, string | string[]>;\nexport default obj;`);
console.log(`${pathLog(togsccType)}`);

})();
42 changes: 42 additions & 0 deletions test/data.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,31 @@ const pinyin = require('../data/pinyin.json');
const pinyinRaw = require('../data/pinyin.raw.json');
const pinyinRawEffect = require('../data/pinyin.raw.effect.json');

const charactersJS = require('../dist/characters');
const pinyinDuoJS = require('../dist/pinyin.duo');
const pinyinJS = require('../dist/pinyin');
const pinyinRawJS = require('../dist/pinyin.raw');
const togscc = require('../');


test('汉字转拼音 test case', () => {
expect(Object.keys(togscc).length).toBe(8105);
expect(togscc['了']).toEqual([ "le", "liǎo" ]);
expect(togscc['正']).toEqual([ "zhēng", "zhèng" ]);
expect(togscc['啜']).toEqual([ "chuài", "chuò" ]);

expect("你好世界!".split('').map((han) => togscc[han] ? togscc[han] : han)).toEqual(["nǐ", ["hǎo","hào"], "shì", "jiè", "!"]);

});

test('汉字字序 test case', () => {
expect(charactersJS.length).toBe(8105);
expect(charactersJS.indexOf('一') + 1).toBe(1);
expect(charactersJS.indexOf('幻') + 1).toBe(189);
expect(charactersJS.indexOf('柰') + 1).toBe(4139);
expect(charactersJS.indexOf('𫘝') + 1).toBe(6643);
expect(charactersJS.indexOf('蠼') + 1).toBe(8105);

expect(characters.length).toBe(8105);
expect(characters.indexOf('一') + 1).toBe(1);
expect(characters.indexOf('幻') + 1).toBe(189);
Expand All @@ -14,13 +38,25 @@ test('汉字字序 test case', () => {
});

test('多音汉字 test case', () => {
expect(Object.keys(pinyinDuoJS).length).toBe(597);
expect(pinyinDuoJS['了']).toEqual([ "le", "liǎo" ]);
expect(pinyinDuoJS['正']).toEqual([ "zhēng", "zhèng" ]);
expect(pinyinDuoJS['啜']).toEqual([ "chuài", "chuò" ]);

expect(Object.keys(pinyinDuo).length).toBe(597);
expect(pinyinDuo['了']).toEqual([ "le", "liǎo" ]);
expect(pinyinDuo['正']).toEqual([ "zhēng", "zhèng" ]);
expect(pinyinDuo['啜']).toEqual([ "chuài", "chuò" ]);
});

test('拼音 test case', () => {
expect(pinyinJS.length).toBe(8105);
expect(pinyinJS[characters.indexOf('一')]).toBe('yī');
expect(pinyinJS[characters.indexOf('幻')]).toBe('huàn');
expect(pinyinJS[characters.indexOf('了')]).toEqual([ "le", "liǎo" ]);
expect(pinyinJS[characters.indexOf('正')]).toEqual([ "zhēng", "zhèng" ]);
expect(pinyinJS[characters.indexOf('啜')]).toEqual([ "chuài", "chuò" ]);

expect(pinyin.length).toBe(8105);
expect(pinyin[characters.indexOf('一')]).toBe('yī');
expect(pinyin[characters.indexOf('幻')]).toBe('huàn');
Expand All @@ -30,6 +66,12 @@ test('拼音 test case', () => {
});

test('拼音对应汉字 test case', () => {
expect(Object.keys(pinyinRawJS).length).toBe(1296);
expect(pinyinRawJS['ái']).toEqual([ "挨", "皑", "癌" ]);
expect(pinyinRawJS['báo']).toEqual([ "雹", "薄" ]);
expect(pinyinRawJS['běng']).toEqual([ "绷", "琫" ]);
expect(pinyinRawJS['wáng']).toEqual([ "亡", "王" ]);

expect(Object.keys(pinyinRaw).length).toBe(1296);
expect(pinyinRaw['ái']).toEqual([ "挨", "皑", "癌" ]);
expect(pinyinRaw['báo']).toEqual([ "雹", "薄" ]);
Expand Down

0 comments on commit 5357862

Please sign in to comment.