Skip to content

Commit f19c2e3

Browse files
committed
Bug fixes on regexes containing : and default acts array; Add cli for ease of use
1 parent be20509 commit f19c2e3

File tree

4 files changed

+48
-10
lines changed

4 files changed

+48
-10
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Purify URL: Remove redundant tracking parameters, skip redirecting pages, and ex
2424

2525
### 🚀 Quick Start
2626

27-
Visit our [demo page](https://pro-2684.github.io/?page=purlfy), or try it out with our [Tampermonkey script](https://greasyfork.org/scripts/492480)!
27+
Visit our [demo page](https://pro-2684.github.io/?page=purlfy), try out our [Tampermonkey script](https://greasyfork.org/scripts/492480), or simply `node cli.js <url[]> [<options>]` to purify a list of URLs (For more information, please refer to the comments in the script).
2828

2929
```js
3030
// Somewhat import `Purlfy` class from https://cdn.jsdelivr.net/gh/PRO-2684/pURLfy@latest/purlfy.min.js
@@ -336,7 +336,7 @@ If URL `https://example.com/?key=123` matches this rule, the `key` parameter wil
336336

337337
### 🖇️ Processors
338338

339-
Some processors support parameters, simply append them to the function name separated by a colon (`:`): `func:arg1:arg2...:argn`. The following processors are currently supported:
339+
Some processors support parameters, simply append them to the function name separated by a colon (`:`): `func:arg`. The following processors are currently supported:
340340

341341
- `url`: `string->string`, URL decoding (`decodeURIComponent`)
342342
- `base64`: `string->string`, Base64 decoding (`decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+'))))`)

README_zh.md

+2-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
### 🚀 快速开始
2626

27-
访问我们的 [示例页面](https://pro-2684.github.io/?page=purlfy)或者通过我们的 [Tampermonkey 脚本](https://greasyfork.org/scripts/492480) 来体验!
27+
访问我们的 [示例页面](https://pro-2684.github.io/?page=purlfy)体验我们的 [Tampermonkey 脚本](https://greasyfork.org/scripts/492480),或者直接 `node cli.js <url[]> [<options>]` 来净化一系列 URL (更多信息请参考脚本注释)。
2828

2929
```js
3030
// 通过某种方式从 https://cdn.jsdelivr.net/gh/PRO-2684/pURLfy@latest/purlfy.min.js 导入 `Purlfy` 类
@@ -101,7 +101,6 @@ new Purlfy({
101101

102102
- `Purlfy.version: string`: pURLfy 的版本号
103103

104-
105104
## 📖 规则
106105

107106
社区贡献的规则文件托管在 GitHub 上,您可以在 [pURLfy-rules](https://github.com/PRO-2684/pURLfy-rules) 中找到。规则文件的格式如下:
@@ -337,7 +336,7 @@ new Purlfy({
337336

338337
### 🖇️ 处理器
339338

340-
部分处理器支持传入参数,只需用 `:` 分隔即可:`func:arg1:arg2...:argn`。目前支持的处理器如下:
339+
部分处理器支持传入参数,只需用 `:` 分隔即可:`func:arg`。目前支持的处理器如下:
341340

342341
- `url`: `string->string`,URL 解码 (`decodeURIComponent`)
343342
- `base64`: `string->string`,Base64 解码 (`decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+'))))`)

cli.js

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// `node cli.js <url[]> [<options>]`
2+
// `url` is the URL to purify. You can pass multiple URLs to purify them all.
3+
// `options` can contain:
4+
// - `--rules <enabled-rules>`, where `enabled-rules` is a comma-separated list of rules to enable. Default is all rules. Short-hand `-r`.
5+
6+
const Purlfy = require("./purlfy");
7+
const { parseArgs } = require("node:util");
8+
9+
const options = {
10+
rules: {
11+
type: "string",
12+
short: "r",
13+
default: ""
14+
}
15+
};
16+
const args = process.argv.slice(2);
17+
const {
18+
values,
19+
positionals: urls,
20+
} = parseArgs({ args, options, allowPositionals: true });
21+
const { rules: rulesStr } = values;
22+
const enabledRules = rulesStr.trim().length ? rulesStr.split(",").map((rule) => rule.trim()).filter(Boolean) : require("./rules/list.json");
23+
console.log("Enabled rules:", enabledRules);
24+
console.log("---");
25+
26+
const purifier = new Purlfy({
27+
fetchEnabled: true,
28+
lambdaEnabled: true,
29+
});
30+
const rules = enabledRules.map((rule) => require(`./rules/${rule}.json`));
31+
purifier.importRules(...rules);
32+
for (const url of urls) {
33+
purifier.purify(url).then((purified) => {
34+
console.log(url, "=>", purified.url);
35+
});
36+
}

purlfy.js

+8-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ class Purlfy extends EventTarget {
3131
static #acts = {
3232
"url": decodeURIComponent,
3333
"base64": s => decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+')))),
34-
"slice": (s, start, end) => s.slice(parseInt(start), end ? parseInt(end) : undefined),
34+
"slice": (s, startEnd) => {
35+
const [start, end] = startEnd.split(":");
36+
return s.slice(parseInt(start), end ? parseInt(end) : undefined)
37+
},
3538
"regex": (s, regex) => {
3639
const r = new RegExp(regex);
3740
const m = s.match(r);
@@ -162,16 +165,16 @@ class Purlfy extends EventTarget {
162165
static #applyActs(input, acts, logFunc) {
163166
let dest = input;
164167
for (const cmd of (acts)) {
165-
const args = cmd.split(":");
166-
const name = args[0];
168+
const name = cmd.split(":")[0];
169+
const arg = cmd.slice(name.length + 1);
167170
const act = Purlfy.#acts[name];
168171
if (!act) {
169172
logFunc("Invalid act:", cmd);
170173
dest = null;
171174
break;
172175
}
173176
try {
174-
dest = act(dest, ...args.slice(1));
177+
dest = act(dest, arg);
175178
} catch (e) {
176179
logFunc(`Error processing input with act "${name}":`, e);
177180
dest = null;
@@ -459,7 +462,7 @@ class Purlfy extends EventTarget {
459462
logFunc("Visit mode, but got redirected to:", r.url);
460463
urlObj = new URL(r.headers.get("location"), urlObj.href);
461464
} else {
462-
const dest = Purlfy.#applyActs(html, rule.acts?.length ? rule.acts : ["regex:https?:\/\/.(?:www\.)?[-a-zA-Z0-9@%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?!&\/\/=]*)"], logFunc);
465+
const dest = Purlfy.#applyActs(html, rule.acts?.length ? rule.acts : [String.raw`regex:https?:\/\/.(?:www\.)?[-a-zA-Z0-9@%._\+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_\+.~#?!&\/\/=]*)`], logFunc);
463466
if (dest && URL.canParse(dest, urlObj.href)) { // Valid URL
464467
urlObj = new URL(dest, urlObj.href);
465468
} else { // Invalid URL

0 commit comments

Comments
 (0)