-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.js
44 lines (37 loc) · 1.36 KB
/
scrape.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import { fetchOrLoad } from "https://js.sabae.cc/fetchOrLoad.js";
import { HTMLParser } from "https://js.sabae.cc/HTMLParser.js";
import { CSV } from "https://code4fukui.github.io/CSV/CSV.js";
import { sleep } from "https://js.sabae.cc/sleep.js";
import { table2json } from "./table2json.js";
const url = "https://kosenconf.jp/";
//const baseurl = url;
const html = await fetchOrLoad(url);
const dom = HTMLParser.parse(html);
const divs = dom.querySelectorAll(".rtable li");
//console.log(divs.length);
const links = divs
.filter(i => i.text.indexOf("年開催") >= 0)
.map(i => i.querySelector("a").getAttribute("href"))
.reverse();
//console.log(links);
const fn = "kosenconf.csv";
const list = await CSV.fetchJSON(fn, []);
//const list = []; // for init
for (const link of links) {
console.log(link);
const html = await fetchOrLoad(link);
const dom = HTMLParser.parse(html);
const tbl = dom.querySelector(".style_table");
const json = table2json(tbl, "タイトル");
//console.log(csv, json);
for (const item of json) {
if (!item.タグ) continue;
if (list.find(i => i.タグ == item.タグ)) continue;
if (!item.URL) item.URL = "https://kosenconf.jp/?" + item.タグ;
item.中止フラグ = item.タイトル.endsWith("中止") ? 1 : 0;
list.push(item);
}
await sleep(100);
//break;
}
await Deno.writeTextFile(fn, CSV.stringify(list));