Skip to content

Commit

Permalink
CLDR-17934 site: drive from sitemap.tsv (unicode-org#4033)
Browse files Browse the repository at this point in the history
  • Loading branch information
srl295 authored and haytenf committed Sep 17, 2024
1 parent 5590f33 commit 545b281
Show file tree
Hide file tree
Showing 5 changed files with 456 additions and 174 deletions.
2 changes: 2 additions & 0 deletions docs/site/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/assets/vendor
/sitemap.xml
/sitemap.md


24 changes: 12 additions & 12 deletions docs/site/assets/css/page.css
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ header .navparent > div {

header .title {
display: table-cell;
color: yellow;
color: white;
font-size: 1.5em;
}

Expand All @@ -41,22 +41,22 @@ header .nav a.uplink {
}

header .nav div.subpages {
box-shadow: 0px 8px 16px 0px rgba(0, 0, 0, 0.2);
z-index: 1;
background-color: white;
position: absolute;
color: black;
padding: 0.5em;
box-shadow: 0px 8px 16px 0px rgba(0, 0, 0, 0.2);
z-index: 1;
background-color: white;
position: absolute;
color: black;
padding: 0.5em;
}

div.subpages .hamburger {
left: 1em;
top: 1em;
color: darkslateblue;
left: 1em;
top: 1em;
color: darkslateblue;
}

.subpages .hamburger:hover {
color: gray;
color: gray;
}

header .nav ul b {
Expand All @@ -80,7 +80,7 @@ header .nav ul li {
}

.subpages .li a {
color: black !important;
color: black !important;
}

header .message {
Expand Down
197 changes: 173 additions & 24 deletions docs/site/assets/js/build.mjs
Original file line number Diff line number Diff line change
@@ -1,14 +1,33 @@
// extract site frontmatter, save to json
// extract site frontmatter and read from /sitemap.tsv, save to json

import * as fs from "node:fs/promises";
import * as path from "node:path";
import { default as process } from "node:process";
import { default as matter } from "gray-matter";
import { SitemapStream, streamToPromise } from "sitemap";
import { Readable } from "node:stream";
import { Dirent } from "node:fs";

// utilities and constants

// files to skip
const SKIP_THESE = /(node_modules|\.jekyll-cache|^sitemap.*)/;

// final URL of site
const SITE = "https://cldr.unicode.org";

// input file
const SITEMAPFILE = "sitemap.tsv";

// utility collator
const coll = new Intl.Collator(["und"]);

/**
* Directory Crawler: process one directory
* @param {string} d directory paren
* @param {string} fullPath path to this file
* @param {object} out output object
*/
async function processFile(d, fullPath, out) {
const f = await fs.readFile(fullPath, "utf-8");
const m = matter(f);
Expand All @@ -20,7 +39,13 @@ async function processFile(d, fullPath, out) {
}
}

/** process one dirent */
/**
* Directory Crawler: process one dirent
* @param {string} d directory paren
* @param {object} out output object
* @param {Dirent} e directory entry
* @returns
*/
async function processEntry(d, out, e) {
const fullpath = path.join(d, e.name);
if (SKIP_THESE.test(e.name)) return;
Expand All @@ -33,6 +58,7 @@ async function processEntry(d, out, e) {
}

/**
* Directory Crawler: kick off the crawl (or subcrawl) of a directory
* @param {string} d path to directory
* @param {object} out output struct
*/
Expand All @@ -42,12 +68,46 @@ async function traverse(d, out) {
return Promise.all(promises);
}

/** replace a/b/c.md with a/b */
function path2dir(p) {
const dir = p.split("/").slice(0, -1).join("/");
return dir;
}

/** replace a/b/c.md with a/b/c.html */
function md2html(p) {
return p.replace(/\.md$/, ".html");
}

/** replace a/b/c.html with a/b/c.md */
function html2md(p) {
return p.replace(/\.html$/, ".md");
}

/** replace a/b/c.md with a/b/c */
function dropmd(p) {
return p.replace(/\.md$/, "");
}

async function writeSiteMaps(out) {
/**
*
* @param {number} n
* @returns string with n tabs
*/
function tabs(n) {
let s = [];
for (let i = 0; i < n; i++) {
s.push("\t");
}
return s.join("");
}

/** convert a markdown path to a final URL */
function mkurl(p) {
return `${SITE}/${md2html(p)}`;
}

async function writeXmlSiteMap(out) {
// simple list of links
const links = await Promise.all(
out.all.map(async ({ fullPath, title }) => {
Expand All @@ -58,43 +118,132 @@ async function writeSiteMaps(out) {
};
})
);
const stream = new SitemapStream({ hostname: "https://cldr.unicode.org" });
const stream = new SitemapStream({ hostname: SITE });
const data = (
await streamToPromise(Readable.from(links).pipe(stream))
).toString();
await fs.writeFile("./sitemap.xml", data, "utf-8");
console.log("Wrote sitemap.xml");
console.log(`Wrote sitemap.xml with ${links.length} entries`);
}

/*
const coll = new Intl.Collator(["und"]);
const allSorted = [...out.all].sort((a, b) =>
coll.compare(a.fullPath, b.fullPath)
);
await fs.writeFile(
"./sitemap.md",
`---\ntitle: Site Map\n---\n\n` +
allSorted
.map(
({ fullPath, title }) =>
`- [/${fullPath}](/${dropmd(fullPath)}) - ${title}`
)
.join("\n"),
"utf-8"
);
console.log("Wrote sitemap.md");
async function readTsvSiteMap(out) {
console.log(`Reading ${SITEMAPFILE}`);
const lines = (await fs.readFile(SITEMAPFILE, "utf-8")).split("\n"); // don't skip comment lines here so we can get line numbers.
const errors = [];

// user's specified map
const usermap = {
/*
index: {
parent: null,
title: 'CLDR Site',
children: [
'cldr-spec',
'downloads',
],
},
'cldr-spec': {
parent: 'index',
title: …,
children: [
'cldr-spec/collation-guidelines',
],
},
'cldr-spec/collation-guidelines': {
parent: 'cldr-spec',
title: …,
children: null,
},
*/
};
// stack of parents, in order
let parents = [];
let n = 0;
for (let line of lines) {
n++;
const location = `${SITEMAPFILE}:${n}: `; // for errors
// skip comment or blank lines
if (/^[ \t]*#/.test(line) || !line.trim()) continue;

// # of leading
const tabs = /^[\t]*/.exec(line)[0].length;
// rest of line: the actual path
const path = line.slice(tabs).trim();
if (usermap[path]) {
errors.push(`${location} duplicate path: ${path}`);
continue;
}
const foundItem = out.all.find(({ fullPath }) => fullPath === `${path}.md`);
if (!foundItem) {
errors.push(`${location} could not find file: ${path}.md`);
continue;
}
if (!foundItem.title) {
errors.push(`${location} missing title in ${path}.md`);
// let this continue on
}
usermap[path] = {
title: foundItem.title ?? path,
};
const parentCount = parents.length;
if (tabs < parentCount) {
/**
* index [1]
* foo [2]
*
*/
// outdent
if (tabs == 0) {
errors.push(`${location} can't have more than one root page!`);
break;
}
// drop 'n' parents
parents = parents.slice(0, tabs);
} else if (tabs > parentCount) {
// Error - wrong indent
errors.push(
`${location} indent too deep (expected ${parentCount} tabs at most)`
);
continue;
}
const parent = parents.slice(-1)[0] || null; // calculate parent (null for index page)
usermap[path].parent = parent;
if (parent) {
// not for index
usermap[parent].children = usermap[parent].children ?? [];
usermap[parent].children.push(path);
}
parents.push(path); // for next time
}
out.usermap = usermap;
out.all.forEach(({ fullPath }) => {
if (!usermap[dropmd(fullPath)]) {
errors.push(`${SITEMAPFILE}: missing: ${dropmd(fullPath)}`);
}
});
if (errors.length) {
errors.forEach((l) => console.error(l));
throw Error(`${errors.length} errors reading tsv`);
} else {
console.log(`${SITEMAPFILE} Valid.`);
}
}

/** top level async */
async function main() {
const out = {
all: [],
dirs: {},
};
await fs.mkdir("assets/json/", { recursive: true });
await traverse(".", out);
await writeXmlSiteMap(out);
await readTsvSiteMap(out);
// write final json asset
delete out.all; //not needed at this phase, so trim out of the deploy
await fs.writeFile("assets/json/tree.json", JSON.stringify(out, null, " "));
console.log("Wrote assets/json/tree.json");
await writeSiteMaps(out);
}

main().then(
Expand Down
Loading

0 comments on commit 545b281

Please sign in to comment.