Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat use markdoc docs #15

Merged
merged 9 commits into from
Jan 9, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: use script over submodule
loks0n committed Dec 1, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit e735ac7073fa737fcae72bd67273625c34badf46
3 changes: 1 addition & 2 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
_APP_ASSISTANT_OPENAI_API_KEY=YOUR_OPENAI_API_KEY
DEBUG=false
_APP_ASSISTANT_OPENAI_API_KEY=YOUR_OPENAI_API_KEY
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -2,4 +2,5 @@ node_modules
.idea
.vscode
.env
venv
venv
sources
3 changes: 0 additions & 3 deletions .gitmodules

This file was deleted.

4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -11,9 +11,9 @@ RUN pnpm --prod install

COPY . .

RUN git submodule update --init --recursive
RUN pnpm run fetch-sources

ENV _APP_ASSISTANT_OPENAI_API_KEY=''

EXPOSE 3003
CMD [ "node", "main.js" ]
CMD [ "pnpm", "start" ]
119 changes: 119 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 9 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
@@ -4,23 +4,25 @@
"description": "",
"main": "main.js",
"scripts": {
"dev": "nodemon main.js",
"start": "node src/main.js",
"dev": "nodemon src/main.js",
"fetch-sources": "node scripts/git-sources.js && node scripts/web-sources.js",
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"ai": "^2.1.15",
"body-parser": "^1.20.2",
"cors": "^2.8.5",
"dotenv": "^16.3.1",
"execa": "^8.0.1",
"express": "^4.18.2",
"faiss-node": "^0.3.0",
"glob": "^10.3.1",
"langchain": "^0.0.131",
"nodemon": "^2.0.22",
"openai-edge": "^1.2.0"
"faiss-node": "^0.5.1",
"glob": "^10.3.10",
"langchain": "^0.0.198",
"node-html-markdown": "^1.3.0",
"nodemon": "^3.0.1"
},
"type": "module"
}
729 changes: 343 additions & 386 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

64 changes: 0 additions & 64 deletions scrape.js

This file was deleted.

39 changes: 39 additions & 0 deletions scripts/git-sources.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { mkdir, writeFile } from "fs/promises";
import { execa } from "execa";

await mkdir("./sources", { recursive: true });

await gitClone("https://github.com/appwrite/website.git", "./sources/website", [
"src/routes/docs/**/*.markdoc",
"src/routes/docs/**/*.md",
"src/partials/**/*.md",
]);

async function gitClone(repositoryUrl, localPath, files = "*") {
await execa("rm", ["-rf", localPath]);

console.log(`Cloning ${repositoryUrl} to ${localPath}`);
try {
await execa("git", ["clone", "--no-checkout", repositoryUrl, localPath]);

if (files !== "*") {
if (!Array.isArray(files)) {
files = [files];
}

await mkdir(`${localPath}/.git/info`, { recursive: true });
await writeFile(
`${localPath}/.git/info/sparse-checkout`,
files.join("\n")
);

await execa("git", ["config", "core.sparseCheckout", "true"], {
cwd: localPath,
});
}

await execa("git", ["checkout"], { cwd: localPath });
} catch (error) {
console.error("An error occurred:", error);
}
}
57 changes: 57 additions & 0 deletions scripts/web-sources.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { mkdir, writeFile } from "fs/promises";
import { execa } from "execa";
import { NodeHtmlMarkdown } from "node-html-markdown";

console.log("Downloading reference pages");

await execa("rm", ["-rf", "./sources/references"]);
await mkdir("./sources/references", { recursive: true });

const SDKS = [
"client-web",
"client-flutter",
"client-apple",
"client-android-kotlin",
"client-android-java",
"client-graphql",
"client-rest",
"server-dart",
"server-deno",
"server-dotnet",
"server-nodejs",
"server-php",
"server-python",
"server-ruby",
"server-swift",
"server-kotlin",
"server-java",
"server-graphql",
"server-rest",
];

const SERVICES = [
"account",
"avatars",
"databases",
"functions",
"locale",
"storage",
"teams",
"users",
];

for (const sdk of SDKS) {
await mkdir(`./sources/references/${sdk}/`, { recursive: true });

for (const service of SERVICES) {
const url = `https://appwrite.io/docs/references/cloud/${sdk}/${service}`;

const response = await fetch(url);
const html = await response.text();

const article = html.match(/<main class="u-contents">(.|\n)*<\/main>/)[0];
const markdown = NodeHtmlMarkdown.translate(article);

await writeFile(`./sources/references/${sdk}/${service}.md`, markdown);
}
}
14 changes: 7 additions & 7 deletions embeddings.js → src/embeddings.js
Original file line number Diff line number Diff line change
@@ -4,18 +4,18 @@ import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { OpenAIChat } from "langchain/llms/openai";
import { Document } from "langchain/document";
import { MarkdownTextSplitter } from "langchain/text_splitter";
import { documentation } from "./scrape.js";
import { documentation, references } from "./sources.js";

async function chunk_sources(sources) {
const source_chunks = [];
async function chunkSources(sources) {
const sourceChunks = [];
const splitter = new MarkdownTextSplitter({
chunk_size: 1024,
chunk_overlap: 64,
});

for (const source of sources) {
for (const chunk of await splitter.splitText(source.pageContent)) {
source_chunks.push(
sourceChunks.push(
new Document({
pageContent: chunk,
metadata: source.metadata,
@@ -24,19 +24,19 @@ async function chunk_sources(sources) {
}
}

return source_chunks;
return sourceChunks;
}

export const initializeSearchIndex = async () => {
const sources = documentation.map((page) => {
const sources = [...documentation, ...references].map((page) => {
return new Document({
pageContent: page.contents,
metadata: page.metadata,
});
});

return FaissStore.fromDocuments(
await chunk_sources(sources),
await chunkSources(sources),
new OpenAIEmbeddings({
openAIApiKey: process.env._APP_ASSISTANT_OPENAI_API_KEY,
})
16 changes: 5 additions & 11 deletions main.js → src/main.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import "dotenv/config";
import bodyParser from "body-parser";
import cors from "cors";
import express from "express";
import { getChain, initializeSearchIndex } from "./embeddings.js";
import "dotenv/config";

const app = express();
app.use(
@@ -32,16 +32,10 @@ app.post("/", async (req, res) => {
const { prompt } = JSON.parse(text);
const templated = template(prompt);

const inputDocuments = await searchIndex.similaritySearch(prompt, 4);
if (process.env.DEBUG) {
res.write("Prompt: " + templated + "\n\n");

for (const doc of inputDocuments) {
res.write("Using source: " + doc.metadata.url + "\n");
res.write(doc.pageContent + "\n\n");
}

res.write("Completion: ");
const inputDocuments = await searchIndex.similaritySearch(prompt, 5);
for (const inputDocument of inputDocuments) {
res.write(`Using source: `);
res.write(`- ${inputDocument.metadata.filename}\n`);
}

const chain = await getChain((token) => {
91 changes: 91 additions & 0 deletions src/sources.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { glob } from "glob";
import { readFileSync } from "fs";

export const documentation = (
await glob([
"./sources/website/src/routes/docs/**/*.markdoc",
"./sources/website/src/routes/docs/**/*.md",
"./sources/website/src/partials/**/*.md",
])
).map((filename) => {
const contents = readFileSync(filename, { encoding: "utf8" });

const metadata = {
filename,
url:
filename.startsWith("./sources/website/src/routes/") &&
filename.endsWith("+page.markdoc")
? `https://appwrite.io/${filename
.replace("./sources/website/src/routes/", "")
.replace("+page.markdoc", "")}
)}`
: null,
...parseMarkdownFrontmatter(contents),
};

return {
metadata,
contents: cleanMarkdoc(contents, metadata.url),
};
});

export const references = (await glob(["./sources/references/**/*.md"])).map(
(filename) => {
const contents = readFileSync(filename, { encoding: "utf8" });

const metadata = {
filename,
url: `https://appwrite.io/docs/references/cloud/${
filename.split("/")[2]
}/${filename.split("/")[3].split(".")[0]}`,
};

return {
metadata,
contents: cleanMarkdoc(contents, metadata.url),
};
}
);

function parseMarkdownFrontmatter(contents) {
const raw = contents.match(/^---\n([\s\S]*?)\n---/);
if (!raw) {
return {};
}
const frontmatterLines = raw[1].split("\n");
const frontmatter = {};
for (const line of frontmatterLines) {
const [key, value] = line.split(": ");
frontmatter[key] = value;
}
return frontmatter;
}

function cleanMarkdoc(contents, currentUrl = null) {
return (
contents
// Remove the frontmatter
// e.g.
///---
// title: Getting Started
// ---
.replace(/^---\n([\s\S]*?)\n---/, "")
// Remove markdoc components
// e.g. {% component foo="bar" %}
.replace(/{% [\s\S]*? %}/g, "")
// Remove links to images
// e.g. ![image](./image.png)
.replace(/!\[[^\]]*\]\((?!http)([^\)]*)\)/g, "")
// Replace relative heading links with absolute links
// e.g. [Getting Started](#getting-started) -> [Getting Started](https://appwrite.io/docs/current-page#getting-started)
.replace(/\[([^\]]*)\]\((?!http)([^\)]*)\)/g, (_, p1, p2) => {
if (!currentUrl) return "";
return `[${p1}](${currentUrl}#${p2})`;
})
// Replace relative links with absolute links
// e.g. [Databases](/docs/products/databases) -> [Databases][(https://appwrite.io/docs/products/databases)
.replace(/\[([^\]]*)\]\((?!http)([^\)]*)\)/g, (_, p1, p2) => {
return `[${p1}](${new URL(currentUrl).origin}/${p2})`;
})
);
}
1 change: 0 additions & 1 deletion website
Submodule website deleted from ec0d6e