From 818355bffcda324ba1cb6fdfc58a623dac982b6d Mon Sep 17 00:00:00 2001 From: Steven Date: Sat, 9 Dec 2023 08:39:50 -0500 Subject: [PATCH] chore: refactor to use postgres (behind a flag) (#1024) This PR refactors the primary database from redis to postgres when using the `TRY_POSTGRES=1` flag. It also ensures that inserts happen in both databases concurrently so we can keep data in both until cutting over. https://vercel.com/changelog/vercel-postgres-is-now-available-for-pro-users --- CONTRIBUTING.md | 5 +- api/initdb.ts | 89 ++++++++++++++++++ package-lock.json | 162 +++++++++++++++++++++++++++++++- package.json | 1 + src/page-props/common.ts | 6 +- src/util/backend/db-postgres.ts | 43 +++++++++ src/util/backend/db-redis.ts | 58 ++++++++++++ src/util/backend/db.ts | 55 +++-------- src/util/npm-api.ts | 2 + 9 files changed, 371 insertions(+), 50 deletions(-) create mode 100644 api/initdb.ts create mode 100644 src/util/backend/db-postgres.ts create mode 100644 src/util/backend/db-redis.ts diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bedcba99..ae4a23cc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -30,11 +30,10 @@ Create a `.env` file in the root directory. ``` # required settings REDIS_URL="redis://127.0.0.1:6379" -GA_ID="" # optional settings -export PORT="3000" -export NPM_REGISTRY_URL="https://registry.npmjs.com" +PORT="3000" +NPM_REGISTRY_URL="https://registry.npmjs.com" ``` ## Running the code diff --git a/api/initdb.ts b/api/initdb.ts new file mode 100644 index 00000000..e1bd1a13 --- /dev/null +++ b/api/initdb.ts @@ -0,0 +1,89 @@ +import { sql } from '@vercel/postgres'; +import { findAll } from '../src/util/backend/db-redis'; +import type { IncomingMessage, ServerResponse } from 'http'; + +export default async function handler(_req: IncomingMessage, res: ServerResponse) { + // This is a temporary function we can use to test + if (process.env.VERCEL_ENV !== 'development') { + res.statusCode = 403; + res.end('403 Forbidden'); + return; + } + + /* + console.log(await sql` + CREATE COLLATION semver ( + LOCALE = 'en-US-u-kn-true', + PROVIDER = 'icu' + ); + `); + + console.log( + await sql` + drop table if exists "packages"; +`, + ); + console.log( + await sql` + CREATE TABLE "packages" ( + "name" VARCHAR(214), + "version" VARCHAR(255) COLLATE semver, + "publishSize" INTEGER, + "installSize" INTEGER, + "publishFiles" INTEGER, + "installFiles" INTEGER, + PRIMARY KEY ("name", "version") + ); +`, + ); +*/ + const result = await findAll('next'); + console.log(`inserting ${Object.keys(result).length} rows`); + + for (let pkg of Object.values(result)) { + console.log(`inserting row ${pkg.name}@${pkg.version}`); + await sql` + INSERT INTO "packages" values (${pkg.name}, ${pkg.version}, ${pkg.publishSize}, ${pkg.installSize}, ${pkg.publishFiles}, ${pkg.installFiles}); + `; + } + /* + console.log(await sql` + SELECT * + FROM "packages" + ORDER BY version desc; + `); + */ + + res.end('success'); +} + +/** + CREATE COLLATION en_natural ( + LOCALE = 'en-US-u-kn-true', + PROVIDER = 'icu' +); + +CREATE TABLE test ( + version varchar(20) collate en_natural +); + +INSERT INTO test values + ('14.1.0'), + ('14.20.0'), + ('14.11.0'), + ('14.10.0'), + ('14.2.0'), + ('14.0.2'), + ('14.0.3-canary.1'), + ('14.0.3-canary.0'), + ('14.0.3-canary.9'), + ('14.0.3-canary.10'), + ('14.0.3-canary.11'), + ('14.0.3-canary.12'), + ('14.0.3'); + +SELECT split_part(version, '-', 1) as one, NULLIF(split_part(version, '-', 2), '') as two +FROM test +ORDER BY split_part(version, '-', 1) desc, NULLIF(split_part(version, '-', 2), '') desc; + + */ diff --git a/package-lock.json b/package-lock.json index 55684fab..521d3407 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7,6 +7,7 @@ "name": "packagephobia", "license": "MIT", "dependencies": { + "@vercel/postgres": "^0.5.1", "badgen": "^3.2.2", "ioredis": "^5.1.0", "lru-cache": "^10.1.0", @@ -32,15 +33,32 @@ "resolved": "https://registry.npmjs.org/@ioredis/commands/-/commands-1.2.0.tgz", "integrity": "sha512-Sx1pU8EM64o2BrqNpEO1CNLtKQwyhuXuqyfH7oGKCk+1a33d2r5saW8zNwm3j6BTExtjrv2BxTgzzkMwts6vGg==" }, + "node_modules/@neondatabase/serverless": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/@neondatabase/serverless/-/serverless-0.6.0.tgz", + "integrity": "sha512-qXxBRYN0m2v8kVQBfMxbzNGn2xFAhTXFibzQlE++NfJ56Shz3m7+MyBBtXDlEH+3Wfa6lToDXf1MElocY4sJ3w==", + "dependencies": { + "@types/pg": "8.6.6" + } + }, "node_modules/@types/node": { "version": "18.18.9", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.9.tgz", "integrity": "sha512-0f5klcuImLnG4Qreu9hPj/rEfFq6YRc5n2mAjSsH+ec/mJL+3voBH0+8T7o8RpFjH7ovc+TRsL/c7OYIQsPTfQ==", - "dev": true, "dependencies": { "undici-types": "~5.26.4" } }, + "node_modules/@types/pg": { + "version": "8.6.6", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.6.tgz", + "integrity": "sha512-O2xNmXebtwVekJDD+02udOncjVcMZQuTEQEMpKJ0ZRf5E7/9JJX3izhKUcUifBkyKpljyUM6BTgy2trmviKlpw==", + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, "node_modules/@types/prop-types": { "version": "15.7.10", "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.10.tgz", @@ -79,11 +97,37 @@ "integrity": "sha512-+d+WYC1BxJ6yVOgUgzK8gWvp5qF8ssV5r4nsDcZWKRWcDQLQ619tvWAxJQYGgBrO1MnLJC7a5GtiYsAoQ47dJg==", "dev": true }, + "node_modules/@vercel/postgres": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/@vercel/postgres/-/postgres-0.5.1.tgz", + "integrity": "sha512-JKl8QOBIDnifhkxAhIKtY0A5Tb8oWBf2nzZhm0OH7Ffjsl0hGVnDL2w1/FCfpX8xna3JAWM034NGuhZfTFdmiw==", + "dependencies": { + "@neondatabase/serverless": "0.6.0", + "bufferutil": "4.0.8", + "utf-8-validate": "6.0.3", + "ws": "8.14.2" + }, + "engines": { + "node": ">=14.6" + } + }, "node_modules/badgen": { "version": "3.2.3", "resolved": "https://registry.npmjs.org/badgen/-/badgen-3.2.3.tgz", "integrity": "sha512-svDuwkc63E/z0ky3drpUppB83s/nlgDciH9m+STwwQoWyq7yCgew1qEfJ+9axkKdNq7MskByptWUN9j1PGMwFA==" }, + "node_modules/bufferutil": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/bufferutil/-/bufferutil-4.0.8.tgz", + "integrity": "sha512-4T53u4PdgsXqKaIctwF8ifXlRTTmEPJ8iEPWFdGZvcf7sbwYo6FKFEX9eNNAnzFZ7EzJAQ3CJeOtCRA4rDp7Pw==", + "hasInstallScript": true, + "dependencies": { + "node-gyp-build": "^4.3.0" + }, + "engines": { + "node": ">=6.14.2" + } + }, "node_modules/cluster-key-slot": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz", @@ -184,6 +228,16 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/node-gyp-build": { + "version": "4.6.1", + "resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.6.1.tgz", + "integrity": "sha512-24vnklJmyRS8ViBNI8KbtK/r/DmXQMRiOMXTNz2nrTnAYUwjmEEbnnpB/+kt+yWRv73bPsSPRFddrcIbAxSiMQ==", + "bin": { + "node-gyp-build": "bin.js", + "node-gyp-build-optional": "optional.js", + "node-gyp-build-test": "build-test.js" + } + }, "node_modules/npm": { "version": "10.2.3", "resolved": "https://registry.npmjs.org/npm/-/npm-10.2.3.tgz", @@ -3055,6 +3109,69 @@ "inBundle": true, "license": "ISC" }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.6.0.tgz", + "integrity": "sha512-M+PDm637OY5WM307051+bsDia5Xej6d9IR4GwJse1qA1DIhiKlksvrneZOYQq42OM+spubpcNYEo2FcKQrDk+Q==" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-bytea": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.0.tgz", + "integrity": "sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", + "dependencies": { + "xtend": "^4.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/prettier": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.3.tgz", @@ -3166,8 +3283,47 @@ "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "dev": true + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + }, + "node_modules/utf-8-validate": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/utf-8-validate/-/utf-8-validate-6.0.3.tgz", + "integrity": "sha512-uIuGf9TWQ/y+0Lp+KGZCMuJWc3N9BHA+l/UmHd/oUHwJJDeysyTRxNQVkbzsIWfGFbRe3OcgML/i0mvVRPOyDA==", + "hasInstallScript": true, + "dependencies": { + "node-gyp-build": "^4.3.0" + }, + "engines": { + "node": ">=6.14.2" + } + }, + "node_modules/ws": { + "version": "8.14.2", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.14.2.tgz", + "integrity": "sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g==", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "engines": { + "node": ">=0.4" + } }, "node_modules/yallist": { "version": "4.0.0", diff --git a/package.json b/package.json index 5561b872..e9180fc5 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "author": "styfle", "license": "MIT", "dependencies": { + "@vercel/postgres": "^0.5.1", "badgen": "^3.2.2", "ioredis": "^5.1.0", "lru-cache": "^10.1.0", diff --git a/src/page-props/common.ts b/src/page-props/common.ts index 8239962b..be16e21b 100644 --- a/src/page-props/common.ts +++ b/src/page-props/common.ts @@ -1,4 +1,6 @@ -import { findOne, insert } from '../util/backend/db'; +import { findOne } from '../util/backend/db'; +import { insert as insertPostgres } from '../util/backend/db-postgres'; +import { insert as insertRedis } from '../util/backend/db-redis'; import { getAllDistTags } from '../util/npm-api'; import { calculatePackageSize } from '../util/backend/npm-stats'; import { versionUnknown } from '../util/constants'; @@ -43,7 +45,7 @@ export async function getPkgDetails( const end = new Date(); const sec = (end.getTime() - start.getTime()) / 1000; console.log(`Calculated size of ${name}@${version} in ${sec}s`); - insert(pkgSize); + await Promise.all([insertRedis(pkgSize), insertPostgres(pkgSize)]); } const result = { diff --git a/src/util/backend/db-postgres.ts b/src/util/backend/db-postgres.ts new file mode 100644 index 00000000..4114f7b7 --- /dev/null +++ b/src/util/backend/db-postgres.ts @@ -0,0 +1,43 @@ +import { sql } from '@vercel/postgres'; +import type { PkgSize } from '../../types'; + +export async function findAll(name: string) { + console.time('findAll (postgres)'); + const { rows } = await sql` + SELECT * + FROM "packages" + WHERE name = ${name}; + `; + console.timeEnd('findAll (postgres)'); + const obj: { [key: string]: PkgSize } = {}; + for (let row of rows) { + const payload = row as PkgSize; + obj[payload.version] = payload; + } + return obj; +} + +export async function findOne(name: string, version: string) { + console.time('findOne (postgres)'); + const { rows } = await sql` + SELECT * + FROM "packages" + WHERE name = ${name} + AND version = ${version}; + `; + console.timeEnd('findOne (postgres)'); + const reply = rows[0]; + + if (!reply) { + return null; + } + + return reply as PkgSize; +} + +export async function insert(pkg: PkgSize) { + const reply = await sql` + INSERT INTO "packages" VALUES (${pkg.name}, ${pkg.version}, ${pkg.publishSize}, ${pkg.installSize}, ${pkg.publishFiles}, ${pkg.installFiles}); + `; + return reply; +} diff --git a/src/util/backend/db-redis.ts b/src/util/backend/db-redis.ts new file mode 100644 index 00000000..8eb468a3 --- /dev/null +++ b/src/util/backend/db-redis.ts @@ -0,0 +1,58 @@ +import Redis from 'ioredis'; +import type { PkgSize } from '../../types'; + +const { REDIS_URL = '' } = process.env; +delete process.env.REDIS_URL; + +if (!REDIS_URL) { + throw new Error('Missing REDIS_URL environment variable'); +} + +try { + new URL(REDIS_URL); +} catch (err) { + throw new Error('Invalid REDIS_URL environment variable'); +} + +const client = new Redis(REDIS_URL); + +client.on('error', err => { + console.error('Redis error: ', err); +}); + +export async function findAll(name: string) { + console.time('findAll (redis)'); + const reply = await client.hgetall(name); + const obj: { [key: string]: PkgSize } = {}; + for (let version in reply) { + const payload: PkgSize = JSON.parse(reply[version] || '{}'); + payload.name = name; + payload.version = version; + obj[version] = payload; + } + console.timeEnd('findAll (redis)'); + return obj; +} + +export async function findOne(name: string, version: string) { + console.time('findOne (redis)'); + const reply = await client.hget(name, version); + + if (!reply) { + return null; + } + + let record: PkgSize = JSON.parse(reply); + record.name = name; + record.version = version; + + console.timeEnd('findOne (redis)'); + return record; +} + +export async function insert(data: PkgSize) { + const { name, version, ...payload } = data; + const value = JSON.stringify(payload); + const reply = await client.hset(name, version, value); + return reply; +} diff --git a/src/util/backend/db.ts b/src/util/backend/db.ts index 0e05a3c8..98832f9b 100644 --- a/src/util/backend/db.ts +++ b/src/util/backend/db.ts @@ -1,53 +1,24 @@ -import Redis from 'ioredis'; +import * as redis from './db-redis'; +import * as postgres from './db-postgres'; import type { PkgSize } from '../../types'; -const { REDIS_URL = '' } = process.env; -delete process.env.REDIS_URL; - -if (!REDIS_URL) { - throw new Error('Missing REDIS_URL environment variable'); -} - -try { - new URL(REDIS_URL); -} catch (err) { - throw new Error('Invalid REDIS_URL environment variable'); -} - -const client = new Redis(REDIS_URL); - -client.on('error', err => { - console.error('Redis error: ', err); -}); - export async function findAll(name: string) { - const reply = await client.hgetall(name); - const obj: { [key: string]: PkgSize } = {}; - for (let version in reply) { - const payload: PkgSize = JSON.parse(reply[version] || '{}'); - payload.name = name; - payload.version = version; - obj[version] = payload; + if (process.env.TRY_POSTGRES === '1') { + return postgres.findAll(name); } - return obj; + return redis.findAll(name); } export async function findOne(name: string, version: string) { - const reply = await client.hget(name, version); - - if (!reply) { - return null; + if (process.env.TRY_POSTGRES === '1') { + return postgres.findOne(name, version); } - - let record: PkgSize = JSON.parse(reply); - record.name = name; - record.version = version; - return record; + return redis.findOne(name, version); } -export async function insert(data: PkgSize) { - const { name, version, ...payload } = data; - const value = JSON.stringify(payload); - const reply = await client.hset(name, version, value); - return reply; +export async function insert(pkg: PkgSize) { + if (process.env.TRY_POSTGRES === '1') { + return postgres.insert(pkg); + } + return redis.insert(pkg); } diff --git a/src/util/npm-api.ts b/src/util/npm-api.ts index 9679d22b..2f8ec6b0 100644 --- a/src/util/npm-api.ts +++ b/src/util/npm-api.ts @@ -20,7 +20,9 @@ export async function fetchManifest(name: string) { } console.log('lrucache miss'); const encodedPackage = escapePackageName(name); + console.time('fetchJSON'); const manifest = await fetchJSON(`${NPM_REGISTRY_URL}/${encodedPackage}`); + console.timeEnd('fetchJSON'); if (!isManifest(manifest)) { throw new NotFoundError({ resource: name }); }