Skip to content

Commit

Permalink
πŸ› Fix CJK handling
Browse files Browse the repository at this point in the history
  • Loading branch information
wei committed Apr 12, 2024
1 parent dbdce31 commit 60bb456
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 51 deletions.
138 changes: 138 additions & 0 deletions common/font.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Source: https://github.com/vercel/satori/blob/9bc47fd41937be1bc96db2c91420323d034bebef/playground/utils/font.ts

type UnicodeRange = Array<number | number[]>

export class FontDetector {
private rangesByLang: {
[font: string]: UnicodeRange
} = {}

public async detect(
text: string,
fonts: string[]
): Promise<{
[lang: string]: string
}> {
await this.load(fonts)

const result: {
[lang: string]: string
} = {}

for (const segment of text) {
const lang = this.detectSegment(segment, fonts)
if (lang) {
result[lang] = result[lang] || ''
result[lang] += segment
}
}

return result
}

private detectSegment(segment: string, fonts: string[]): string | null {
for (const font of fonts) {
const range = this.rangesByLang[font]
if (range && checkSegmentInRange(segment, range)) {
return font
}
}

return null
}

private async load(fonts: string[]): Promise<void> {
let params = ''

const existingLang = Object.keys(this.rangesByLang)
const langNeedsToLoad = fonts.filter((font) => !existingLang.includes(font))

if (langNeedsToLoad.length === 0) {
return
}

for (const font of langNeedsToLoad) {
params += `family=${font}&`
}
params += 'display=swap'

const API = `https://fonts.googleapis.com/css2?${params}`

const fontFace = await (
await fetch(API, {
headers: {
// Make sure it returns TTF.
'User-Agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
}
})
).text()

this.addDetectors(fontFace)
}

private addDetectors(input: string) {
const regex = /font-family:\s*'(.+?)';.+?unicode-range:\s*(.+?);/gms
const matches = input.matchAll(regex)

for (const [, _lang, range] of matches) {
const lang = _lang.replaceAll(' ', '+')
if (!this.rangesByLang[lang]) {
this.rangesByLang[lang] = []
}

this.rangesByLang[lang].push(...convert(range))
}
}
}

function convert(input: string): UnicodeRange {
return input.split(', ').map((range) => {
range = range.replaceAll('U+', '')
const [start, end] = range.split('-').map((hex) => parseInt(hex, 16))

if (isNaN(end)) {
return start
}

return [start, end]
})
}

function checkSegmentInRange(segment: string, range: UnicodeRange): boolean {
const codePoint = segment.codePointAt(0)

if (!codePoint) return false

return range.some((val) => {
if (typeof val === 'number') {
return codePoint === val
} else {
const [start, end] = val
return start <= codePoint && codePoint <= end
}
})
}

// @TODO: Support font style and weights, and make this option extensible rather
// than built-in.
// @TODO: Cover most languages with Noto Sans.
export const languageFontMap = {
'ja-JP': 'Noto+Sans+JP',
'ko-KR': 'Noto+Sans+KR',
'zh-CN': 'Noto+Sans+SC',
'zh-TW': 'Noto+Sans+TC',
'zh-HK': 'Noto+Sans+HK',
'th-TH': 'Noto+Sans+Thai',
'bn-IN': 'Noto+Sans+Bengali',
'ar-AR': 'Noto+Sans+Arabic',
'ta-IN': 'Noto+Sans+Tamil',
'ml-IN': 'Noto+Sans+Malayalam',
'he-IL': 'Noto+Sans+Hebrew',
'te-IN': 'Noto+Sans+Telugu',
devanagari: 'Noto+Sans+Devanagari',
kannada: 'Noto+Sans+Kannada',
symbol: ['Noto+Sans+Symbols', 'Noto+Sans+Symbols+2'],
math: 'Noto+Sans+Math',
unknown: 'Noto+Sans'
}
93 changes: 55 additions & 38 deletions common/renderCard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { mergeConfig } from './configHelper'
import { getRepoDetails } from './github/repoQuery'
import { getIconCode, loadEmoji } from './twemoji'
import { HOST_PREFIX } from './helpers'
import { languageFontMap } from './font'

export async function getFont(
font: Font,
Expand Down Expand Up @@ -35,24 +36,6 @@ export function getFonts(font: Font) {
])
}

export const languageFontMap: Record<string, string | string[]> = {
zh: 'Noto+Sans+SC',
ja: 'Noto+Sans+JP',
ko: 'Noto+Sans+KR',
th: 'Noto+Sans+Thai',
he: 'Noto+Sans+Hebrew',
ar: 'Noto+Sans+Arabic',
bn: 'Noto+Sans+Bengali',
ta: 'Noto+Sans+Tamil',
te: 'Noto+Sans+Telugu',
ml: 'Noto+Sans+Malayalam',
devanagari: 'Noto+Sans+Devanagari',
kannada: 'Noto+Sans+Kannada',
symbol: ['Noto+Sans+Symbols', 'Noto+Sans+Symbols+2'],
math: 'Noto+Sans+Math',
unknown: 'Noto+Sans+SC'
}

function withCache(fn: Function) {
const cache = new Map()
return async (...args: string[]) => {
Expand All @@ -67,39 +50,73 @@ function withCache(fn: Function) {
type LanguageCode = keyof typeof languageFontMap | 'emoji'

export const loadDynamicAsset = withCache(
async (code: LanguageCode, text: string) => {
if (code === 'emoji') {
async (_code: LanguageCode, text: string) => {
if (_code === 'emoji') {
// It's an emoji, load the image.
return (
`data:image/svg+xml;base64,` +
btoa(await loadEmoji('twemoji', getIconCode(text)))
)
}

const codes = _code.split('|')

// Try to load from Google Fonts.
let names = languageFontMap[code]
if (!names) code = 'unknown'
const names = codes
.map((code) => languageFontMap[code as keyof typeof languageFontMap])
.filter(Boolean)

if (names.length === 0) return []

const params = new URLSearchParams()
for (const name of names.flat()) {
params.append('fonts', name)
}
params.set('text', text)

try {
if (typeof names === 'string') {
names = [names]
}
const response = await fetch(
`${HOST_PREFIX}/api/font?${params.toString()}`
)

if (response.status === 200) {
const data = await response.arrayBuffer()
const fonts: any[] = []

// Decode the encoded font format.
const decodeFontInfoFromArrayBuffer = (buffer: ArrayBuffer) => {
let offset = 0
const bufferView = new Uint8Array(buffer)

while (offset < bufferView.length) {
// 1 byte for font name length.
const languageCodeLength = bufferView[offset]
offset += 1
let languageCode = ''
for (let i = 0; i < languageCodeLength; i++) {
languageCode += String.fromCharCode(bufferView[offset + i])
}
offset += languageCodeLength

for (const name of names) {
const res = await fetch(
`${HOST_PREFIX}/api/font?font=${encodeURIComponent(
name
)}&text=${encodeURIComponent(text)}`
)
if (res.status === 200) {
const font = await res.arrayBuffer()
return {
name: `satori_${code}_fallback_${text}`,
data: font,
weight: 400,
style: 'normal'
// 4 bytes for font data length.
const fontDataLength = new DataView(buffer).getUint32(offset, false)
offset += 4
const fontData = buffer.slice(offset, offset + fontDataLength)
offset += fontDataLength

fonts.push({
name: `satori_${languageCode}_fallback_${text}`,
data: fontData,
weight: 400,
style: 'normal',
lang: languageCode === 'unknown' ? undefined : languageCode
})
}
}

decodeFontInfoFromArrayBuffer(data)

return fonts
}
} catch (e) {
console.error('Failed to load dynamic font for', text, '. Error:', e)
Expand Down
92 changes: 80 additions & 12 deletions pages/api/font.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,94 @@
// Source: https://github.com/vercel/satori/blob/02ce7f00cdad309bd427ecf4c01b050ee5e93b4a/playground/pages/api/font.ts
// Source: https://github.com/vercel/satori/blob/9bc47fd41937be1bc96db2c91420323d034bebef/playground/pages/api/font.ts

import type { NextRequest } from 'next/server'
import { FontDetector, languageFontMap } from '../../common/font'

export const config = {
runtime: 'edge'
}

const detector = new FontDetector()

// Our own encoding of multiple fonts and their code, so we can fetch them in one request. The structure is:
// [1 byte = X, length of language code][X bytes of language code string][4 bytes = Y, length of font][Y bytes of font data]
// Note that:
// - The language code can't be longer than 255 characters.
// - The language code can't contain non-ASCII characters.
// - The font data can't be longer than 4GB.
// When there are multiple fonts, they are concatenated together.
function encodeFontInfoAsArrayBuffer(code: string, fontData: ArrayBuffer) {
// 1 byte per char
const buffer = new ArrayBuffer(1 + code.length + 4 + fontData.byteLength)
const bufferView = new Uint8Array(buffer)
// 1 byte for the length of the language code
bufferView[0] = code.length
// X bytes for the language code
for (let i = 0; i < code.length; i++) {
bufferView[i + 1] = code.charCodeAt(i)
}

// 4 bytes for the length of the font data
new DataView(buffer).setUint32(1 + code.length, fontData.byteLength, false)

// Y bytes for the font data
bufferView.set(new Uint8Array(fontData), 1 + code.length + 4)

return buffer
}

export default async function loadGoogleFont(req: NextRequest) {
if (req.nextUrl.pathname !== '/api/font') return
const { searchParams, hostname } = new URL(req.url)

const font = searchParams.get('font')
const { searchParams } = new URL(req.url)

const fonts = searchParams.getAll('fonts')
const text = searchParams.get('text')

if (!font || !text) return
if (!fonts || fonts.length === 0 || !text) return

const textByFont = await detector.detect(text, fonts)

const _fonts = Object.keys(textByFont)

const encodedFontBuffers: ArrayBuffer[] = []
let fontBufferByteLength = 0
;(
await Promise.all(_fonts.map((font) => fetchFont(textByFont[font], font)))
).forEach((fontData, i) => {
if (fontData) {
// TODO: We should be able to directly get the language code here :)
const langCode = Object.entries(languageFontMap).find(
([, v]) => v === _fonts[i]
)?.[0]

if (langCode) {
const buffer = encodeFontInfoAsArrayBuffer(langCode, fontData)
encodedFontBuffers.push(buffer)
fontBufferByteLength += buffer.byteLength
}
}
})

const responseBuffer = new ArrayBuffer(fontBufferByteLength)
const responseBufferView = new Uint8Array(responseBuffer)
let offset = 0
encodedFontBuffers.forEach((buffer) => {
responseBufferView.set(new Uint8Array(buffer), offset)
offset += buffer.byteLength
})

return new Response(responseBuffer, {
headers: {
'Content-Type': 'font/woff',
'Cache-Control': 'public, max-age=31536000, immutable'
}
})
}

async function fetchFont(
text: string,
font: string
): Promise<ArrayBuffer | null> {
const API = `https://fonts.googleapis.com/css2?family=${font}&text=${encodeURIComponent(
text
)}`
Expand All @@ -31,15 +105,9 @@ export default async function loadGoogleFont(req: NextRequest) {

const resource = css.match(/src: url\((.+)\) format\('(opentype|truetype)'\)/)

if (!resource) return
if (!resource) return null

const res = await fetch(resource[1])

// Make sure not to mess it around with compression when developing it locally.
if (hostname === 'localhost') {
res.headers.delete('content-encoding')
res.headers.delete('content-length')
}

return res
return res.arrayBuffer()
}
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"compilerOptions": {
"target": "es5",
"target": "es2015",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
Expand Down

0 comments on commit 60bb456

Please sign in to comment.