Skip to content

Commit

Permalink
fix(): unicode chars were removed from .po files when extracting
Browse files Browse the repository at this point in the history
  • Loading branch information
semoal committed Sep 2, 2021
1 parent 3b449b7 commit da1ffb5
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 9 deletions.
1 change: 1 addition & 0 deletions packages/conf/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { GeneratorOptions } from "@babel/core";

export const UNICODE_REGEX: RegExp
export declare type CatalogFormat = "lingui" | "minimal" | "po" | "csv" | "po-gettext";
export type CatalogFormatOptions = {
origins?: boolean;
Expand Down
7 changes: 4 additions & 3 deletions packages/conf/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ import type { GeneratorOptions } from "@babel/core"
import path from "path"
import fs from "fs"
import chalk from "chalk"
import { Loader, cosmiconfigSync } from "cosmiconfig"
import { cosmiconfigSync } from "cosmiconfig"
import { multipleValidOptions, validate } from "jest-validate"
import get from "lodash.get"

// This regex will detect if a string contains unicode chars, when they're we should interpolate them
// why? because platforms like react native doesn't parse them, just doing a JSON.parse makes them UTF-8 friendly
export const UNICODE_REGEX = /\\u[a-fA-F0-9]{4}|\\x[a-fA-F0-9]{2}/g;
export type CatalogFormat = "lingui" | "minimal" | "po" | "csv"

export type CatalogFormatOptions = {
Expand Down
4 changes: 3 additions & 1 deletion packages/core/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { CompiledMessage, Locales } from "./i18n"
import { date, number } from "./formats"
import { isString, isFunction } from "./essentials"

export const UNICODE_REGEX = /\\u[a-fA-F0-9]{4}|\\x[a-fA-F0-9]{2}/g;

const defaultFormats = (
locale,
locales,
Expand Down Expand Up @@ -111,7 +113,7 @@ export function interpolate(
}

const result = formatMessage(translation)
if (isString(result) && /\\u[a-fA-F0-9]{4}/g.test(result)) return JSON.parse(`"${result.trim()}"`)
if (isString(result) && UNICODE_REGEX.test(result)) return JSON.parse(`"${result.trim()}"`)
if (isString(result)) return result.trim()
return result
}
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/i18n.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { interpolate } from "./context"
import { interpolate, UNICODE_REGEX } from "./context"
import { isString, isFunction } from "./essentials"
import { date, number } from "./formats"
import * as icu from "./dev"
Expand Down Expand Up @@ -193,7 +193,7 @@ export class I18n extends EventEmitter<Events> {


// hack for parsing unicode values inside a string to get parsed in react native environments
if (isString(translation) && /\\u[a-fA-F0-9]{4}/g.test(translation)) return JSON.parse(`"${translation}"`) as string;
if (isString(translation) && UNICODE_REGEX.test(translation)) return JSON.parse(`"${translation}"`) as string;
if (isString(translation)) return translation

return interpolate(
Expand Down
15 changes: 14 additions & 1 deletion packages/macro/src/macroJs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ describe("js macro", () => {
])
})

it("message with unicode chars is interpreted by babel", () => {
it("message with unicode \\u chars is interpreted by babel", () => {
const macro = createMacro()
const exp = parseExpression('t`Message \\u0020`')
const tokens = macro.tokenizeTemplateLiteral(exp)
Expand All @@ -92,6 +92,19 @@ describe("js macro", () => {
])
})

it("message with unicode \\x chars is interpreted by babel", () => {
const macro = createMacro()
const exp = parseExpression('t`Bienvenue\\xA0!`')
const tokens = macro.tokenizeTemplateLiteral(exp)
expect(tokens).toEqual([
{
type: "text",
// Looks like an empty space, but it isn't
value: 'Bienvenue !',
},
])
})

it("message with double scaped literals it's stripped", () => {
const macro = createMacro()
const exp = parseExpression('t\`Passing \\`${argSet}\\` is not supported.\`')
Expand Down
3 changes: 2 additions & 1 deletion packages/macro/src/macroJs.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as R from "ramda"
import * as babelTypes from "@babel/types"
import { NodePath } from "@babel/traverse"
import { UNICODE_REGEX } from "@lingui/conf"

import ICUMessageFormat from "./icu"
import { zip, makeCounter } from "./utils"
Expand Down Expand Up @@ -265,7 +266,7 @@ export default class MacroJs {
quasis: R.map((text: babelTypes.TemplateElement) => {
// Don't output tokens without text.
// if it's an unicode we keep the cooked value because it's the parsed value by babel (without unicode chars)
const value = /\\u[a-fA-F0-9]{4}/g.test(text.value.raw) ? text.value.cooked : text.value.raw
const value = UNICODE_REGEX.test(text.value.raw) ? text.value.cooked : text.value.raw
if (value === "") return null

return {
Expand Down
3 changes: 2 additions & 1 deletion packages/macro/src/macroJsx.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as R from "ramda"
import * as babelTypes from "@babel/types"
import { NodePath } from "@babel/traverse"
import { UNICODE_REGEX } from "@lingui/conf"

import ICUMessageFormat from "./icu"
import { zip, makeCounter } from "./utils"
Expand Down Expand Up @@ -231,7 +232,7 @@ export default class MacroJSX {
R.evolve({
quasis: R.map((text: babelTypes.TemplateElement) => {
// Don"t output tokens without text.
const value = /\\u[a-fA-F0-9]{4}/g.test(text.value.raw) ? text.value.cooked : text.value.raw
const value = UNICODE_REGEX.test(text.value.raw) ? text.value.cooked : text.value.raw
if (value === "") return null

return this.tokenizeText(this.clearBackslashes(value))
Expand Down

0 comments on commit da1ffb5

Please sign in to comment.