From da1ffb5bf3051c906bb2a4748e9b87817faf44c8 Mon Sep 17 00:00:00 2001 From: Sergio Moreno <22656541+semoal@users.noreply.github.com> Date: Thu, 2 Sep 2021 00:41:27 +0200 Subject: [PATCH] fix(): unicode chars were removed from .po files when extracting --- packages/conf/index.d.ts | 1 + packages/conf/src/index.ts | 7 ++++--- packages/core/src/context.ts | 4 +++- packages/core/src/i18n.ts | 4 ++-- packages/macro/src/macroJs.test.ts | 15 ++++++++++++++- packages/macro/src/macroJs.ts | 3 ++- packages/macro/src/macroJsx.ts | 3 ++- 7 files changed, 28 insertions(+), 9 deletions(-) diff --git a/packages/conf/index.d.ts b/packages/conf/index.d.ts index fe47871e6..f1eced4bb 100644 --- a/packages/conf/index.d.ts +++ b/packages/conf/index.d.ts @@ -1,5 +1,6 @@ import type { GeneratorOptions } from "@babel/core"; +export const UNICODE_REGEX: RegExp export declare type CatalogFormat = "lingui" | "minimal" | "po" | "csv" | "po-gettext"; export type CatalogFormatOptions = { origins?: boolean; diff --git a/packages/conf/src/index.ts b/packages/conf/src/index.ts index 6190ffc14..bb4476739 100644 --- a/packages/conf/src/index.ts +++ b/packages/conf/src/index.ts @@ -2,10 +2,11 @@ import type { GeneratorOptions } from "@babel/core" import path from "path" import fs from "fs" import chalk from "chalk" -import { Loader, cosmiconfigSync } from "cosmiconfig" +import { cosmiconfigSync } from "cosmiconfig" import { multipleValidOptions, validate } from "jest-validate" -import get from "lodash.get" - +// This regex will detect if a string contains unicode chars, when they're we should interpolate them +// why? because platforms like react native doesn't parse them, just doing a JSON.parse makes them UTF-8 friendly +export const UNICODE_REGEX = /\\u[a-fA-F0-9]{4}|\\x[a-fA-F0-9]{2}/g; export type CatalogFormat = "lingui" | "minimal" | "po" | "csv" export type CatalogFormatOptions = { diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index 2695d5e51..20ba11160 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -2,6 +2,8 @@ import { CompiledMessage, Locales } from "./i18n" import { date, number } from "./formats" import { isString, isFunction } from "./essentials" +export const UNICODE_REGEX = /\\u[a-fA-F0-9]{4}|\\x[a-fA-F0-9]{2}/g; + const defaultFormats = ( locale, locales, @@ -111,7 +113,7 @@ export function interpolate( } const result = formatMessage(translation) - if (isString(result) && /\\u[a-fA-F0-9]{4}/g.test(result)) return JSON.parse(`"${result.trim()}"`) + if (isString(result) && UNICODE_REGEX.test(result)) return JSON.parse(`"${result.trim()}"`) if (isString(result)) return result.trim() return result } diff --git a/packages/core/src/i18n.ts b/packages/core/src/i18n.ts index d17a7a790..024fd7760 100644 --- a/packages/core/src/i18n.ts +++ b/packages/core/src/i18n.ts @@ -1,4 +1,4 @@ -import { interpolate } from "./context" +import { interpolate, UNICODE_REGEX } from "./context" import { isString, isFunction } from "./essentials" import { date, number } from "./formats" import * as icu from "./dev" @@ -193,7 +193,7 @@ export class I18n extends EventEmitter { // hack for parsing unicode values inside a string to get parsed in react native environments - if (isString(translation) && /\\u[a-fA-F0-9]{4}/g.test(translation)) return JSON.parse(`"${translation}"`) as string; + if (isString(translation) && UNICODE_REGEX.test(translation)) return JSON.parse(`"${translation}"`) as string; if (isString(translation)) return translation return interpolate( diff --git a/packages/macro/src/macroJs.test.ts b/packages/macro/src/macroJs.test.ts index 406baba38..558c8519a 100644 --- a/packages/macro/src/macroJs.test.ts +++ b/packages/macro/src/macroJs.test.ts @@ -80,7 +80,7 @@ describe("js macro", () => { ]) }) - it("message with unicode chars is interpreted by babel", () => { + it("message with unicode \\u chars is interpreted by babel", () => { const macro = createMacro() const exp = parseExpression('t`Message \\u0020`') const tokens = macro.tokenizeTemplateLiteral(exp) @@ -92,6 +92,19 @@ describe("js macro", () => { ]) }) + it("message with unicode \\x chars is interpreted by babel", () => { + const macro = createMacro() + const exp = parseExpression('t`Bienvenue\\xA0!`') + const tokens = macro.tokenizeTemplateLiteral(exp) + expect(tokens).toEqual([ + { + type: "text", + // Looks like an empty space, but it isn't + value: 'Bienvenue !', + }, + ]) + }) + it("message with double scaped literals it's stripped", () => { const macro = createMacro() const exp = parseExpression('t\`Passing \\`${argSet}\\` is not supported.\`') diff --git a/packages/macro/src/macroJs.ts b/packages/macro/src/macroJs.ts index e1dbb2c95..e69786075 100644 --- a/packages/macro/src/macroJs.ts +++ b/packages/macro/src/macroJs.ts @@ -1,6 +1,7 @@ import * as R from "ramda" import * as babelTypes from "@babel/types" import { NodePath } from "@babel/traverse" +import { UNICODE_REGEX } from "@lingui/conf" import ICUMessageFormat from "./icu" import { zip, makeCounter } from "./utils" @@ -265,7 +266,7 @@ export default class MacroJs { quasis: R.map((text: babelTypes.TemplateElement) => { // Don't output tokens without text. // if it's an unicode we keep the cooked value because it's the parsed value by babel (without unicode chars) - const value = /\\u[a-fA-F0-9]{4}/g.test(text.value.raw) ? text.value.cooked : text.value.raw + const value = UNICODE_REGEX.test(text.value.raw) ? text.value.cooked : text.value.raw if (value === "") return null return { diff --git a/packages/macro/src/macroJsx.ts b/packages/macro/src/macroJsx.ts index c77935494..f0e3cde51 100644 --- a/packages/macro/src/macroJsx.ts +++ b/packages/macro/src/macroJsx.ts @@ -1,6 +1,7 @@ import * as R from "ramda" import * as babelTypes from "@babel/types" import { NodePath } from "@babel/traverse" +import { UNICODE_REGEX } from "@lingui/conf" import ICUMessageFormat from "./icu" import { zip, makeCounter } from "./utils" @@ -231,7 +232,7 @@ export default class MacroJSX { R.evolve({ quasis: R.map((text: babelTypes.TemplateElement) => { // Don"t output tokens without text. - const value = /\\u[a-fA-F0-9]{4}/g.test(text.value.raw) ? text.value.cooked : text.value.raw + const value = UNICODE_REGEX.test(text.value.raw) ? text.value.cooked : text.value.raw if (value === "") return null return this.tokenizeText(this.clearBackslashes(value))