Skip to content

Commit

Permalink
unicode-analyzer: Display friendly general category name
Browse files Browse the repository at this point in the history
  • Loading branch information
eliandoran committed Jul 9, 2024
1 parent 94512cb commit a01057e
Showing 1 changed file with 111 additions and 34 deletions.
145 changes: 111 additions & 34 deletions src/routes/unicode-analyzer/analyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,105 @@ import { get_unicode_by_decimal } from "unicode-information";
import he from "he";

const generalCategoryMappings = {
Cc: true,
Cf: true,
Co: true,
Cs: true,
Ll: false,
Lm: false,
Lo: false,
Lt: false,
Lu: false,
Mc: true,
Me: true,
Mn: true,
Nd: false,
Nl: true,
No: true,
Pc: false,
Pd: false,
Pe: false,
Pf: false,
Pi: false,
Po: false,
Ps: false,
Sc: false,
Sk: false,
Sm: false,
So: false,
Zl: true,
Zp: true,
Zs: true
Cc: {
name: "Control",
control: true
},
Cf: {
name: "Format",
control: true,
},
Co: {
name: "Private Use",
control: true,
},
Cs: {
name: "Surrogate",
control: true,
},
Ll: {
name: "Lowercase Letter"
},
Lm: {
name: "Modifier Letter"
},
Lo: {
name: "Other Letter"
},
Lt: {
name: "Titlecase Letter"
},
Lu: {
name: "Uppercase Letter"
},
Mc: {
name: "Spacing Mark",
control: true
},
Me: {
name: "Enclosing Mark",
control: true
},
Mn: {
name: "Nonspacing Mark",
control: true,
},
Nd: {
name: "Decimal Number"
},
Nl: {
name: "Letter Number",
control: true
},
No: {
name: "Other Number",
control: true
},
Pc: {
name: "Connector Punctuation"
},
Pd: {
name: "Dash Punctuation"
},
Pe: {
name: "Close Punctuation"
},
Pf: {
name: "Final Punctuation"
},
Pi: {
name: "Initial Punctuation"
},
Po: {
name: "Other Punctuation"
},
Ps: {
name: "Open Punctuation"
},
Sc: {
name: "Currency Symbol"
},
Sk: {
name: "Modifier Symbol"
},
Sm: {
name: "Math Symbol"
},
So: {
name: "Other Symbol"
},
Zl: {
name: "Line Separator",
control: true
},
Zp: {
name: "Paragraph Separator",
control: true,
},
Zs: {
name: "Space Character",
control: true,
}
};

export class UnicodeAnalyzer {
Expand All @@ -47,10 +117,17 @@ export class UnicodeAnalyzer {
}

const info = get_unicode_by_decimal(ch.charCodeAt(0))
info.htmlEntity = he.encode(ch, {
useNamedReferences: true
});
this.#dataCodeLookup[code] = info;
if (info) {
this.#dataCodeLookup[code] = {
cp: info.cp,
name: info.name,
gc: generalCategoryMappings[info.gc]?.name || info.gc,
blk: info.blk,
htmlEntity: he.encode(ch, {
useNamedReferences: true
})
};
}

const builtData = this.#buildNewCharacter(ch, info);
if (typeof builtData === "object") {
Expand Down Expand Up @@ -88,7 +165,7 @@ export class UnicodeAnalyzer {
return { symbol: "␣", noBackground: true }
}

if (info && generalCategoryMappings[info.gc]) {
if (info && generalCategoryMappings[info.gc].control) {
const unicodeHexValue = `U+${ch.charCodeAt(0).toString(16).toUpperCase().padStart(4, "0")}`;
return { symbol: unicodeHexValue }
}
Expand Down

0 comments on commit a01057e

Please sign in to comment.