Skip to content

Commit

Permalink
[GR-42869] Update regex for Unicode 15.
Browse files Browse the repository at this point in the history
PullRequest: graal/13393
  • Loading branch information
jirkamarsik committed Dec 15, 2022
2 parents 47d10f8 + 78ba489 commit 2b8ff8e
Show file tree
Hide file tree
Showing 6 changed files with 693 additions and 637 deletions.
4 changes: 4 additions & 0 deletions regex/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

This changelog summarizes major changes between TRegex versions relevant to language implementors integrating TRegex into their language. This document will focus on API changes relevant to integrators of TRegex.

## Version 23.0.0

* Updated Unicode data (case-folding, character properties) to version 15 of the Unicode standard.

## Version 22.2.0

* Added support for atomic groups and possessive quantifiers in Ruby regular expressions by using the backtracking.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

public class UnicodeCharacterAliases {

public static final EconomicMap<String, Integer> CHARACTER_ALIASES = EconomicMap.create(470);
public static final EconomicMap<String, Integer> CHARACTER_ALIASES = EconomicMap.create(473);

static {
CHARACTER_ALIASES.put("ACK", 0x0006);
Expand All @@ -58,6 +58,7 @@ public class UnicodeCharacterAliases {
CHARACTER_ALIASES.put("ALM", 0x061C);
CHARACTER_ALIASES.put("APC", 0x009F);
CHARACTER_ALIASES.put("APPLICATION PROGRAM COMMAND", 0x009F);
CHARACTER_ALIASES.put("ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE", 0x0616);
CHARACTER_ALIASES.put("BACKSPACE", 0x0008);
CHARACTER_ALIASES.put("BEL", 0x0007);
CHARACTER_ALIASES.put("BOM", 0xFEFF);
Expand Down Expand Up @@ -94,6 +95,7 @@ public class UnicodeCharacterAliases {
CHARACTER_ALIASES.put("DEVICE CONTROL THREE", 0x0013);
CHARACTER_ALIASES.put("DEVICE CONTROL TWO", 0x0012);
CHARACTER_ALIASES.put("DLE", 0x0010);
CHARACTER_ALIASES.put("EM", 0x0019);
CHARACTER_ALIASES.put("END OF GUARDED AREA", 0x0097);
CHARACTER_ALIASES.put("END OF LINE", 0x000A);
CHARACTER_ALIASES.put("END OF MEDIUM", 0x0019);
Expand Down Expand Up @@ -248,6 +250,7 @@ public class UnicodeCharacterAliases {
CHARACTER_ALIASES.put("STX", 0x0002);
CHARACTER_ALIASES.put("SUB", 0x001A);
CHARACTER_ALIASES.put("SUBSTITUTE", 0x001A);
CHARACTER_ALIASES.put("SUNDANESE LETTER ARCHAIC I", 0x1BBD);
CHARACTER_ALIASES.put("SYN", 0x0016);
CHARACTER_ALIASES.put("SYNCHRONOUS IDLE", 0x0016);
CHARACTER_ALIASES.put("SYRIAC SUBLINEAR COLON SKEWED LEFT", 0x0709);
Expand Down
Loading

0 comments on commit 2b8ff8e

Please sign in to comment.