Skip to content

Commit

Permalink
Pseudo-Locale-related mods (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
turquoiseowl committed May 17, 2015
1 parent ae24a49 commit 112ea2a
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 44 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,17 @@ Note the `-x-`, after which you can add four or more alphanumeric characters to
There must be an exact match for all subtags for this translation to be returned. If the module can't find a
translation for the tenant, it will match the remaining subtags according to the algorithm described above.

##### Microsoft Pseudo-Locales and App Testing

As an aid to testing the localization of you app, Microsoft have added some
['pseudo-locales'](https://msdn.microsoft.com/en-us/library/windows/desktop/dd319106(v=vs.85).aspx) to Windows.

Specifically, these are identified by the following special language tags ```qps-ploc```, ```qps-plocm``` and
```qps-ploa```.

i18n supports the use of these special locales. See [Issue #195](https://github.com/turquoiseowl/i18n/issues/195)
for further details.

##### Language Matching Update

The latest refinement to the language matching algoritm:
Expand Down
49 changes: 25 additions & 24 deletions src/i18n.Tests/Tests/LanguageTagTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,30 +66,31 @@ public void ExtractLangTagFromUrl()
ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCD/account/x" , "zh-Hans-123-x-ABCD" , "/account/x");
ExtractLangTagFromUrlHelper("/zh-Hans-123-x-ABCDEFG123/account/x", "zh-Hans-123-x-ABCDEFG123", "/account/x");

ExtractLangTagFromUrlHelper("/azh" , null);
ExtractLangTagFromUrlHelper("/azh-HK" , null);
ExtractLangTagFromUrlHelper("/azh-123" , null);
ExtractLangTagFromUrlHelper("/azh-Hans" , null);
ExtractLangTagFromUrlHelper("/azh-Hans-HK" , null);
ExtractLangTagFromUrlHelper("/azh-Hans-123" , null);
ExtractLangTagFromUrlHelper("/azh-Hans-123-x-ABCD" , null);
ExtractLangTagFromUrlHelper("/azh-Hans-123-x-ABCDEFG123", null);

ExtractLangTagFromUrlHelper("/zh-a" , null);
ExtractLangTagFromUrlHelper("/zh-aHK" , null);
ExtractLangTagFromUrlHelper("/zh-a123" , null);
ExtractLangTagFromUrlHelper("/zh-aHans" , null);
ExtractLangTagFromUrlHelper("/zh-aHans-HK" , null);
ExtractLangTagFromUrlHelper("/zh-aHans-123" , null);
ExtractLangTagFromUrlHelper("/zh-aHans-123-x-ABCD" , null);
ExtractLangTagFromUrlHelper("/zh-aHans-HK-x-ABCDEFG123", null);
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x-ABC" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x-" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-HK-ABC" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-HK-" , null);

ExtractLangTagFromUrlHelper("/zh-Hans-K" , null);
ExtractLangTagFromUrlHelper("/azh" , "azh" , "/");
ExtractLangTagFromUrlHelper("/azh-HK" , "azh-HK" , "/");
ExtractLangTagFromUrlHelper("/azh-123" , "azh-123" , "/");
ExtractLangTagFromUrlHelper("/azh-Hans" , "azh-Hans" , "/");
ExtractLangTagFromUrlHelper("/azh-Hans-HK" , "azh-Hans-HK" , "/");
ExtractLangTagFromUrlHelper("/azh-Hans-123" , "azh-Hans-123" , "/");
ExtractLangTagFromUrlHelper("/azh-Hans-123-x-ABCD" , "azh-Hans-123-x-ABCD" , "/");
ExtractLangTagFromUrlHelper("/azh-Hans-123-x-ABCDEFG123", "azh-Hans-123-x-ABCDEFG123", "/");


ExtractLangTagFromUrlHelper("/zh-a" , null); // 1-char Script/Region subtag = bad
ExtractLangTagFromUrlHelper("/zh-aHK" , null); // 3-char Script/Region subtag = bad
ExtractLangTagFromUrlHelper("/zh-a123" , null); // 4-char Script/Region subtag = bad
ExtractLangTagFromUrlHelper("/zh-aaHans" , null); // 6-char Script/Region subtag = bad
ExtractLangTagFromUrlHelper("/zh-aaHans-HK" , null); // 6-char Script subtag = bad
ExtractLangTagFromUrlHelper("/zh-aaHans-123" , null); // 6-char Script subtag = bad
ExtractLangTagFromUrlHelper("/zh-aaHans-123-x-ABCD" , null); // 6-char Script subtag = bad
ExtractLangTagFromUrlHelper("/zh-aaHans-HK-x-ABCDEFG123", null); // 6-char Script subtag = bad
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x-ABC" , null); // < 4-char Private use subtag = bad
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x-" , null); // < 4-char Private use subtag = bad
ExtractLangTagFromUrlHelper("/zh-Hans-HK-x" , null); // < 4-char Private use subtag = bad
ExtractLangTagFromUrlHelper("/zh-Hans-HK-ABC" , null); // Invalid subtag
ExtractLangTagFromUrlHelper("/zh-Hans-HK-" , null); // Invalid subtag

ExtractLangTagFromUrlHelper("/zh-Hans-K" , null); // Invalid Region
ExtractLangTagFromUrlHelper("/zh-Hans-23" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-aHK" , null);
ExtractLangTagFromUrlHelper("/zh-Hans-a123" , null);
Expand Down
4 changes: 2 additions & 2 deletions src/i18n.Tests/Tests/ResponseFilterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public void ResponseFilter_can_patch_html_urls()
"<a class=\"corporate_logo\" href=\"/fr\" title=\"Home\"></a>");
ResponseFilter_can_patch_html_urls(
"fr",
"<a class=\"corporate_logo\" href=\"/aaa\" title=\"Home\"></a>",
"<a class=\"corporate_logo\" href=\"/fr/aaa\" title=\"Home\"></a>");
"<a class=\"corporate_logo\" href=\"/aaaa\" title=\"Home\"></a>",
"<a class=\"corporate_logo\" href=\"/fr/aaaa\" title=\"Home\"></a>");
ResponseFilter_can_patch_html_urls(
"fr",
"<a class=\"corporate_logo\" title=\"Home\" href=\"/\"></a>",
Expand Down
44 changes: 26 additions & 18 deletions src/i18n/Concrete/LanguageTag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,22 @@ namespace i18n
/// <remarks>
/// Supports a subset of BCP 47 language tag spec corresponding to the Windows
/// support for language names, namely the following subtags:
/// language (mandatory, 2 alphachars)
/// script (optional, 4 alphachars)
/// region (optional, 2 alphachars | 3 decdigits)
/// private use (optional, -x- followed by 4 or more alphanumericchars )
/// language (mandatory, 2 or 3 alphachars)
/// script (optional, 4 alphachars, or 5 alphachars in the special case of Microsoft Pseudo-Locales)
/// region (optional, 2 alphachars | 3 decdigits)
/// private use (optional, -x- followed by 4 or more alphanumeric chars)
/// Example tags supported:
/// "en" [language]
/// "en-US" [language + region]
/// "zh" [language]
/// "zh-HK" [language + region]
/// "zh-123" [language + region]
/// "zh-Hant" [language + script]
/// "zh-Hant-HK" [language + script + region]
/// "zh-Hant-HK-x-AAAA" [language + script + region + private use]
/// "en" [language]
/// "en-US" [language + region]
/// "zh" [language]
/// "zh-HK" [language + region]
/// "zh-123" [language + region]
/// "zh-Hant" [language + script]
/// "zh-Hant-HK" [language + script + region]
/// "zh-Hant-HK-x-AAAA" [language + script + region + private use]
/// "qps-ploc" [language + script] [Microsoft pseudo-locale]
/// "qps-plocm" [language + script] [Microsoft pseudo-locale]
/// "qps-ploca" [language + script] [Microsoft pseudo-locale]
/// </remarks>
/// <seealso href="http://www.microsoft.com/resources/msdn/goglobal/default.mspx"/>
public class LanguageTag : ILanguageTag, IEquatable<LanguageTag>, IComparable<LanguageTag>
Expand Down Expand Up @@ -69,13 +72,18 @@ public enum MatchGrade
_MaxMatch = LanguageMatch,
}
// Data
static readonly Regex m_regex_parseLangtag = new Regex(@"^([a-zA-Z]{2})(?:-([a-zA-Z]{4}))?(?:-([a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?$", RegexOptions.CultureInvariant);
// ([a-zA-Z]{2})
static readonly Regex m_regex_parseLangtag = new Regex(@"^([a-zA-Z]{2,3})(?:-([a-zA-Z]{4,5}))?(?:-([a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?$", RegexOptions.CultureInvariant);
// ([a-zA-Z]{2,3})
// Matches language.
// (?:-([a-zA-Z]{4}))?
// (?:-([a-zA-Z]{4,5}))?
// Matches script.
// NB: The inner group is wrapped in an outer non-capturing group that
// prefixed the former with the '-' which is thus not captured.
// NB: according to BCP47, Script subtage is always 4 chars; however, we have
// expanded this to allow 5 chars also so as to allow parsing all the Microsoft
// Pseudo-Locale language tags (qps-ploc, qps-plocm, qps-ploca).
// If this causes a problem, consider explicitly matching (ploc|plocm|ploca).
// Ref Issue https://github.com/turquoiseowl/i18n/issues/195.
// (?:-([a-zA-Z]{2}|[0-9]{3}))?
// Matches region.
// NB: The inner group is wrapped in an outer non-capturing group that
Expand All @@ -84,12 +92,12 @@ public enum MatchGrade
// Matches private use subtag
// eg en-ABCD-GB-x-AAAA
static readonly Regex m_regex_parseUrl = new System.Text.RegularExpressions.Regex(
@"^/([a-zA-Z]{2}(?:-[a-zA-Z]{4})?(?:-(?:[a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?)(?:$|/)",
@"^/([a-zA-Z]{2,3}(?:-[a-zA-Z]{4,5})?(?:-(?:[a-zA-Z]{2}|[0-9]{3}))?(?:\-x-([a-zA-Z0-9]{4,}))?)(?:$|/)",
System.Text.RegularExpressions.RegexOptions.CultureInvariant);
// ^/
// ( # begin 1st and only capture group
// [a-zA-Z]{2} # 2-letter country code
// (?:-[a-zA-Z]{4})? # optional script code - not a capture group itself
// [a-zA-Z]{2,3} # 2-letter or 3-letter country code
// (?:-[a-zA-Z]{4,5})? # optional script code - not a capture group itself
// (?:-(?:[a-zA-Z]{2}|[0-9]{3}))? # optional region code (2-letter or 3-digit) - not a capture group itself
// (?:\-x-([a-zA-Z0-9]{4,}))? # optional private use tag (-x- followed by 4+ alphanumericcharacters) - not a capture group itself
// ) # end 1st and only capture group
Expand Down

0 comments on commit 112ea2a

Please sign in to comment.