Skip to content

Commit

Permalink
Add support for processing RFC4646 style tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
ptr727 committed Mar 4, 2023
1 parent a7cebb7 commit f694f2d
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 34 deletions.
69 changes: 52 additions & 17 deletions Utilities/ISO-639-3.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@


// Language data sources
// http://www-01.sil.org/iso639-3
// https://www.rfc-editor.org/info/bcp47
// https://www.iana.org/assignments/language-tags/language-tags.xml

// Generated code
// https://docs.microsoft.com/en-us/visualstudio/modeling/code-generation-and-t4-text-templates
// Visual Studio: Right click on .tt file and run custom tool
// https://github.com/mono/t4
// http://www-01.sil.org/iso639-3/download.asp
// wget -O ISO-639-3.tab https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
// dotnet tool install -g dotnet-t4
// t4 --out=ISO-639-3.cs .\ISO-639-3.tt
Expand Down Expand Up @@ -42,22 +47,13 @@ public static Iso6393 FromString(string language, List<Iso6393> iso6393List)

// Match the input string type
Iso6393 lang;
if (language.Length > 3 && language.ElementAt(2) == '-')
{
// Treat the language as a culture form, e.g. en-us
CultureInfo cix = new CultureInfo(language);

// Recursively call using the ISO 639-2 code
return FromString(cix.ThreeLetterISOLanguageName, iso6393List);
}
if (language.Length > 3)
{
// Try long form
lang = iso6393List.FirstOrDefault(item => item.RefName.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}
if (language.Length == 3)
// Look for dash
int dash = language.IndexOf('-');

// 693 3 letter form
// E.g. zho, chi, afr
if (language.Length == 3 && dash == -1)
{
// Try 639-3
lang = iso6393List.FirstOrDefault(item => item.Id.Equals(language, StringComparison.OrdinalIgnoreCase));
Expand All @@ -74,14 +70,53 @@ public static Iso6393 FromString(string language, List<Iso6393> iso6393List)
if (lang != null)
return lang;
}
if (language.Length == 2)

// 693 2 letter
// E.g. zh, af
if (language.Length == 2 && dash == -1)
{
// Try 639-1
lang = iso6393List.FirstOrDefault(item => item.Part1.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}

// Long form
// E.g. Zambian Sign Language, Zul
if (dash == -1)
{
// Try long form
lang = iso6393List.FirstOrDefault(item => item.RefName.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}

// Try to lookup from CultureInfo
// E.g. en-us, zh-Hans
if (language.Length > 4 && dash != -1 && language.ElementAt(2) == '-')
{
try
{
// Get culture info from OS
CultureInfo cix = new CultureInfo(language);

// Recursively call using the ISO 639-2 code
return FromString(cix.ThreeLetterISOLanguageName, iso6393List);
}
catch (CultureNotFoundException)
{
// Try something else
}
}

// Try the prefix part
// E.g. cmn-Hans, cmn-Hant, yue-Hant
if (dash != -1)
{
// Recursively call using the prefix part only
return FromString(language.Substring(0, dash), iso6393List);
}

// Not found
return null;
}
Expand Down
69 changes: 52 additions & 17 deletions Utilities/ISO-639-3.tt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@
<#@ assembly name="Microsoft.VisualBasic.dll" #>
<#@ import namespace="Microsoft.VisualBasic.FileIO" #>

// Language data sources
// http://www-01.sil.org/iso639-3
// https://www.rfc-editor.org/info/bcp47
// https://www.iana.org/assignments/language-tags/language-tags.xml

// Generated code
// https://docs.microsoft.com/en-us/visualstudio/modeling/code-generation-and-t4-text-templates
// Visual Studio: Right click on .tt file and run custom tool
// https://github.com/mono/t4
// http://www-01.sil.org/iso639-3/download.asp
// wget -O ISO-639-3.tab https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab
// dotnet tool install -g dotnet-t4
// t4 --out=ISO-639-3.cs .\ISO-639-3.tt
Expand Down Expand Up @@ -46,22 +51,13 @@ namespace InsaneGenius.Utilities

// Match the input string type
Iso6393 lang;
if (language.Length > 3 && language.ElementAt(2) == '-')
{
// Treat the language as a culture form, e.g. en-us
CultureInfo cix = new CultureInfo(language);

// Recursively call using the ISO 639-2 code
return FromString(cix.ThreeLetterISOLanguageName, iso6393List);
}
if (language.Length > 3)
{
// Try long form
lang = iso6393List.FirstOrDefault(item => item.RefName.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}
if (language.Length == 3)
// Look for dash
int dash = language.IndexOf('-');

// 693 3 letter form
// E.g. zho, chi, afr
if (language.Length == 3 && dash == -1)
{
// Try 639-3
lang = iso6393List.FirstOrDefault(item => item.Id.Equals(language, StringComparison.OrdinalIgnoreCase));
Expand All @@ -78,14 +74,53 @@ namespace InsaneGenius.Utilities
if (lang != null)
return lang;
}
if (language.Length == 2)

// 693 2 letter
// E.g. zh, af
if (language.Length == 2 && dash == -1)
{
// Try 639-1
lang = iso6393List.FirstOrDefault(item => item.Part1.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}

// Long form
// E.g. Zambian Sign Language, Zul
if (dash == -1)
{
// Try long form
lang = iso6393List.FirstOrDefault(item => item.RefName.Equals(language, StringComparison.OrdinalIgnoreCase));
if (lang != null)
return lang;
}

// Try to lookup from CultureInfo
// E.g. en-us, zh-Hans
if (language.Length > 4 && dash != -1 && language.ElementAt(2) == '-')
{
try
{
// Get culture info from OS
CultureInfo cix = new CultureInfo(language);

// Recursively call using the ISO 639-2 code
return FromString(cix.ThreeLetterISOLanguageName, iso6393List);
}
catch (CultureNotFoundException)
{
// Try something else
}
}

// Try the prefix part
// E.g. cmn-Hans, cmn-Hant, yue-Hant
if (dash != -1)
{
// Recursively call using the prefix part only
return FromString(language.Substring(0, dash), iso6393List);
}

// Not found
return null;
}
Expand Down
11 changes: 11 additions & 0 deletions UtilitiesTests/Iso6393Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ public void Create()
[Theory]
[InlineData("afr", "Afrikaans")]
[InlineData("af", "Afrikaans")]
[InlineData("zh", "Chinese")]
[InlineData("zh-CHS", "Chinese")]
[InlineData("zh-Hans", "Chinese")]
[InlineData("zh-CHT", "Chinese")]
[InlineData("zh-Hant", "Chinese")]
[InlineData("zho", "Chinese")]
[InlineData("chi", "Chinese")]
[InlineData("cmn-Hans", "Mandarin Chinese")]
[InlineData("cmn-Hant", "Mandarin Chinese")]
[InlineData("yue", "Yue Chinese")]
[InlineData("yue-Hant", "Yue Chinese")]
public void FromString(string input, string output)
{
// Create full list of languages
Expand Down

0 comments on commit f694f2d

Please sign in to comment.