Skip to content

Commit

Permalink
Add ISO 639-2 data
Browse files Browse the repository at this point in the history
  • Loading branch information
ptr727 committed May 6, 2023
1 parent 31b3a71 commit 0e677e7
Show file tree
Hide file tree
Showing 12 changed files with 5,617 additions and 8 deletions.
487 changes: 487 additions & 0 deletions Data/ISO-639-2_utf-8.txt

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ Packages published on [NuGet](https://www.nuget.org/packages/InsaneGenius.Utilit

## External Data

- ISO 639-3 language data is sourced from [ISO 639-3 Registration Authority](https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab).
- RFC 5646 / BCP 47 language data is sourced from [IANA](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry).
- ISO 639-2 language data is sourced from the [ISO 639-2 Registration Authority](https://www.loc.gov/standards/iso639-2/langhome.html) [download](https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt)
- ISO 639-3 language data is sourced from the [ISO 639-3 Registration Authority](https://iso639-3.sil.org/) [download](https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3.tab).
- RFC 5646 / BCP 47 language data is sourced from the [IANA Tags for Identifying Languages RFC 5646](https://www.rfc-editor.org/rfc/rfc5646.html) [download](https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry).
1 change: 1 addition & 0 deletions Utilities.sln
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sandbox", "Sandbox\Sandbox.
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{7E189F23-48EE-44B2-A102-0FDF27BE9C88}"
ProjectSection(SolutionItems) = preProject
Data\ISO-639-2_utf-8.txt = Data\ISO-639-2_utf-8.txt
Data\iso-639-3.tab = Data\iso-639-3.tab
Data\language-subtag-registry = Data\language-subtag-registry
EndProjectSection
Expand Down
126 changes: 126 additions & 0 deletions Utilities/Iso6392.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;

namespace InsaneGenius.Utilities;

// ISO-639-2 reference
// https://www.loc.gov/standards/iso639-2/langhome.html
// https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt

// T4 template
// https://docs.microsoft.com/en-us/visualstudio/modeling/code-generation-and-t4-text-templates
// https://github.com/mono/t4
// wget -O ./Data/ISO-639-2_utf-8.txt https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
// dotnet tool install -g dotnet-t4
// dotnet publish ./Utilities/Utilities.csproj --self-contained=false --output=./bin/T4
// t4 -P="./bin/T4" --out=./Utilities/Iso6392Gen.cs ./Utilities/Iso6392Gen.tt

// ISO 639-2 class
public partial class Iso6392
{
public class Record
{
// Default to use 2B
public string Id { get { return Part2B; } }
// 639-2 Bibliographic
public string Part2B { get; set; } = "";
// 639-2 Terminology
public string Part2T { get; set; } = "";
// 639-1
public string Part1 { get; set; } = "";
// English name
public string RefName { get; set; } = "";
}

public List<Record> RecordList { get; private set; } = new();

// Create() method is generated from Iso6392Gen.tt
// public bool Create() { return true; }

public bool Load(string fileName)
{
try
{
// Open the file as a stream
using StreamReader lineReader = new(File.OpenRead(fileName));

// Init
RecordList.Clear();

// Read header
var line = lineReader.ReadLine();
Debug.Assert(!string.IsNullOrEmpty(line));

// Read line by line
while ((line = lineReader.ReadLine()) is not null)
{
// Parse using pipe character
var records = line.Split('|');
Debug.Assert(records.Length == 5);

// Populate record
var record = new Record
{
Part2B = records[0].Trim(),
Part2T = records[1].Trim(),
Part1 = records[2].Trim(),
RefName = records[3].Trim(),
// Ignore the French name, German name is not in file
};
RecordList.Add(record);
}
}
catch (Exception e) when (LogOptions.Logger.LogAndHandle(e, MethodBase.GetCurrentMethod()?.Name))
{
return false;
}
return true;
}

public Record Find(string languageTag, bool includeDescription)
{
ArgumentException.ThrowIfNullOrEmpty(nameof(languageTag));

// Find the matching language entry
Record record = null;

// 693 3 letter form
if (languageTag.Length == 3)
{
// Try the 639-2/B
record = RecordList.FirstOrDefault(item => item.Part2B.Equals(languageTag, StringComparison.OrdinalIgnoreCase));
if (record != null)
return record;

// Try the 639-2/T
record = RecordList.FirstOrDefault(item => item.Part2T.Equals(languageTag, StringComparison.OrdinalIgnoreCase));
if (record != null)
return record;
}

// 693 2 letter form
if (languageTag.Length == 2)
{
// Try 639-1
record = RecordList.FirstOrDefault(item => item.Part1.Equals(languageTag, StringComparison.OrdinalIgnoreCase));
if (record != null)
return record;
}

// Long form
if (includeDescription)
{
// Try long form
record = RecordList.FirstOrDefault(item => item.RefName.Equals(languageTag, StringComparison.OrdinalIgnoreCase));
if (record != null)
return record;
}

// Not found
return null;
}
}
Loading

0 comments on commit 0e677e7

Please sign in to comment.