Skip to content

Commit

Permalink
Add MF2 test generator
Browse files Browse the repository at this point in the history
  • Loading branch information
mradbourne committed Feb 27, 2024
1 parent 5279e7e commit 9066446
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

.pylintrc
.idea
.devcontainer

executors/rust/target/
**/__pycache__/
Expand Down
21 changes: 21 additions & 0 deletions testgen/icu75/message_fmt2/syntax/literal-text.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"scenario": "Literal text",
"description": "Text placed directly into the pattern",
"defaultTestProperties": {
"testSubtype": "syntax"
},
"tests": [
{
"description": "Includes basic literals",
"locale": "en-US",
"pattern": "hello world",
"verify": "hello world"
},
{
"description": "Includes unquoted literals",
"locale": "en-US",
"pattern": "hello {world}",
"verify": "hello world"
}
]
}
33 changes: 33 additions & 0 deletions testgen/icu75/message_fmt2/syntax/whitespace.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"scenario": "Whitespace",
"description": "Leading and trailing space characters",
"defaultTestProperties": {
"testSubtype": "syntax"
},
"tests": [
{
"description": "Preserves leading space on basic literals",
"locale": "en-US",
"pattern": " hello world",
"verify": " hello world"
},
{
"description": "Removes leading space on unquoted literals",
"locale": "en-US",
"pattern": "hello { world}",
"verify": "hello world"
},
{
"description": "Preserves trailing space on basic literals",
"locale": "en-US",
"pattern": "hello world ",
"verify": "hello world "
},
{
"description": "Removes trailing space on unquoted literals",
"locale": "en-US",
"pattern": "hello {world }",
"verify": "hello world"
}
]
}
82 changes: 76 additions & 6 deletions testgen/testdata_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
import re
import requests
from enum import Enum
import glob
from pathlib import Path
from jsonschema import Draft202012Validator, ValidationError

reblankline = re.compile('^\s*$')

Expand All @@ -27,6 +30,10 @@ class TestType(str, Enum):
COLLATION_SHORT = 'collation_short'
LANG_NAMES = 'lang_names'
LIKELY_SUBTAGS = 'likely_subtags'
MESSAGE_FMT2 = 'message_fmt2'

def __str__(self):
return self.value


class generateData():
Expand All @@ -41,10 +48,10 @@ def setVersion(self, selected_version):
self.icu_version = selected_version

def saveJsonFile(self, filename, data, indent=None):
output_path = os.path.join(self.icu_version, filename)
output_file = open(output_path, 'w', encoding='UTF-8')
json.dump(data, output_file, indent=indent)
output_file.close()
output_path = Path(os.path.dirname(__file__), '..', 'DDT_DATA', 'testData', self.icu_version, filename)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w', encoding='UTF-8') as output_file:
json.dump(data, output_file, indent=indent)

def getTestDataFromGitHub(self, datafile_name, version):
# Path for fetching test data from ICU repository
Expand Down Expand Up @@ -324,20 +331,80 @@ def processLikelySubtagsData(self):
logging.info('Likely Subtags Test (%s): %d lines processed', self.icu_version, count)
return

def processMessageFmt2TestData(self):
json_test = {'test_type': str(TestType.MESSAGE_FMT2), 'tests': []}
json_verify = {'test_type': str(TestType.MESSAGE_FMT2), 'verifications': []}

src_dir = Path(os.path.dirname(__file__), self.icu_version, TestType.MESSAGE_FMT2)
src_file_paths = glob.glob(os.path.join(src_dir, '**', '*.json'), recursive=True)
src_file_paths.sort()

json_schema_path = Path(os.path.dirname(__file__), '..', 'schema', TestType.MESSAGE_FMT2, 'testgen_schema.json')
json_schema_validator = Draft202012Validator(json.load(open(json_schema_path)))

test_count = 0
test_list = []
verify_list = []

for test_file_path in src_file_paths:
src_data = readFile(test_file_path, filetype='json')
if src_data is None:
logging.error('Problem reading JSON. Omitting file %s', test_file_path)
continue

defaults = src_data.get('defaultTestProperties')

try:
json_schema_validator.validate(src_data)
except ValidationError as err:
logging.error('Problem validating JSON: %s', test_file_path)
logging.error(err)

for src_test in src_data['tests']:
test_count += 1
label = f'{test_count - 1:05d}'
description = f'{src_data["scenario"]}: {src_test["description"]}'
args = src_test.get('args') or (defaults.get('args') if defaults else None)

try:
test_list.append({
'label': label,
'test_description': description,
'test_subtype': src_test.get('testSubtype') or defaults['testSubtype'],
'locale': src_test.get('locale') or defaults['locale'],
'pattern': src_test.get('pattern') or defaults['pattern'],
**({'args': args} if args else {})
})
verify_list.append({
'label': label,
'verify': src_test['verify']
})
except KeyError as err:
logging.error('Missing value for %s in %s', err, test_file_path)
logging.error('Omitting test %s (%s)', label, description)

json_test["tests"] = self.sample_tests(test_list)
json_verify["verifications"] = self.sample_tests(verify_list)

self.saveJsonFile(f'{TestType.MESSAGE_FMT2}_test.json', json_test, 2)
self.saveJsonFile(f'{TestType.MESSAGE_FMT2}_verify.json', json_verify, 2)

logging.info('MessageFormat2 Test (%s): %d tests processed', self.icu_version, test_count)


# Utility functions
def computeMaxDigitsForCount(count):
return math.ceil(math.log10(count + 1))


def readFile(filename, version=''):
def readFile(filename, version='', filetype='txt'):
# If version is provided, it refers to a subdirectory containing the test source
path = filename
if version:
path = os.path.join(version, filename)
try:
with open(path, 'r', encoding='utf-8') as testdata:
return testdata.read()
return json.load(testdata) if filetype == 'json' else testdata.read()
except BaseException as err:
logging.warning('** READ: Error = %s', err)
return None
Expand Down Expand Up @@ -1065,6 +1132,9 @@ def generate_versioned_data(version_info):
# This is slow
data_generator.processLangNameTestData()

if TestType.MESSAGE_FMT2 in new_args.test_types:
data_generator.processMessageFmt2TestData()

logging.info('++++ Data generation for %s is complete.', icu_version)


Expand Down

0 comments on commit 9066446

Please sign in to comment.