Skip to content

Commit

Permalink
Format feature aspect and profile comparison strings as table.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 652022995
  • Loading branch information
isingoo authored and copybara-github committed Jul 13, 2024
1 parent 5eaf7d0 commit c5d6b27
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 304 deletions.
114 changes: 51 additions & 63 deletions nisaba/scripts/natural_translit/phonology/descriptive_features_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,79 +74,67 @@ def test_profile_p(self):
'}\n',
)

def test_compare_p_f_verbose(self):
self.assertEqual(
_P.p.comparison_table(_P.f, verbose=True),
'phonology_descriptive comparison (max distance = 23.50):\n\n'
'| aspect | p | f | distance |\n'
'|----------------|----------------|----------------|------------|\n'
'| ph_class | consonant | consonant | 0 |\n'
'| airstream | pulmonic | pulmonic | 0 |\n'
'| manner | stop | non_sibilant | 1 |\n'
'| place | labial | dental | 0.5 |\n'
'| articulator | labial | labial | 0 |\n'
'| height | not_applicable | not_applicable | 0 |\n'
'| backness | not_applicable | not_applicable | 0 |\n'
'| breathiness | any | any | 0 |\n'
'| voicing | voiceless | voiceless | 0 |\n'
'| labialization | labialized | labialized | 0 |\n'
'| lateralization | none | none | 0 |\n'
'| nasalization | none | none | 0 |\n'
'| palatalization | none | none | 0 |\n'
'| rhoticization | none | none | 0 |\n'
'| duration | any | any | 0 |\n'
'| syllabicity | none | none | 0 |\n'
'| Total distance | | | 1.5 |\n'
'| Similarity | | | 0.936 |\n',
)

def test_compare_p_f(self):
self.assertEqual(
_P.p.comparison(_P.f),
'p - f phonology_descriptive comparison:\n'
' ph_class: {consonant} vs ph_class: {consonant} = 0.00\n'
' airstream: {pulmonic} vs airstream: {pulmonic} = 0.00\n'
' manner: {stop} vs manner: {non_sibilant} = 1.00\n'
' place: {labial} vs place: {dental} = 0.50\n'
' articulator: {labial} vs articulator: {labial} = 0.00\n'
' height: {not_applicable} vs height: {not_applicable} = 0.00\n'
' backness: {not_applicable} vs backness: {not_applicable} = 0.00\n'
' breathiness: {any} vs breathiness: {any} = 0.00\n'
' voicing: {voiceless} vs voicing: {voiceless} = 0.00\n'
' labialization: {labialized} vs labialization: {labialized} = 0.00'
'\n'
' lateralization: {none} vs lateralization: {none} = 0.00\n'
' nasalization: {none} vs nasalization: {none} = 0.00\n'
' palatalization: {none} vs palatalization: {none} = 0.00\n'
' rhoticization: {none} vs rhoticization: {none} = 0.00\n'
' duration: {any} vs duration: {any} = 0.00\n'
' syllabicity: {none} vs syllabicity: {none} = 0.00\n'
' Total distance = 1.50/23.50\n'
' Similarity = 0.936\n',
_P.p.comparison_table(_P.f),
'phonology_descriptive comparison (max distance = 23.50):\n\n'
'| aspect | p | f | distance |\n'
'|----------------|--------|--------------|------------|\n'
'| manner | stop | non_sibilant | 1 |\n'
'| place | labial | dental | 0.5 |\n'
'| Total distance | | | 1.5 |\n'
'| Similarity | | | 0.936 |\n',
)

def test_compare_f_s(self):
self.assertEqual(
_P.f.comparison(_P.s),
'f - s phonology_descriptive comparison:\n'
' ph_class: {consonant} vs ph_class: {consonant} = 0.00\n'
' airstream: {pulmonic} vs airstream: {pulmonic} = 0.00\n'
' manner: {non_sibilant} vs manner: {sibilant} = 0.50\n'
' place: {dental} vs place: {alveolar, dental, postalveolar} = 0.00'
'\n'
' articulator: {labial} vs articulator: {apical, laminal} = 0.50\n'
' height: {not_applicable} vs height: {not_applicable} = 0.00\n'
' backness: {not_applicable} vs backness: {not_applicable} = 0.00\n'
' breathiness: {any} vs breathiness: {any} = 0.00\n'
' voicing: {voiceless} vs voicing: {voiceless} = 0.00\n'
' labialization: {labialized} vs labialization: {none} = 1.00\n'
' lateralization: {none} vs lateralization: {none} = 0.00\n'
' nasalization: {none} vs nasalization: {none} = 0.00\n'
' palatalization: {none} vs palatalization: {none} = 0.00\n'
' rhoticization: {none} vs rhoticization: {none} = 0.00\n'
' duration: {any} vs duration: {any} = 0.00\n'
' syllabicity: {none} vs syllabicity: {none} = 0.00\n'
' Total distance = 2.00/23.50\n'
' Similarity = 0.915\n',
_P.f.comparison_table(_P.s),
'phonology_descriptive comparison (max distance = 23.50):\n\n'
'| aspect | f | s | distance |\n'
'|----------------|--------------|-----------------|------------|\n'
'| manner | non_sibilant | sibilant | 0.5 |\n'
'| articulator | labial | apical, laminal | 0.5 |\n'
'| labialization | labialized | none | 1 |\n'
'| Total distance | | | 2 |\n'
'| Similarity | | | 0.915 |\n',
)

def test_compare_f_s_low_amplitude(self):
self.assertEqual(
_P.f.comparison(_P.s_low_amplitude),
'f - s_low_amplitude phonology_descriptive comparison:\n'
' ph_class: {consonant} vs ph_class: {consonant} = 0.00\n'
' airstream: {pulmonic} vs airstream: {pulmonic} = 0.00\n'
' manner: {non_sibilant} vs manner: {non_sibilant} = 0.00\n'
' place: {dental} vs place: {alveolar, dental, postalveolar} = 0.00'
'\n'
' articulator: {labial} vs articulator: {apical, laminal} = 0.50\n'
' height: {not_applicable} vs height: {not_applicable} = 0.00\n'
' backness: {not_applicable} vs backness: {not_applicable} = 0.00\n'
' breathiness: {any} vs breathiness: {any} = 0.00\n'
' voicing: {voiceless} vs voicing: {voiceless} = 0.00\n'
' labialization: {labialized} vs labialization: {none} = 1.00\n'
' lateralization: {none} vs lateralization: {none} = 0.00\n'
' nasalization: {none} vs nasalization: {none} = 0.00\n'
' palatalization: {none} vs palatalization: {none} = 0.00\n'
' rhoticization: {none} vs rhoticization: {none} = 0.00\n'
' duration: {any} vs duration: {any} = 0.00\n'
' syllabicity: {none} vs syllabicity: {none} = 0.00\n'
' Total distance = 1.50/23.50\n'
' Similarity = 0.936\n',
_P.f.comparison_table(_P.s_low_amplitude),
'phonology_descriptive comparison (max distance = 23.50):\n\n'
'| aspect | f | s_low_amplitude | distance |\n'
'|----------------|------------|-------------------|------------|\n'
'| articulator | labial | apical, laminal | 0.5 |\n'
'| labialization | labialized | none | 1 |\n'
'| Total distance | | | 1.5 |\n'
'| Similarity | | | 0.936 |\n',
)

if __name__ == '__main__':
Expand Down
4 changes: 4 additions & 0 deletions nisaba/scripts/natural_translit/utils/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# limitations under the License.


# Required for `tabulate` below.
load("@pip_deps//:requirements.bzl", "requirement")

package(
default_applicable_licenses = [
],
Expand Down Expand Up @@ -157,6 +160,7 @@ py_library(
":inventory",
":log_op",
":type_op",
requirement("tabulate"),
],
)

Expand Down
84 changes: 59 additions & 25 deletions nisaba/scripts/natural_translit/utils/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@

import enum
from typing import Iterable, Union

import tabulate

from nisaba.scripts.natural_translit.utils import inventory
from nisaba.scripts.natural_translit.utils import type_op as ty

Expand Down Expand Up @@ -205,12 +208,13 @@ def __init__(self, *features, alias: str = ''):
self.add(*features)

def __str__(self):
items = [item.text for item in self._items]
items.sort()
return '%s: {%s}' % (
self.alias, ', '.join(items)
self.alias, ', '.join(self.sorted_item_texts())
)

def sorted_item_texts(self) -> list[str]:
return sorted(item.text for item in self._items)

def _item_set(self) -> set['Feature']:
return ty.type_check(self._items, set(self._items))

Expand Down Expand Up @@ -500,11 +504,17 @@ def __init__(
self.distance_dict = {}

def __str__(self):
"""String representation of distance dicts of features for this aspect."""
text = '%s (%.2f):\n' % (self.text, self.max_dist)
for feature in self:
text += str(feature)
return text
"""Distance matrix of this aspect as table in github format."""
headers = ['distances'] + [feature.text for feature in self]
table = [
[feature1.text] + [feature1.distance(feature2) for feature2 in self]
for feature1 in self
]
return (
'aspect: %s, max_dist: %.2f\n\n' % (self.text, self.max_dist)
+ tabulate.tabulate(table, headers, tablefmt='github')
+ '\n'
)

def add_feature(self, feature: 'Feature') -> None:
self.add_item(feature)
Expand Down Expand Up @@ -723,50 +733,74 @@ def copy_and_update(
return new

def compare(
self, p: 'Feature.Profile', aspects: 'Feature.ASPECTS' = ty.UNSPECIFIED,
) -> tuple[str, float]:
self,
p: 'Feature.Profile',
aspects: 'Feature.ASPECTS' = ty.UNSPECIFIED,
verbose: bool = False,
) -> dict[str, Union[str, float]]:
"""Compares this Profile to another Profile.
Args:
p: Profile to be compared.
aspects: contrastive aspects to be included while calculating the
distance and similarity.
verbose: If false, aspects with zero distance are not included in the
output table.
Returns:
Similarity score
Comparison table as string, similarity score as float
"""
text = '%s - %s %s comparison:\n' % (
self.alias, p.alias, self.inventory.alias
)
if p.inventory != self.inventory:
return ' not comparable\n Similarity = 0\n', 0
return {
'text':
'%s and %s profiles are not comparable\n Similarity = 0\n'
% (self.inventory.alias, p.inventory.alias),
'similarity': 0,
}
if isinstance(aspects, ty.Nothing): aspects = self.inventory
total_dist = 0
max_dist = 0
headers = ['aspect', self.text, p.text, 'distance']
table = []
for aspect in aspects:
item1 = self.get(aspect.alias)
item2 = p.get(aspect.alias)
dist = item1.distance(item2)
total_dist += dist
max_dist += aspect.max_dist
text += ' %s vs %s = %.2f\n' % (
str(item1), str(item2), dist
)
if verbose or dist:
table.append([
aspect.alias,
', '.join(item1.sorted_item_texts()),
', '.join(item2.sorted_item_texts()),
'%.2f' % dist,
])
similarity = 1 - total_dist / max_dist
text += ' Total distance = %.2f/%.2f\n' % (total_dist, max_dist)
text += ' Similarity = %.3f\n' % similarity
return text, similarity
table.extend([
['Total distance', '', '', '%.2f' % total_dist],
['Similarity', '', '', '%.3f' % similarity],
])
text = (
self.inventory.alias
+ ' comparison (max distance = %.2f):\n\n' % max_dist
+ tabulate.tabulate(table, headers, tablefmt='github')
+ '\n'
)
return {'text': text, 'similarity': similarity}

def comparison(
self, p: 'Feature.Profile', aspects: 'Feature.ASPECTS' = ty.UNSPECIFIED,
def comparison_table(
self,
p: 'Feature.Profile',
aspects: 'Feature.ASPECTS' = ty.UNSPECIFIED,
verbose: bool = False,
) -> str:
return self.compare(p, aspects)[0]
return self.compare(p, aspects, verbose)['text']

def similarity(
self, p: 'Feature.Profile', aspects: 'Feature.ASPECTS' = ty.UNSPECIFIED,
) -> float:
return self.compare(p, aspects)[1]
return self.compare(p, aspects)['similarity']

def has_feature(self, value: 'Feature.Aspect.VALUES') -> bool:
"""Checks if the given value or one of its children is in this profile."""
Expand Down
Loading

0 comments on commit c5d6b27

Please sign in to comment.