Skip to content

Commit ab9e2cc

Browse files
CSV importer
1 parent 8f91219 commit ab9e2cc

File tree

5 files changed

+51
-0
lines changed

5 files changed

+51
-0
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ All notable changes to this project will be documented in this file.
66

77
### Added
88

9+
- CSV importer to register pre-exisiting CSV annotations into the index without performing any conversion
10+
911
### Fixed
1012

1113
## [0.0.1] - 2021-07-14

ChildProject/converters.py

+8
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ def __init_subclass__(cls, **kwargs):
6262
super().__init_subclass__(**kwargs)
6363
converters[cls.FORMAT] = cls
6464

65+
66+
class CsvConverter(AnnotationConverter):
67+
FORMAT = 'csv'
68+
69+
@staticmethod
70+
def convert(filename: str) -> pd.DataFrame:
71+
return pd.read_csv(filename)
72+
6573
class VtcConverter(AnnotationConverter):
6674
FORMAT = 'vtc_rttm'
6775

tests/data/csv.csv

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
segment_onset,segment_offset,speaker_type
2+
1982193,1982492,NA
3+
1983496,1988992,NA
4+
1984136,1984993,CHI
5+
1984168,1986512,OCH
6+
1985492,1988951,FEM
7+
28278092,28278784,NA
8+
28282768,28284052,MAL
9+
28283492,28289116,NA
10+
28284010,28287945,OCH
11+
28285421,28285575,MAL
12+
28288492,28289007,OCH
13+
28294206,28294692,MAL
14+
28300492,28300769,NA
15+
28310511,28312511,MAL
16+
28310992,28312491,NA

tests/test_annotations.py

+9
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ def project(request):
2727
for raw_annotation in glob.glob("output/annotations/annotations/*.*/converted"):
2828
shutil.rmtree(raw_annotation)
2929

30+
def test_csv():
31+
converted = CsvConverter().convert('tests/data/csv.csv').fillna('NA')
32+
truth = pd.read_csv('tests/truth/csv.csv').fillna('NA')
33+
34+
pd.testing.assert_frame_equal(
35+
standardize_dataframe(converted, converted.columns),
36+
standardize_dataframe(truth, converted.columns)
37+
)
38+
3039
def test_vtc():
3140
converted = VtcConverter().convert('tests/data/vtc.rttm')
3241
truth = pd.read_csv('tests/truth/vtc.csv').fillna('NA')

tests/truth/csv.csv

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
segment_onset,segment_offset,speaker_type
2+
1982193,1982492,NA
3+
1983496,1988992,NA
4+
1984136,1984993,CHI
5+
1984168,1986512,OCH
6+
1985492,1988951,FEM
7+
28278092,28278784,NA
8+
28282768,28284052,MAL
9+
28283492,28289116,NA
10+
28284010,28287945,OCH
11+
28285421,28285575,MAL
12+
28288492,28289007,OCH
13+
28294206,28294692,MAL
14+
28300492,28300769,NA
15+
28310511,28312511,MAL
16+
28310992,28312491,NA

0 commit comments

Comments
 (0)