Skip to content

Commit

Permalink
add option for empty tokens.
Browse files Browse the repository at this point in the history
To allow tokens that have only puncuation
  • Loading branch information
Tomotz committed Oct 3, 2024
1 parent 25d83ac commit 704b8ec
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/synth/cst_synth.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,16 +220,17 @@ cst_utterance *default_tokenization(cst_utterance *u)
while(!ts_eof(fd))
{
token = ts_get(fd);
if (cst_strlen(token) > 0)
{
/* allow empty token for cases where we have a puncuation on it's own. Example - "I . am"*/
// if (cst_strlen(token) > 0)
// {
t = relation_append(r,NULL);
item_set_string(t,"name",token);
item_set_string(t,"whitespace",fd->whitespace);
item_set_string(t,"prepunctuation",fd->prepunctuation);
item_set_string(t,"punc",fd->postpunctuation);
item_set_int(t,"file_pos",fd->file_pos);
item_set_int(t,"line_number",fd->line_number);
}
// }
}

ts_close(fd);
Expand Down

0 comments on commit 704b8ec

Please sign in to comment.