@@ -290,16 +290,16 @@ def get_cond(key, cutoff, max_cutoff):
290290 "stopwords_ratio"
291291 ]
292292 for i in range (len (self .docs ["stopwords_ratio" ])):
293- self .docs ["stopwords_ratio" ].iloc [
294- i
295- ] = Filtering . compute_stopwords_ratio (
296- self .docs [ "text" ]. iloc [ i ] ,
297- self .sentencepiece_model_tok ,
298- self .param ["strip_characters " ],
299- self .param ["cond_words_augmentation " ],
300- self .param ["words_augmentation_group_sizes " ],
301- self . param [ "words_augmentation_join_char" ] ,
302- new_stopwords ,
293+ self .docs ["stopwords_ratio" ].iloc [i ] = (
294+ Filtering . compute_stopwords_ratio (
295+ self . docs [ "text" ]. iloc [ i ],
296+ self .sentencepiece_model_tok ,
297+ self .param [ "strip_characters" ] ,
298+ self .param ["cond_words_augmentation " ],
299+ self .param ["words_augmentation_group_sizes " ],
300+ self .param ["words_augmentation_join_char " ],
301+ new_stopwords ,
302+ )
303303 )
304304 cutoff_def = "If the stop words ratio of a document is lower than this number, the document is removed."
305305 cutoff_stopwords_ratio = st .slider (
@@ -326,16 +326,16 @@ def get_cond(key, cutoff, max_cutoff):
326326 "flagged_words_ratio"
327327 ]
328328 for i in range (len (self .docs ["flagged_words_ratio" ])):
329- self .docs ["flagged_words_ratio" ].iloc [
330- i
331- ] = Filtering . compute_flagged_words_ratio (
332- self .docs [ "text" ]. iloc [ i ] ,
333- self .sentencepiece_model_tok ,
334- self .param ["strip_characters " ],
335- self .param ["cond_words_augmentation " ],
336- self .param ["words_augmentation_group_sizes " ],
337- self . param [ "words_augmentation_join_char" ] ,
338- new_flagged_words ,
329+ self .docs ["flagged_words_ratio" ].iloc [i ] = (
330+ Filtering . compute_flagged_words_ratio (
331+ self . docs [ "text" ]. iloc [ i ],
332+ self .sentencepiece_model_tok ,
333+ self .param [ "strip_characters" ] ,
334+ self .param ["cond_words_augmentation " ],
335+ self .param ["words_augmentation_group_sizes " ],
336+ self .param ["words_augmentation_join_char " ],
337+ new_flagged_words ,
338+ )
339339 )
340340 cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
341341 max_fwr = np .max (self .docs ["flagged_words_ratio" ])
0 commit comments