File tree 1 file changed +3
-3
lines changed
1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -26,7 +26,7 @@ def get_wiki(path,lang):
26
26
shutil .rmtree (path / 'text' )
27
27
28
28
29
- def split_wiki (path ,lang ):
29
+ def split_wiki (path ,lang , encoding = 'utf-8' ):
30
30
dest = path / 'docs'
31
31
name = f'{ lang } wiki'
32
32
if dest .exists ():
@@ -35,7 +35,7 @@ def split_wiki(path,lang):
35
35
36
36
dest .mkdir (exist_ok = True , parents = True )
37
37
title_re = re .compile (rf'<doc id="\d+" url="https://{ lang } .wikipedia.org/wiki\?curid=\d+" title="([^"]+)">' )
38
- lines = (path / name ).open ()
38
+ lines = (path / name ).open (encoding = encoding )
39
39
f = None
40
40
41
41
for i ,l in enumerate (lines ):
@@ -44,7 +44,7 @@ def split_wiki(path,lang):
44
44
title = title_re .findall (l )[0 ].replace ('/' ,'_' )
45
45
if len (title )> 150 : continue
46
46
if f : f .close ()
47
- f = (dest / f'{ title } .txt' ).open ('w' )
47
+ f = (dest / f'{ title } .txt' ).open ('w' , encoding = encoding )
48
48
else : f .write (l )
49
49
f .close ()
50
50
return dest
You can’t perform that action at this time.
0 commit comments