-
Notifications
You must be signed in to change notification settings - Fork 124
/
AutoTranslateNotebooks.py
73 lines (60 loc) · 2.59 KB
/
AutoTranslateNotebooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json, googletrans, os, re, argparse
import nbformat
from googletrans import Translator
# Google Translate API limit text to 5000 characters for each request
# this code piece splits long markdown text into shorter ones so no
# exceptions returned
def segment_text(text_list, max_chars=2000):
segments = []
current_segment = ''
current_word_count = 0
for text in text_list.split('.'):
text_len = len(text)
if current_word_count + text_len <= max_chars:
current_segment += text + '.'
current_word_count += text_len
else:
segments.append(current_segment.strip())
current_segment = text
current_word_count = len(text)
if current_segment:
segments.append(current_segment.strip())
return segments
def TranslateNBText(Gtranslator, text, max_characters = 2000, src_lang='en', dest_lang='zh-cn'):
translator = Gtranslator
TranslatedTextList = []
TranslatedText = ''
# null strings will return NoneType error, using strip() to handle
if len(str(text).strip())>0:
#limit to fewer characters for API call
text_segments = segment_text(text, max_chars=max_characters)
for ts in text_segments:
outtext = translator.translate(str(ts), src=src_lang, dest=dest_lang)
TranslatedTextList.append(outtext.text)
for element in TranslatedTextList:
TranslatedText += element+'.'
return(TranslatedText)
# Set up translator object
translator = Translator()
# Load Jupyter notebook from arguments
parser = argparse.ArgumentParser()
parser.add_argument('input_file', help='the input Jupyter Notebook file')
parser.add_argument('output_file', help='the output Juypter Notebook file to write to')
args = parser.parse_args()
#notebook_file = 'chapters/Chapter01-Copy.ipynb'
#notebook_path = os.path.abspath(notebook_file)
notebook_path = args.input_file
with open(notebook_path, encoding='UTF-8') as f:
nb = nbformat.read(f, as_version=4)
# Loop through cells and translate markdown cells
for cell in nb['cells']:
if cell['cell_type'] == 'markdown':
source_text = cell['source']
# Translate source text to Chinese
translated_text = TranslateNBText(translator, source_text)
# Replace original text with translated text
cell['source'] = translated_text
# Save updated notebook
#notebook_path2 = notebook_path.split('.')[0]+'_cn.'+notebook_path.split('.')[1]
with open(args.output_file, 'w', encoding='utf-8') as f:
nbformat.write(nb, f)