Skip to content

Commit

Permalink
Merge branch 'master' into arm
Browse files Browse the repository at this point in the history
update
  • Loading branch information
jackeyzzz12138 committed Jan 4, 2024
2 parents 544fb64 + d883c7f commit d93dcd2
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 4 deletions.
3 changes: 3 additions & 0 deletions crazy_functions/crazy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,9 @@ def starts_with_lowercase_word(s):
return True
else:
return False
# 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
if starts_with_lowercase_word(meta_txt[0]):
meta_txt[0] = meta_txt[0].capitalize()
for _ in range(100):
for index, block_txt in enumerate(meta_txt):
if starts_with_lowercase_word(block_txt):
Expand Down
4 changes: 2 additions & 2 deletions crazy_functions/latex_fns/latex_toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ def find_main_tex_file(file_manifest, mode):
else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
unexpected_words = ['\\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\\input', '\\ref', '\\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
Expand Down
4 changes: 2 additions & 2 deletions crazy_functions/pdf_fns/breakdown_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=F
# 如果没有找到合适的切分点
if break_anyway:
# 是否允许暴力切分
prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
else:
# 不允许直接报错
raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")

# 追加列表
res.append(prev); fin_len+=len(prev)
Expand Down

0 comments on commit d93dcd2

Please sign in to comment.