Skip to content

Commit

Permalink
Use codecs.open() only if there is decoding error
Browse files Browse the repository at this point in the history
codecs.open() is slower then open()
  • Loading branch information
17451k committed Sep 25, 2019
1 parent a92ad7d commit 8676d3a
Showing 1 changed file with 12 additions and 3 deletions.
15 changes: 12 additions & 3 deletions clade/extensions/cross_ref.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def __get_raw_macro_locations(self, raw_locations):

return raw_locations

def __parse_file(self, file, raw_locations, encoding="utf8"):
def __parse_file(self, file, raw_locations, ignore_errors=False, encoding="utf8"):
storage_file = self.extensions["Storage"].get_storage_path(file)

if not os.path.exists(storage_file):
Expand All @@ -146,7 +146,12 @@ def __parse_file(self, file, raw_locations, encoding="utf8"):
sorted_locs = sorted(raw_locations[file], key=lambda x: int(x[0]))
sorted_pos = 0

with codecs.open(storage_file, "r", encoding="utf8", errors="ignore") as fp:
try:
if ignore_errors:
fp = codecs.open(storage_file, "r", encoding=encoding, errors="ignore")
else:
fp = open(storage_file, "r", encoding=encoding)

for i, s in enumerate(fp):
if sorted_pos >= len(sorted_locs):
break
Expand All @@ -171,7 +176,11 @@ def __parse_file(self, file, raw_locations, encoding="utf8"):

sorted_pos += 1

return locations
return locations
except UnicodeDecodeError:
return self.__parse_file(file, raw_locations, ignore_errors=True)
finally:
fp.close()

def __find_all(self, s, name):
for m in re.finditer(r"\w+", s):
Expand Down

0 comments on commit 8676d3a

Please sign in to comment.