diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 2c3e677575..736ac32ef3 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -69,7 +69,7 @@ def row_number(fnm, binary): if fnm.split(".")[-1].lower() in ["csv", "txt"]: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") return len(txt.split("\n")) diff --git a/rag/app/book.py b/rag/app/book.py index 70aee29c21..c4bc62abf8 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -91,7 +91,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/laws.py b/rag/app/laws.py index 473eca9c7d..6361d62cba 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -113,7 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/naive.py b/rag/app/naive.py index 55fab84c28..c557a62670 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -141,7 +141,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/one.py b/rag/app/one.py index f5c78f5aa3..531fd0a70a 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -85,7 +85,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/qa.py b/rag/app/qa.py index a37ff63fdf..1ecf9b1872 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -107,7 +107,7 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/table.py b/rag/app/table.py index 96a53aac4c..368d1ce85a 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -149,7 +149,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: