Skip to content

Commit

Permalink
Tolerance for utf-8 decode errors
Browse files Browse the repository at this point in the history
  • Loading branch information
kfsone committed Mar 28, 2015
1 parent 571a7e0 commit 61b0298
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions transfers.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,9 @@ class CSVStream(object):
print("{} = {}".format(cols[0], vals[0]))
"""

def __init__(self, url):
def __init__(self, url, tdenv=None):
self.url = url
self.tdenv = tdenv
if not url.startswith("file:///"):
requests = import_requests()
self.req = requests.get(self.url, stream=True)
Expand All @@ -265,14 +266,24 @@ def __init__(self, url):

def next_line(self):
""" Fetch the next line as a text string """
return next(self.lines).decode()
while True:
line = next(self.lines)
try:
return line.decode()
except UnicodeDecodeError as e:
if not self.tdenv:
raise e
self.tdenv.WARN(
"{}: line:{}: {}\n{}",
self.url, self.csvin.line_num, line, e
)

def __iter__(self):
"""
Iterate across data received as csv values.
Yields [column headings], [column values]
"""
csvin = csv.reader(
self.csvin = csvin = csv.reader(
iter(self.next_line, 'END'),
delimiter=',', quotechar="'", doublequote=True
)
Expand Down

0 comments on commit 61b0298

Please sign in to comment.