Skip to content
Merged
21 changes: 11 additions & 10 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2755,23 +2755,24 @@ def _check_for_bom(self, first_row):
if first_elt != _BOM:
return first_row

first_row = first_row[0]
first_row_bom = first_row[0]

if len(first_row) > 1 and first_row[1] == self.quotechar:
if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar:
start = 2
quote = first_row[1]
end = first_row[2:].index(quote) + 2
quote = first_row_bom[1]
end = first_row_bom[2:].index(quote) + 2

# Extract the data between the quotation marks
new_row = first_row[start:end]
new_row = first_row_bom[start:end]

# Extract any remaining data after the second
# quotation mark.
if len(first_row) > end + 1:
new_row += first_row[end + 1:]
return [new_row]
elif len(first_row) > 1:
return [first_row[1:]]
if len(first_row_bom) > end + 1:
new_row += first_row_bom[end + 1:]
return [new_row] + first_row[1:]

elif len(first_row_bom) > 1:
return [first_row_bom[1:]]
else:
# First row is just the BOM, so we
# return an empty string.
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/parser/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1927,3 +1927,17 @@ def test_read_table_deprecated(all_parsers):
check_stacklevel=False):
result = parser.read_table(StringIO(data))
tm.assert_frame_equal(result, expected)


def test_first_row_bom_python(all_parsers):
parser = all_parsers
data = """
\ufeff"Head1" "Head2" "Head3"
"""

assert parser.read_csv(StringIO(data),
delimiter='\t',
engine='python').shape == (0, 3)

assert parser.read_csv(StringIO(data),
delimiter='\t').shape == (0, 3)