Skip to content

Commit 09161b3

Browse files
author
James McKinney
committed
Add skip_lines like Table.from_csv, closes #7
1 parent b006edd commit 09161b3

File tree

6 files changed

+30
-6
lines changed

6 files changed

+30
-6
lines changed

agateexcel/table_xls.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import six
1111
import xlrd
1212

13-
def from_xls(cls, path, sheet=None, **kwargs):
13+
def from_xls(cls, path, sheet=None, skip_lines=0, **kwargs):
1414
"""
1515
Parse an XLS file.
1616
@@ -19,6 +19,8 @@ def from_xls(cls, path, sheet=None, **kwargs):
1919
:param sheet:
2020
The name of a worksheet to load. If not specified then the first
2121
sheet will be used.
22+
:param skip_lines:
23+
The number of rows to skip from the top of the sheet.
2224
"""
2325
if hasattr(path, 'read'):
2426
book = xlrd.open_workbook(file_contents=path.read())
@@ -36,11 +38,14 @@ def from_xls(cls, path, sheet=None, **kwargs):
3638
column_names = []
3739
columns = []
3840

41+
if not isinstance(skip_lines, int):
42+
raise ValueError('skip_lines argument must be an int')
43+
3944
for i in range(sheet.ncols):
4045
data = sheet.col_values(i)
41-
name = six.text_type(data[0]) or None
42-
values = data[1:]
43-
types = sheet.col_types(i)[1:]
46+
name = six.text_type(data[skip_lines]) or None
47+
values = data[skip_lines + 1:]
48+
types = sheet.col_types(i)[skip_lines + 1:]
4449

4550
excel_type = determine_excel_type(types)
4651

agateexcel/table_xlsx.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
NULL_TIME = datetime.time(0, 0, 0)
1414

15-
def from_xlsx(cls, path, sheet=None, **kwargs):
15+
def from_xlsx(cls, path, sheet=None, skip_lines=0, **kwargs):
1616
"""
1717
Parse an XLSX file.
1818
@@ -21,6 +21,8 @@ def from_xlsx(cls, path, sheet=None, **kwargs):
2121
:param sheet:
2222
The name or integer index of a worksheet to load. If not specified
2323
then the "active" sheet will be used.
24+
:param skip_lines:
25+
The number of rows to skip from the top of the sheet.
2426
"""
2527
if hasattr(path, 'read'):
2628
f = path
@@ -39,7 +41,10 @@ def from_xlsx(cls, path, sheet=None, **kwargs):
3941
column_names = []
4042
rows = []
4143

42-
for i, row in enumerate(sheet.rows):
44+
if not isinstance(skip_lines, int):
45+
raise ValueError('skip_lines argument must be an int')
46+
47+
for i, row in enumerate(sheet.iter_rows(row_offset=skip_lines)):
4348
if i == 0:
4449
column_names = [None if c.value is None else six.text_type(c.value) for c in row]
4550
continue

examples/test_skip_lines.xls

58.5 KB
Binary file not shown.

examples/test_skip_lines.xlsx

26.7 KB
Binary file not shown.

tests/test_table_xls.py

+7
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ def test_sheet_index(self):
5959
self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
6060
self.assertRows(table, [r.values() for r in self.table.rows])
6161

62+
def test_skip_lines(self):
63+
table = agate.Table.from_xls('examples/test_skip_lines.xls', skip_lines=3)
64+
65+
self.assertColumnNames(table, self.column_names)
66+
self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
67+
self.assertRows(table, [r.values() for r in self.table.rows])
68+
6269
def test_zeros(self):
6370
table = agate.Table.from_xls('examples/test_zeros.xls')
6471

tests/test_table_xlsx.py

+7
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,13 @@ def test_sheet_index(self):
5959
self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
6060
self.assertRows(table, [r.values() for r in self.table.rows])
6161

62+
def test_skip_lines(self):
63+
table = agate.Table.from_xlsx('examples/test_skip_lines.xlsx', skip_lines=3)
64+
65+
self.assertColumnNames(table, self.column_names)
66+
self.assertColumnTypes(table, [agate.Number, agate.Text, agate.Boolean, agate.Date, agate.DateTime])
67+
self.assertRows(table, [r.values() for r in self.table.rows])
68+
6269
def test_ambiguous_date(self):
6370
table = agate.Table.from_xlsx('examples/test_ambiguous_date.xlsx')
6471

0 commit comments

Comments
 (0)