Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ Bug Fixes
~~~~~~~~~
- Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove these; these have already been moved

- Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`)
-
- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to be moved to the I/O section of bug fixes


Categorical
^^^^^^^^^^^
Expand Down
18 changes: 17 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1894,6 +1894,11 @@ def __init__(self, src, **kwds):
not set(usecols).issubset(self.orig_names)):
_validate_usecols_names(usecols, self.orig_names)

# GH 25623
elif self.usecols_dtype == 'integer':
indices = lrange(self._reader.table_width)
_validate_usecols_names(usecols, indices)

if len(self.names) > len(usecols):
self.names = [n for i, n in enumerate(self.names)
if (i in usecols or n in usecols)]
Expand Down Expand Up @@ -2197,7 +2202,8 @@ def __init__(self, f, **kwds):
self.skipinitialspace = kwds['skipinitialspace']
self.lineterminator = kwds['lineterminator']
self.quoting = kwds['quoting']
self.usecols, _ = _validate_usecols_arg(kwds['usecols'])
self.usecols, self.usecols_dtype = _validate_usecols_arg(
kwds['usecols'])
self.skip_blank_lines = kwds['skip_blank_lines']

self.warn_bad_lines = kwds['warn_bad_lines']
Expand Down Expand Up @@ -2588,6 +2594,12 @@ def _infer_columns(self):
if clear_buffer:
self._clear_buffer()

# GH 25623
if self.usecols_dtype == 'integer':
for col in columns:
indices = lrange(len(col))
_validate_usecols_names(self.usecols, indices)

if names is not None:
if ((self.usecols is not None and
len(names) != len(self.usecols)) or
Expand Down Expand Up @@ -2623,6 +2635,10 @@ def _infer_columns(self):
ncols = len(line)
num_original_columns = ncols

# GH25623
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here as well

if self.usecols_dtype == 'integer':
_validate_usecols_names(self.usecols, lrange(ncols))

if not names:
if self.prefix:
columns = [['%s%d' % (self.prefix, i)
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/parser/test_usecols.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@
"expected but not found: {0}")


@pytest.mark.parametrize("names,usecols", [
(None, [0, 3]),
(["a", "b", "c"], [0, -1, 2]),
(None, [3]),
(["a"], [3])
])
def test_usecols_out_of_bounds(all_parsers, names, usecols):
data = "a,b,c\n1,2,3\n4,5,6"
parser = all_parsers

with pytest.raises(ValueError, match=_msg_validate_usecols_names):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

although this technically doesn't fail the regex, it appears that _msg_validate_usecols_names is intended to be used with .format()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea good catch @simonjayhawkins

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed! @jreback I'll push an update this weekend

parser.read_csv(StringIO(data), usecols=usecols, names=names)


def test_raise_on_mixed_dtype_usecols(all_parsers):
# See gh-12678
data = """a,b,c
Expand Down