- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 19.2k
Deprecated usecols with out of bounds indices in read_csv #41130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
9a82d19
              0287dd9
              97158ed
              f446e4f
              f5d3a05
              21b496b
              92488c0
              41e3310
              e34631b
              5bef676
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -470,12 +470,14 @@ def _infer_columns(self): | |
| if self.usecols is not None: | ||
| # Set _use_cols. We don't store columns because they are | ||
| # overwritten. | ||
| self._handle_usecols(columns, names) | ||
| self._handle_usecols(columns, names, num_original_columns) | ||
| else: | ||
| num_original_columns = len(names) | ||
| columns = [names] | ||
| else: | ||
| columns = self._handle_usecols(columns, columns[0]) | ||
| columns = self._handle_usecols( | ||
| columns, columns[0], num_original_columns | ||
| ) | ||
| else: | ||
| try: | ||
| line = self._buffered_line() | ||
|  | @@ -494,10 +496,12 @@ def _infer_columns(self): | |
| columns = [[f"{self.prefix}{i}" for i in range(ncols)]] | ||
| else: | ||
| columns = [list(range(ncols))] | ||
| columns = self._handle_usecols(columns, columns[0]) | ||
| columns = self._handle_usecols( | ||
| columns, columns[0], num_original_columns | ||
| ) | ||
| else: | ||
| if self.usecols is None or len(names) >= num_original_columns: | ||
| columns = self._handle_usecols([names], names) | ||
| columns = self._handle_usecols([names], names, num_original_columns) | ||
| num_original_columns = len(names) | ||
| else: | ||
| if not callable(self.usecols) and len(names) != len(self.usecols): | ||
|  | @@ -506,13 +510,13 @@ def _infer_columns(self): | |
| "header fields in the file" | ||
| ) | ||
| # Ignore output but set used columns. | ||
| self._handle_usecols([names], names) | ||
| self._handle_usecols([names], names, ncols) | ||
| columns = [names] | ||
| num_original_columns = ncols | ||
|  | ||
| return columns, num_original_columns, unnamed_cols | ||
|  | ||
| def _handle_usecols(self, columns, usecols_key): | ||
| def _handle_usecols(self, columns, usecols_key, num_original_columns): | ||
|          | ||
| """ | ||
| Sets self._col_indices | ||
|  | ||
|  | @@ -537,6 +541,13 @@ def _handle_usecols(self, columns, usecols_key): | |
| else: | ||
| col_indices.append(col) | ||
| else: | ||
| missing_usecols = [ | ||
| col for col in self.usecols if col >= num_original_columns | ||
| ] | ||
| if missing_usecols: | ||
| raise ParserError( | ||
| f"Usecols indices {missing_usecols} are out of bounds!" | ||
| ) | ||
| col_indices = self.usecols | ||
|  | ||
| columns = [ | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -312,3 +312,19 @@ def test_malformed_skipfooter(python_parser_only): | |
| msg = "Expected 3 fields in line 4, saw 5" | ||
| with pytest.raises(ParserError, match=msg): | ||
| parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1) | ||
|  | ||
|  | ||
| @pytest.mark.parametrize("header", [0, None]) | ||
| @pytest.mark.parametrize("names", [None, ["a", "b"], ["a", "b", "c"]]) | ||
| def test_usecols_indices_out_of_bounds(python_parser_only, names, header): | ||
|          | ||
| # GH#25623 | ||
| if header == 0 and names == ["a", "b", "c"]: | ||
| pytest.skip("This case is not valid") | ||
| parser = python_parser_only | ||
| data = """ | ||
| a,b | ||
| 1,2 | ||
| """ | ||
| msg = r"Usecols indices \[2\] are out of bounds!" | ||
| with pytest.raises(ParserError, match=msg): | ||
| parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=header) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im not sure what "uncontrolled" means here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not raised on purpose by us but instead raised because we are accessing a non existent list index