Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parse_index function for parsing NDIndex from string #91

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,15 @@ dmypy.json

# Pyre type checker
.pyre/

# jetbrains ide stuff
*.iml
.idea/

# vscode ide stuff
*.code-workspace
.history/
.vscode/

# project scratch dir
/scratch/
4 changes: 2 additions & 2 deletions ndindex/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
__all__ = []

from .ndindex import ndindex
from .ndindex import parse_index, ndindex

__all__ += ['ndindex']
__all__ += ['parse_index', 'ndindex']

from .slice import Slice

Expand Down
86 changes: 86 additions & 0 deletions ndindex/ndindex.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ast
import inspect
import numbers

Expand Down Expand Up @@ -65,6 +66,91 @@ def __init__(self, f):
def __get__(self, obj, owner):
return self.f(owner)

class _Guard:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this guard stuff is needed. The Tuple constructor should catch this after calling ndindex() at the end (and will give a better error message than what this currently gives).

def __init__(self):
self.val = False

def __call__(self):
if self.val:
return True
else:
self.val = True
return False

def parse_index(node_or_string):
"""
"Safely" (needs validation) evaluate an expression node or a string containing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does "needs validation" here mean?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends on what we mean by "safely". parse_index('('*1000 + ')'*1000) raises MemoryError (but so does ast.literal_eval('('*1000 + ')'*1000)). It shouldn't be possible to execute arbitrary code, though, as far as I can tell.

Copy link
Contributor Author

@telamonian telamonian Oct 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't really mean anything, mostly just a reflection of my paranoia about building a) any parser, and b) something meant to take raw user input. My goal has been to make something that's at least as safe as the existing literal_eval. At this point I've examined the code and the surrounding issues enough that I'm reasonably confident this has actually been archived, so I'll edit out the wishy-washy language

a (limited subset) of valid numpy index or slice expressions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring should probably clarify what is and isn't allowed. I guess we aren't allowing any kind of expressions that create numbers.

Actually, we may at one point want to support just anything that parses, and make the current behavior under a safe=True flag, which I guess should be the default. But we can worry about that later.

Also this docstring needs examples. and should be added to Sphinx.

"""
if isinstance(node_or_string, str):
node_or_string = ast.parse('dummy[{}]'.format(node_or_string.lstrip(" \t")) , mode='eval')
if isinstance(node_or_string, ast.Expression):
node_or_string = node_or_string.body
if isinstance(node_or_string, ast.Subscript):
node_or_string = node_or_string.slice

def _raise_malformed_node(node):
raise ValueError(f'malformed node or string: {node!r}, {ast.dump(node)!r}')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The node part here isn't particularly helpful alongside the dump.

Also I don't understand why the literal_eval error message says "node or string". How can the string be malformed? If there is a syntax error, it raises a different exception. I like this better:

Suggested change
raise ValueError(f'malformed node or string: {node!r}, {ast.dump(node)!r}')
raise ValueError('unsupported node when parsing index: {ast.dump(node)}')

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think your language is clearer in any case. I'll pull this suggestion in

def _raise_nested_tuple_node(node):
raise ValueError(f'tuples inside of tuple indices are not supported: {node!r}, {ast.dump(node)!r}')

# from cpy37, should work until they remove ast.Num (not until cpy310)
def _convert_num(node):
if isinstance(node, ast.Constant):
if isinstance(node.value, (int, float, complex)):
return node.value
elif isinstance(node, ast.Num):
# ast.Num was removed from ast grammar in cpy38
return node.n # pragma: no cover
_raise_malformed_node(node)
def _convert_signed_num(node):
if isinstance(node, ast.UnaryOp) and isinstance(node.op, (ast.UAdd, ast.USub)):
operand = _convert_num(node.operand)
if isinstance(node.op, ast.UAdd):
return + operand
else:
return - operand
return _convert_num(node)

_nested_tuple_guard = _Guard()
def _convert(node):
if isinstance(node, ast.Tuple):
if _nested_tuple_guard():
_raise_nested_tuple_node(node)

return tuple(map(_convert, node.elts))
elif isinstance(node, ast.List):
return list(map(_convert, node.elts))
elif isinstance(node, ast.Slice):
return slice(
_convert(node.lower) if node.lower is not None else None,
_convert(node.upper) if node.upper is not None else None,
_convert(node.step) if node.step is not None else None,
)
elif isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == 'slice' and node.keywords == []:
# support for parsing slices written out as 'slice(...)' objects
return slice(*map(_convert, node.args))
elif isinstance(node, ast.NameConstant) and node.value is None:
# support for literal None in slices, eg 'slice(None, ...)'
return None
elif isinstance(node, ast.Ellipsis):
# support for three dot '...' ellipsis syntax
return ...
elif isinstance(node, ast.Name) and node.id == 'Ellipsis':
# support for 'Ellipsis' ellipsis syntax
return ...
elif isinstance(node, ast.Index):
# ast.Index was removed from ast grammar in cpy39
return _convert(node.value) # pragma: no cover
elif isinstance(node, ast.ExtSlice):
# ast.ExtSlice was removed from ast grammar in cpy39
_nested_tuple_guard() # pragma: no cover
return tuple(map(_convert, node.dims)) # pragma: no cover

return _convert_signed_num(node)
return ndindex(_convert(node_or_string))


class NDIndex:
"""
Represents an index into an nd-array (i.e., a numpy array).
Expand Down
4 changes: 3 additions & 1 deletion ndindex/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
def prod(seq):
return reduce(mul, seq, 1)

positive_ints = integers(1, 10)
nonnegative_ints = integers(0, 10)
negative_ints = integers(-10, -1)
ints = lambda: one_of(negative_ints, nonnegative_ints)
ints_nonzero = lambda: one_of(negative_ints, positive_ints)

def slices(start=one_of(none(), ints()), stop=one_of(none(), ints()),
step=one_of(none(), ints())):
step=one_of(none(), ints_nonzero())):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This includes zero step because it is used to test the slice constructor. Although it would probably be better to use this for the tests that don't actually test that (see the discussion at #46). It may be better to work on this in a separate PR, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this since I kept getting ValueErrors due to the related check in the Slice constructor.

So what I don't get is how this wasn't causing errors previously for tests using the @given(ndindices) strategy. Wouldn't that just mean that (due to one of the many weird quirks of hypothesis) @given(ndindices) just happened to never attempt to generate ndindex(slice(x, y, 0))?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ndindices uses filter(_doesnt_raise) which make it only generate valid indices. Actually I suspect this means several tests

return builds(slice, start, stop, step)

ellipses = lambda: just(...)
Expand Down
86 changes: 80 additions & 6 deletions ndindex/tests/test_ndindex.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
import ast
import inspect

import numpy as np

from hypothesis import given, example, settings

from hypothesis import example, given, settings
from hypothesis.strategies import one_of
from pytest import raises, warns

from ..ndindex import ndindex, asshape
from ..ndindex import ndindex, parse_index, asshape
from ..integer import Integer
from ..ellipsis import ellipsis
from ..integerarray import IntegerArray
from ..tuple import Tuple
from .helpers import ndindices, check_same, assert_equal
from .helpers import ndindices, check_same, assert_equal, ellipses, ints, slices, tuples, _doesnt_raise

Tuples = tuples(one_of(
ellipses(),
ints(),
slices(),
)).filter(_doesnt_raise)

ndindexStrs = one_of(
ellipses(),
ints(),
slices(),
Tuples,
).map(lambda x: f'{x}')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put strategies in helpers.py

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can include arrays here too, although you'll have to use the repr form in that case.


class _Dummy:
def __getitem__(self, x):
return x
_dummy = _Dummy()

@given(ndindices)
def test_eq(idx):
Expand Down Expand Up @@ -77,6 +94,63 @@ def test_ndindex_invalid():
def test_ndindex_ellipsis():
raises(IndexError, lambda: ndindex(ellipsis))


@example('3')
@example('-3')
@example('...')
@example('Ellipsis')
@example('+3')
@example('3:4')
@example('3:-4')
@example('3, 5, 14, 1')
@example('3, -5, 14, -1')
@example('3:15, 5, 14:99, 1')
@example('3:15, -5, 14:-99, 1')
@example(':15, -5, 14:-99:3, 1')
@example('3:15, -5, [1,2,3], :')
@example('slice(None)')
@example('slice(None, None)')
@example('slice(None, None, None)')
@example('slice(14)')
@example('slice(12, 14)')
@example('slice(12, 72, 14)')
@example('slice(-12, -72, 14)')
@example('3:15, -5, slice(-12, -72, 14), Ellipsis')
@example('..., 3:15, -5, slice(-12, -72, 14)')
@given(ndindexStrs)
def test_parse_index_hypothesis(ixStr):
assert eval(f'_dummy[{ixStr}]') == parse_index(ixStr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can check here if eval gives an error than so should parse_index.


def test_parse_index_malformed_raise():
# we don't allow the bitwise not unary op
with raises(ValueError):
ixStr = '~3'
parse_index(ixStr)

def test_parse_index_nested_tuple_raise():
# we don't allow tuples within tuple indices
with raises(ValueError):
# this will parse as either ast.Index or ast.Slice (depending on cpy version) containing an ast.Tuple
ixStr = '..., -5, slice(12, -14), (1,2,3)'
parse_index(ixStr)

with raises(ValueError):
# in cpy37, this will parse as ast.ExtSlice containing an ast.Tuple
ixStr = '3:15, -5, :, (1,2,3)'
parse_index(ixStr)

def test_parse_index_ensure_coverage():
# ensure full coverage, regarless of cpy version and accompanying changes to the ast grammar
for node in (
ast.Constant(7),
ast.Num(7),
ast.Index(ast.Constant(7)),
):
assert parse_index(node) == 7

assert parse_index(ast.ExtSlice((ast.Constant(7), ast.Constant(7), ast.Constant(7)))) == (7, 7, 7)


def test_signature():
sig = inspect.signature(Integer)
assert sig.parameters.keys() == {'idx'}
Expand Down
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
"sympy",
],
tests_require=[
'pytest',
'hypothesis',
"pytest",
"pytest-cov",
"pytest-flakes",
"pytest-tornasync",
"hypothesis",
],
classifiers=[
"Programming Language :: Python :: 3",
Expand Down