Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/docs_light.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ on:
pull_request:
paths:
- 'docs/**'
- '.github/workflows/docs.yml'
- '.github/workflows/docs_light.yml'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch!

- 'ci/docker/conda.dockerfile'
- 'ci/docker/conda-cpp.dockerfile'
- 'ci/docker/conda-python.dockerfile'
Expand All @@ -37,12 +37,12 @@ env:
jobs:

light:
name: AMD64 Ubuntu 20.04 Sphinx Documentation
name: AMD64 Conda Python 3.9 Sphinx Documentation
runs-on: ubuntu-latest
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 30
env:
UBUNTU: "20.04"
PYTHON: "3.9"
steps:
- name: Checkout Arrow
uses: actions/checkout@v2
Expand All @@ -52,8 +52,8 @@ jobs:
uses: actions/cache@v2
with:
path: .docker
key: ubuntu-docs-${{ hashFiles('cpp/**') }}
restore-keys: ubuntu-docs-
key: conda-docs-${{ hashFiles('cpp/**') }}
restore-keys: conda-docs-
- name: Setup Python
uses: actions/setup-python@v2
with:
Expand Down
16 changes: 12 additions & 4 deletions dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,15 +303,22 @@ def lint(ctx, src, fix, iwyu_all, **checks):
sys.exit(1)


def _flatten_numpydoc_rules(rules):
flattened = []
for rule in rules:
flattened.extend(filter(None, rule.split(',')))
return flattened


@archery.command(short_help="Lint python docstring with NumpyDoc")
@click.argument('symbols', nargs=-1)
@click.option("--src", metavar="<arrow_src>", default=None,
callback=validate_arrow_sources,
help="Specify Arrow source directory")
@click.option("--allow-rule", "-a", multiple=True,
help="Allow only these rules")
help="Allow only these rules (can be comma-separated)")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

@click.option("--disallow-rule", "-d", multiple=True,
help="Disallow these rules")
help="Disallow these rules (can be comma-separated)")
def numpydoc(src, symbols, allow_rule, disallow_rule):
"""
Pass list of modules or symbols as arguments to restrict the validation.
Expand All @@ -326,8 +333,9 @@ def numpydoc(src, symbols, allow_rule, disallow_rule):
"""
disallow_rule = disallow_rule or {'GL01', 'SA01', 'EX01', 'ES01'}
try:
results = python_numpydoc(symbols, allow_rules=allow_rule,
disallow_rules=disallow_rule)
results = python_numpydoc(
symbols, allow_rules=_flatten_numpydoc_rules(allow_rule),
disallow_rules=_flatten_numpydoc_rules(disallow_rule))
for result in results:
result.ok()
except LintValidationException:
Expand Down
15 changes: 15 additions & 0 deletions dev/archery/archery/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,18 @@ def _import_pandas():
sys.modules['pyarrow'] = None
import pandas as pd
return pd


def _get_module(obj, *, default=None):
"""
Try to find the name of the module *obj* is defined on.
"""
try:
return obj.__module__
except AttributeError:
# Might be a method/property descriptor as generated by Cython,
# look up the enclosing class.
try:
return obj.__objclass__.__module__
except AttributeError:
return default
26 changes: 20 additions & 6 deletions dev/archery/archery/lang/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# specific language governing permissions and limitations
# under the License.

from contextlib import contextmanager
import inspect
import tokenize
from contextlib import contextmanager

try:
from numpydoc.validate import Docstring, validate
Expand All @@ -26,6 +26,7 @@
else:
have_numpydoc = True

from ..compat import _get_module
from ..utils.logger import logger
from ..utils.command import Command, capture_stdout, default_bin

Expand Down Expand Up @@ -118,8 +119,11 @@ def traverse(self, fn, obj, from_package):

Parameters
----------
fn : callable
A function to apply on all traversed objects.
obj : Any
from_package : string, default 'pyarrow'
The object to start from.
from_package : string
Predicate to only consider objects from this package.
"""
todo = [obj]
Expand All @@ -139,10 +143,20 @@ def traverse(self, fn, obj, from_package):
continue

member = getattr(obj, name)
module = getattr(member, '__module__', None)
if not (module and module.startswith(from_package)):
module = _get_module(member)
if module is None or not module.startswith(from_package):
continue

# Is it a Cython-generated method? If so, try to detect
# whether it only has a implicitly-generated docstring,
# and no user-defined docstring following it.
# The generated docstring would lack description of method
# parameters and therefore fail Numpydoc validation.
if hasattr(member, '__objclass__'):
doc = getattr(member, '__doc__', None)
# The Cython-generated docstring would be a one-liner,
# such as "ReadOptions.equals(self, ReadOptions other)".
if (doc and '\n' not in doc and f'.{name}(' in doc):
continue
todo.append(member)

@contextmanager
Expand Down Expand Up @@ -195,7 +209,7 @@ def callback(obj):
try:
result = validate(obj)
except OSError as e:
symbol = f"{obj.__module__}.{obj.__name__}"
symbol = f"{_get_module(obj, default='')}.{obj.__name__}"
logger.warning(f"Unable to validate `{symbol}` due to `{e}`")
return

Expand Down
3 changes: 2 additions & 1 deletion dev/archery/archery/utils/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import click

from .command import Bash, Command, default_bin
from ..compat import _get_module
from .cmake import CMake
from .git import git
from .logger import logger
Expand Down Expand Up @@ -284,7 +285,7 @@ def python_numpydoc(symbols=None, allow_rules=None, disallow_rules=None):
doc = getattr(obj, '__doc__', '')
name = getattr(obj, '__name__', '')
qualname = getattr(obj, '__qualname__', '')
module = getattr(obj, '__module__', '')
module = _get_module(obj, default='')
instance = getattr(obj, '__self__', '')
if instance:
klass = instance.__class__.__name__
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ services:
["/arrow/ci/scripts/cpp_build.sh /arrow /build &&
/arrow/ci/scripts/python_build.sh /arrow /build &&
pip install -e /arrow/dev/archery[numpydoc] &&
archery numpydoc --allow-rule PR01"]
archery numpydoc --allow-rule PR01,PR10"]

conda-python-dask:
# Possible $DASK parameters:
Expand Down
78 changes: 72 additions & 6 deletions python/pyarrow/_compute.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,17 @@ cdef class Function(_Weakrefable):
MemoryPool memory_pool=None):
"""
Call the function on the given arguments.

Parameters
----------
args : iterable
The arguments to pass to the function. Accepted types depend
on the specific function.
options : FunctionOptions, optional
Options instance for executing this function. This should have
the right concrete options type.
memory_pool : pyarrow.MemoryPool, optional
If not passed, will allocate memory from the default memory pool.
"""
cdef:
const CFunctionOptions* c_options = NULL
Expand Down Expand Up @@ -2005,8 +2016,8 @@ cdef class Expression(_Weakrefable):
``|`` (logical or) and ``~`` (logical not).
Note: python keywords ``and``, ``or`` and ``not`` cannot be used
to combine expressions.
- Check whether the expression is contained in a list of values with
the ``pyarrow.compute.Expression.isin()`` member function.
- Create expression predicates using Expression methods such as
``pyarrow.compute.Expression.isin()``.

Examples
--------
Expand Down Expand Up @@ -2130,21 +2141,76 @@ cdef class Expression(_Weakrefable):
return Expression._call("divide_checked", [self, other])

def is_valid(self):
"""Checks whether the expression is not-null (valid)"""
"""
Check whether the expression is not-null (valid).

This creates a new expression equivalent to calling the
`is_valid` compute function on this expression.

Returns
-------
is_valid : Expression
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not that it needs to be changed for this PR, but just a general future note: I think putting only "Expression" (i.e. the type) on this line is valid as well for numpydoc, and personally I find the repeating of the name "is_valid: " not giving any added value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or did numpydoc complain about this? (seeing you changes this also in some existing docstrings)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it seems to pass actually. Given that other docstrings used this convention, I just thought it was preferred.

"""
return Expression._call("is_valid", [self])

def is_null(self, bint nan_is_null=False):
"""Checks whether the expression is null"""
"""
Check whether the expression is null.

This creates a new expression equivalent to calling the
`is_null` compute function on this expression.

Parameters
----------
nan_is_null : boolean, default False
Whether floating-point NaNs are considered null.

Returns
-------
is_null : Expression
"""
options = NullOptions(nan_is_null=nan_is_null)
return Expression._call("is_null", [self], options)

def cast(self, type, bint safe=True):
"""Explicitly change the expression's data type"""
"""
Explicitly set or change the expression's data type.

This creates a new expression equivalent to calling the
`cast` compute function on this expression.

Parameters
----------
type : DataType
Type to cast array to.
safe : boolean, default True
Whether to check for conversion errors such as overflow.

Returns
-------
cast : Expression
"""
options = CastOptions.safe(ensure_type(type))
return Expression._call("cast", [self], options)

def isin(self, values):
"""Checks whether the expression is contained in values"""
"""
Check whether the expression is contained in values.

This creates a new expression equivalent to calling the
`is_in` compute function on this expression.

Parameters
----------
values : Array or iterable
The values to check for.

Returns
-------
isin : Expression
A new expression that, when evaluated, checks whether
this expression's value is contained in `values`.
"""
if not isinstance(values, Array):
values = lib.array(values)

Expand Down
Loading