Skip to content

Commit

Permalink
ARROW-3325: [Python][FOLLOWUP] In Python 2.7, a class's __doc__ membe…
Browse files Browse the repository at this point in the history
…r is not writable (apache#5018)
  • Loading branch information
wesm authored and pprudhvi committed Aug 11, 2019
1 parent 45aea44 commit 4c367e5
Showing 1 changed file with 56 additions and 56 deletions.
112 changes: 56 additions & 56 deletions python/pyarrow/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,63 @@ def _open_dataset_file(dataset, path, meta=None):
common_metadata=dataset.common_metadata)


_read_docstring_common = """\
read_dictionary : list, default None
List of names or column paths (for nested types) to read directly
as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
a flat column as dictionary-encoded pass the column name. For
nested types, you must pass the full column "path", which could be
something like level1.level2.list.item. Refer to the Parquet
file's schema to obtain the paths.
memory_map : boolean, default True
If the source is a file path, use a memory map to read file, which can
improve performance in some environments"""


class ParquetDataset(object):

__doc__ = """
Encapsulates details of reading a complete Parquet dataset possibly
consisting of multiple files and partitions in subdirectories
Parameters
----------
path_or_paths : str or List[str]
A directory name, single file name, or list of file names
filesystem : FileSystem, default None
If nothing passed, paths assumed to be found in the local on-disk
filesystem
metadata : pyarrow.parquet.FileMetaData
Use metadata obtained elsewhere to validate file schemas
schema : pyarrow.parquet.Schema
Use schema obtained elsewhere to validate file schemas. Alternative to
metadata parameter
split_row_groups : boolean, default False
Divide files into pieces for each row group in the file
validate_schema : boolean, default True
Check that individual file schemas are all the same / compatible
filters : List[Tuple] or List[List[Tuple]] or None (default)
List of filters to apply, like ``[[('x', '=', 0), ...], ...]``. This
implements partition-level (hive) filtering only, i.e., to prevent the
loading of some files of the dataset.
Predicates are expressed in disjunctive normal form (DNF). This means
that the innermost tuple describe a single column predicate. These
inner predicate make are all combined with a conjunction (AND) into a
larger predicate. The most outer list then combines all filters
with a disjunction (OR). By this, we should be able to express all
kinds of filters that are possible using boolean logic.
This function also supports passing in as List[Tuple]. These predicates
are evaluated as a conjunction. To express OR in predictates, one must
use the (preferred) List[List[Tuple]] notation.
metadata_nthreads: int, default 1
How many threads to allow the thread pool which is used to read the
dataset metadata. Increasing this is helpful to read partitioned
datasets.
{0}
""".format(_read_docstring_common)

def __init__(self, path_or_paths, filesystem=None, schema=None,
metadata=None, split_row_groups=False, validate_schema=True,
filters=None, metadata_nthreads=1,
Expand Down Expand Up @@ -1105,62 +1161,6 @@ def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
return pieces, partitions, common_metadata_path, metadata_path


_read_docstring_common = """\
read_dictionary : list, default None
List of names or column paths (for nested types) to read directly
as DictionaryArray. Only supported for BYTE_ARRAY storage. To read
a flat column as dictionary-encoded pass the column name. For
nested types, you must pass the full column "path", which could be
something like level1.level2.list.item. Refer to the Parquet
file's schema to obtain the paths.
memory_map : boolean, default True
If the source is a file path, use a memory map to read file, which can
improve performance in some environments"""


ParquetDataset.__doc__ = """
Encapsulates details of reading a complete Parquet dataset possibly
consisting of multiple files and partitions in subdirectories
Parameters
----------
path_or_paths : str or List[str]
A directory name, single file name, or list of file names
filesystem : FileSystem, default None
If nothing passed, paths assumed to be found in the local on-disk
filesystem
metadata : pyarrow.parquet.FileMetaData
Use metadata obtained elsewhere to validate file schemas
schema : pyarrow.parquet.Schema
Use schema obtained elsewhere to validate file schemas. Alternative to
metadata parameter
split_row_groups : boolean, default False
Divide files into pieces for each row group in the file
validate_schema : boolean, default True
Check that individual file schemas are all the same / compatible
filters : List[Tuple] or List[List[Tuple]] or None (default)
List of filters to apply, like ``[[('x', '=', 0), ...], ...]``. This
implements partition-level (hive) filtering only, i.e., to prevent the
loading of some files of the dataset.
Predicates are expressed in disjunctive normal form (DNF). This means
that the innermost tuple describe a single column predicate. These
inner predicate make are all combined with a conjunction (AND) into a
larger predicate. The most outer list then combines all filters
with a disjunction (OR). By this, we should be able to express all
kinds of filters that are possible using boolean logic.
This function also supports passing in as List[Tuple]. These predicates
are evaluated as a conjunction. To express OR in predictates, one must
use the (preferred) List[List[Tuple]] notation.
metadata_nthreads: int, default 1
How many threads to allow the thread pool which is used to read the
dataset metadata. Increasing this is helpful to read partitioned
datasets.
{0}
""".format(_read_docstring_common)


_read_table_docstring = """
{0}
Expand Down

0 comments on commit 4c367e5

Please sign in to comment.