Skip to content

Commit f7fd2d2

Browse files
tswastsycai
andauthored
docs: use autosummary to split documentation pages (#2251)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕 --------- Co-authored-by: Shenyang Cai <[email protected]>
1 parent bb66915 commit f7fd2d2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+735
-988
lines changed

bigframes/_config/__init__.py

Lines changed: 14 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -17,175 +17,24 @@
1717
DataFrames from this package.
1818
"""
1919

20-
from __future__ import annotations
21-
22-
import copy
23-
from dataclasses import dataclass, field
24-
import threading
25-
from typing import Optional
26-
27-
import bigframes_vendored.pandas._config.config as pandas_config
28-
29-
import bigframes._config.bigquery_options as bigquery_options
30-
import bigframes._config.compute_options as compute_options
31-
import bigframes._config.display_options as display_options
32-
import bigframes._config.experiment_options as experiment_options
33-
import bigframes._config.sampling_options as sampling_options
34-
35-
36-
@dataclass
37-
class ThreadLocalConfig(threading.local):
38-
# If unset, global settings will be used
39-
bigquery_options: Optional[bigquery_options.BigQueryOptions] = None
40-
# Note: use default factory instead of default instance so each thread initializes to default values
41-
display_options: display_options.DisplayOptions = field(
42-
default_factory=display_options.DisplayOptions
43-
)
44-
sampling_options: sampling_options.SamplingOptions = field(
45-
default_factory=sampling_options.SamplingOptions
46-
)
47-
compute_options: compute_options.ComputeOptions = field(
48-
default_factory=compute_options.ComputeOptions
49-
)
50-
experiment_options: experiment_options.ExperimentOptions = field(
51-
default_factory=experiment_options.ExperimentOptions
52-
)
53-
54-
55-
class Options:
56-
"""Global options affecting BigQuery DataFrames behavior."""
57-
58-
def __init__(self):
59-
self.reset()
60-
61-
def reset(self) -> Options:
62-
"""Reset the option settings to defaults.
63-
64-
Returns:
65-
bigframes._config.Options: Options object with default values.
66-
"""
67-
self._local = ThreadLocalConfig()
68-
69-
# BigQuery options are special because they can only be set once per
70-
# session, so we need an indicator as to whether we are using the
71-
# thread-local session or the global session.
72-
self._bigquery_options = bigquery_options.BigQueryOptions()
73-
return self
74-
75-
def _init_bigquery_thread_local(self):
76-
"""Initialize thread-local options, based on current global options."""
77-
78-
# Already thread-local, so don't reset any options that have been set
79-
# already. No locks needed since this only modifies thread-local
80-
# variables.
81-
if self._local.bigquery_options is not None:
82-
return
83-
84-
self._local.bigquery_options = copy.deepcopy(self._bigquery_options)
85-
self._local.bigquery_options._session_started = False
86-
87-
@property
88-
def bigquery(self) -> bigquery_options.BigQueryOptions:
89-
"""Options to use with the BigQuery engine.
90-
91-
Returns:
92-
bigframes._config.bigquery_options.BigQueryOptions:
93-
Options for BigQuery engine.
94-
"""
95-
if self._local.bigquery_options is not None:
96-
# The only way we can get here is if someone called
97-
# _init_bigquery_thread_local.
98-
return self._local.bigquery_options
99-
100-
return self._bigquery_options
101-
102-
@property
103-
def display(self) -> display_options.DisplayOptions:
104-
"""Options controlling object representation.
105-
106-
Returns:
107-
bigframes._config.display_options.DisplayOptions:
108-
Options for controlling object representation.
109-
"""
110-
return self._local.display_options
111-
112-
@property
113-
def sampling(self) -> sampling_options.SamplingOptions:
114-
"""Options controlling downsampling when downloading data
115-
to memory.
116-
117-
The data can be downloaded into memory explicitly
118-
(e.g., to_pandas, to_numpy, values) or implicitly (e.g.,
119-
matplotlib plotting). This option can be overridden by
120-
parameters in specific functions.
121-
122-
Returns:
123-
bigframes._config.sampling_options.SamplingOptions:
124-
Options for controlling downsampling.
125-
"""
126-
return self._local.sampling_options
127-
128-
@property
129-
def compute(self) -> compute_options.ComputeOptions:
130-
"""Thread-local options controlling object computation.
131-
132-
Returns:
133-
bigframes._config.compute_options.ComputeOptions:
134-
Thread-local options for controlling object computation
135-
"""
136-
return self._local.compute_options
137-
138-
@property
139-
def experiments(self) -> experiment_options.ExperimentOptions:
140-
"""Options controlling experiments
141-
142-
Returns:
143-
bigframes._config.experiment_options.ExperimentOptions:
144-
Thread-local options for controlling experiments
145-
"""
146-
return self._local.experiment_options
147-
148-
@property
149-
def is_bigquery_thread_local(self) -> bool:
150-
"""Indicator that we're using a thread-local session.
151-
152-
A thread-local session can be started by using
153-
`with bigframes.option_context("bigquery.some_option", "some-value"):`.
154-
155-
Returns:
156-
bool:
157-
A boolean value, where a value is True if a thread-local session
158-
is in use; otherwise False.
159-
"""
160-
return self._local.bigquery_options is not None
161-
162-
@property
163-
def _allow_large_results(self) -> bool:
164-
"""The effective 'allow_large_results' setting.
165-
166-
This value is `self.compute.allow_large_results` if set (not `None`),
167-
otherwise it defaults to `self.bigquery.allow_large_results`.
168-
169-
Returns:
170-
bool:
171-
Whether large query results are permitted.
172-
- `True`: The BigQuery result size limit (e.g., 10 GB) is removed.
173-
- `False`: Results are restricted to this limit (potentially faster).
174-
BigQuery will raise an error if this limit is exceeded.
175-
"""
176-
if self.compute.allow_large_results is None:
177-
return self.bigquery.allow_large_results
178-
return self.compute.allow_large_results
179-
180-
181-
options = Options()
182-
"""Global options for default session."""
183-
184-
option_context = pandas_config.option_context
20+
from bigframes._config.bigquery_options import BigQueryOptions
21+
from bigframes._config.compute_options import ComputeOptions
22+
from bigframes._config.display_options import DisplayOptions
23+
from bigframes._config.experiment_options import ExperimentOptions
24+
from bigframes._config.global_options import option_context, Options
25+
import bigframes._config.global_options as global_options
26+
from bigframes._config.sampling_options import SamplingOptions
18527

28+
options = global_options.options
29+
"""Global options for the default session."""
18630

18731
__all__ = (
18832
"Options",
18933
"options",
19034
"option_context",
35+
"BigQueryOptions",
36+
"ComputeOptions",
37+
"DisplayOptions",
38+
"ExperimentOptions",
39+
"SamplingOptions",
19140
)

bigframes/_config/compute_options.py

Lines changed: 94 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class ComputeOptions:
2929
>>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins")
3030
3131
>>> bpd.options.compute.maximum_bytes_billed = 500
32-
>>> # df.to_pandas() # this should fail
32+
>>> df.to_pandas() # this should fail # doctest: +SKIP
3333
google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required.
3434
3535
>>> bpd.options.compute.maximum_bytes_billed = None # reset option
@@ -53,68 +53,112 @@ class ComputeOptions:
5353
>>> del bpd.options.compute.extra_query_labels["test1"]
5454
>>> bpd.options.compute.extra_query_labels
5555
{'test2': 'abc', 'test3': False}
56-
57-
Attributes:
58-
ai_ops_confirmation_threshold (int | None):
59-
Guards against unexpected processing of large amount of rows by semantic operators.
60-
If the number of rows exceeds the threshold, the user will be asked to confirm
61-
their operations to resume. The default value is 0. Set the value to None
62-
to turn off the guard.
63-
64-
ai_ops_threshold_autofail (bool):
65-
Guards against unexpected processing of large amount of rows by semantic operators.
66-
When set to True, the operation automatically fails without asking for user inputs.
67-
68-
allow_large_results (bool | None):
69-
Specifies whether query results can exceed 10 GB. Defaults to False. Setting this
70-
to False (the default) restricts results to 10 GB for potentially faster execution;
71-
BigQuery will raise an error if this limit is exceeded. Setting to True removes
72-
this result size limit.
73-
74-
enable_multi_query_execution (bool | None):
75-
If enabled, large queries may be factored into multiple smaller queries
76-
in order to avoid generating queries that are too complex for the query
77-
engine to handle. However this comes at the cost of increase cost and latency.
78-
79-
extra_query_labels (Dict[str, Any] | None):
80-
Stores additional custom labels for query configuration.
81-
82-
maximum_bytes_billed (int | None):
83-
Limits the bytes billed for query jobs. Queries that will have
84-
bytes billed beyond this limit will fail (without incurring a
85-
charge). If unspecified, this will be set to your project default.
86-
See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
87-
88-
maximum_result_rows (int | None):
89-
Limits the number of rows in an execution result. When converting
90-
a BigQuery DataFrames object to a pandas DataFrame or Series (e.g.,
91-
using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct
92-
iteration), the data is downloaded from BigQuery to the client
93-
machine. This option restricts the number of rows that can be
94-
downloaded. If the number of rows to be downloaded exceeds this
95-
limit, a ``bigframes.exceptions.MaximumResultRowsExceeded``
96-
exception is raised.
97-
98-
semantic_ops_confirmation_threshold (int | None):
99-
.. deprecated:: 1.42.0
100-
Semantic operators are deprecated. Please use AI operators instead
101-
102-
semantic_ops_threshold_autofail (bool):
103-
.. deprecated:: 1.42.0
104-
Semantic operators are deprecated. Please use AI operators instead
10556
"""
10657

10758
ai_ops_confirmation_threshold: Optional[int] = 0
59+
"""
60+
Guards against unexpected processing of large amount of rows by semantic operators.
61+
62+
If the number of rows exceeds the threshold, the user will be asked to confirm
63+
their operations to resume. The default value is 0. Set the value to None
64+
to turn off the guard.
65+
66+
Returns:
67+
Optional[int]: Number of rows.
68+
"""
69+
10870
ai_ops_threshold_autofail: bool = False
71+
"""
72+
Guards against unexpected processing of large amount of rows by semantic operators.
73+
74+
When set to True, the operation automatically fails without asking for user inputs.
75+
76+
Returns:
77+
bool: True if the guard is enabled.
78+
"""
79+
10980
allow_large_results: Optional[bool] = None
81+
"""
82+
Specifies whether query results can exceed 10 GB.
83+
84+
Defaults to False. Setting this to False (the default) restricts results to
85+
10 GB for potentially faster execution; BigQuery will raise an error if this
86+
limit is exceeded. Setting to True removes this result size limit.
87+
88+
89+
Returns:
90+
bool | None: True if results > 10 GB are enabled.
91+
"""
11092
enable_multi_query_execution: bool = False
93+
"""
94+
If enabled, large queries may be factored into multiple smaller queries.
95+
96+
This is in order to avoid generating queries that are too complex for the
97+
query engine to handle. However this comes at the cost of increase cost and
98+
latency.
99+
100+
101+
Returns:
102+
bool | None: True if enabled.
103+
"""
104+
111105
extra_query_labels: Dict[str, Any] = dataclasses.field(
112106
default_factory=dict, init=False
113107
)
108+
"""
109+
Stores additional custom labels for query configuration.
110+
111+
Returns:
112+
Dict[str, Any] | None: Additional labels.
113+
"""
114+
114115
maximum_bytes_billed: Optional[int] = None
116+
"""
117+
Limits the bytes billed for query jobs.
118+
119+
Queries that will have bytes billed beyond this limit will fail (without
120+
incurring a charge). If unspecified, this will be set to your project
121+
default. See `maximum_bytes_billed`:
122+
https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed.
123+
124+
Returns:
125+
int | None: Number of bytes, if set.
126+
"""
127+
115128
maximum_result_rows: Optional[int] = None
129+
"""
130+
Limits the number of rows in an execution result.
131+
132+
When converting a BigQuery DataFrames object to a pandas DataFrame or Series
133+
(e.g., using ``.to_pandas()``, ``.peek()``, ``.__repr__()``, direct
134+
iteration), the data is downloaded from BigQuery to the client machine. This
135+
option restricts the number of rows that can be downloaded. If the number
136+
of rows to be downloaded exceeds this limit, a
137+
``bigframes.exceptions.MaximumResultRowsExceeded`` exception is raised.
138+
139+
Returns:
140+
int | None: Number of rows, if set.
141+
"""
142+
116143
semantic_ops_confirmation_threshold: Optional[int] = 0
144+
"""
145+
Deprecated.
146+
147+
.. deprecated:: 1.42.0
148+
Semantic operators are deprecated. Please use the functions in
149+
:mod:`bigframes.bigquery.ai` instead.
150+
151+
"""
152+
117153
semantic_ops_threshold_autofail = False
154+
"""
155+
Deprecated.
156+
157+
.. deprecated:: 1.42.0
158+
Semantic operators are deprecated. Please use the functions in
159+
:mod:`bigframes.bigquery.ai` instead.
160+
161+
"""
118162

119163
def assign_extra_query_labels(self, **kwargs: Any) -> None:
120164
"""

0 commit comments

Comments
 (0)