From 7dc809cee34b321e0bb095b9722190ad4757a586 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 13 Jan 2022 13:32:33 -0500 Subject: [PATCH 1/3] Add kwargs to sql and explain methods --- dask_sql/context.py | 10 +++++++--- tests/unit/test_context.py | 11 ++++------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dask_sql/context.py b/dask_sql/context.py index adce9eaf1..896c85439 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -418,6 +418,7 @@ def sql( sql: str, return_futures: bool = True, dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, + **kwargs, ) -> Union[dd.DataFrame, pd.DataFrame]: """ Query the registered tables with the given SQL. @@ -450,7 +451,7 @@ def sql( """ if dataframes is not None: for df_name, df in dataframes.items(): - self.create_table(df_name, df) + self.create_table(df_name, df, **kwargs) rel, select_names, _ = self._get_ral(sql) @@ -477,7 +478,10 @@ def sql( return df def explain( - self, sql: str, dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None + self, + sql: str, + dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, + **kwargs, ) -> str: """ Return the stringified relational algebra that this query will produce @@ -499,7 +503,7 @@ def explain( """ if dataframes is not None: for df_name, df in dataframes.items(): - self.create_table(df_name, df) + self.create_table(df_name, df, **kwargs) _, _, rel_string = self._get_ral(sql) return rel_string diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index a9b0d3fe6..b84f9fa11 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -78,11 +78,8 @@ def test_explain(gpu): data_frame = dd.from_pandas(pd.DataFrame({"a": [1, 2, 3]}), npartitions=1) - if gpu: - data_frame = dask_cudf.from_dask_dataframe(data_frame) - sql_string = c.explain( - "SELECT * FROM other_df", dataframes={"other_df": data_frame} + "SELECT * FROM other_df", dataframes={"other_df": data_frame}, gpu=gpu ) assert sql_string.startswith( @@ -107,9 +104,9 @@ def test_sql(gpu): assert isinstance(result, pd.DataFrame if not gpu else cudf.DataFrame) dd.assert_eq(result, data_frame) - if gpu: - data_frame = dask_cudf.from_dask_dataframe(data_frame) - result = c.sql("SELECT * FROM other_df", dataframes={"other_df": data_frame}) + result = c.sql( + "SELECT * FROM other_df", dataframes={"other_df": data_frame}, gpu=gpu + ) assert isinstance(result, dd.DataFrame if not gpu else dask_cudf.DataFrame) dd.assert_eq(result, data_frame) From 7d89c06aebbe446543dc639527c07898f5e7059b Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 13 Jan 2022 13:39:20 -0500 Subject: [PATCH 2/3] Use explicit gpu default kwarg over kwargs --- dask_sql/context.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dask_sql/context.py b/dask_sql/context.py index 896c85439..88faa7dad 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -418,7 +418,7 @@ def sql( sql: str, return_futures: bool = True, dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, - **kwargs, + gpu: bool = False, ) -> Union[dd.DataFrame, pd.DataFrame]: """ Query the registered tables with the given SQL. @@ -451,7 +451,7 @@ def sql( """ if dataframes is not None: for df_name, df in dataframes.items(): - self.create_table(df_name, df, **kwargs) + self.create_table(df_name, df, gpu=gpu) rel, select_names, _ = self._get_ral(sql) @@ -481,7 +481,7 @@ def explain( self, sql: str, dataframes: Dict[str, Union[dd.DataFrame, pd.DataFrame]] = None, - **kwargs, + gpu: bool = False, ) -> str: """ Return the stringified relational algebra that this query will produce @@ -503,7 +503,7 @@ def explain( """ if dataframes is not None: for df_name, df in dataframes.items(): - self.create_table(df_name, df, **kwargs) + self.create_table(df_name, df, gpu=gpu) _, _, rel_string = self._get_ral(sql) return rel_string From 70e89e437db5fe34871c040f48656fbea033d8be Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Thu, 13 Jan 2022 13:42:15 -0500 Subject: [PATCH 3/3] Add docstrings --- dask_sql/context.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dask_sql/context.py b/dask_sql/context.py index 88faa7dad..c3a66de6a 100644 --- a/dask_sql/context.py +++ b/dask_sql/context.py @@ -444,6 +444,8 @@ def sql( Defaults to returning the dask dataframe. dataframes (:obj:`Dict[str, dask.dataframe.DataFrame]`): additional Dask or pandas dataframes to register before executing this query + gpu (:obj:`bool`): Whether or not to load the additional Dask or pandas dataframes (if any) on GPU; + requires cuDF / dask-cuDF if enabled. Defaults to False. Returns: :obj:`dask.dataframe.DataFrame`: the created data frame of this query. @@ -496,6 +498,8 @@ def explain( sql (:obj:`str`): The query string to use dataframes (:obj:`Dict[str, dask.dataframe.DataFrame]`): additional Dask or pandas dataframes to register before executing this query + gpu (:obj:`bool`): Whether or not to load the additional Dask or pandas dataframes (if any) on GPU; + requires cuDF / dask-cuDF if enabled. Defaults to False. Returns: :obj:`str`: a description of the created relational algebra.