Merge pull request #3696 from mathesar-foundation/explorations_run

Implement `explorations.run` RPC endpoint
mathesar-foundation · Aug 1, 2024 · 2cc1223 · 2cc1223
2 parents ebb0196 + 6e0c435
commit 2cc1223
Show file tree

Hide file tree

Showing 4 changed files with 240 additions and 4 deletions.
diff --git a/docs/docs/api/rpc.md b/docs/docs/api/rpc.md
@@ -194,7 +194,10 @@ To use an RPC function:
       members:
       - list_
       - delete
+      - run
       - ExplorationInfo
+      - ExplorationDef
+      - ExplorationResult
 
 ## Roles
 

diff --git a/mathesar/rpc/explorations.py b/mathesar/rpc/explorations.py
@@ -3,16 +3,17 @@
 """
 from typing import Optional, TypedDict
 
-from modernrpc.core import rpc_method
+from modernrpc.core import rpc_method, REQUEST_KEY
 from modernrpc.auth.basic import http_basic_auth_login_required
 
 from mathesar.rpc.exceptions.handlers import handle_rpc_exceptions
-from mathesar.utils.explorations import get_explorations, delete_exploration
+from mathesar.rpc.utils import connect
+from mathesar.utils.explorations import get_explorations, delete_exploration, run_exploration
 
 
 class ExplorationInfo(TypedDict):
     """
-    Information about a Exploration.
+    Information about an exploration.
 
     Attributes:
         id: The Django id of an exploration.
@@ -50,6 +51,94 @@ def from_model(cls, model):
         )
 
 
+class ExplorationDef(TypedDict):
+    """
+    Definition about a runnable exploration.
+
+    Attributes:
+        base_table_oid: The OID of the base table of the exploration on the database.
+        initial_columns: A list describing the columns to be included in the exploration.
+        display_names: A map between the actual column names on the database and the alias to be displayed.
+        transformations: A list describing the transformations to be made on the included columns.
+        limit: Specifies the number of rows to return.(default 100)
+        offset: Specifies the number of rows to skip.(default 0)
+        filter: A dict describing filters to be applied to an exploration.
+            e.g. Here is a dict describing getting records from exploration where "col1" = NULL and "col2" = "abc"
+            ```
+            {"and": [
+                {"null": [
+                    {"column_name": ["col1"]},
+                ]},
+                {"equal": [
+                    {"to_lowercase": [
+                        {"column_name": ["col2"]},
+                    ]},
+                    {"literal": ["abc"]},
+                ]},
+            ]}
+            ```
+            Refer to db/functions/base.py for all the possible filters.
+        order_by: A list of dicts, where each dict has a `field` and `direction` field.
+                  Here the value for `field` should be column name and `direction` should be either `asc` or `desc`.
+        search: A list of dicts, where each dict has a `column` and `literal` field.
+                Here the value for `column` should be a column name and `literal` should be a string to be searched in the aforementioned column.
+        duplicate_only: A list of column names for which you want duplicate records.
+    """
+    base_table_oid: int
+    initial_columns: list
+    display_names: dict
+    transformations: Optional[list]
+    limit: Optional[int]
+    offset: Optional[int]
+    filter: Optional[dict]
+    order_by: Optional[list[dict]]
+    search: Optional[list[dict]]
+    duplicate_only: Optional[list]
+
+
+class ExplorationResult(TypedDict):
+    """
+    Result of an exploration run.
+
+    Attributes:
+        query: A dict describing the exploration that ran.
+        records: A dict describing the total count of records along with the contents of those records.
+        output_columns: A tuple describing the names of the columns included in the exploration.
+        column_metadata: A dict describing the metadata applied to included columns.
+        limit: Specifies the max number of rows returned.(default 100)
+        offset: Specifies the number of rows skipped.(default 0)
+        filter: A dict describing filters applied to an exploration.
+        order_by: The ordering applied to the columns of an exploration.
+        search: Specifies a list of dicts containing column names and searched expression.
+        duplicate_only: A list of column names for which you want duplicate records.
+    """
+    query: dict
+    records: dict
+    output_columns: tuple
+    column_metadata: dict
+    limit: Optional[int]
+    offset: Optional[int]
+    filter: Optional[dict]
+    order_by: Optional[list[dict]]
+    search: Optional[list[dict]]
+    duplicate_only: Optional[list]
+
+    @classmethod
+    def from_dict(cls, e):
+        return cls(
+            query=e["query"],
+            records=e["records"],
+            output_columns=e["output_columns"],
+            column_metadata=e["column_metadata"],
+            limit=e["limit"],
+            offset=e["offset"],
+            filter=e["filter"],
+            order_by=e["order_by"],
+            search=e["search"],
+            duplicate_only=e["duplicate_only"]
+        )
+
+
 @rpc_method(name="explorations.list")
 @http_basic_auth_login_required
 @handle_rpc_exceptions
@@ -78,3 +167,20 @@ def delete(*, exploration_id: int, **kwargs) -> None:
         exploration_id: The Django id of the exploration to delete.
     """
     delete_exploration(exploration_id)
+
+
+@rpc_method(name="explorations.run")
+@http_basic_auth_login_required
+@handle_rpc_exceptions
+def run(*, exploration_def: ExplorationDef, database_id: int, **kwargs) -> ExplorationResult:
+    """
+    Run an exploration.
+
+    Args:
+        exploration_def: A dict describing an exploration to run.
+        database_id: The Django id of the database containing the base table for the exploration.
+    """
+    user = kwargs.get(REQUEST_KEY).user
+    with connect(database_id, user) as conn:
+        exploration_result = run_exploration(exploration_def, database_id, conn)
+    return ExplorationResult.from_dict(exploration_result)
diff --git a/mathesar/tests/rpc/test_endpoints.py b/mathesar/tests/rpc/test_endpoints.py
@@ -159,6 +159,11 @@
         "explorations.delete",
         [user_is_authenticated]
     ),
+    (
+        explorations.run,
+        "explorations.run",
+        [user_is_authenticated]
+    ),
     (
         roles.list_,
         "roles.list",

diff --git a/mathesar/utils/explorations.py b/mathesar/utils/explorations.py
@@ -1,4 +1,11 @@
-from mathesar.models.base import Explorations
+from db.engine import create_future_engine_with_custom_types
+from db.records.operations.select import get_count
+from db.queries.base import DBQuery, InitialColumn, JoinParameter
+from db.tables.operations.select import get_table
+from mathesar.api.utils import process_annotated_records
+from mathesar.models.base import Explorations, ColumnMetaData
+from mathesar.rpc.columns.metadata import ColumnMetaDataRecord
+from mathesar.state import get_cached_metadata
 
 
 def get_explorations(database_id):
@@ -7,3 +14,118 @@ def get_explorations(database_id):
 
 def delete_exploration(exploration_id):
     Explorations.objects.get(id=exploration_id).delete()
+
+
+def run_exploration(exploration_def, database_id, conn):
+    engine = create_future_engine_with_custom_types(
+        conn.info.user,
+        conn.info.password,
+        conn.info.host,
+        conn.info.dbname,
+        conn.info.port
+    )
+    metadata = get_cached_metadata()
+    base_table_oid = exploration_def["base_table_oid"]
+    initial_columns = exploration_def['initial_columns']
+    processed_initial_columns = []
+    for column in initial_columns:
+        jp_path = column.get("join_path")
+        if jp_path is not None:
+            join_path = [
+                JoinParameter(
+                    left_oid=i[0][0],
+                    left_attnum=i[0][1],
+                    right_oid=i[1][0],
+                    right_attnum=i[1][1]
+                ) for i in jp_path
+            ]
+        processed_initial_columns.append(
+            InitialColumn(
+                reloid=jp_path[-1][-1][0] if jp_path else base_table_oid,
+                attnum=column["attnum"],
+                alias=column["alias"],
+                jp_path=join_path if jp_path else None
+            )
+        )
+    db_query = DBQuery(
+        base_table_oid=base_table_oid,
+        initial_columns=processed_initial_columns,
+        engine=engine,
+        transformations=exploration_def.get("transformations", []),
+        name=None,
+        metadata=metadata
+    )
+    records = db_query.get_records(
+        limit=exploration_def.get('limit', 100),
+        offset=exploration_def.get('offset', 0),
+        filter=exploration_def.get('filter', None),
+        order_by=exploration_def.get('order_by', []),
+        search=exploration_def.get('search', []),
+        duplicate_only=exploration_def.get('duplicate_only', None)
+    )
+    processed_records = process_annotated_records(records)[0]
+    column_metadata = _get_exploration_column_metadata(
+        exploration_def,
+        processed_initial_columns,
+        database_id,
+        db_query,
+        conn,
+        engine,
+        metadata
+    )
+    return {
+        "query": exploration_def,
+        "records": {
+            "count": get_count(
+                table=db_query.transformed_relation,
+                engine=engine,
+                filter=exploration_def.get('filter', None)
+            ),
+            "results": processed_records
+        },
+        "output_columns": tuple(sa_col.name for sa_col in db_query.sa_output_columns),
+        "column_metadata": column_metadata,
+        "limit": exploration_def.get('limit', 100),
+        "offset": exploration_def.get('offset', 0),
+        "filter": exploration_def.get('filter', None),
+        "order_by": exploration_def.get('order_by', []),
+        "search": exploration_def.get('search', []),
+        "duplicate_only": exploration_def.get('duplicate_only', None)
+    }
+
+
+def _get_exploration_column_metadata(
+    exploration_def,
+    processed_initial_columns,
+    database_id,
+    db_query,
+    conn,
+    engine,
+    metadata
+):
+    exploration_column_metadata = {}
+    for alias, sa_col in db_query.all_sa_columns_map.items():
+        initial_column = None
+        for col in processed_initial_columns:
+            if alias == col.alias:
+                initial_column = col
+        column_metadata = ColumnMetaData.objects.filter(
+            database__id=database_id,
+            table_oid=initial_column.reloid,
+            attnum=sa_col.column_attnum
+        ).first() if initial_column else None
+        input_table_name = get_table(initial_column.reloid, conn)["name"] if initial_column else None
+        input_column_name = initial_column.get_name(engine, metadata) if initial_column else None
+        exploration_column_metadata[alias] = {
+            "alias": alias,
+            "display_name": exploration_def["display_names"].get(alias),
+            "type": sa_col.db_type.id,
+            "type_options": sa_col.type_options,
+            "display_options": ColumnMetaDataRecord.from_model(column_metadata) if column_metadata else None,
+            "is_initial_column": True if initial_column else False,
+            "input_column_name": input_column_name,
+            "input_table_name": input_table_name,
+            "input_table_id": initial_column.reloid if initial_column else None,
+            "input_alias": db_query.get_input_alias_for_output_alias(alias)
+        }
+    return exploration_column_metadata