Skip to content

Commit

Permalink
Merge pull request #3771 from mathesar-foundation/type_inference_rpc
Browse files Browse the repository at this point in the history
Add `data_modeling.suggest_types` function.
  • Loading branch information
Anish9901 authored Aug 22, 2024
2 parents 7dddf3a + 8466839 commit 4567c4d
Show file tree
Hide file tree
Showing 7 changed files with 243 additions and 0 deletions.
118 changes: 118 additions & 0 deletions db/sql/00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2747,6 +2747,124 @@ END;
$$ LANGUAGE SQL RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.build_cast_expr(tab_id regclass, col_id smallint, typ_id regtype) RETURNS text AS $$/*
Build an expression for casting a column in Mathesar, returning the text of that expression.
We fall back silently to default casting behavior if the mathesar_types namespace is missing.
However, we do throw an error in cases where the schema exists, but the type casting function
doesn't. This is assumed to be an error the user should know about.
Args:
tab_id: The OID of the table whose column we're casting.
col_id: The attnum of the column in the table.
typ_id: The OID of the type we will cast to.
*/
SELECT CASE
WHEN msar.schema_exists('mathesar_types') THEN
msar.get_cast_function_name(typ_id)
|| '('
|| format('%I', msar.get_column_name(tab_id, col_id))
|| ')'
ELSE
format('%I', msar.get_column_name(tab_id, col_id)) || '::' || typ_id::text
END;
$$ LANGUAGE SQL RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.infer_column_data_type(tab_id regclass, col_id smallint) RETURNS regtype AS $$/*
Infer the best type for a given column.
Note that we currently only try for `text` columns, since we only do this at import. I.e.,
if the column is some other type we just return that original type.
Args:
tab_id: The OID of the table of the column whose type we're inferring.
col_id: The attnum of the column whose type we're inferring.
*/
DECLARE
inferred_type regtype;
infer_sequence_raw text[] := ARRAY[
'boolean',
'date',
'numeric',
'mathesar_types.mathesar_money',
'timestamp without time zone',
'timestamp with time zone',
'time without time zone',
'interval',
'mathesar_types.email',
'mathesar_types.mathesar_json_array',
'mathesar_types.mathesar_json_object',
'mathesar_types.uri'
];
infer_sequence regtype[];
column_nonempty boolean;
test_type regtype;
BEGIN
infer_sequence := array_agg(pg_catalog.to_regtype(t))
FILTER (WHERE pg_catalog.to_regtype(t) IS NOT NULL)
FROM unnest(infer_sequence_raw) AS x(t);
EXECUTE format(
'SELECT EXISTS (SELECT 1 FROM %1$I.%2$I WHERE %3$I IS NOT NULL)',
msar.get_relation_schema_name(tab_id),
msar.get_relation_name(tab_id),
msar.get_column_name(tab_id, col_id)
) INTO column_nonempty;
inferred_type := atttypid FROM pg_catalog.pg_attribute WHERE attrelid=tab_id AND attnum=col_id;
IF inferred_type = 'text'::regtype AND column_nonempty THEN
FOREACH test_type IN ARRAY infer_sequence
LOOP
BEGIN
EXECUTE format(
'EXPLAIN ANALYZE SELECT %1$s FROM %2$I.%3$I',
msar.build_cast_expr(tab_id, col_id, test_type),
msar.get_relation_schema_name(tab_id),
msar.get_relation_name(tab_id)
);
inferred_type := test_type;
EXIT;
EXCEPTION WHEN OTHERS THEN
RAISE NOTICE 'Test failed: %', format(
'EXPLAIN ANALYZE SELECT %1$s FROM %2$I.%3$I',
msar.build_cast_expr(tab_id, col_id, test_type),
msar.get_relation_schema_name(tab_id),
msar.get_relation_name(tab_id)
);
-- do nothing, just try the next type.
END;
END LOOP;
END IF;
RETURN inferred_type;
END;
$$ LANGUAGE plpgsql RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
msar.infer_table_column_data_types(tab_id regclass) RETURNS jsonb AS $$/*
Infer the best type for each column in the table.
Currently we only suggest different types for columns which originate as type `text`.
Args:
tab_id: The OID of the table whose columns we're inferring types for.
The response JSON will have attnum keys, and values will be the result of `format_type`
for the inferred type of each column. Restricted to columns to which the user has access.
*/
SELECT jsonb_object_agg(
attnum, pg_catalog.format_type(msar.infer_column_data_type(attrelid, attnum), null)
)
FROM pg_catalog.pg_attribute
WHERE
attrelid = tab_id
AND attnum > 0
AND NOT attisdropped
AND has_column_privilege(attrelid, attnum, 'SELECT');
$$ LANGUAGE SQL RETURNS NULL ON NULL INPUT;


CREATE OR REPLACE FUNCTION
__msar.build_col_drop_default_expr(tab_id oid, col_id integer, new_type text, new_default jsonb)
RETURNS TEXT AS $$/*
Expand Down
44 changes: 44 additions & 0 deletions db/sql/test_00_msar.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,50 @@ END;
$$ LANGUAGE plpgsql;


-- msar.infer_table_column_data_types --------------------------------------------------------------

CREATE OR REPLACE FUNCTION __setup_type_inference() RETURNS SETOF TEXT AS $$
BEGIN
CREATE TABLE "Types Test" (
-- We can't test custom types in this context.
id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
"Empty" text,
"Boolean" text,
"Date" text,
"Numeric" text,
"Interval" text,
"Text" text
);
INSERT INTO "Types Test"
("Boolean", "Date", "Numeric", "Interval", "Text")
VALUES
('0', '2000-01-01', '0', '3 days', 'cat'),
('1', '6/23/2004', '3.14', '3 hours', 'bat'),
('t', 'May-2007-29', '-234.22', '3 minutes', 'rat'),
('false', '20200909', '1', '3 seconds', 'mat');
END;
$$ LANGUAGE plpgsql;


CREATE OR REPLACE FUNCTION test_infer_table_column_data_types() RETURNS SETOF TEXT AS $f$
BEGIN
PERFORM __setup_type_inference();
RETURN NEXT is(
msar.infer_table_column_data_types('"Types Test"'::regclass),
jsonb_build_object(
1, 'integer',
2, 'text',
3, 'boolean',
4, 'date',
5, 'numeric',
6, 'interval',
7, 'text'
)
);
END;
$f$ LANGUAGE plpgsql;


-- msar.add_mathesar_table

CREATE OR REPLACE FUNCTION __setup_create_table() RETURNS SETOF TEXT AS $f$
Expand Down
20 changes: 20 additions & 0 deletions db/tables/operations/infer_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from db import constants
from db.columns.base import MathesarColumn
from db.columns.operations.infer_types import infer_column_type
from db.connection import exec_msar_func
from db.schemas.operations.create import create_schema_if_not_exists_via_sql_alchemy
from db.tables.operations.create import CreateTableAs
from db.tables.operations.select import reflect_table
Expand All @@ -16,6 +17,25 @@
TEMP_TABLE = f"{constants.MATHESAR_PREFIX}temp_table_%s"


def infer_table_column_data_types(conn, table_oid):
"""
Infer the best type for each column in the table.
Currently we only suggest different types for columns which originate
as type `text`.
Args:
tab_id: The OID of the table whose columns we're inferring types for.
The response JSON will have attnum keys, and values will be the
result of `format_type` for the inferred type of each column.
Restricted to columns to which the user has access.
"""
return exec_msar_func(
conn, 'infer_table_column_data_types', table_oid
).fetchone()[0]


def update_table_column_types(schema, table_name, engine, metadata=None, columns_might_have_defaults=True):
metadata = metadata if metadata else get_empty_metadata()
table = reflect_table(table_name, schema, engine, metadata=metadata)
Expand Down
1 change: 1 addition & 0 deletions docs/docs/api/rpc.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ To use an RPC function:
members:
- add_foreign_key_column
- add_mapping_table
- suggest_types
- MappingColumn

## Responses
Expand Down
24 changes: 24 additions & 0 deletions mathesar/rpc/data_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from modernrpc.auth.basic import http_basic_auth_login_required

from db.links.operations import create as links_create
from db.tables.operations import infer_types
from mathesar.rpc.exceptions.handlers import handle_rpc_exceptions
from mathesar.rpc.utils import connect

Expand Down Expand Up @@ -80,3 +81,26 @@ def add_mapping_table(
links_create.add_mapping_table(
conn, schema_oid, table_name, mapping_columns
)


@rpc_method(name="data_modeling.suggest_types")
@http_basic_auth_login_required
@handle_rpc_exceptions
def suggest_types(*, table_oid: int, database_id: int, **kwargs) -> dict:
"""
Infer the best type for each column in the table.
Currently we only suggest different types for columns which originate
as type `text`.
Args:
table_oid: The OID of the table whose columns we're inferring types for.
database_id: The Django id of the database containing the table.
The response JSON will have attnum keys, and values will be the
result of `format_type` for the inferred type of each column, i.e., the
canonical string referring to the type.
"""
user = kwargs.get(REQUEST_KEY).user
with connect(database_id, user) as conn:
return infer_types.infer_table_column_data_types(conn, table_oid)
31 changes: 31 additions & 0 deletions mathesar/tests/rpc/test_data_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,34 @@ def mock_add_mapping_table(
database_id=_database_id,
request=request,
)


def test_data_modeling_suggest_types(rf, monkeypatch):
_username = 'alice'
_password = 'pass1234'
_table_oid = 12345
_database_id = 2
request = rf.post('/api/rpc/v0/', data={})
request.user = User(username=_username, password=_password)

@contextmanager
def mock_connect(database_id, user):
if database_id == _database_id and user.username == _username:
try:
yield True
finally:
pass
else:
raise AssertionError('incorrect parameters passed')

def mock_suggest_types(conn, table_oid):
if table_oid != _table_oid:
raise AssertionError('incorrect parameters passed')

monkeypatch.setattr(data_modeling, 'connect', mock_connect)
monkeypatch.setattr(data_modeling.infer_types, 'infer_table_column_data_types', mock_suggest_types)
data_modeling.suggest_types(
table_oid=_table_oid,
database_id=_database_id,
request=request,
)
5 changes: 5 additions & 0 deletions mathesar/tests/rpc/test_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@
"data_modeling.add_mapping_table",
[user_is_authenticated]
),
(
data_modeling.suggest_types,
"data_modeling.suggest_types",
[user_is_authenticated]
),
(
database_privileges.list_direct,
"database_privileges.list_direct",
Expand Down

0 comments on commit 4567c4d

Please sign in to comment.