Skip to content

Commit

Permalink
feat(ux): implement recursive element conversion for nested types and…
Browse files Browse the repository at this point in the history
… json
  • Loading branch information
cpcloud committed Oct 11, 2023
1 parent aae28e9 commit 8ddfa94
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 19 deletions.
9 changes: 6 additions & 3 deletions ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@


class SnowflakePandasData(PandasData):
convert_Struct = staticmethod(PandasData.convert_JSON)
convert_Array = staticmethod(PandasData.convert_JSON)
convert_Map = staticmethod(PandasData.convert_JSON)
@staticmethod
def convert_JSON(s, dtype, pandas_type):
converter = SnowflakePandasData.convert_JSON_element(dtype)
return s.map(converter, na_action="ignore").astype("object")

convert_Struct = convert_Array = convert_Map = convert_JSON
1 change: 0 additions & 1 deletion ibis/backends/tests/test_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ def test_scalar_param_date(backend, alltypes, value):
"oracle",
"pyspark",
"mssql",
"trino",
"druid",
]
)
Expand Down
69 changes: 54 additions & 15 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
import warnings

import numpy as np
Expand Down Expand Up @@ -205,40 +206,78 @@ def convert_String(s, dtype, pandas_type):

@staticmethod
def convert_UUID(s, dtype, pandas_type):
from uuid import UUID

return s.map(
lambda v: v if isinstance(v, UUID) else UUID(v), na_action="ignore"
)
return s.map(PandasData.get_element_converter(dtype), na_action="ignore")

@staticmethod
def convert_Struct(s, dtype, pandas_type):
return s.map(
lambda values, names=dtype.names: (
values if isinstance(values, dict) else dict(zip(names, values))
),
na_action="ignore",
)
return s.map(PandasData.get_element_converter(dtype), na_action="ignore")

@staticmethod
def convert_Array(s, dtype, pandas_type):
return s.map(list, na_action="ignore")
return s.map(PandasData.get_element_converter(dtype), na_action="ignore")

@staticmethod
def convert_Map(s, dtype, pandas_type):
return s.map(dict, na_action="ignore")
return s.map(PandasData.get_element_converter(dtype), na_action="ignore")

@staticmethod
def convert_JSON(s, dtype, pandas_type):
import json
return s.map(
PandasData.get_element_converter(dtype), na_action="ignore"
).astype("object")

@staticmethod
def get_element_converter(dtype):
funcgen = getattr(
PandasData, f"convert_{type(dtype).__name__}_element", lambda _: lambda x: x
)
return funcgen(dtype)

@staticmethod
def convert_Struct_element(dtype):
converters = tuple(map(PandasData.get_element_converter, dtype.types))

def convert(values, names=dtype.names, converters=converters):
items = values.items() if isinstance(values, dict) else zip(names, values)
return {
k: converter(v) if v is not None else v
for converter, (k, v) in zip(converters, items)
}

return convert

@staticmethod
def convert_JSON_element(_):
def try_json(x):
if x is None:
return x
try:
return json.loads(x)
except (TypeError, json.JSONDecodeError):
return x

return s.map(try_json, na_action="ignore").astype("object")
return try_json

@staticmethod
def convert_Array_element(dtype):
convert_value = PandasData.get_element_converter(dtype.value_type)
return lambda values: [
convert_value(value) if value is not None else value for value in values
]

@staticmethod
def convert_Map_element(dtype):
convert_value = PandasData.get_element_converter(dtype.value_type)
return lambda row: {
key: convert_value(value) if value is not None else value
for key, value in dict(row).items()
}

@staticmethod
def convert_UUID_element(_):
from uuid import UUID

return lambda v: v if isinstance(v, UUID) else UUID(v)


class DaskData(PandasData):
Expand Down

0 comments on commit 8ddfa94

Please sign in to comment.