ibis-project · cpcloud · Jul 6, 2017 · Jul 27, 2017 · Jul 27, 2017 · wesm
diff --git a/ibis/expr/rules.py b/ibis/expr/rules.py
@@ -43,15 +43,15 @@ def get_result(self):
         return shape_like_args(self.args, promoted_type)
 
     def _get_type(self):
-        if util.any_of(self.args, ir.FloatingValue):
+        if util.any_of(self.args, ir.DecimalValue):
+            return _decimal_promoted_type(self.args)
+        elif util.any_of(self.args, ir.FloatingValue):
             if util.any_of(self.args, ir.DoubleValue):
                 return 'double'
             else:
                 return 'float'
         elif util.all_of(self.args, ir.IntegerValue):
             return self._get_int_type()
-        elif util.any_of(self.args, ir.DecimalValue):
-            return _decimal_promoted_type(self.args)
         else:
             raise NotImplementedError
 

diff --git a/ibis/expr/tests/test_value_exprs.py b/ibis/expr/tests/test_value_exprs.py
@@ -890,3 +890,18 @@ def test_fillna_null(value, expected):
 def test_string_temporal_compare(op, left, right):
     result = op(left, right)
     assert result.type().equals(dt.boolean)
+
+
+@pytest.mark.parametrize(
+    ('value', 'type', 'expected_type_class'),
+    [
+        (2.21, 'decimal', dt.Decimal),
+        (3.14, 'double', dt.Double),
+        (4.2, 'int64', dt.Double),
+        (4, 'int64', dt.Int64),
+    ]
+)
+def test_decimal_modulo_output_type(value, type, expected_type_class):
+    t = ibis.table([('a', type)])
+    expr = t.a % value
+    assert isinstance(expr.type(), expected_type_class)
diff --git a/ibis/pandas/api.py b/ibis/pandas/api.py
@@ -1,4 +1,7 @@
+from __future__ import absolute_import
+
 from ibis.pandas.client import PandasClient
+from ibis.pandas.decimal import execute_node  # noqa: F401
 from ibis.pandas.execution import execute  # noqa: F401
 
 

diff --git a/ibis/pandas/client.py b/ibis/pandas/client.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import
+
 import six
 
 import numpy as np
@@ -30,7 +32,7 @@
 }
 
 
-def pandas_dtypes_to_ibis_schema(df):
+def pandas_dtypes_to_ibis_schema(df, schema):
     dtypes = df.dtypes
 
     pairs = []
@@ -41,10 +43,20 @@ def pandas_dtypes_to_ibis_schema(df):
                 'Column names must be strings to use the pandas backend'
             )
 
-        if dtype == np.object_:
-            ibis_type = _INFERRED_DTYPE_TO_IBIS_TYPE[
-                infer_dtype(df[column_name].dropna())
-            ]
+        if column_name in schema:
+            ibis_type = dt.validate_type(schema[column_name])
+        elif dtype == np.object_:
+            inferred_dtype = infer_dtype(df[column_name].dropna())
+
+            if inferred_dtype == 'mixed':
+                raise TypeError(
+                    'Unable to infer type of column {0!r}. Try instantiating '
+                    'your table from the client with client.table('
+                    "'my_table', schema={{{0!r}: <explicit type>}})".format(
+                        column_name
+                    )
+                )
+            ibis_type = _INFERRED_DTYPE_TO_IBIS_TYPE[inferred_dtype]
         elif hasattr(dtype, 'tz'):
             ibis_type = dt.Timestamp(str(dtype.tz))
         else:
@@ -60,9 +72,11 @@ class PandasClient(client.Client):
     def __init__(self, dictionary):
         self.dictionary = dictionary
 
-    def table(self, name):
+    def table(self, name, schema=None):
         df = self.dictionary[name]
-        schema = pandas_dtypes_to_ibis_schema(df)
+        schema = pandas_dtypes_to_ibis_schema(
+            df, schema if schema is not None else {}
+        )
         return ops.DatabaseTable(name, schema, self).to_expr()
 
     def execute(self, query, *args, **kwargs):

diff --git a/ibis/pandas/core.py b/ibis/pandas/core.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import
+
 import collections
 import numbers
 import datetime

diff --git a/ibis/pandas/decimal.py b/ibis/pandas/decimal.py
@@ -0,0 +1,121 @@
+from __future__ import absolute_import
+
+import decimal
+import math
+import numbers
+
+import numpy as np
+import pandas as pd
+import six
+
+import ibis.expr.datatypes as dt
+import ibis.expr.operations as ops
+from ibis.pandas.dispatch import execute_node
+
+
+@execute_node.register(ops.Ln, decimal.Decimal)
+def execute_decimal_natural_log(op, data, scope=None):
+    try:
+        return data.ln()
+    except decimal.InvalidOperation:
+        return decimal.Decimal('NaN')
+
+
+@execute_node.register(ops.Log, decimal.Decimal, decimal.Decimal)
+def execute_decimal_log_with_decimal_base(op, data, base, scope=None):
+    try:
+        return data.ln() / base.ln()
+    except decimal.InvalidOperation:
+        return decimal.Decimal('NaN')
+
+
+@execute_node.register(ops.Log, decimal.Decimal, type(None))
+def execute_decimal_log_with_no_base(op, data, _, scope=None):
+    return execute_decimal_natural_log(op, data, scope=scope)
+
+
+@execute_node.register(ops.Log, decimal.Decimal, numbers.Real)
+def execute_decimal_log_with_real_base(op, data, base, scope=None):
+    return execute_node(op, data, decimal.Decimal(base), scope=scope)
+
+
+@execute_node.register(ops.Log, decimal.Decimal, np.integer)
+def execute_decimal_log_with_np_integer_base(op, data, base, scope=None):
+    return execute_node(op, data, int(base), scope=scope)
+
+
+@execute_node.register(ops.Log2, decimal.Decimal)
+def execute_decimal_log2(op, data, scope=None):
+    try:
+        return data.ln() / decimal.Decimal(2).ln()
+    except decimal.InvalidOperation:
+        return decimal.Decimal('NaN')
+
+
+@execute_node.register(ops.UnaryOp, decimal.Decimal)
+def execute_decimal_unary(op, data, scope=None):
+    operation_name = type(op).__name__.lower()
+    math_function = getattr(math, operation_name, None)
+    function = getattr(
+        decimal.Decimal,
+        operation_name,
+        lambda x: decimal.Decimal(math_function(x))
+    )
+    try:
+        return function(data)
+    except decimal.InvalidOperation:
+        return decimal.Decimal('NaN')
+
+
+@execute_node.register(ops.Sign, decimal.Decimal)
+def execute_decimal_sign(op, data, scope=None):
+    return data if not data else decimal.Decimal(1).copy_sign(data)
+
+
+@execute_node.register(ops.Abs, decimal.Decimal)
+def execute_decimal_abs(op, data, scope=None):
+    return abs(data)
+
+
+@execute_node.register(
+    ops.Round, decimal.Decimal, (np.integer,) + six.integer_types
+)
+def execute_round_decimal(op, data, places, scope=None):
+    # If we only allowed Python 3, we wouldn't have to implement any of this;
+    # we could just call round(data, places) :(
+    tuple_value = data.as_tuple()
+    precision = len(tuple_value.digits)
+    integer_part_length = precision + min(tuple_value.exponent, 0)
+
+    if places < 0:
+        decimal_format_string = '0.{}E+{:d}'.format(
+            '0' * (integer_part_length - 1 + places),
+            max(integer_part_length + places, abs(places))
+        )
+    else:
+        decimal_format_string = '{}.{}'.format(
+            '0' * integer_part_length, '0' * places
+        )
+
+    places = decimal.Decimal(decimal_format_string)
+    return data.quantize(places)
+
+
+@execute_node.register(ops.Round, decimal.Decimal, type(None))
+def execute_round_decimal_no_places(op, data, _, scope=None):
+    return np.int64(round(data))
+
+
+@execute_node.register(ops.Cast, pd.Series, dt.Decimal)
+def execute_cast_series_to_decimal(op, data, type, scope=None):
+    precision = type.precision
+    scale = type.scale
+    context = decimal.Context(prec=precision)
+    places = context.create_decimal(
+        '{}.{}'.format('0' * (precision - scale), '0' * scale),
+    )
+    return data.apply(
+        lambda x, context=context, places=places: (  # noqa: E501
+            context.create_decimal(x).quantize(places)
+        )
+    )
diff --git a/ibis/pandas/dispatch.py b/ibis/pandas/dispatch.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import
+
 from multipledispatch import Dispatcher
 
 

diff --git a/ibis/pandas/execution.py b/ibis/pandas/execution.py
@@ -1,7 +1,10 @@
+from __future__ import absolute_import
+
 import numbers
 import operator
 import datetime
 import functools
+import decimal
 
 import six
 
@@ -135,6 +138,38 @@ def execute_cast_series_date(op, data, type, scope=None):
 }
 
 
+@execute_node.register(ops.UnaryOp, pd.Series)
+def execute_series_unary_op(op, data, scope=None):
+    function = getattr(np, type(op).__name__.lower())
+    if data.dtype == np.dtype(np.object_):
+        return data.apply(functools.partial(execute_node, op, scope=scope))
+    return function(data)
+
+
+def vectorize_object(op, arg, *args, **kwargs):
+    func = np.vectorize(functools.partial(execute_node, op, **kwargs))
+    return pd.Series(func(arg, *args), index=arg.index, name=arg.name)
+
+
+@execute_node.register(
+    ops.Log, pd.Series, (pd.Series, numbers.Real, decimal.Decimal, type(None))
+)
+def execute_series_log_with_base(op, data, base, scope=None):
+    if data.dtype == np.dtype(np.object_):
+        return vectorize_object(op, data, base, scope=scope)
+
+    if base is None:
+        return np.log(data)
+    return np.log(data) / np.log(base)
+
+
+@execute_node.register(ops.Ln, pd.Series)
+def execute_series_natural_log(op, data, scope=None):
+    if data.dtype == np.dtype(np.object_):
+        return data.apply(functools.partial(execute_node, op, scope=scope))
+    return np.log(data)
+
+
 @execute_node.register(ops.Cast, datetime.datetime, dt.String)
 def execute_cast_datetime_or_timestamp_to_string(op, data, type, scope=None):
     """Cast timestamps to strings"""
@@ -213,6 +248,17 @@ def execute_cast_string_literal(op, data, type, scope=None):
         return cast_function(data)
 
 
+@execute_node.register(
+    ops.Round,
+    pd.Series,
+    (pd.Series, np.integer, type(None)) + six.integer_types
+)
+def execute_round_series(op, data, places, scope=None):
+    if data.dtype == np.dtype(np.object_):
+        return vectorize_object(op, data, places, scope=scope)
+    return data.round(places if places is not None else 0)
+
+
 @execute_node.register(ops.TableColumn, (pd.DataFrame, DataFrameGroupBy))
 def execute_table_column_dataframe_or_dataframe_groupby(op, data, scope=None):
     return data[op.name]
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,5 @@
		from __future__ import absolute_import

		from multipledispatch import Dispatcher


Expand Down