-
Notifications
You must be signed in to change notification settings - Fork 697
ENH: Enable unary math operations for pandas, sqlite #1071
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import collections | ||
| import numbers | ||
| import datetime | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import decimal | ||
| import math | ||
| import numbers | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import six | ||
|
|
||
| import ibis.expr.datatypes as dt | ||
| import ibis.expr.operations as ops | ||
| from ibis.pandas.dispatch import execute_node | ||
|
|
||
|
|
||
| @execute_node.register(ops.Ln, decimal.Decimal) | ||
| def execute_decimal_natural_log(op, data, scope=None): | ||
| try: | ||
| return data.ln() | ||
| except decimal.InvalidOperation: | ||
| return decimal.Decimal('NaN') | ||
|
|
||
|
|
||
| @execute_node.register(ops.Log, decimal.Decimal, decimal.Decimal) | ||
| def execute_decimal_log_with_decimal_base(op, data, base, scope=None): | ||
| try: | ||
| return data.ln() / base.ln() | ||
| except decimal.InvalidOperation: | ||
| return decimal.Decimal('NaN') | ||
|
|
||
|
|
||
| @execute_node.register(ops.Log, decimal.Decimal, type(None)) | ||
| def execute_decimal_log_with_no_base(op, data, _, scope=None): | ||
| return execute_decimal_natural_log(op, data, scope=scope) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Log, decimal.Decimal, numbers.Real) | ||
| def execute_decimal_log_with_real_base(op, data, base, scope=None): | ||
| return execute_node(op, data, decimal.Decimal(base), scope=scope) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Log, decimal.Decimal, np.integer) | ||
| def execute_decimal_log_with_np_integer_base(op, data, base, scope=None): | ||
| return execute_node(op, data, int(base), scope=scope) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Log2, decimal.Decimal) | ||
| def execute_decimal_log2(op, data, scope=None): | ||
| try: | ||
| return data.ln() / decimal.Decimal(2).ln() | ||
| except decimal.InvalidOperation: | ||
| return decimal.Decimal('NaN') | ||
|
|
||
|
|
||
| @execute_node.register(ops.UnaryOp, decimal.Decimal) | ||
| def execute_decimal_unary(op, data, scope=None): | ||
| operation_name = type(op).__name__.lower() | ||
| math_function = getattr(math, operation_name, None) | ||
| function = getattr( | ||
| decimal.Decimal, | ||
| operation_name, | ||
| lambda x: decimal.Decimal(math_function(x)) | ||
| ) | ||
| try: | ||
| return function(data) | ||
| except decimal.InvalidOperation: | ||
| return decimal.Decimal('NaN') | ||
|
|
||
|
|
||
| @execute_node.register(ops.Sign, decimal.Decimal) | ||
| def execute_decimal_sign(op, data, scope=None): | ||
| return data if not data else decimal.Decimal(1).copy_sign(data) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Abs, decimal.Decimal) | ||
| def execute_decimal_abs(op, data, scope=None): | ||
| return abs(data) | ||
|
|
||
|
|
||
| @execute_node.register( | ||
| ops.Round, decimal.Decimal, (np.integer,) + six.integer_types | ||
| ) | ||
| def execute_round_decimal(op, data, places, scope=None): | ||
| # If we only allowed Python 3, we wouldn't have to implement any of this; | ||
| # we could just call round(data, places) :( | ||
| tuple_value = data.as_tuple() | ||
| precision = len(tuple_value.digits) | ||
| integer_part_length = precision + min(tuple_value.exponent, 0) | ||
|
|
||
| if places < 0: | ||
| decimal_format_string = '0.{}E+{:d}'.format( | ||
| '0' * (integer_part_length - 1 + places), | ||
| max(integer_part_length + places, abs(places)) | ||
| ) | ||
| else: | ||
| decimal_format_string = '{}.{}'.format( | ||
| '0' * integer_part_length, '0' * places | ||
| ) | ||
|
|
||
| places = decimal.Decimal(decimal_format_string) | ||
| return data.quantize(places) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Round, decimal.Decimal, type(None)) | ||
| def execute_round_decimal_no_places(op, data, _, scope=None): | ||
| return np.int64(round(data)) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Cast, pd.Series, dt.Decimal) | ||
| def execute_cast_series_to_decimal(op, data, type, scope=None): | ||
| precision = type.precision | ||
| scale = type.scale | ||
| context = decimal.Context(prec=precision) | ||
| places = context.create_decimal( | ||
| '{}.{}'.format('0' * (precision - scale), '0' * scale), | ||
| ) | ||
| return data.apply( | ||
| lambda x, context=context, places=places: ( # noqa: E501 | ||
| context.create_decimal(x).quantize(places) | ||
| ) | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,5 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| from multipledispatch import Dispatcher | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,10 @@ | ||
| from __future__ import absolute_import | ||
|
|
||
| import numbers | ||
| import operator | ||
| import datetime | ||
| import functools | ||
| import decimal | ||
|
|
||
| import six | ||
|
|
||
|
|
@@ -135,6 +138,38 @@ def execute_cast_series_date(op, data, type, scope=None): | |
| } | ||
|
|
||
|
|
||
| @execute_node.register(ops.UnaryOp, pd.Series) | ||
| def execute_series_unary_op(op, data, scope=None): | ||
| function = getattr(np, type(op).__name__.lower()) | ||
| if data.dtype == np.dtype(np.object_): | ||
| return data.apply(functools.partial(execute_node, op, scope=scope)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't think so, @jreback any idea here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, it looks like |
||
| return function(data) | ||
|
|
||
|
|
||
| def vectorize_object(op, arg, *args, **kwargs): | ||
| func = np.vectorize(functools.partial(execute_node, op, **kwargs)) | ||
| return pd.Series(func(arg, *args), index=arg.index, name=arg.name) | ||
|
|
||
|
|
||
| @execute_node.register( | ||
| ops.Log, pd.Series, (pd.Series, numbers.Real, decimal.Decimal, type(None)) | ||
| ) | ||
| def execute_series_log_with_base(op, data, base, scope=None): | ||
| if data.dtype == np.dtype(np.object_): | ||
| return vectorize_object(op, data, base, scope=scope) | ||
|
|
||
| if base is None: | ||
| return np.log(data) | ||
| return np.log(data) / np.log(base) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Ln, pd.Series) | ||
| def execute_series_natural_log(op, data, scope=None): | ||
| if data.dtype == np.dtype(np.object_): | ||
| return data.apply(functools.partial(execute_node, op, scope=scope)) | ||
| return np.log(data) | ||
|
|
||
|
|
||
| @execute_node.register(ops.Cast, datetime.datetime, dt.String) | ||
| def execute_cast_datetime_or_timestamp_to_string(op, data, type, scope=None): | ||
| """Cast timestamps to strings""" | ||
|
|
@@ -213,6 +248,17 @@ def execute_cast_string_literal(op, data, type, scope=None): | |
| return cast_function(data) | ||
|
|
||
|
|
||
| @execute_node.register( | ||
| ops.Round, | ||
| pd.Series, | ||
| (pd.Series, np.integer, type(None)) + six.integer_types | ||
| ) | ||
| def execute_round_series(op, data, places, scope=None): | ||
| if data.dtype == np.dtype(np.object_): | ||
| return vectorize_object(op, data, places, scope=scope) | ||
| return data.round(places if places is not None else 0) | ||
|
|
||
|
|
||
| @execute_node.register(ops.TableColumn, (pd.DataFrame, DataFrameGroupBy)) | ||
| def execute_table_column_dataframe_or_dataframe_groupby(op, data, scope=None): | ||
| return data[op.name] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yikes. I guess we should make a NaN-friendly type inference function someplace (seems like an oversight in infer_dtype originally)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can u post an issue in pandas tracker about this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done: pandas-dev/pandas#17059
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I PR'd it :) pandas-dev/pandas#17066.