Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions modin/data_management/data_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,30 +815,41 @@ def query_builder(df, **kwargs):

def eval(self, expr, **kwargs):
cls = type(self)
columns = self.columns
inplace = kwargs.get("inplace", False)

columns = self.index if self._is_transposed else self.columns
index = self.columns if self._is_transposed else self.index

# Dun eval on columns to determine result type
columns_copy = pandas.DataFrame(columns=self.columns)
columns_copy = columns_copy.eval(expr, inplace=False, **kwargs)
expect_series = isinstance(columns_copy, pandas.Series)

# if there is no assignment, then we simply save the results
# in the first column
if expect_series:
if inplace:
raise ValueError("Cannot operate inplace if there is no assignment")
else:
expr = "{0} = {1}".format(columns[0], expr)

def eval_builder(df, **kwargs):
df.columns = columns
result = df.eval(expr, inplace=False, **kwargs)
# If result is a series, expr was not an assignment expression.
if not isinstance(result, pandas.Series):
result.columns = pandas.RangeIndex(0, len(result.columns))
result.columns = pandas.RangeIndex(0, len(result.columns))
return result

func = self._prepare_method(eval_builder, **kwargs)
new_data = self.map_across_full_axis(1, func)

# eval can update the columns, so we must update columns
columns_copy = pandas.DataFrame(columns=columns)
columns_copy = columns_copy.eval(expr, inplace=False, **kwargs)
if isinstance(columns_copy, pandas.Series):
# To create a data manager, we need the
# columns to be in a list-like
columns = list(columns_copy.name)
if expect_series:
result = new_data.to_pandas()[0]
result.name = columns_copy.name
result.index = index
return result
else:
columns = columns_copy.columns

return cls(new_data, self.index, columns)
return cls(new_data, self.index, columns)

def quantile_for_list_of_values(self, **kwargs):
cls = type(self)
Expand Down
11 changes: 7 additions & 4 deletions modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,12 +1344,15 @@ def eval(self, expr, inplace=False, **kwargs):
self._validate_eval_query(expr, **kwargs)
inplace = validate_bool_kwarg(inplace, "inplace")

data_manager = self._data_manager.eval(expr, **kwargs)
result = self._data_manager.eval(expr, **kwargs)

if inplace:
self._update_inplace(new_manager=data_manager)
if isinstance(result, pandas.Series):
return result
else:
return DataFrame(data_manager=data_manager)
if inplace:
self._update_inplace(new_manager=result)
else:
return DataFrame(data_manager=result)

def ewm(self,
com=None,
Expand Down
1 change: 1 addition & 0 deletions modin/pandas/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def to_datetime(arg,

Args:
errors ('raise' or 'ignore'): If 'ignore', errors are silenced.
Pandas blatantly ignores this argument so we will too.
dayfirst (bool): Date format is passed in as day first.
yearfirst (bool): Date format is passed in as year first.
utc (bool): retuns a UTC DatetimeIndex if True.
Expand Down
70 changes: 32 additions & 38 deletions modin/pandas/test/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def test_int_dataframe():
test_cumsum(ray_df, pandas_df)
test_pipe(ray_df, pandas_df)

# test_loc(ray_df, pandas_df)
# test_iloc(ray_df, pandas_df)
test_loc(ray_df, pandas_df)
test_iloc(ray_df, pandas_df)

labels = ['a', 'b', 'c', 'd']
test_set_axis(ray_df, pandas_df, labels, 0)
Expand Down Expand Up @@ -1120,30 +1120,28 @@ def test_assign():

def test_astype():
td = TestData()
ray_df = pd.DataFrame(td.frame)
our_df_casted = ray_df.astype(np.int32)
expected_df_casted = pandas.DataFrame(
td.frame.values.astype(np.int32),
ray_df = pd.DataFrame(td.frame.values,
index=td.frame.index,
columns=td.frame.columns)
expected_df = pandas.DataFrame(
td.frame.values,
index=td.frame.index,
columns=td.frame.columns)

assert ray_df_equals_pandas(our_df_casted, expected_df_casted)
ray_df_casted = ray_df.astype(np.int32)
expected_df_casted = expected_df.astype(np.int32)

our_df_casted = ray_df.astype(np.float64)
expected_df_casted = pandas.DataFrame(
td.frame.values.astype(np.float64),
index=td.frame.index,
columns=td.frame.columns)
assert ray_df_equals_pandas(ray_df_casted, expected_df_casted)

assert ray_df_equals_pandas(our_df_casted, expected_df_casted)
ray_df_casted = ray_df.astype(np.float64)
expected_df_casted = expected_df.astype(np.float64)

our_df_casted = ray_df.astype(str)
expected_df_casted = pandas.DataFrame(
td.frame.values.astype(str),
index=td.frame.index,
columns=td.frame.columns)
assert ray_df_equals_pandas(ray_df_casted, expected_df_casted)

assert ray_df_equals_pandas(our_df_casted, expected_df_casted)
ray_df_casted = ray_df.astype(str)
expected_df_casted = expected_df.astype(str)

assert ray_df_equals_pandas(ray_df_casted, expected_df_casted)


def test_at_time():
Expand Down Expand Up @@ -1536,7 +1534,20 @@ def test_eval_df_use_case():
df = pandas.DataFrame(frame_data)
ray_df = pd.DataFrame(frame_data)

# Very hacky test to test eval while inplace is not working
# test eval for series results
tmp_pandas = df.eval(
"arctan2(sin(a), b)",
engine='python',
parser='pandas')
tmp_ray = ray_df.eval(
"arctan2(sin(a), b)",
engine='python',
parser='pandas')

assert isinstance(tmp_ray, pandas.Series)
assert ray_series_equals_pandas(tmp_ray, tmp_pandas)

# Test not inplace assignments
tmp_pandas = df.eval(
"e = arctan2(sin(a), b)",
engine='python',
Expand All @@ -1547,6 +1558,7 @@ def test_eval_df_use_case():
parser='pandas')
assert ray_df_equals_pandas(tmp_ray, tmp_pandas)

# Test inplace assignments
df.eval(
"e = arctan2(sin(a), b)",
engine='python',
Expand All @@ -1573,24 +1585,6 @@ def test_eval_df_arithmetic_subexpression():
assert ray_df_equals_pandas(ray_df, df)


def test_eval_df_series_result():
frame_data = {'a': np.random.randn(10), 'b': np.random.randn(10)}
df = pandas.DataFrame(frame_data)
ray_df = pd.DataFrame(frame_data)

# Very hacky test to test eval while inplace is not working
tmp_pandas = df.eval(
"arctan2(sin(a), b)",
engine='python',
parser='pandas')
tmp_ray = ray_df.eval(
"arctan2(sin(a), b)",
engine='python',
parser='pandas')
assert ray_df_equals_pandas(tmp_ray, tmp_pandas)
assert isinstance(to_pandas(tmp_ray), pandas.Series)


def test_ewm():
ray_df = create_test_dataframe()

Expand Down