Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refine apply key generation #3208

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions mars/dataframe/base/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
DictField,
FunctionField,
)
from ...utils import enter_current_session, quiet_stdio, get_func_token_values
from ...utils import enter_current_session, quiet_stdio, get_func_token, tokenize
from ..arrays import ArrowArray
from ..operands import DataFrameOperandMixin, DataFrameOperand
from ..utils import (
Expand All @@ -56,7 +56,7 @@ def _get_logic_key_token_values(self):
self._elementwise,
]
if self.func:
return token_values + get_func_token_values(self.func)
return token_values + [get_func_token(self.func)]
else: # pragma: no cover
return token_values

Expand Down Expand Up @@ -102,6 +102,13 @@ def __init__(
**kw,
)

def _update_key(self):
values = [v for v in self._values_ if v is not self.func] + [
get_func_token(self.func)
]
self._obj_set("_key", tokenize(type(self).__name__, *values))
return self

@property
def func(self):
return self._func
Expand Down
11 changes: 9 additions & 2 deletions mars/dataframe/groupby/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
FunctionField,
)
from ...core.operand import OperatorLogicKeyGeneratorMixin
from ...utils import enter_current_session, quiet_stdio, get_func_token_values
from ...utils import enter_current_session, quiet_stdio, get_func_token, tokenize
from ..operands import DataFrameOperandMixin, DataFrameOperand
from ..utils import (
auto_merge_chunks,
Expand All @@ -43,7 +43,7 @@ class GroupByApplyLogicKeyGeneratorMixin(OperatorLogicKeyGeneratorMixin):
def _get_logic_key_token_values(self):
token_values = super()._get_logic_key_token_values()
if self.func:
return token_values + get_func_token_values(self.func)
return token_values + [get_func_token(self.func)]
else: # pragma: no cover
return token_values

Expand All @@ -62,6 +62,13 @@ class GroupByApply(
def __init__(self, output_types=None, **kw):
super().__init__(_output_types=output_types, **kw)

def _update_key(self):
values = [v for v in self._values_ if v is not self.func] + [
get_func_token(self.func)
]
self._obj_set("_key", tokenize(type(self).__name__, *values))
return self

@classmethod
@redirect_custom_log
@enter_current_session
Expand Down
2 changes: 1 addition & 1 deletion mars/services/task/execution/ray/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def execute_subtask(
# https://user-images.githubusercontent.com/12445254/168569524-f09e42a7-653a-4102-bdf0-cc1631b3168d.png
reducer_chunks = subtask_chunk_graph.successors(start_chunks[0])
reducer_operands = set(c.op for c in reducer_chunks)
if len(reducer_operands) != 1:
if len(reducer_operands) != 1: # pragma: no cover
raise ValueError(
f"Subtask {subtask_id} has more than 1 reduce operands: {subtask_chunk_graph.to_dot()}"
)
Expand Down
14 changes: 7 additions & 7 deletions mars/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,30 +587,30 @@ def test_web_serialize_lambda():


def test_get_func_token_values():
from ..utils import get_func_token_values
from ..utils import _get_func_token_values

assert get_func_token_values(test_get_func_token_values) == [
assert _get_func_token_values(test_get_func_token_values) == [
test_get_func_token_values.__code__.co_code
]
captured_vars = [1, 2, 3]

def closure_func(a, b):
return captured_vars

assert get_func_token_values(closure_func)[1][0] == captured_vars
assert get_func_token_values(partial(closure_func, 1))[0][0] == 1
assert get_func_token_values(partial(closure_func, 1))[-1][0] == captured_vars
assert _get_func_token_values(closure_func)[1][0] == captured_vars
assert _get_func_token_values(partial(closure_func, 1))[0][0] == 1
assert _get_func_token_values(partial(closure_func, 1))[-1][0] == captured_vars

from .._utils import ceildiv

assert get_func_token_values(ceildiv) == [ceildiv.__module__, ceildiv.__name__]
assert _get_func_token_values(ceildiv) == [ceildiv.__module__, ceildiv.__name__]

class Func:
def __call__(self, *args, **kwargs):
pass

func = Func()
assert get_func_token_values(func) == [func]
assert _get_func_token_values(func) == [func]


@pytest.mark.parametrize("id_length", [0, 5, 32, 63])
Expand Down
21 changes: 19 additions & 2 deletions mars/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import numbers
import operator
import os
import weakref

import cloudpickle as pickle
import pkgutil
import random
Expand Down Expand Up @@ -1617,7 +1619,22 @@ def __str__(self):
return new_exc_type().with_traceback(traceback)


def get_func_token_values(func):
_func_token_cache = weakref.WeakKeyDictionary()


def get_func_token(func):
try:
token = _func_token_cache.get(func)
if token is None:
fields = _get_func_token_values(func)
token = tokenize(*fields)
_func_token_cache[func] = token
return token
except TypeError: # cannot create weak reference to func like 'numpy.ufunc'
return tokenize(*_get_func_token_values(func))


def _get_func_token_values(func):
if hasattr(func, "__code__"):
tokens = [func.__code__.co_code]
if func.__closure__ is not None:
Expand All @@ -1630,7 +1647,7 @@ def get_func_token_values(func):
tokens.extend([func.args, func.keywords])
func = func.func
if hasattr(func, "__code__"):
tokens.extend(get_func_token_values(func))
tokens.extend(_get_func_token_values(func))
elif isinstance(func, types.BuiltinFunctionType):
tokens.extend([func.__module__, func.__name__])
else:
Expand Down