modin-project · YarShev · Jun 16, 2022 · Apr 13, 2022 · Apr 15, 2022 · Apr 18, 2022
@@ -7,8 +7,9 @@ Key Features and Updates
 
 * Stability and Bugfixes
   * FIX-#4543: Fix `read_csv` in case skiprows=<0, []> (#4544)
+  * FIX-#4059: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
 * Performance enhancements
-  *
+  * PERF-#4182: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
 * Benchmarking enhancements
   *
 * Refactor Codebase

@@ -2507,18 +2507,38 @@ def binary_op(self, op, right_frame, join_type="outer"):
         left_parts, right_parts, joined_index, row_lengths = self._copartition(
             0, right_frame, join_type, sort=True
         )
-        # unwrap list returned by `copartition`.
-        right_parts = right_parts[0]
-        new_frame = self._partition_mgr_cls.binary_operation(
-            1, left_parts, lambda l, r: op(l, r), right_parts
+        new_left_frame = self.__constructor__(
+            left_parts, joined_index, self.columns, row_lengths, self._column_widths
         )
-        new_columns = self.columns.join(right_frame.columns, how=join_type)
+        new_right_frame = self.__constructor__(
+            right_parts[0],
+            joined_index,
+            right_frame.columns,
+            row_lengths,
+            right_frame._column_widths,
+        )
+
+        (
+            left_parts,
+            right_parts,
+            joined_columns,
+            column_widths,
+        ) = new_left_frame._copartition(1, new_right_frame, join_type, sort=True)
+
+        new_frame = (
+            np.array([])
+            if len(left_parts) == 0 or len(right_parts[0]) == 0
+            else self._partition_mgr_cls.binary_operation(
+                left_parts, op, right_parts[0]
+            )
+        )
+
         return self.__constructor__(
             new_frame,
             joined_index,
-            new_columns,
+            joined_columns,
             row_lengths,
-            column_widths=self._column_widths_cache,
+            column_widths,
         )
 
     @lazy_metadata_decorator(apply_axis="both")

@@ -1242,44 +1242,39 @@ def compute_part_size(indexer, remote_part, part_idx, axis):
 
     @classmethod
     @wait_computations_if_benchmark_mode
-    def binary_operation(cls, axis, left, func, right):
+    def binary_operation(cls, left, func, right):
         """
-        Apply a function that requires two PandasDataframe objects.
+        Apply a function that requires two ``PandasDataframe`` objects.
 
         Parameters
         ----------
-        axis : {0, 1}
-            The axis to apply the function over (0 - rows, 1 - columns).
         left : np.ndarray
-            The partitions of left PandasDataframe.
+            The partitions of left ``PandasDataframe``.
         func : callable
             The function to apply.
         right : np.ndarray
-            The partitions of right PandasDataframe.
+            The partitions of right ``PandasDataframe``.
 
         Returns
         -------
         np.ndarray
             A NumPy array with new partitions.
         """
-        if axis:
-            left_partitions = cls.row_partitions(left)
-            right_partitions = cls.row_partitions(right)
-        else:
-            left_partitions = cls.column_partitions(left)
-            right_partitions = cls.column_partitions(right)
+        [part.drain_call_queue() for part in right.flatten()]
+
         func = cls.preprocess_func(func)
-        result = np.array(
+        return np.array(
             [
-                left_partitions[i].apply(
-                    func,
-                    num_splits=NPartitions.get(),
-                    other_axis_partition=right_partitions[i],
-                )
-                for i in range(len(left_partitions))
+                [
+                    part.apply(
+                        func,
+                        right[row_idx][col_idx]._data,
+                    )
+                    for col_idx, part in enumerate(left[row_idx])
+                ]
+                for row_idx in range(len(left))
             ]
         )
-        return result if axis else result.T
 
     @classmethod
     @wait_computations_if_benchmark_mode

@@ -525,14 +525,12 @@ def apply_func_to_indices_both_axis(
 
     @classmethod
     @progress_bar_wrapper
-    def binary_operation(cls, axis, left, func, right):
+    def binary_operation(cls, left, func, right):
         """
         Apply a function that requires partitions of two ``PandasOnRayDataframe`` objects.
 
         Parameters
         ----------
-        axis : {0, 1}
-            The axis to apply the function over (0 - rows, 1 - columns).
         left : np.ndarray
             The partitions of left ``PandasOnRayDataframe``.
         func : callable
@@ -546,5 +544,5 @@ def binary_operation(cls, axis, left, func, right):
             A NumPy array with new partitions.
         """
         return super(PandasOnRayDataframePartitionManager, cls).binary_operation(
-            axis, left, func, right
+            left, func, right
         )
@@ -249,3 +249,40 @@ def test_duplicate_indexes():
     modin_df2, pandas_df2 = create_test_dfs({"a": data, "b": data})
     df_equals(modin_df1 / modin_df2, pandas_df1 / pandas_df2)
     df_equals(modin_df1 / modin_df1, pandas_df1 / pandas_df1)
+
+
+@pytest.mark.parametrize("subset_operand", ["left", "right"])
+def test_mismatched_col_partitions(subset_operand):
+    data = [0, 1, 2, 3]
+    modin_df1, pandas_df1 = create_test_dfs({"a": data, "b": data})
+    modin_df_tmp, pandas_df_tmp = create_test_dfs({"c": data})
+
+    modin_df2 = pd.concat([modin_df1, modin_df_tmp], axis=1)
+    pandas_df2 = pandas.concat([pandas_df1, pandas_df_tmp], axis=1)
+
+    if subset_operand == "right":
+        modin_res = modin_df2 + modin_df1
+        pandas_res = pandas_df2 + pandas_df1
+    else:
+        modin_res = modin_df1 + modin_df2
+        pandas_res = pandas_df1 + pandas_df2
+
+    df_equals(modin_res, pandas_res)
+
+
+@pytest.mark.parametrize("empty_operand", ["right", "left", "both"])
+def test_empty_df(empty_operand):
+    modin_df, pandas_df = create_test_dfs([0, 1, 2, 0, 1, 2])
+    modin_df_empty, pandas_df_empty = create_test_dfs()
+
+    if empty_operand == "right":
+        modin_res = modin_df + modin_df_empty
+        pandas_res = pandas_df + pandas_df_empty
+    elif empty_operand == "left":
+        modin_res = modin_df_empty + modin_df
+        pandas_res = pandas_df_empty + pandas_df
+    else:
+        modin_res = modin_df_empty + modin_df_empty
+        pandas_res = pandas_df_empty + pandas_df_empty
+
+    df_equals(modin_res, pandas_res)