From a5da4a89eeb3033c5c2acfa14ce1276c4b694f19 Mon Sep 17 00:00:00 2001
From: Weiwei Zhang <weiwzhang@berkeley.edu>
Date: Tue, 18 Sep 2018 13:39:40 -0700
Subject: [PATCH 1/2] added docs for data_manager.py starting line1250

---
 modin/data_management/data_manager.py | 112 ++++++++++++++++++++++++--
 1 file changed, 104 insertions(+), 8 deletions(-)

diff --git a/modin/data_management/data_manager.py b/modin/data_management/data_manager.py
index b3cd03015c8..04e9d1af066 100644
--- a/modin/data_management/data_manager.py
+++ b/modin/data_management/data_manager.py
@@ -1550,6 +1550,14 @@ def from_pandas(cls, df, block_partitions_cls):
 
     # __getitem__ methods
     def getitem_single_key(self, key):
+        """Get item for a single target index.
+
+        Args:
+            key: Target index by which to retrieve data.
+
+        Returns:
+            A new PandasDataManager.
+        """
         numeric_index = self.columns.get_indexer_for([key])
 
         new_data = self.getitem_column_array([key])
@@ -1562,7 +1570,14 @@ def getitem_single_key(self, key):
             return new_data.to_pandas()[key]
 
     def getitem_column_array(self, key):
+        """Get column data for target index.
+
+        Args:
+            key: Target index by which to retrieve data.
 
+        Returns:
+            A new PandasDataManager.
+        """
         # Convert to list for type checking
         numeric_indices = list(self.columns.get_indexer_for(key))
 
@@ -1580,6 +1595,14 @@ def getitem(df, internal_indices=[]):
         return self.__constructor__(result, self.index, new_columns, new_dtypes)
 
     def getitem_row_array(self, key):
+        """Get row data for target index.
+
+        Args:
+            key: Target index by which to retrieve data.
+
+        Returns:
+            A new PandasDataManager.
+        """
         # Convert to list for type checking
         numeric_indices = list(self.index.get_indexer_for(key))
 
@@ -1600,8 +1623,15 @@ def delitem(self, key):
         return self.drop(columns=[key])
 
     def drop(self, index=None, columns=None):
+        """Remove row data for target index and columns.
 
+        Args:
+            index: Target index to drop.
+            columns: Target columns to drop.
 
+        Returns:
+            A new PandasDataManager.
+        """
         if index is None:
             new_data = self.data
             new_index = self.index
@@ -1637,8 +1667,16 @@ def delitem(df, internal_indices=[]):
     # return a new one from here and let the front end handle the inplace
     # update.
     def insert(self, loc, column, value):
+        """Get row data for target index.
 
+        Args:
+            loc: Insertion index.
+            column: Column labels to insert.
+            value: Dtype object values to insert.
 
+        Returns:
+            A new PandasDataManager.
+        """
         def insert(df, internal_indices=[]):
             internal_idx = internal_indices[0]
             df.insert(internal_idx, internal_idx, value, allow_duplicates=True)
@@ -1660,6 +1698,15 @@ def insert(df, internal_indices=[]):
     # There is a wide range of behaviors that are supported, so a lot of the
     # logic can get a bit convoluted.
     def apply(self, func, axis, *args, **kwargs):
+        """Apply func across given axis.
+
+        Args:
+            func: The function to apply.
+            axis: Target axis to apply the function along.
+
+        Returns:
+            A new PandasDataManager.
+        """
         if callable(func):
             return self._callable_func(func, axis, *args, **kwargs)
         elif isinstance(func, dict):
@@ -1670,6 +1717,15 @@ def apply(self, func, axis, *args, **kwargs):
             pass
 
     def _post_process_apply(self, result_data, axis, try_scale=True):
+        """Recompute the index after applying function.
+
+        Args:
+            result_data: a BlockPartitions object.
+            axis: Target axis along which function was applied.
+
+        Returns:
+            A new PandasDataManager.
+        """
         if try_scale:
             try:
                 index = self.compute_index(0, result_data, True)
@@ -1702,6 +1758,15 @@ def _post_process_apply(self, result_data, axis, try_scale=True):
         return self.__constructor__(result_data, index, columns)
 
     def _dict_func(self, func, axis, *args, **kwargs):
+        """Apply function to certain indices across given axis.
+
+        Args:
+            func: The function to apply.
+            axis: Target axis to apply the function along.
+
+        Returns:
+            A new PandasDataManager.
+        """
         if "axis" not in kwargs:
             kwargs["axis"] = axis
 
@@ -1728,7 +1793,15 @@ def dict_apply_builder(df, func_dict={}):
         return full_result
 
     def _list_like_func(self, func, axis, *args, **kwargs):
+        """Apply list-like function across given axis.
+
+        Args:
+            func: The function to apply.
+            axis: Target axis to apply the function along.
 
+        Returns:
+            A new PandasDataManager.
+        """
         func_prepared = self._prepare_method(lambda df: df.apply(func, *args, **kwargs))
         new_data = self.map_across_full_axis(axis, func_prepared)
 
@@ -1737,7 +1810,15 @@ def _list_like_func(self, func, axis, *args, **kwargs):
         return self.__constructor__(new_data, new_index, self.columns)
 
     def _callable_func(self, func, axis, *args, **kwargs):
+        """Apply callable functions across given axis.
 
+        Args:
+            func: The functions to apply.
+            axis: Target axis to apply the function along.
+
+        Returns:
+            A new PandasDataManager.
+        """
         def callable_apply_builder(df, func, axis, index, *args, **kwargs):
             if not axis:
                 df.index = index
@@ -1763,8 +1844,9 @@ def callable_apply_builder(df, func, axis, index, *args, **kwargs):
     def _manual_repartition(self, axis, repartition_func, **kwargs):
         """This method applies all manual partitioning functions.
 
-        :param axis:
-        :param repartition_func:
+        Args:
+            axis: The axis to shuffle data along.
+            repartition_func: The function used to repartition data.
 
         Returns:
             A `BlockPartitions` object.
@@ -1787,6 +1869,14 @@ def groupby_agg_builder(df):
     # END Manual Partitioning methods
 
     def get_dummies(self, columns, **kwargs):
+        """Convert categorical variables to dummy variables for certain columns.
+
+        Args:
+            columns: The columns to convert.
+
+        Returns:
+            A new PandasDataManager.
+        """
         cls = type(self)
 
         # `columns` as None does not mean all columns, by default it means only
@@ -1899,9 +1989,12 @@ def global_idx_to_numeric_idx(self, axis, indices):
         """
         Note: this function involves making copies of the index in memory.
 
-        :param axis:
-        :param indices:
-        :return:
+        Args:
+            axis: Axis to extract indices.
+            indices: Indices to convert to numerical.
+
+        Returns:
+            An Index object.
         """
         assert axis in ['row', 'col', 'columns']
         if axis == 'row':
@@ -1932,8 +2025,9 @@ def __init__(self,
                  index_map_series: pandas.Series=None,
                  columns_map_series: pandas.Series=None):
         """
-        :param index_map_series: a Pandas Series Object mapping user-facing index to numeric index.
-        :param columns_map_series: a Pandas Series Object mapping user-facing index to numeric index.
+        Args:
+            index_map_series: a Pandas Series Object mapping user-facing index to numeric index.
+            columns_map_series: a Pandas Series Object mapping user-facing index to numeric index.
         """
         assert index_map_series is not None
         assert columns_map_series is not None
@@ -1956,7 +2050,9 @@ def __constructor__(self, block_partitions_object: BlockPartitions, index: panda
     def _get_data(self) -> BlockPartitions:
         """
         Perform the map step
-        :return:
+
+        Returns:
+            A BlockPartitions object.
         """
         def iloc(partition, row_internal_indices, col_internal_indices):
             return partition.iloc[row_internal_indices, col_internal_indices]

From fbabef27c96bc9c4928367f9595a6d6ded0e3fa8 Mon Sep 17 00:00:00 2001
From: Weiwei Zhang <weiwzhang@berkeley.edu>
Date: Fri, 21 Sep 2018 17:18:14 -0700
Subject: [PATCH 2/2] added and revised some func docs

---
 modin/data_management/data_manager.py | 36 +++++++++++++++++++++------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/modin/data_management/data_manager.py b/modin/data_management/data_manager.py
index 04e9d1af066..673a513f311 100644
--- a/modin/data_management/data_manager.py
+++ b/modin/data_management/data_manager.py
@@ -1202,6 +1202,11 @@ def diff(self, **kwargs):
         return self.__constructor__(new_data, self.index, self.columns)
 
     def dropna(self, **kwargs):
+        """Returns a new DataManager with null values dropped along given axis.
+
+        Return:
+            a new DataManager
+        """
         axis = kwargs.get("axis", 0)
         subset = kwargs.get("subset")
         thresh = kwargs.get("thresh")
@@ -1243,13 +1248,20 @@ def dropna(self, **kwargs):
         return self.drop(index=rm_from_index, columns=rm_from_columns)
 
     def eval(self, expr, **kwargs):
+        """Returns a new DataManager with expr evaluated on columns.
+
+        Args:
+            expr: The string expression to evaluate.
 
+        Returns:
+            A new PandasDataManager with new columns after applying expr.
+        """
         inplace = kwargs.get("inplace", False)
 
         columns = self.index if self._is_transposed else self.columns
         index = self.columns if self._is_transposed else self.index
 
-        # Dun eval on columns to determine result type
+        # Make a copy of columns and eval on the copy to determine if result type is series or not
         columns_copy = pandas.DataFrame(columns=self.columns)
         columns_copy = columns_copy.eval(expr, inplace=False, **kwargs)
         expect_series = isinstance(columns_copy, pandas.Series)
@@ -1281,6 +1293,11 @@ def eval_builder(df, **kwargs):
             return self.__constructor__(new_data, self.index, columns)
 
     def mode(self, **kwargs):
+        """Returns a new DataManager with modes calculated for each label along given axis.
+
+        Returns:
+            A new PandasDataManager with modes calculated.
+        """
         axis = kwargs.get("axis", 0)
         func = self._prepare_method(pandas.DataFrame.mode, **kwargs)
         new_data = self.map_across_full_axis(axis, func)
@@ -1300,8 +1317,11 @@ def mode(self, **kwargs):
         return self.__constructor__(final_data, new_index, new_columns, self._dtype_cache)
 
     def fillna(self, **kwargs):
+        """Returns a new DataManager with null values filled by given values or according to given method.
 
-
+        Returns:
+            A new PandasDataManager with null values filled.
+        """
         axis = kwargs.get("axis", 0)
         value = kwargs.get("value")
 
@@ -1570,10 +1590,10 @@ def getitem_single_key(self, key):
             return new_data.to_pandas()[key]
 
     def getitem_column_array(self, key):
-        """Get column data for target index.
+        """Get column data for target labels.
 
         Args:
-            key: Target index by which to retrieve data.
+            key: Target labels by which to retrieve data.
 
         Returns:
             A new PandasDataManager.
@@ -1595,10 +1615,10 @@ def getitem(df, internal_indices=[]):
         return self.__constructor__(result, self.index, new_columns, new_dtypes)
 
     def getitem_row_array(self, key):
-        """Get row data for target index.
+        """Get row data for target labels.
 
         Args:
-            key: Target index by which to retrieve data.
+            key: Target labels by which to retrieve data.
 
         Returns:
             A new PandasDataManager.
@@ -1667,7 +1687,7 @@ def delitem(df, internal_indices=[]):
     # return a new one from here and let the front end handle the inplace
     # update.
     def insert(self, loc, column, value):
-        """Get row data for target index.
+        """Insert new column data.
 
         Args:
             loc: Insertion index.
@@ -1675,7 +1695,7 @@ def insert(self, loc, column, value):
             value: Dtype object values to insert.
 
         Returns:
-            A new PandasDataManager.
+            A new PandasDataManager with new data inserted.
         """
         def insert(df, internal_indices=[]):
             internal_idx = internal_indices[0]