Fix comments & tests

databricks · Aug 30, 2019 · d35f53d · d35f53d
1 parent d4068ca
commit d35f53d
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 31 deletions.
diff --git a/databricks/koalas/config.py b/databricks/koalas/config.py
@@ -35,26 +35,8 @@
     # just a truncated repr.
     "display.max_rows": 1000,  # TODO: None should support unlimited.
 
-    # This sets the default index type
-    # There are three types of default index that can be configured by `compute.default_index_type`
-    # environment variable.
-    # - sequence: It implements a sequence that increases one by one, by Window function without
-    #     specifying partition. Therefore, it ends up with whole partition in single node.
-    #     This index type should be avoided when the data is large. This is default.
-    # - distributed: It implements a monotonically increasing sequence simply by using
-    #     Spark's `monotonically_increasing_id` function. If the index does not have to be
-    #     a sequence that increases one by one, this index should be used.
-    #     Performance-wise, this index almost does not have any penalty comparing to
-    #     other index types. Note that we cannot use this type of index for combining
-    #     two dataframes because it is not guaranteed to have the same indexes in two
-    #     dataframes.
-    # - distributed-sequence: It implements a sequence that increases one by one, by group-by and
-    #     group-map approach. It still generates the sequential index globally.
-    #     If the default index must be the sequence in a large dataset, this
-    #     index has to be used.
-    #     Note that if more data are added to the data source after creating this index,
-    #     then it does not guarantee the sequential index.
-    "compute.default_index_type": "sequence"
+    # This sets the default index type: sequence, distributed and distributed-sequence.
+    "compute.default_index_type": "sequence",
 }  # type: Dict[str, Any]
 
 

diff --git a/databricks/koalas/tests/test_default_index.py b/databricks/koalas/tests/test_default_index.py
@@ -13,11 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import os
-
 import pandas as pd
 
 from databricks import koalas as ks
+from databricks.koalas.config import set_option, reset_option
 from databricks.koalas.testing.utils import ReusedSQLTestCase, TestUtils
 
 
@@ -26,13 +25,12 @@ class OneByOneDefaultIndexTest(ReusedSQLTestCase, TestUtils):
     @classmethod
     def setUpClass(cls):
         super(OneByOneDefaultIndexTest, cls).setUpClass()
-        cls.default_index = os.environ.get('DEFAULT_INDEX', 'sequence')
-        os.environ['DEFAULT_INDEX'] = 'sequence'
+        set_option('compute.default_index_type', 'sequence')
 
     @classmethod
     def tearDownClass(cls):
         super(OneByOneDefaultIndexTest, cls).tearDownClass()
-        os.environ['DEFAULT_INDEX'] = cls.default_index
+        reset_option('compute.default_index_type')
 
     def test_default_index(self):
         sdf = self.spark.range(1000)
@@ -44,13 +42,12 @@ class DistributedOneByOneDefaultIndexTest(ReusedSQLTestCase, TestUtils):
     @classmethod
     def setUpClass(cls):
         super(DistributedOneByOneDefaultIndexTest, cls).setUpClass()
-        cls.default_index = os.environ.get('DEFAULT_INDEX', 'sequence')
-        os.environ['DEFAULT_INDEX'] = 'distributed-sequence'
+        set_option('compute.default_index_type', 'distributed-sequence')
 
     @classmethod
     def tearDownClass(cls):
         super(DistributedOneByOneDefaultIndexTest, cls).tearDownClass()
-        os.environ['DEFAULT_INDEX'] = cls.default_index
+        reset_option('compute.default_index_type')
 
     def test_default_index(self):
         sdf = self.spark.range(1000)
@@ -62,13 +59,12 @@ class DistributedDefaultIndexTest(ReusedSQLTestCase, TestUtils):
     @classmethod
     def setUpClass(cls):
         super(DistributedDefaultIndexTest, cls).setUpClass()
-        cls.default_index = os.environ.get('DEFAULT_INDEX', 'sequence')
-        os.environ['DEFAULT_INDEX'] = 'distributed'
+        set_option('compute.default_index_type', 'distributed')
 
     @classmethod
     def tearDownClass(cls):
         super(DistributedDefaultIndexTest, cls).tearDownClass()
-        os.environ['DEFAULT_INDEX'] = cls.default_index
+        reset_option('compute.default_index_type')
 
     def test_default_index(self):
         sdf = self.spark.range(1000)