databricks · HyukjinKwon · Jan 8, 2020 · Dec 30, 2019 · Dec 30, 2019 · Jan 1, 2020
diff --git a/databricks/koalas/indexing.py b/databricks/koalas/indexing.py
@@ -22,7 +22,7 @@
 from pandas.api.types import is_list_like
 from pyspark import sql as spark
 from pyspark.sql import functions as F
-from pyspark.sql.types import BooleanType
+from pyspark.sql.types import BooleanType, StringType, LongType
 from pyspark.sql.utils import AnalysisException
 
 from databricks.koalas.internal import _InternalFrame, HIDDEN_COLUMNS, NATURAL_ORDER_COLUMN_NAME
@@ -233,7 +233,6 @@ class LocIndexer(_LocIndexerLike):
 
     .. note:: Note that contrary to usual python slices, **both** the
         start and the stop are included, and the step of the slice is not allowed.
-        In addition, with a slice, Koalas works as a filter between the range.
 
     .. note:: With a list or array of labels for row selection,
         Koalas behaves as a filter without reordering by the labels.
@@ -290,13 +289,9 @@ class LocIndexer(_LocIndexerLike):
     Slice with labels for row and single label for column. As mentioned
     above, note that both the start and stop of the slice are included.
 
-    Also note that the row for 'sidewinder' is included since 'sidewinder'
-    is between 'cobra' and 'viper'.
-
     >>> df.loc['cobra':'viper', 'max_speed']
-    cobra         1
-    viper         4
-    sidewinder    7
+    cobra    1
+    viper    4
     Name: max_speed, dtype: int64
 
     Conditional that returns a boolean Series
@@ -400,16 +395,58 @@ def _select_rows(self, rows_sel):
                 # If slice is None - select everything, so nothing to do
                 return None, None
             elif len(self._internal.index_columns) == 1:
+                sdf = self._kdf_or_kser._internal.sdf
+                index = self._kdf_or_kser.index
+                index_column = index.to_series()
+                index_data_type = index_column.spark_type
                 start = rows_sel.start
                 stop = rows_sel.stop
+                start_order_column = sdf[NATURAL_ORDER_COLUMN_NAME]
+                stop_order_column = sdf[NATURAL_ORDER_COLUMN_NAME]
+
+                # get natural order from '__natural_order__' from start to stop
+                # to keep natural order.
+                start_and_stop = (
+                    sdf.select(index_column._scol, NATURAL_ORDER_COLUMN_NAME)
+                       .where((index_column._scol == start) | (index_column._scol == stop))
+                       .collect())
+
+                start = [row[1] for row in start_and_stop if row[0] == start]
+                start = start[0] if len(start) > 0 else None
+
+                stop = [row[1] for row in start_and_stop if row[0] == stop]
+                stop = stop[-1] if len(stop) > 0 else None
+
+                # Assume we use the natural order by default.
+                start_order_column_type = LongType()
+                stop_order_column_type = LongType()
+
+                # if index order is not monotonic increasing or decreasing
+                # and specified values don't exist in index, raise KeyError
+                if start is None and rows_sel.start is not None:
+                    if not (index.is_monotonic_increasing or index.is_monotonic_decreasing):
+                        raise KeyError(rows_sel.start)
+                    else:
+                        start = rows_sel.start
+                        start_order_column = index_column._scol
+                        start_order_column_type = index_data_type
+                if stop is None and rows_sel.stop is not None:
+                    if not (index.is_monotonic_increasing or index.is_monotonic_decreasing):
+                        raise KeyError(rows_sel.stop)
+                    else:
+                        stop = rows_sel.stop
+                        stop_order_column = index_column._scol
+                        stop_order_column_type = index_data_type
+
+                # if start and stop are same, just get all start(or stop) values
+                if start == stop:
+                    return index_column._scol == F.lit(rows_sel.start).cast(index_data_type), None
 
-                index_column = self._kdf_or_kser.index.to_series()
-                index_data_type = index_column.spark_type
                 cond = []
                 if start is not None:
-                    cond.append(index_column._scol >= F.lit(start).cast(index_data_type))
+                    cond.append(start_order_column >= F.lit(start).cast(start_order_column_type))
                 if stop is not None:
-                    cond.append(index_column._scol <= F.lit(stop).cast(index_data_type))
+                    cond.append(stop_order_column <= F.lit(stop).cast(stop_order_column_type))
 
                 if len(cond) > 0:
                     return reduce(lambda x, y: x & y, cond), None

diff --git a/databricks/koalas/series.py b/databricks/koalas/series.py
@@ -3441,10 +3441,7 @@ def truncate(self, before=None, after=None, copy=True):
             if before > after:
                 raise ValueError("Truncate: %s must be after %s" % (after, before))
 
-        if indexes_increasing:
-            result = _col(self.to_frame()[before:after])
-        else:
-            result = _col(self.to_frame()[after:before])
+        result = _col(self.to_frame()[before:after])
 
         return result.copy() if copy else result
 

diff --git a/databricks/koalas/tests/test_indexing.py b/databricks/koalas/tests/test_indexing.py
@@ -223,6 +223,34 @@ def test_loc(self):
         self.assert_eq(kdf.loc[1000:], pdf.loc[1000:])
         self.assert_eq(kdf.loc[-2000:-1000], pdf.loc[-2000:-1000])
 
+        # duplicated index test
+        pdf = pd.DataFrame(
+            [1, 2, 3, 4, 5, 6, 7, 8, 9],
+            index=[0, 1, 1, 2, 2, 2, 3, 4, 5])
+        kdf = ks.from_pandas(pdf)
+
+        self.assert_eq(repr(kdf.loc[:2]), repr(pdf.loc[:2]))
+
+        # test when type of key is string and given value is not included in key
+        pdf = pd.DataFrame([1, 2, 3], index=['a', 'b', 'd']).loc['a':'z']
+        kdf = ks.from_pandas(pdf)
+
+        self.assert_eq(repr(kdf.loc['a':'z']), repr(pdf.loc['a':'z']))
+
+        # KeyError when index is not monotonic increasing or decreasing
+        # and specified values don't exist in index
+        kdf = ks.DataFrame([[1, 2], [4, 5], [7, 8]],
+                           index=['cobra', 'viper', 'sidewinder'])
+
+        self.assertRaises(KeyError, lambda: kdf.loc['cobra':'koalas'])
+        self.assertRaises(KeyError, lambda: kdf.loc['koalas':'viper'])
+
+        kdf = ks.DataFrame([[1, 2], [4, 5], [7, 8]],
+                           index=[10, 30, 20])
+
+        self.assertRaises(KeyError, lambda: kdf.loc[0:30])
+        self.assertRaises(KeyError, lambda: kdf.loc[10:100])
+
     def test_loc_non_informative_index(self):
         pdf = pd.DataFrame({'x': [1, 2, 3, 4]}, index=[10, 20, 30, 40])
         kdf = ks.from_pandas(pdf)