databricks
diff --git a/‎databricks/koalas/generic.py
+90-31 b/‎databricks/koalas/generic.py
+90-31
diff --git a/‎databricks/koalas/series.py
-79 b/‎databricks/koalas/series.py
-79
@@ -456,21 +456,24 @@ def to_numpy(self):
         """
         return self.to_pandas().values
 
-    def to_csv(self, path_or_buf=None, sep=',', na_rep='', columns=None, header=True,
-               index=True, encoding=None, quotechar='"', date_format=None, escapechar=None):
+    def to_csv(self, path=None, sep=',', na_rep='', columns=None, header=True,
+               quotechar='"', date_format=None, escapechar=None, num_files=None,
+               **kwargs):
         r"""
         Write object to a comma-separated values (csv) file.
 
-        .. note:: Spark writes files to HDFS by default.
-        If you want to save the file locally, you need to use path like below
-        `'files:/' + local paths`  like 'files:/work/data.csv'. Otherwise,
-        you will write the file to the HDFS path where the spark program starts.
+        .. note:: Koalas `to_csv` writes files to a path or URI. Unlike pandas', Koalas
+            respects HDFS's property such as 'fs.default.name'.
+
+        .. note:: Koalas writes CSV files into the directory, `path`, and writes
+            multiple `part-...` files in the directory when `path` is specified.
+            This behaviour was inherited from Apache Spark. The number of files can
+            be controlled by `num_files`.
 
         Parameters
         ----------
-        path_or_buf : str or file handle, default None
-            File path or object, if None is provided the result is returned as
-            a string.
+        path : str, default None
+            File path. If None is provided the result is returned as a string.
         sep : str, default ','
             String of length 1. Field delimiter for the output file.
         na_rep : str, default ''
@@ -480,18 +483,20 @@ def to_csv(self, path_or_buf=None, sep=',', na_rep='', columns=None, header=True
         header : bool or list of str, default True
             Write out the column names. If a list of strings is given it is
             assumed to be aliases for the column names.
-        index : bool, default True
-            Write row names (index).
-        encoding : str, optional
-            A string representing the encoding to use in the output file,
-            defaults to 'utf-8'.
         quotechar : str, default '\"'
             String of length 1. Character used to quote fields.
         date_format : str, default None
             Format string for datetime objects.
         escapechar : str, default None
             String of length 1. Character used to escape `sep` and `quotechar`
             when appropriate.
+        num_files : the number of files to be written in `path` directory when
+            this is a path.
+        kwargs: keyword arguments for additional options specific to PySpark.
+            This kwargs are specific to PySpark's CSV options to pass. Check
+            the options in PySpark's API documentation for spark.write.csv(...).
+            It has higher priority and overwrites all other options.
+            This parameter only works when `path` is specified.
 
         See Also
         --------
@@ -500,40 +505,94 @@ def to_csv(self, path_or_buf=None, sep=',', na_rep='', columns=None, header=True
         DataFrame.to_table
         DataFrame.to_parquet
         DataFrame.to_spark_io
+
         Examples
         --------
         >>> df = ks.DataFrame(dict(
         ...    date=list(pd.date_range('2012-1-1 12:00:00', periods=3, freq='M')),
         ...    country=['KR', 'US', 'JP'],
         ...    code=[1, 2 ,3]), columns=['date', 'country', 'code'])
-        >>> df
-                         date country  code
-        0 2012-01-31 12:00:00      KR     1
-        1 2012-02-29 12:00:00      US     2
-        2 2012-03-31 12:00:00      JP     3
-        >>> df.to_csv(path=r'%s/to_csv/foo.csv' % path)
+        >>> df.sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+                           date country  code
+        ... 2012-01-31 12:00:00      KR     1
+        ... 2012-02-29 12:00:00      US     2
+        ... 2012-03-31 12:00:00      JP     3
+
+        >>> print(df.to_csv())  # doctest: +NORMALIZE_WHITESPACE
+        date,country,code
+        2012-01-31 12:00:00,KR,1
+        2012-02-29 12:00:00,US,2
+        2012-03-31 12:00:00,JP,3
+
+        >>> df.to_csv(path=r'%s/to_csv/foo.csv' % path, num_files=1)
+        >>> ks.read_csv(
+        ...    path=r'%s/to_csv/foo.csv' % path
+        ... ).sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+                           date country  code
+        ... 2012-01-31 12:00:00      KR     1
+        ... 2012-02-29 12:00:00      US     2
+        ... 2012-03-31 12:00:00      JP     3
+
+        In case of Series,
+
+        >>> print(df.date.to_csv())  # doctest: +NORMALIZE_WHITESPACE
+        date
+        2012-01-31 12:00:00
+        2012-02-29 12:00:00
+        2012-03-31 12:00:00
+
+        >>> df.date.to_csv(path=r'%s/to_csv/foo.csv' % path, num_files=1)
+        >>> ks.read_csv(
+        ...     path=r'%s/to_csv/foo.csv' % path
+        ... ).sort_values(by="date")  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
+                           date
+        ... 2012-01-31 12:00:00
+        ... 2012-02-29 12:00:00
+        ... 2012-03-31 12:00:00
         """
+        if path is None:
+            # If path is none, just collect and use pandas's to_csv.
+            kdf_or_ser = self
+            if isinstance(self, ks.DataFrame):
+                return kdf_or_ser.to_pandas().to_csv(
+                    None, sep=sep, na_rep=na_rep, columns=columns,
+                    header=header, quotechar=quotechar,
+                    date_format=date_format, escapechar=escapechar, index=False)
+            elif isinstance(self, ks.Series):
+                # 0.23 seems not having 'columns' parameter in Series' to_csv.
+                return kdf_or_ser.to_pandas().to_csv(
+                    None, sep=sep, na_rep=na_rep,
+                    header=header, quotechar=quotechar,
+                    date_format=date_format, escapechar=escapechar, index=False)
+            else:
+                raise TypeError('Constructor expects DataFrame or Series; however, '
+                                'got [%s]' % (self,))
+
         if columns is not None:
             data_columns = columns
         else:
             data_columns = self._internal.data_columns
 
-        if index:
-            index_columns = self._internal.index_columns
-        else:
-            index_columns = []
+        kdf = self
+        if isinstance(self, ks.Series):
+            kdf = self._kdf
 
         if isinstance(header, list):
-            sdf = self._sdf.select(index_columns +
-                                   [self._internal.scol_for(old_name).alias(new_name)
-                                    for (old_name, new_name) in zip(data_columns, header)])
+            sdf = kdf._sdf.select(
+                [self._internal.scol_for(old_name).alias(new_name)
+                 for (old_name, new_name) in zip(data_columns, header)])
             header = True
         else:
-            sdf = self._sdf.select(index_columns + data_columns)
+            sdf = kdf._sdf.select(data_columns)
+
+        if num_files is not None:
+            sdf = sdf.repartition(num_files)
 
-        sdf.write.csv(path=path_or_buf, sep=sep, nullValue=na_rep, header=header,
-                      encoding=encoding, quote=quotechar, dateFormat=date_format,
-                      charToEscapeQuoteEscaping=escapechar)
+        builder = sdf.write.mode("overwrite").options(
+            path=path, sep=sep, nullValue=na_rep, header=header,
+            quote=quotechar, dateFormat=date_format,
+            charToEscapeQuoteEscaping=escapechar)
+        builder.options(**kwargs).format("csv").save(path)
 
     def to_json(self, path_or_buf=None, orient=None, date_format=None,
                 double_precision=10, force_ascii=True, date_unit='ms',
 
@@ -1016,85 +1016,6 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False):
         else:
             return kdf
 
-    def to_csv(self, path_or_buf=None, sep=',', na_rep='', columns=None, header=True,
-               index=True, encoding=None, quotechar='"', date_format=None, escapechar=None):
-        r"""
-        Write object to a comma-separated values (csv) file.
-
-        .. note:: Spark writes files to HDFS by default.
-        If you want to save the file locally, you need to use path like below
-        `'files:/' + local paths`  like 'files:/work/data.csv'. Otherwise,
-        you will write the file to the HDFS path where the spark program starts.
-
-        Parameters
-        ----------
-        path_or_buf : str or file handle, default None
-            File path or object, if None is provided the result is returned as
-            a string.
-        sep : str, default ','
-            String of length 1. Field delimiter for the output file.
-        na_rep : str, default ''
-            Missing data representation.
-        columns : sequence, optional
-            Columns to write.
-        header : bool or list of str, default True
-            Write out the column names. If a list of strings is given it is
-            assumed to be aliases for the column names.
-        index : bool, default True
-            Write row names (index).
-        encoding : str, optional
-            A string representing the encoding to use in the output file,
-            defaults to 'utf-8'.
-        quotechar : str, default '\"'
-            String of length 1. Character used to quote fields.
-        date_format : str, default None
-            Format string for datetime objects.
-        escapechar : str, default None
-            String of length 1. Character used to escape `sep` and `quotechar`
-            when appropriate.
-
-        See Also
-        --------
-        read_csv
-        DataFrame.to_delta
-        DataFrame.to_table
-        DataFrame.to_parquet
-        DataFrame.to_spark_io
-        Examples
-        --------
-        >>> df = ks.DataFrame(dict(
-        ...    date=list(pd.date_range('2012-1-1 12:00:00', periods=3, freq='M')),
-        ...    country=['KR', 'US', 'JP'],
-        ...    code=[1, 2 ,3]), columns=['date', 'country', 'code'])
-        >>> df
-                         date country  code
-        0 2012-01-31 12:00:00      KR     1
-        1 2012-02-29 12:00:00      US     2
-        2 2012-03-31 12:00:00      JP     3
-        >>> df.to_csv(path=r'%s/to_csv/foo.csv' % path)
-        """
-        if columns is not None:
-            data_columns = columns
-        else:
-            data_columns = self._internal.data_columns
-
-        if index:
-            index_columns = self._internal.index_columns
-        else:
-            index_columns = []
-
-        if isinstance(header, list):
-            sdf = self._sdf.select(index_columns +
-                                   [self._internal.scol_for(old_name).alias(new_name)
-                                    for (old_name, new_name) in zip(data_columns, header)])
-            header = True
-        else:
-            sdf = self._sdf.select(index_columns + data_columns)
-
-        sdf.write.csv(path=path_or_buf, sep=sep, nullValue=na_rep, header=header,
-                      encoding=encoding, quote=quotechar, dateFormat=date_format,
-                      charToEscapeQuoteEscaping=escapechar)
-
     def to_frame(self, name=None) -> spark.DataFrame:
         """
         Convert Series to DataFrame.