@@ -314,7 +314,7 @@ def pickleFile(self, name, minPartitions=None):
314314 return RDD (self ._jsc .objectFile (name , minPartitions ), self ,
315315 BatchedSerializer (PickleSerializer ()))
316316
317- def textFile (self , name , minPartitions = None ):
317+ def textFile (self , name , minPartitions = None , use_unicode = True ):
318318 """
319319 Read a text file from HDFS, a local file system (available on all
320320 nodes), or any Hadoop-supported file system URI, and return it as an
@@ -329,9 +329,9 @@ def textFile(self, name, minPartitions=None):
329329 """
330330 minPartitions = minPartitions or min (self .defaultParallelism , 2 )
331331 return RDD (self ._jsc .textFile (name , minPartitions ), self ,
332- UTF8Deserializer ())
332+ UTF8Deserializer (use_unicode ))
333333
334- def wholeTextFiles (self , path , minPartitions = None ):
334+ def wholeTextFiles (self , path , minPartitions = None , use_unicode = True ):
335335 """
336336 Read a directory of text files from HDFS, a local file system
337337 (available on all nodes), or any Hadoop-supported file system
@@ -369,7 +369,7 @@ def wholeTextFiles(self, path, minPartitions=None):
369369 """
370370 minPartitions = minPartitions or self .defaultMinPartitions
371371 return RDD (self ._jsc .wholeTextFiles (path , minPartitions ), self ,
372- PairDeserializer (UTF8Deserializer (), UTF8Deserializer ()))
372+ PairDeserializer (UTF8Deserializer (use_unicode ), UTF8Deserializer (use_unicode )))
373373
374374 def _dictToJavaMap (self , d ):
375375 jm = self ._jvm .java .util .HashMap ()
0 commit comments