Skip to content

Commit 3aa349b

Browse files
author
Davies Liu
committed
add experimental notes
1 parent 24e84b6 commit 3aa349b

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,8 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
558558

559559

560560
/**
561+
* :: Experimental ::
562+
*
561563
* Get an RDD for a Hadoop-readable dataset as PortableDataStream for each file
562564
* (useful for binary data)
563565
*
@@ -600,6 +602,8 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
600602
}
601603

602604
/**
605+
* :: Experimental ::
606+
*
603607
* Load data from a flat binary file, assuming the length of each record is constant.
604608
*
605609
* @param path Directory to the input data files

core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,14 @@ import java.io.Closeable
2121
import java.util
2222
import java.util.{Map => JMap}
2323

24-
import java.io.DataInputStream
25-
26-
import org.apache.hadoop.io.{BytesWritable, LongWritable}
27-
import org.apache.spark.input.{PortableDataStream, FixedLengthBinaryInputFormat}
28-
2924
import scala.collection.JavaConversions
3025
import scala.collection.JavaConversions._
3126
import scala.language.implicitConversions
3227
import scala.reflect.ClassTag
3328

3429
import com.google.common.base.Optional
3530
import org.apache.hadoop.conf.Configuration
31+
import org.apache.spark.input.PortableDataStream
3632
import org.apache.hadoop.mapred.{InputFormat, JobConf}
3733
import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
3834

@@ -286,6 +282,8 @@ class JavaSparkContext(val sc: SparkContext)
286282
new JavaPairRDD(sc.binaryFiles(path, minPartitions))
287283

288284
/**
285+
* :: Experimental ::
286+
*
289287
* Read a directory of binary files from HDFS, a local file system (available on all nodes),
290288
* or any Hadoop-supported file system URI as a byte array. Each file is read as a single
291289
* record and returned in a key-value pair, where the key is the path of each file,
@@ -312,15 +310,19 @@ class JavaSparkContext(val sc: SparkContext)
312310
*
313311
* @note Small files are preferred; very large files but may cause bad performance.
314312
*/
313+
@Experimental
315314
def binaryFiles(path: String): JavaPairRDD[String, PortableDataStream] =
316315
new JavaPairRDD(sc.binaryFiles(path, defaultMinPartitions))
317316

318317
/**
318+
* :: Experimental ::
319+
*
319320
* Load data from a flat binary file, assuming the length of each record is constant.
320321
*
321322
* @param path Directory to the input data files
322323
* @return An RDD of data with values, represented as byte arrays
323324
*/
325+
@Experimental
324326
def binaryRecords(path: String, recordLength: Int): JavaRDD[Array[Byte]] = {
325327
new JavaRDD(sc.binaryRecords(path, recordLength))
326328
}

python/pyspark/context.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
398398

399399
def binaryFiles(self, path, minPartitions=None):
400400
"""
401-
:: Developer API ::
401+
:: Experimental ::
402402
403403
Read a directory of binary files from HDFS, a local file system
404404
(available on all nodes), or any Hadoop-supported file system URI
@@ -415,6 +415,8 @@ def binaryFiles(self, path, minPartitions=None):
415415

416416
def binaryRecords(self, path, recordLength):
417417
"""
418+
:: Experimental ::
419+
418420
Load data from a flat binary file, assuming each record is a set of numbers
419421
with the specified numerical format (see ByteBuffer), and the number of
420422
bytes per record is constant.

0 commit comments

Comments
 (0)