|
17 | 17 |
|
18 | 18 | package org.apache.spark.sql.sources |
19 | 19 |
|
| 20 | +import org.apache.hadoop.conf.Configuration |
| 21 | + |
20 | 22 | import org.apache.spark.annotation.{DeveloperApi, Experimental} |
21 | 23 | import org.apache.spark.rdd.RDD |
22 | 24 | import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} |
@@ -245,12 +247,26 @@ trait CatalystScan { |
245 | 247 | /** |
246 | 248 | * ::Experimental:: |
247 | 249 | * [[OutputWriter]] is used together with [[FSBasedRelation]] for persisting rows to the |
248 | | - * underlying file system. An [[OutputWriter]] instance is created when a new output file is |
249 | | - * opened. This instance is used to persist rows to this single output file. |
| 250 | + * underlying file system. Subclasses of [[OutputWriter]] must provide a zero-argument constructor. |
| 251 | + * An [[OutputWriter]] instance is created and initialized when a new output file is opened on |
| 252 | + * executor side. This instance is used to persist rows to this single output file. |
250 | 253 | */ |
251 | 254 | @Experimental |
252 | 255 | abstract class OutputWriter { |
253 | | - def init(): Unit = () |
| 256 | + /** |
| 257 | + * Initializes this [[OutputWriter]] before any rows are persisted. |
| 258 | + * |
| 259 | + * @param path The file path to which this [[OutputWriter]] is supposed to write. |
| 260 | + * @param dataSchema Schema of the rows to be written. Partition columns are not included in the |
| 261 | + * schema if the corresponding relation is partitioned. |
| 262 | + * @param options Data source options inherited from driver side. |
| 263 | + * @param conf Hadoop configuration inherited from driver side. |
| 264 | + */ |
| 265 | + def init( |
| 266 | + path: String, |
| 267 | + dataSchema: StructType, |
| 268 | + options: java.util.Map[String, String], |
| 269 | + conf: Configuration): Unit = () |
254 | 270 |
|
255 | 271 | /** |
256 | 272 | * Persists a single row. Invoked on the executor side. When writing to dynamically partitioned |
@@ -341,5 +357,5 @@ abstract class FSBasedRelation extends BaseRelation { |
341 | 357 | * This method is responsible for producing a new [[OutputWriter]] for each newly opened output |
342 | 358 | * file on the executor side. |
343 | 359 | */ |
344 | | - def newOutputWriter(path: String): OutputWriter |
| 360 | + def outputWriterClass: Class[_ <: OutputWriter] |
345 | 361 | } |
0 commit comments