-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25241][SQL] Configurable empty values when reading/writing CSV files #22234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8b51800
ebd052b
17eaba6
3d3f178
bb28db9
0bcdb2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,7 +79,8 @@ private[csv] object CSVInferSchema { | |
| * point checking if it is an Int, as the final type must be Double or higher. | ||
| */ | ||
| def inferField(typeSoFar: DataType, field: String, options: CSVOptions): DataType = { | ||
| if (field == null || field.isEmpty || field == options.nullValue) { | ||
| if (field == null || field.isEmpty || field == options.nullValue || | ||
| field == options.emptyValueInRead) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't do this for now. It needs another review iteration. Let's revert this back. |
||
| typeSoFar | ||
| } else { | ||
| typeSoFar match { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -117,6 +117,9 @@ class CSVOptions( | |
|
|
||
| val nullValue = parameters.getOrElse("nullValue", "") | ||
|
|
||
| val emptyValueInRead = parameters.getOrElse("emptyValue", "") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would just call it
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I though that as well. Just for the shake of providing backwards compatibility as we already have in
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had to name them differently names because the default values are different. Ah, yea then it makes sense here. I rushed to read. |
||
| val emptyValueInWrite = parameters.getOrElse("emptyValue", "\"\"") | ||
|
|
||
| val nanValue = parameters.getOrElse("nanValue", "NaN") | ||
|
|
||
| val positiveInf = parameters.getOrElse("positiveInf", "Inf") | ||
|
|
@@ -173,7 +176,7 @@ class CSVOptions( | |
| writerSettings.setIgnoreLeadingWhitespaces(ignoreLeadingWhiteSpaceFlagInWrite) | ||
| writerSettings.setIgnoreTrailingWhitespaces(ignoreTrailingWhiteSpaceFlagInWrite) | ||
| writerSettings.setNullValue(nullValue) | ||
| writerSettings.setEmptyValue("\"\"") | ||
| writerSettings.setEmptyValue(emptyValueInWrite) | ||
| writerSettings.setSkipEmptyLines(true) | ||
| writerSettings.setQuoteAllFields(quoteAll) | ||
| writerSettings.setQuoteEscapingEnabled(escapeQuotes) | ||
|
|
@@ -194,7 +197,7 @@ class CSVOptions( | |
| settings.setInputBufferSize(inputBufferSize) | ||
| settings.setMaxColumns(maxColumns) | ||
| settings.setNullValue(nullValue) | ||
| settings.setEmptyValue("") | ||
| settings.setEmptyValue(emptyValueInRead) | ||
| settings.setMaxCharsPerColumn(maxCharsPerColumn) | ||
| settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER) | ||
| settings | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| year,make,model,comment,blank | ||
| "2012","Tesla","S","","" | ||
| 1997,Ford,E350,"Go get one now they are going fast", | ||
| 2015,Chevy,Volt,,"" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto for excluding.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I revert these both changes @HyukjinKwon then?