Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,30 @@ private static synchronized Configuration addRequiredProjectionFields(Configurat
return configuration;
}

/**
* Hive will append read columns' ids to old columns' ids during getRecordReader. In some cases, e.g. SELECT COUNT(*),
* the read columns' id is an empty string and Hive will combine it with Hoodie required projection ids and becomes
* e.g. ",2,0,3" and will cause an error. Actually this method is a temporary solution because the real bug is from
* Hive. Hive has fixed this bug after 3.0.0, but the version before that would still face this problem. (HIVE-22438)
*/
private static Configuration cleanProjectionColumnIds(Configuration conf) {
synchronized (conf) {
String columnIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
if (!columnIds.isEmpty() && columnIds.charAt(0) == ',') {
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, columnIds.substring(1));
if (LOG.isDebugEnabled()) {
LOG.debug("The projection Ids: {" + columnIds + "} start with ','. First comma is removed");
}
}
}
return conf;
}

@Override
public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job,
final Reporter reporter) throws IOException {

this.conf = cleanProjectionColumnIds(job);
LOG.info("Before adding Hoodie columns, Projections :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
+ ", Ids :" + job.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));

Expand Down