Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import java.util.stream.Collectors;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -315,7 +316,10 @@ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> r
throws IOException {
logger.info("Cleaning path " + partitionPath);
FileSystem fs = getMetaClient().getFs();
FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
// FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
FileStatus[] toBeDeleted = StringUtils.isBlank(partitionPath)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FSUtils.getPartitionPath below is supposed to handle this.Can you use that method directly everywhere to be consistent. If it does not work, can you fix the method ?

public static Path getPartitionPath(Path basePath, String partitionPath) {
// FOr non-partitioned table, return only base-path
return ((partitionPath == null) || (partitionPath.isEmpty())) ? basePath :
new Path(basePath, partitionPath);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bvaradar
When I rebuilt hoodie with spark-2.3.3 and parquet-1.8.2 and avro-1.8.2 and run test class HoodieJavaApp, it reported error Can not create a Path from an empty string. But spark-2.1.0 didn't. Here is my version:

  <properties>
    <maven-dependency-plugin.version>2.10</maven-dependency-plugin.version>
    <maven-jar-plugin.version>2.6</maven-jar-plugin.version>
    <maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
    <maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
    <fasterxml.version>2.6.7</fasterxml.version>
    <parquet.version>1.8.2</parquet.version>
    <junit.version>4.11</junit.version>
    <mockito.version>1.9.5</mockito.version>
    <log4j.version>1.2.17</log4j.version>
    <joda.version>2.9.9</joda.version>
    <hadoop.version>2.7.3</hadoop.version>
    <hive.groupid>org.apache.hive</hive.groupid>
    <hive.version>2.3.1</hive.version>
    <metrics.version>4.0.2</metrics.version>
    <spark.version>2.3.3</spark.version>
    <avro.version>1.8.2</avro.version>
    <scala.version>2.11.8</scala.version>
    <scala.libversion>2.11</scala.libversion>
    <surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
    <thrift.version>0.12.0</thrift.version>
    <hbase.version>1.2.3</hbase.version>
    <codehaus-jackson.version>1.9.13</codehaus-jackson.version>
    <notice.dir>${project.basedir}</notice.dir>
    <notice.file>NOTICE.txt</notice.file>
  </properties>

I think I can write a common method in FsUtils.

? fs.listStatus(new Path(config.getBasePath()), filter) :
fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
for (FileStatus file : toBeDeleted) {
boolean success = fs.delete(file.getPath(), false);
results.put(file, success);
Expand All @@ -340,7 +344,10 @@ protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> r
}
return false;
};
FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
// FileStatus[] toBeDeleted = fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
FileStatus[] toBeDeleted = StringUtils.isBlank(partitionPath)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here

? fs.listStatus(new Path(config.getBasePath()), filter) :
fs.listStatus(new Path(config.getBasePath(), partitionPath), filter);
for (FileStatus file : toBeDeleted) {
boolean success = fs.delete(file.getPath(), false);
results.put(file, success);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
Expand Down Expand Up @@ -216,7 +218,9 @@ private void ensurePartitionLoadedCorrectly(String partition) {
log.info("Building file system view for partition (" + partitionPathStr + ")");

// Create the path if it does not exist already
Path partitionPath = FSUtils.getPartitionPath(metaClient.getBasePath(), partitionPathStr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't FSUtils.getPartitionPath working ? Its better to use the method consistently in all places wherever we see
new Path(basePath, relativePartitionPath) to make things work for non-partitioned dataset

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Path partitionPath = StringUtils.isBlank(partitionPathStr)
? new Path(metaClient.getBasePath()) : new Path(metaClient.getBasePath(), partitionPathStr);
// Path partitionPath = new Path(metaClient.getBasePath(), partitionPathStr);
FSUtils.createPathIfNotExists(metaClient.getFs(), partitionPath);
long beginLsTs = System.currentTimeMillis();
FileStatus[] statuses = metaClient.getFs().listStatus(partitionPath);
Expand Down