Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #TG-955 feat: Handle throttling of data exhaust requests - framework changes #97

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,24 @@ class HadoopFileUtil {
if (deleteSource) srcFS.delete(srcDir, true) else true
} else false
}

/**
* Get a hadoop source folder/file size in bytes
*/
def size(file: String, conf: Configuration) : Long = {

val path = new Path(file);
path.getFileSystem(conf).getContentSummary(path).getLength
}

/**
* Get size in bytes for multiple files.
*/
def size(conf: Configuration, files: String*) : Seq[(String, Long)] = {

for(file <- files) yield {
val path = new Path(file);
(file, path.getFileSystem(conf).getContentSummary(path).getLength)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,16 @@ class TestDatasetUtil extends BaseSpec with Matchers with MockFactory {
rdd3.head should be ("env1,22.1,3")
rdd3.last should be ("env1,32.1,4")

// get file size
val size = fileUtil.size("src/test/resources/test-report2.csv", sparkSession.sparkContext.hadoopConfiguration)
size should be (36)

// get multiple files size
val filesWithSize = fileUtil.size(sparkSession.sparkContext.hadoopConfiguration, "src/test/resources/test-report/env1.csv", "src/test/resources/test-report/env2.csv", "src/test/resources/test-report2.csv")
filesWithSize.size should be (3)
filesWithSize.head._1 should be ("src/test/resources/test-report/env1.csv")
filesWithSize.head._2 should be (31)

val rdd1 = sparkSession.sparkContext.parallelize(Seq(DruidSummary("2020-01-11","env1", 22.1, 3), DruidSummary("2020-01-11","env2", 20.1, 3)), 1)
val df1 = sparkSession.createDataFrame(rdd1);

Expand Down