Skip to content

Commit ea59a7f

Browse files
[HUDI-2080] Move to ubuntu-18.04 for Azure CI (apache#3409)
Update Azure CI ubuntu from 16.04 to 18.04 due to 16.04 will be removed soon Fixed some consistently failed tests * fix TestCOWDataSourceStorage TestMORDataSourceStorage * reset mocks Also update readme badge Co-authored-by: Raymond Xu <[email protected]>
1 parent eb5e7ee commit ea59a7f

File tree

9 files changed

+321
-189
lines changed

9 files changed

+321
-189
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ Hudi manages the storage of large analytical datasets on DFS (Cloud stores, HDFS
2121

2222
<https://hudi.apache.org/>
2323

24-
[![Build Status](https://travis-ci.com/apache/hudi.svg?branch=master)](https://travis-ci.com/apache/hudi)
25-
[![Build Status](https://dev.azure.com/apache-hudi-ci-org/apache-hudi-ci/_apis/build/status/apachehudi-ci.hudi-mirror?branchName=master)](https://dev.azure.com/apache-hudi-ci-org/apache-hudi-ci/_build/latest?definitionId=3&branchName=master)
24+
[![Build](https://github.com/apache/hudi/actions/workflows/bot.yml/badge.svg)](https://github.com/apache/hudi/actions/workflows/bot.yml)
25+
[![Test](https://dev.azure.com/apache-hudi-ci-org/apache-hudi-ci/_apis/build/status/apachehudi-ci.hudi-mirror?branchName=master)](https://dev.azure.com/apache-hudi-ci-org/apache-hudi-ci/_build/latest?definitionId=3&branchName=master)
2626
[![License](https://img.shields.io/badge/license-Apache%202-4EB1BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html)
2727
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.hudi/hudi/badge.svg)](http://search.maven.org/#search%7Cga%7C1%7Cg%3A%22org.apache.hudi%22)
2828
[![Join on Slack](https://img.shields.io/badge/slack-%23hudi-72eff8?logo=slack&color=48c628&label=Join%20on%20Slack)](https://join.slack.com/t/apache-hudi/shared_invite/enQtODYyNDAxNzc5MTg2LTE5OTBlYmVhYjM0N2ZhOTJjOWM4YzBmMWU2MjZjMGE4NDc5ZDFiOGQ2N2VkYTVkNzU3ZDQ4OTI1NmFmYWQ0NzE)

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ trigger:
1919
- '*' # must quote since "*" is a YAML reserved character; we want a string
2020

2121
pool:
22-
vmImage: 'ubuntu-16.04'
22+
vmImage: 'ubuntu-18.04'
2323

2424
variables:
2525
MAVEN_CACHE_FOLDER: $(Pipeline.Workspace)/.m2/repository

hudi-client/hudi-client-common/src/test/java/org/apache/hudi/callback/http/TestCallbackHttpClient.java

+9-1
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@
1818

1919
package org.apache.hudi.callback.http;
2020

21+
import org.apache.hudi.callback.client.http.HoodieWriteCommitHttpCallbackClient;
22+
2123
import org.apache.http.StatusLine;
2224
import org.apache.http.client.methods.CloseableHttpResponse;
2325
import org.apache.http.impl.client.CloseableHttpClient;
24-
import org.apache.hudi.callback.client.http.HoodieWriteCommitHttpCallbackClient;
2526
import org.apache.log4j.AppenderSkeleton;
2627
import org.apache.log4j.Level;
2728
import org.apache.log4j.Logger;
2829
import org.apache.log4j.spi.LoggingEvent;
30+
import org.junit.jupiter.api.AfterEach;
2931
import org.junit.jupiter.api.Test;
3032
import org.junit.jupiter.api.extension.ExtendWith;
3133
import org.mockito.ArgumentCaptor;
@@ -39,6 +41,7 @@
3941
import static org.junit.jupiter.api.Assertions.assertTrue;
4042
import static org.junit.jupiter.api.Assertions.fail;
4143
import static org.mockito.ArgumentMatchers.any;
44+
import static org.mockito.Mockito.reset;
4245
import static org.mockito.Mockito.verify;
4346
import static org.mockito.Mockito.when;
4447

@@ -63,6 +66,11 @@ public class TestCallbackHttpClient {
6366
@Mock
6467
StatusLine statusLine;
6568

69+
@AfterEach
70+
void resetMocks() {
71+
reset(appender, httpClient, httpResponse, statusLine);
72+
}
73+
6674
private void mockResponse(int statusCode) {
6775
when(statusLine.getStatusCode()).thenReturn(statusCode);
6876
when(httpResponse.getStatusLine()).thenReturn(statusLine);

hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/datadog/TestDatadogHttpClient.java

+7
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.log4j.Level;
2828
import org.apache.log4j.Logger;
2929
import org.apache.log4j.spi.LoggingEvent;
30+
import org.junit.jupiter.api.AfterEach;
3031
import org.junit.jupiter.api.Test;
3132
import org.junit.jupiter.api.extension.ExtendWith;
3233
import org.junit.jupiter.params.ParameterizedTest;
@@ -46,6 +47,7 @@
4647
import static org.junit.jupiter.api.Assertions.assertTrue;
4748
import static org.junit.jupiter.api.Assertions.fail;
4849
import static org.mockito.ArgumentMatchers.any;
50+
import static org.mockito.Mockito.reset;
4951
import static org.mockito.Mockito.verify;
5052
import static org.mockito.Mockito.when;
5153

@@ -67,6 +69,11 @@ public class TestDatadogHttpClient {
6769
@Mock
6870
StatusLine statusLine;
6971

72+
@AfterEach
73+
void resetMocks() {
74+
reset(appender, httpClient, httpResponse, statusLine);
75+
}
76+
7077
private void mockResponse(int statusCode) {
7178
when(statusLine.getStatusCode()).thenReturn(statusCode);
7279
when(httpResponse.getStatusLine()).thenReturn(statusLine);

hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/datadog/TestDatadogReporter.java

+7
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.log4j.Level;
2929
import org.apache.log4j.Logger;
3030
import org.apache.log4j.spi.LoggingEvent;
31+
import org.junit.jupiter.api.AfterEach;
3132
import org.junit.jupiter.api.Test;
3233
import org.junit.jupiter.api.extension.ExtendWith;
3334
import org.mockito.ArgumentCaptor;
@@ -41,6 +42,7 @@
4142

4243
import static org.junit.jupiter.api.Assertions.assertEquals;
4344
import static org.mockito.Mockito.doThrow;
45+
import static org.mockito.Mockito.reset;
4446
import static org.mockito.Mockito.verify;
4547

4648
@ExtendWith(MockitoExtension.class)
@@ -58,6 +60,11 @@ public class TestDatadogReporter {
5860
@Mock
5961
DatadogHttpClient client;
6062

63+
@AfterEach
64+
void resetMocks() {
65+
reset(appender, registry, client);
66+
}
67+
6168
@Test
6269
public void stopShouldCloseEnclosedClient() throws IOException {
6370
new DatadogReporter(registry, client, "foo", Option.empty(), Option.empty(),

hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala

-117
Original file line numberDiff line numberDiff line change
@@ -156,123 +156,6 @@ class TestCOWDataSource extends HoodieClientTestBase {
156156
assertEquals(snapshotDF1.count() - inputDF2.count(), snapshotDF2.count())
157157
}
158158

159-
160-
@ParameterizedTest
161-
@ValueSource(booleans = Array(true, false))
162-
def testCopyOnWriteStorage(isMetadataEnabled: Boolean) {
163-
// Insert Operation
164-
val records1 = recordsToStrings(dataGen.generateInserts("000", 100)).toList
165-
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
166-
inputDF1.write.format("org.apache.hudi")
167-
.options(commonOpts)
168-
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
169-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
170-
.mode(SaveMode.Overwrite)
171-
.save(basePath)
172-
173-
assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000"))
174-
val commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, basePath)
175-
176-
// Snapshot query
177-
val snapshotDF1 = spark.read.format("org.apache.hudi")
178-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
179-
.load(basePath + "/*/*/*")
180-
assertEquals(100, snapshotDF1.count())
181-
182-
// Upsert based on the written table with Hudi metadata columns
183-
val verificationRowKey = snapshotDF1.limit(1).select("_row_key").first.getString(0)
184-
val updateDf = snapshotDF1.filter(col("_row_key") === verificationRowKey).withColumn(verificationCol, lit(updatedVerificationVal))
185-
186-
updateDf.write.format("org.apache.hudi")
187-
.options(commonOpts)
188-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
189-
.mode(SaveMode.Append)
190-
.save(basePath)
191-
val commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, basePath)
192-
193-
val snapshotDF2 = spark.read.format("hudi")
194-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
195-
.load(basePath + "/*/*/*")
196-
assertEquals(100, snapshotDF2.count())
197-
assertEquals(updatedVerificationVal, snapshotDF2.filter(col("_row_key") === verificationRowKey).select(verificationCol).first.getString(0))
198-
199-
// Upsert Operation without Hudi metadata columns
200-
val records2 = recordsToStrings(dataGen.generateUpdates("001", 100)).toList
201-
val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2 , 2))
202-
val uniqueKeyCnt = inputDF2.select("_row_key").distinct().count()
203-
204-
inputDF2.write.format("org.apache.hudi")
205-
.options(commonOpts)
206-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
207-
.mode(SaveMode.Append)
208-
.save(basePath)
209-
210-
val commitInstantTime3 = HoodieDataSourceHelpers.latestCommit(fs, basePath)
211-
assertEquals(3, HoodieDataSourceHelpers.listCommitsSince(fs, basePath, "000").size())
212-
213-
// Snapshot Query
214-
val snapshotDF3 = spark.read.format("org.apache.hudi")
215-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
216-
.load(basePath + "/*/*/*")
217-
assertEquals(100, snapshotDF3.count()) // still 100, since we only updated
218-
219-
// Read Incremental Query
220-
// we have 2 commits, try pulling the first commit (which is not the latest)
221-
val firstCommit = HoodieDataSourceHelpers.listCommitsSince(fs, basePath, "000").get(0)
222-
val hoodieIncViewDF1 = spark.read.format("org.apache.hudi")
223-
.option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
224-
.option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000")
225-
.option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommit)
226-
.load(basePath)
227-
assertEquals(100, hoodieIncViewDF1.count()) // 100 initial inserts must be pulled
228-
var countsPerCommit = hoodieIncViewDF1.groupBy("_hoodie_commit_time").count().collect()
229-
assertEquals(1, countsPerCommit.length)
230-
assertEquals(firstCommit, countsPerCommit(0).get(0))
231-
232-
// Test incremental query has no instant in range
233-
val emptyIncDF = spark.read.format("org.apache.hudi")
234-
.option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
235-
.option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000")
236-
.option(DataSourceReadOptions.END_INSTANTTIME.key, "001")
237-
.load(basePath)
238-
assertEquals(0, emptyIncDF.count())
239-
240-
// Upsert an empty dataFrame
241-
val emptyRecords = recordsToStrings(dataGen.generateUpdates("002", 0)).toList
242-
val emptyDF = spark.read.json(spark.sparkContext.parallelize(emptyRecords, 1))
243-
emptyDF.write.format("org.apache.hudi")
244-
.options(commonOpts)
245-
.option(HoodieMetadataConfig.ENABLE.key, isMetadataEnabled)
246-
.mode(SaveMode.Append)
247-
.save(basePath)
248-
249-
// pull the latest commit
250-
val hoodieIncViewDF2 = spark.read.format("org.apache.hudi")
251-
.option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
252-
.option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime2)
253-
.load(basePath)
254-
255-
assertEquals(uniqueKeyCnt, hoodieIncViewDF2.count()) // 100 records must be pulled
256-
countsPerCommit = hoodieIncViewDF2.groupBy("_hoodie_commit_time").count().collect()
257-
assertEquals(1, countsPerCommit.length)
258-
assertEquals(commitInstantTime3, countsPerCommit(0).get(0))
259-
260-
// pull the latest commit within certain partitions
261-
val hoodieIncViewDF3 = spark.read.format("org.apache.hudi")
262-
.option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
263-
.option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime2)
264-
.option(DataSourceReadOptions.INCR_PATH_GLOB.key, "/2016/*/*/*")
265-
.load(basePath)
266-
assertEquals(hoodieIncViewDF2.filter(col("_hoodie_partition_path").contains("2016")).count(), hoodieIncViewDF3.count())
267-
268-
val timeTravelDF = spark.read.format("org.apache.hudi")
269-
.option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
270-
.option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000")
271-
.option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommit)
272-
.load(basePath)
273-
assertEquals(100, timeTravelDF.count()) // 100 initial inserts must be pulled
274-
}
275-
276159
@Test def testOverWriteModeUseReplaceAction(): Unit = {
277160
val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
278161
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))

0 commit comments

Comments
 (0)