Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GOBBLIN-2173] Avoid Adhoc flow spec addition for non leasable entity #4076

Merged
merged 8 commits into from
Nov 19, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.gobblin.metrics.ServiceMetricNames;
import org.apache.gobblin.runtime.api.FlowSpec;
import org.apache.gobblin.runtime.api.FlowSpecSearchObject;
import org.apache.gobblin.runtime.api.TooSoonToRerunSameFlowException;
import org.apache.gobblin.runtime.api.SpecNotFoundException;
import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse;
import org.apache.gobblin.runtime.spec_catalog.FlowCatalog;
Expand Down Expand Up @@ -256,6 +257,9 @@ public CreateKVResponse<ComplexResourceKey<FlowId, FlowStatusId>, FlowConfig> cr
responseMap = this.flowCatalog.put(flowSpec, true);
} catch (QuotaExceededException e) {
throw new RestLiServiceException(HttpStatus.S_503_SERVICE_UNAVAILABLE, e.getMessage());
} catch(TooSoonToRerunSameFlowException e) {
return new CreateKVResponse<>(new RestLiServiceException(HttpStatus.S_409_CONFLICT,
"FlowSpec with URI " + flowSpec.getUri() + " was previously launched within the lease consolidation period, no action will be taken"));
} catch (Throwable e) {
// TODO: Compilation errors should fall under throwable exceptions as well instead of checking for strings
log.warn(String.format("Failed to add flow configuration %s.%s to catalog due to", flowConfig.getId().getFlowGroup(), flowConfig.getId().getFlowName()), e);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.gobblin.runtime.api;

import lombok.Getter;


/**
* An exception thrown when another {@link FlowSpec} with same flow name and flow group
* is submitted within lease consolidation time.
*/
public class TooSoonToRerunSameFlowException extends RuntimeException {
@Getter
private final FlowSpec flowSpec;

/**
* Account for unwrapping within @{link FlowCatalog#updateOrAddSpecHelper}`s `CallbackResult` error handling for `SpecCatalogListener`s
* @return `TooSoonToRerunSameFlowException` wrapped in another `TooSoonToRerunSameFlowException
*/
public static TooSoonToRerunSameFlowException wrappedOnce(FlowSpec flowSpec) {
return new TooSoonToRerunSameFlowException(flowSpec, new TooSoonToRerunSameFlowException(flowSpec));
}

public TooSoonToRerunSameFlowException(FlowSpec flowSpec) {
super("Lease already occupied by another recent execution of this flow: " + flowSpec);
this.flowSpec = flowSpec;
}

/** restricted-access ctor: use {@link #wrappedOnce(FlowSpec)} instead */
private TooSoonToRerunSameFlowException(FlowSpec flowSpec, Throwable cause) {
super("Lease already occupied by another recent execution of this flow: " + flowSpec, cause);
this.flowSpec = flowSpec;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ public interface DagManagementStateStore {
*/
void updateDagNode(Dag.DagNode<JobExecutionPlan> dagNode) throws IOException;

/**
* Returns true if a flow has been launched recently with same flow name and flow group.
* @param flowGroup flow group for the flow
* @param flowName flow name for the flow
* @param flowExecutionId flow execution for the flow
* @throws IOException
*/
boolean existsCurrentlyLaunchingExecOfSameFlow(String flowGroup, String flowName, long flowExecutionId) throws IOException;

/**
* Returns the requested {@link org.apache.gobblin.service.modules.flowgraph.Dag.DagNode} and its {@link JobStatus}.
* Both params are returned as optional and are empty if not present in the store.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.LeaseParams leaseParams
throw new RuntimeException(String.format("Unexpected LeaseAttemptStatus (%s) for %s", leaseAttemptStatus.getClass().getName(), leaseParams));
}

@Override
public boolean existsSimilarLeaseWithinConsolidationPeriod(DagActionStore.LeaseParams leaseParams) throws IOException {
return decoratedMultiActiveLeaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(leaseParams);
}

@Override
public boolean recordLeaseSuccess(LeaseAttemptStatus.LeaseObtainedStatus status)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,17 @@ public interface MultiActiveLeaseArbiter {
LeaseAttemptStatus tryAcquireLease(DagActionStore.LeaseParams leaseParams, boolean adoptConsensusFlowExecutionId)
throws IOException;

/**
* This method checks if entry for same flow name and flow group exists within the lease consolidation period
* returns true if entry for the same flow exists within Lease Consolidation Period (aka. epsilon)
* else returns false
* @param leaseParams uniquely identifies the flow, the present action upon it, the time the action
* was triggered, and if the dag action event we're checking on is a reminder event
* @return true if lease for a recently launched flow already exists for the flow details in leaseParams
*/
phet marked this conversation as resolved.
Show resolved Hide resolved
boolean existsSimilarLeaseWithinConsolidationPeriod(DagActionStore.LeaseParams leaseParams)
throws IOException;

/**
* This method is used to indicate the owner of the lease has successfully completed required actions while holding
* the lease of the dag action event. It marks the lease as "no longer leasing", if the eventTimeMillis and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public class MySqlDagManagementStateStore implements DagManagementStateStore {
// todo - these two stores should merge
private DagStateStoreWithDagNodes dagStateStore;
private DagStateStoreWithDagNodes failedDagStateStore;
private MultiActiveLeaseArbiter multiActiveLeaseArbiter;
private final JobStatusRetriever jobStatusRetriever;
private boolean dagStoresInitialized = false;
private final UserQuotaManager quotaManager;
Expand All @@ -79,13 +80,14 @@ public class MySqlDagManagementStateStore implements DagManagementStateStore {

@Inject
public MySqlDagManagementStateStore(Config config, FlowCatalog flowCatalog, UserQuotaManager userQuotaManager,
JobStatusRetriever jobStatusRetriever, DagActionStore dagActionStore) {
JobStatusRetriever jobStatusRetriever, DagActionStore dagActionStore, MultiActiveLeaseArbiter multiActiveLeaseArbiter) {
this.quotaManager = userQuotaManager;
this.config = config;
this.flowCatalog = flowCatalog;
this.jobStatusRetriever = jobStatusRetriever;
this.dagManagerMetrics.activate();
this.dagActionStore = dagActionStore;
this.multiActiveLeaseArbiter = multiActiveLeaseArbiter;
}

// It should be called after topology spec map is set
Expand Down Expand Up @@ -168,6 +170,14 @@ public synchronized void updateDagNode(Dag.DagNode<JobExecutionPlan> dagNode)
this.dagStateStore.updateDagNode(dagNode);
}

@Override
public boolean existsCurrentlyLaunchingExecOfSameFlow(String flowGroup, String flowName, long flowExecutionId) throws IOException {
DagActionStore.DagAction dagAction = DagActionStore.DagAction.forFlow(flowGroup, flowName,
flowExecutionId, DagActionStore.DagActionType.LAUNCH);
DagActionStore.LeaseParams leaseParams = new DagActionStore.LeaseParams(dagAction, System.currentTimeMillis());
return multiActiveLeaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(leaseParams);
}

@Override
public Optional<Dag<JobExecutionPlan>> getDag(Dag.DagId dagId) throws IOException {
return Optional.ofNullable(this.dagStateStore.getDag(dagId));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ else if (leaseValidityStatus == 2) {
}
}

@Override
public boolean existsSimilarLeaseWithinConsolidationPeriod(DagActionStore.LeaseParams leaseParams) throws IOException {
Optional<GetEventInfoResult> infoResult = getExistingEventInfo(leaseParams);
return infoResult.isPresent() ? infoResult.get().isWithinEpsilon() : false;
}

/**
* Checks leaseArbiterTable for an existing entry for this dag action and event time
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import org.apache.gobblin.runtime.api.SpecCatalogListener;
import org.apache.gobblin.runtime.api.SpecProducer;
import org.apache.gobblin.runtime.api.TopologySpec;
import org.apache.gobblin.runtime.api.TooSoonToRerunSameFlowException;
import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse;
import org.apache.gobblin.runtime.spec_catalog.TopologyCatalog;
import org.apache.gobblin.service.modules.flow.FlowUtils;
Expand All @@ -78,6 +79,7 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable {
protected final SpecCompiler specCompiler;
protected final TopologyCatalog topologyCatalog;
private final JobStatusRetriever jobStatusRetriever;
private final DagManagementStateStore dagManagementStateStore;

protected final MetricContext metricContext;

Expand All @@ -100,6 +102,7 @@ public Orchestrator(Config config, TopologyCatalog topologyCatalog, Optional<Log
this.topologyCatalog = topologyCatalog;
this.flowLaunchHandler = flowLaunchHandler;
this.sharedFlowMetricsSingleton = sharedFlowMetricsSingleton;
this.dagManagementStateStore = dagManagementStateStore;
this.jobStatusRetriever = jobStatusRetriever;
this.specCompiler = flowCompilationValidationHelper.getSpecCompiler();
// todo remove the need to set topology factory outside of constructor GOBBLIN-2056
Expand All @@ -125,6 +128,7 @@ public AddSpecResponse onAddSpec(Spec addedSpec) {
_log.info("Orchestrator - onAdd[Topology]Spec: " + addedSpec);
this.specCompiler.onAddSpec(addedSpec);
} else if (addedSpec instanceof FlowSpec) {
enforceNoRecentAdhocExecOfSameFlow((FlowSpec) addedSpec);
_log.info("Orchestrator - onAdd[Flow]Spec: " + addedSpec);
return this.specCompiler.onAddSpec(addedSpec);
} else {
Expand All @@ -133,6 +137,29 @@ public AddSpecResponse onAddSpec(Spec addedSpec) {
return new AddSpecResponse<>(null);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an FYI, this also gets called during updating a flow. But since we have a condition of checking the flow is scheduled or not and we don't expect users to update an adhoc flow, we should be fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But since this can still be called for adhoc flows, it would be good to test what the behaviour is. No need to handle it specially, but to know what the behaviour is would be good.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the callout, will add it for the test suite


/*
enforces that a similar adhoc flow is not launching,
else throw {@link TooSoonToRerunSameFlowException}
*/
private void enforceNoRecentAdhocExecOfSameFlow(FlowSpec flowSpec) {
if (!flowSpec.isScheduled()) {
Config flowConfig = flowSpec.getConfig();
String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY);
String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY);

_log.info("Checking existing adhoc flow entry for " + flowGroup + "." + flowName);
try {
if (dagManagementStateStore.existsCurrentlyLaunchingExecOfSameFlow(flowGroup, flowName, FlowUtils.getOrCreateFlowExecutionId(flowSpec))) {
_log.warn("Another recent adhoc flow execution found for " + flowGroup + "." + flowName);
throw TooSoonToRerunSameFlowException.wrappedOnce(flowSpec);
}
} catch (IOException exception) {
_log.error("Unable to check whether similar flow exists " + flowGroup + "." + flowName);
throw new RuntimeException("Unable to check whether similar flow exists " + flowGroup + "." + flowName, exception);
}
}
}

public void onDeleteSpec(URI deletedSpecURI, String deletedSpecVersion) {
onDeleteSpec(deletedSpecURI, deletedSpecVersion, new Properties());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Map;
import java.util.Set;

import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
Expand All @@ -47,8 +48,7 @@
import org.apache.gobblin.service.monitoring.JobStatusRetriever;
import org.apache.gobblin.util.CompletedFuture;

import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;

Expand All @@ -59,6 +59,7 @@
public class MySqlDagManagementStateStoreTest {

private ITestMetastoreDatabase testDb;
private static MultiActiveLeaseArbiter leaseArbiter;
private MySqlDagManagementStateStore dagManagementStateStore;
private static final String TEST_USER = "testUser";
public static final String TEST_PASSWORD = "testPassword";
Expand All @@ -68,6 +69,7 @@ public class MySqlDagManagementStateStoreTest {
@BeforeClass
public void setUp() throws Exception {
// Setting up mock DB
this.leaseArbiter = mock(MultiActiveLeaseArbiter.class);
this.testDb = TestMetastoreDatabaseFactory.get();
this.dagManagementStateStore = getDummyDMSS(this.testDb);
}
Expand All @@ -92,6 +94,22 @@ public static <T> boolean compareLists(List<T> list1, List<T> list2) {
return true;
}

@Test
public void testExistsCurrentlyLaunchingSimilarFlowGivesTrue() throws Exception{
Mockito.when(leaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(Mockito.any(DagActionStore.LeaseParams.class))).thenReturn(true);
String flowName = "testFlow";
String flowGroup = "testGroup";
Assert.assertTrue(dagManagementStateStore.existsCurrentlyLaunchingExecOfSameFlow(flowGroup, flowName, System.currentTimeMillis()));
}

@Test
public void testExistsCurrentlyLaunchingSimilarFlowGivesFalse() throws Exception{
Mockito.when(leaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(Mockito.any(DagActionStore.LeaseParams.class))).thenReturn(false);
String flowName = "testFlow";
String flowGroup = "testGroup";
Assert.assertFalse(dagManagementStateStore.existsCurrentlyLaunchingExecOfSameFlow(flowGroup, flowName, System.currentTimeMillis()));
}

@Test
public void testAddDag() throws Exception {
Dag<JobExecutionPlan> dag = DagTestUtils.buildDag("test", 12345L);
Expand Down Expand Up @@ -150,9 +168,11 @@ public static MySqlDagManagementStateStore getDummyDMSS(ITestMetastoreDatabase t
TopologySpec topologySpec = LaunchDagProcTest.buildNaiveTopologySpec(TEST_SPEC_EXECUTOR_URI);
URI specExecURI = new URI(TEST_SPEC_EXECUTOR_URI);
topologySpecMap.put(specExecURI, topologySpec);
MultiActiveLeaseArbiter multiActiveLeaseArbiter = Mockito.mock(MultiActiveLeaseArbiter.class);
leaseArbiter = multiActiveLeaseArbiter;
MySqlDagManagementStateStore dagManagementStateStore =
new MySqlDagManagementStateStore(config, null, null, jobStatusRetriever,
MysqlDagActionStoreTest.getTestDagActionStore(testMetastoreDatabase));
MysqlDagActionStoreTest.getTestDagActionStore(testMetastoreDatabase), multiActiveLeaseArbiter);
dagManagementStateStore.setTopologySpecMap(topologySpecMap);
return dagManagementStateStore;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
public class MysqlMultiActiveLeaseArbiterTest {
private static final long EPSILON = 10000L;
private static final long MORE_THAN_EPSILON = (long) (EPSILON * 1.1);
private static final long LESS_THAN_EPSILON = (long) (EPSILON * 0.90);
// NOTE: `sleep`ing this long SIGNIFICANTLY slows tests, but we need a large enough value that exec. variability won't cause spurious failure
private static final long LINGER = 20000L;
private static final long MORE_THAN_LINGER = (long) (LINGER * 1.1);
Expand All @@ -53,9 +54,12 @@ public class MysqlMultiActiveLeaseArbiterTest {
private static final String CONSTANTS_TABLE = "constants_store";
private static final String flowGroup = "testFlowGroup";
private static final String flowGroup2 = "testFlowGroup2";
private static final String flowGroup3 = "testFlowGroup3";
private static final String flowGroup4 = "testFlowGroup4";
private static final String flowName = "testFlowName";
private static final String jobName = "testJobName";
private static final long flowExecutionId = 12345677L;
private static final long flowExecutionId = 12345677213L;
private static final long flowExecutionId1 = 12345996546L;
private static final long eventTimeMillis = 1710451837L;
// Dag actions with the same flow info but different flow action types are considered unique
private static final DagActionStore.DagAction launchDagAction =
Expand All @@ -70,6 +74,18 @@ public class MysqlMultiActiveLeaseArbiterTest {
new DagActionStore.DagAction(flowGroup2, flowName, flowExecutionId, jobName, DagActionStore.DagActionType.LAUNCH);
private static final DagActionStore.LeaseParams
launchLeaseParams2 = new DagActionStore.LeaseParams(launchDagAction2, false, eventTimeMillis);
private static final DagActionStore.LeaseParams
launchLeaseParams3 = new DagActionStore.LeaseParams(new DagActionStore.DagAction(flowGroup3, flowName, flowExecutionId, jobName,
DagActionStore.DagActionType.LAUNCH), false, eventTimeMillis);
private static final DagActionStore.LeaseParams
launchLeaseParams3_similar = new DagActionStore.LeaseParams(new DagActionStore.DagAction(flowGroup3, flowName, flowExecutionId1, jobName,
DagActionStore.DagActionType.LAUNCH), false, eventTimeMillis);
private static final DagActionStore.LeaseParams
launchLeaseParams4 = new DagActionStore.LeaseParams(new DagActionStore.DagAction(flowGroup4, flowName, flowExecutionId, jobName,
DagActionStore.DagActionType.LAUNCH), false, eventTimeMillis);
private static final DagActionStore.LeaseParams
launchLeaseParams4_similar = new DagActionStore.LeaseParams(new DagActionStore.DagAction(flowGroup4, flowName, flowExecutionId1, jobName,
DagActionStore.DagActionType.LAUNCH), false, eventTimeMillis);
private static final Timestamp dummyTimestamp = new Timestamp(99999);
private ITestMetastoreDatabase testDb;
private MysqlMultiActiveLeaseArbiter mysqlMultiActiveLeaseArbiter;
Expand Down Expand Up @@ -201,6 +217,33 @@ public void testAcquireLeaseSingleParticipant() throws Exception {
<= sixthObtainedStatus.getLeaseAcquisitionTimestamp());
}

/*
test to verify if leasable entity is unavailable before epsilon time
to account for clock drift
*/
@Test
public void testExistsSimilarLeaseWithinConsolidationPeriod() throws Exception{
LeaseAttemptStatus firstLaunchStatus =
mysqlMultiActiveLeaseArbiter.tryAcquireLease(launchLeaseParams3, true);
Assert.assertTrue(firstLaunchStatus instanceof LeaseAttemptStatus.LeaseObtainedStatus);
completeLeaseHelper(launchLeaseParams3);
Thread.sleep(LESS_THAN_EPSILON);
Assert.assertTrue(mysqlMultiActiveLeaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(launchLeaseParams3_similar));
}

/*
test to verify if leasable entity exists post epsilon time
*/
@Test
public void testDoesNotExistsSimilarLeaseWithinConsolidationPeriod() throws Exception{
LeaseAttemptStatus firstLaunchStatus =
mysqlMultiActiveLeaseArbiter.tryAcquireLease(launchLeaseParams4, true);
Assert.assertTrue(firstLaunchStatus instanceof LeaseAttemptStatus.LeaseObtainedStatus);
completeLeaseHelper(launchLeaseParams4);
Thread.sleep(MORE_THAN_EPSILON);
Assert.assertFalse(mysqlMultiActiveLeaseArbiter.existsSimilarLeaseWithinConsolidationPeriod(launchLeaseParams4_similar));
}

/*
Tests attemptLeaseIfNewRow() method to ensure a new row is inserted if no row matches the primary key in the table.
If such a row does exist, the method should disregard the resulting SQL error and return 0 rows updated, indicating
Expand Down
Loading
Loading