-
Notifications
You must be signed in to change notification settings - Fork 749
[GOBBLIN-2173] Avoid Adhoc flow spec addition for non leasable entity #4076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
618f357
3fa68a1
30929f8
a782202
0981b82
7d2b49f
01cac22
b596948
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.gobblin.runtime.api; | ||
phet marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
/** | ||
* An {@link RuntimeException} thrown when lease cannot be acquired on provided entity. | ||
*/ | ||
public class LeaseUnavailableException extends RuntimeException { | ||
phet marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public LeaseUnavailableException(String message) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. beyond clearly naming for callers, impl-wise, this definitely relates to a flow, so that should be a ctor param. consider whether to allow a catcher to reach in to access the details as instance member(s) or merely to use internally in the ctor, to contextualize the |
||
super(message); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,16 @@ public interface MultiActiveLeaseArbiter { | |
LeaseAttemptStatus tryAcquireLease(DagActionStore.LeaseParams leaseParams, boolean adoptConsensusFlowExecutionId) | ||
throws IOException; | ||
|
||
/** | ||
* This method checks if lease can be acquired on provided flow in lease params | ||
* returns true if entry for the same flow does not exists within epsilon time | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. very reasonable method-level javadoc... but it turns out so, please add the class-level info. mentioning the name 'epsilon' is fine, but definitely also give it a more specific name, like "Lease Consolidation Period". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
* in leaseArbiterStore | ||
* @param leaseParams uniquely identifies the flow, the present action upon it, the time the action | ||
* was triggered, and if the dag action event we're checking on is a reminder event | ||
*/ | ||
phet marked this conversation as resolved.
Show resolved
Hide resolved
|
||
boolean canAcquireLeaseOnEntity(DagActionStore.LeaseParams leaseParams) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consider renaming this to |
||
throws IOException; | ||
|
||
/** | ||
* This method is used to indicate the owner of the lease has successfully completed required actions while holding | ||
* the lease of the dag action event. It marks the lease as "no longer leasing", if the eventTimeMillis and | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -362,6 +362,12 @@ else if (leaseValidityStatus == 2) { | |
} | ||
} | ||
|
||
@Override | ||
public boolean canAcquireLeaseOnEntity(DagActionStore.LeaseParams leaseParams) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add javadoc.. something like: |
||
Optional<GetEventInfoResult> infoResult = getExistingEventInfo(leaseParams); | ||
return infoResult.isPresent() ? !infoResult.get().isWithinEpsilon() : true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. idiomatic:
|
||
} | ||
|
||
/** | ||
* Checks leaseArbiterTable for an existing entry for this dag action and event time | ||
*/ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,13 +17,16 @@ | |
|
||
package org.apache.gobblin.service.modules.orchestration; | ||
|
||
import com.linkedin.restli.common.HttpStatus; | ||
import com.linkedin.restli.server.RestLiServiceException; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please fix import ordering, see from : https://gobblin.apache.org/docs/developer-guide/CodingStyle/ |
||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Properties; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import org.apache.gobblin.runtime.api.LeaseUnavailableException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
@@ -78,6 +81,7 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { | |
protected final SpecCompiler specCompiler; | ||
protected final TopologyCatalog topologyCatalog; | ||
private final JobStatusRetriever jobStatusRetriever; | ||
private final DagManagementStateStore dagManagementStateStore; | ||
|
||
protected final MetricContext metricContext; | ||
|
||
|
@@ -100,6 +104,7 @@ public Orchestrator(Config config, TopologyCatalog topologyCatalog, Optional<Log | |
this.topologyCatalog = topologyCatalog; | ||
this.flowLaunchHandler = flowLaunchHandler; | ||
this.sharedFlowMetricsSingleton = sharedFlowMetricsSingleton; | ||
this.dagManagementStateStore = dagManagementStateStore; | ||
this.jobStatusRetriever = jobStatusRetriever; | ||
this.specCompiler = flowCompilationValidationHelper.getSpecCompiler(); | ||
// todo remove the need to set topology factory outside of constructor GOBBLIN-2056 | ||
|
@@ -125,6 +130,7 @@ public AddSpecResponse onAddSpec(Spec addedSpec) { | |
_log.info("Orchestrator - onAdd[Topology]Spec: " + addedSpec); | ||
this.specCompiler.onAddSpec(addedSpec); | ||
} else if (addedSpec instanceof FlowSpec) { | ||
validateAdhocFlowLeasability((FlowSpec) addedSpec); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: "validate"/"verify" are good for methods returning a boolean. the entire purpose of this |
||
_log.info("Orchestrator - onAdd[Flow]Spec: " + addedSpec); | ||
return this.specCompiler.onAddSpec(addedSpec); | ||
} else { | ||
|
@@ -133,6 +139,25 @@ public AddSpecResponse onAddSpec(Spec addedSpec) { | |
return new AddSpecResponse<>(null); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just an FYI, this also gets called during updating a flow. But since we have a condition of checking the flow is scheduled or not and we don't expect users to update an adhoc flow, we should be fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But since this can still be called for adhoc flows, it would be good to test what the behaviour is. No need to handle it specially, but to know what the behaviour is would be good. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the callout, will add it for the test suite |
||
|
||
private void validateAdhocFlowLeasability(FlowSpec flowSpec) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add javadoc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
if (!flowSpec.isScheduled()) { | ||
Config flowConfig = flowSpec.getConfig(); | ||
String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY); | ||
String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY); | ||
DagActionStore.DagAction dagAction = DagActionStore.DagAction.forFlow(flowGroup, flowName, | ||
FlowUtils.getOrCreateFlowExecutionId(flowSpec), DagActionStore.DagActionType.LAUNCH); | ||
DagActionStore.LeaseParams leaseParams = new DagActionStore.LeaseParams(dagAction, System.currentTimeMillis()); | ||
try { | ||
if (!dagManagementStateStore.canAcquireLeaseOnEntity(leaseParams)) { | ||
throw new LeaseUnavailableException("Lease already occupied by another execution of this flow"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add an info log here with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added lease params which contains details of flow name and flow group |
||
} | ||
} catch (IOException exception) { | ||
_log.error(String.format("Failed to query leaseArbiterTable for existing flow details: %s", flowSpec), exception); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we called (anyway, the table's name is dynamically set in config). instead:
(also on the line below) |
||
throw new RuntimeException("Error querying leaseArbiterTable", exception); | ||
} | ||
} | ||
} | ||
|
||
public void onDeleteSpec(URI deletedSpecURI, String deletedSpecVersion) { | ||
onDeleteSpec(deletedSpecURI, deletedSpecVersion, new Properties()); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ | |
import java.util.Map; | ||
import java.util.Set; | ||
|
||
import org.mockito.Mockito; | ||
import org.testng.Assert; | ||
import org.testng.annotations.AfterClass; | ||
import org.testng.annotations.BeforeClass; | ||
|
@@ -59,6 +60,7 @@ | |
public class MySqlDagManagementStateStoreTest { | ||
|
||
private ITestMetastoreDatabase testDb; | ||
private static MultiActiveLeaseArbiter leaseArbiter; | ||
private MySqlDagManagementStateStore dagManagementStateStore; | ||
private static final String TEST_USER = "testUser"; | ||
public static final String TEST_PASSWORD = "testPassword"; | ||
|
@@ -68,6 +70,7 @@ public class MySqlDagManagementStateStoreTest { | |
@BeforeClass | ||
public void setUp() throws Exception { | ||
// Setting up mock DB | ||
this.leaseArbiter = mock(MultiActiveLeaseArbiter.class); | ||
this.testDb = TestMetastoreDatabaseFactory.get(); | ||
this.dagManagementStateStore = getDummyDMSS(this.testDb); | ||
} | ||
|
@@ -92,6 +95,16 @@ public static <T> boolean compareLists(List<T> list1, List<T> list2) { | |
return true; | ||
} | ||
|
||
@Test | ||
public void testcanAcquireLeaseOnEntity() throws Exception{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. camel case typo... (but anyway, |
||
Mockito.when(leaseArbiter.canAcquireLeaseOnEntity(Mockito.any(DagActionStore.LeaseParams.class))).thenReturn(true); | ||
String flowName = "testFlow"; | ||
String flowGroup = "testGroup"; | ||
DagActionStore.DagAction dagAction = new DagActionStore.DagAction(flowName, flowGroup, System.currentTimeMillis(), "testJob", DagActionStore.DagActionType.LAUNCH); | ||
DagActionStore.LeaseParams leaseParams = new DagActionStore.LeaseParams(dagAction); | ||
Assert.assertTrue(dagManagementStateStore.canAcquireLeaseOnEntity(leaseParams)); | ||
} | ||
|
||
@Test | ||
public void testAddDag() throws Exception { | ||
Dag<JobExecutionPlan> dag = DagTestUtils.buildDag("test", 12345L); | ||
|
@@ -150,9 +163,11 @@ public static MySqlDagManagementStateStore getDummyDMSS(ITestMetastoreDatabase t | |
TopologySpec topologySpec = LaunchDagProcTest.buildNaiveTopologySpec(TEST_SPEC_EXECUTOR_URI); | ||
URI specExecURI = new URI(TEST_SPEC_EXECUTOR_URI); | ||
topologySpecMap.put(specExecURI, topologySpec); | ||
MultiActiveLeaseArbiter multiActiveLeaseArbiter = Mockito.mock(MultiActiveLeaseArbiter.class); | ||
leaseArbiter = multiActiveLeaseArbiter; | ||
MySqlDagManagementStateStore dagManagementStateStore = | ||
new MySqlDagManagementStateStore(config, null, null, jobStatusRetriever, | ||
MysqlDagActionStoreTest.getTestDagActionStore(testMetastoreDatabase)); | ||
MysqlDagActionStoreTest.getTestDagActionStore(testMetastoreDatabase), multiActiveLeaseArbiter); | ||
dagManagementStateStore.setTopologySpecMap(topologySpecMap); | ||
return dagManagementStateStore; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
usually exception messages are designed for logging, more than for end-user consumption, so probably not appropriate to blindly return that. (it's sometimes done for a 5xx error, as above... but even that can be inadvisable.)
anyway, the 409 above might offer a better template:
(to provide N we may wish to tunnel the value of epsilon... or at least how many secs remain before a subsequent launch would be possible)
also: when do we want to
return
(as that 409 above does), vs.throw
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated as discussed offline