-
Notifications
You must be signed in to change notification settings - Fork 749
Delete Launch Action Events After Processing #3837
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -103,19 +103,23 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { | |
| private Optional<FlowTriggerHandler> flowTriggerHandler; | ||
| @Getter | ||
| private final SharedFlowMetricsSingleton sharedFlowMetricsSingleton; | ||
| @Getter | ||
| private final Optional<DagActionStore> dagActionStore; | ||
|
|
||
| private final ClassAliasResolver<SpecCompiler> aliasResolver; | ||
|
|
||
| public Orchestrator(Config config, Optional<TopologyCatalog> topologyCatalog, Optional<DagManager> dagManager, | ||
| Optional<Logger> log, FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, | ||
| Optional<FlowTriggerHandler> flowTriggerHandler, SharedFlowMetricsSingleton sharedFlowMetricsSingleton) { | ||
| Optional<FlowTriggerHandler> flowTriggerHandler, SharedFlowMetricsSingleton sharedFlowMetricsSingleton, | ||
| Optional<DagActionStore> dagActionStore) { | ||
| _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); | ||
| this.aliasResolver = new ClassAliasResolver<>(SpecCompiler.class); | ||
| this.topologyCatalog = topologyCatalog; | ||
| this.dagManager = dagManager; | ||
| this.flowStatusGenerator = flowStatusGenerator; | ||
| this.flowTriggerHandler = flowTriggerHandler; | ||
| this.sharedFlowMetricsSingleton = sharedFlowMetricsSingleton; | ||
| this.dagActionStore = dagActionStore; | ||
| try { | ||
| String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; | ||
| if (config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY)) { | ||
|
|
@@ -161,9 +165,9 @@ public Orchestrator(Config config, Optional<TopologyCatalog> topologyCatalog, Op | |
| @Inject | ||
| public Orchestrator(Config config, FlowStatusGenerator flowStatusGenerator, Optional<TopologyCatalog> topologyCatalog, | ||
| Optional<DagManager> dagManager, Optional<Logger> log, Optional<FlowTriggerHandler> flowTriggerHandler, | ||
| SharedFlowMetricsSingleton sharedFlowMetricsSingleton) { | ||
| SharedFlowMetricsSingleton sharedFlowMetricsSingleton, Optional<DagActionStore> dagActionStore) { | ||
| this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, flowTriggerHandler, | ||
| sharedFlowMetricsSingleton); | ||
| sharedFlowMetricsSingleton, dagActionStore); | ||
| } | ||
|
|
||
|
|
||
|
|
@@ -266,7 +270,7 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil | |
| return; | ||
| } | ||
|
|
||
| String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); | ||
| String flowExecutionId = TimingEventUtils.getFlowExecutionIdFromFlowMetadata(flowMetadata); | ||
| DagActionStore.DagAction flowAction = | ||
| new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.LAUNCH); | ||
| flowTriggerHandler.get().handleTriggerEvent(jobProps, flowAction, triggerTimestampMillis, isReminderEvent); | ||
|
|
@@ -292,7 +296,11 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil | |
|
|
||
| // Depending on if DagManager is present, handle execution | ||
| if (this.dagManager.isPresent()) { | ||
| submitFlowToDagManager((FlowSpec) spec, jobExecutionPlanDag); | ||
| DagActionStore.DagAction launchAction = | ||
| new DagActionStore.DagAction(flowGroup, flowName, | ||
| TimingEventUtils.getFlowExecutionIdFromFlowMetadata(flowMetadata), | ||
| DagActionStore.FlowActionType.LAUNCH); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could we raise up the |
||
| submitFlowToDagManager((FlowSpec) spec, jobExecutionPlanDag, launchAction); | ||
| } else { | ||
| // Schedule all compiled JobSpecs on their respective Executor | ||
| for (Dag.DagNode<JobExecutionPlan> dagNode : jobExecutionPlanDag.getNodes()) { | ||
|
|
@@ -335,22 +343,25 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil | |
| Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS); | ||
| } | ||
|
|
||
| public void submitFlowToDagManager(FlowSpec flowSpec, Optional<String> optionalFlowExecutionId) throws IOException, InterruptedException { | ||
| public void submitFlowToDagManager(FlowSpec flowSpec, DagActionStore.DagAction flowAction) throws IOException, InterruptedException { | ||
| Optional<Dag<JobExecutionPlan>> optionalJobExecutionPlanDag = | ||
| this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec, optionalFlowExecutionId); | ||
| this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec, | ||
| Optional.of(flowAction.getFlowExecutionId())); | ||
| if (optionalJobExecutionPlanDag.isPresent()) { | ||
| submitFlowToDagManager(flowSpec, optionalJobExecutionPlanDag.get()); | ||
| submitFlowToDagManager(flowSpec, optionalJobExecutionPlanDag.get(), flowAction); | ||
| } else { | ||
| _log.warn("Flow: {} submitted to dagManager failed to compile and produce a job execution plan dag", flowSpec); | ||
| Instrumented.markMeter(this.flowOrchestrationFailedMeter); | ||
| } | ||
| } | ||
|
|
||
| public void submitFlowToDagManager(FlowSpec flowSpec, Dag<JobExecutionPlan> jobExecutionPlanDag) | ||
| public void submitFlowToDagManager(FlowSpec flowSpec, Dag<JobExecutionPlan> jobExecutionPlanDag, | ||
| DagActionStore.DagAction launchAction) | ||
| throws IOException { | ||
| try { | ||
| //Send the dag to the DagManager. | ||
| // Send the dag to the DagManager and delete the action after persisting it to avoid redundant execution on start up | ||
| this.dagManager.get().addDag(jobExecutionPlanDag, true, true); | ||
| this.dagActionStore.get().deleteDagAction(launchAction); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if the DM hasn't successfully handled it (e.g. the service fails after line 364, but before the DM actions complete)? would the action/event be lost?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. overall I recognize we're in an interim phase of our multi-leader journey, where we have yet to integrate the DagMgr w/ the MultiActiveLeaseArbiter... this is only temporary so let's not over-engineer I'm concerned however that this PR's approach presumes successful handling for non-durable/in-memory state. a safer pattern would be to remove the LAUNCH action from the DagAction store only after the DM has finished launching the flow execution.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's great news. we're all set then! sorry, FSR I thought it merely added the DAG to a queue, and hadn't recalled that it synchronously persists the DAG. |
||
| } catch (Exception ex) { | ||
| String failureMessage = "Failed to add Job Execution Plan due to: " + ex.getMessage(); | ||
| _log.warn("Orchestrator call - " + failureMessage, ex); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,6 +47,13 @@ static Map<String, String> getFlowMetadata(Config flowConfig) { | |
| return metadata; | ||
| } | ||
|
|
||
| /** | ||
| * Retrieves a flowExecutionId from flowMetadata map and returns dummy value if one is not set | ||
| */ | ||
| public static String getFlowExecutionIdFromFlowMetadata(Map<String, String> flowMetadata) { | ||
| return flowMetadata.getOrDefault(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, "<<no flowExecutionId>>"); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I really like the abstraction! as far as where this should live... don't we already have another utils class with static methods for extracting the flow group and flow name?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class has methods relating to this map of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makes sense. I now see that the |
||
|
|
||
| static Map<String, String> getJobMetadata(Map<String, String> flowMetadata, JobExecutionPlan jobExecutionPlan) { | ||
| Map<String, String> jobMetadata = Maps.newHashMap(); | ||
| JobSpec jobSpec = jobExecutionPlan.getJobSpec(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,6 @@ | |
| package org.apache.gobblin.service.monitoring; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import com.google.common.base.Optional; | ||
| import com.google.common.cache.CacheBuilder; | ||
| import com.google.common.cache.CacheLoader; | ||
| import com.google.common.cache.LoadingCache; | ||
|
|
@@ -168,7 +167,7 @@ protected void processMessage(DecodeableKafkaRecord message) { | |
| throw new RuntimeException(String.format("Received LAUNCH dagAction while not in multi-active scheduler " | ||
| + "mode for flowAction: %s", dagAction)); | ||
| } | ||
| submitFlowToDagManagerHelper(flowGroup, flowName, flowExecutionId); | ||
| submitFlowToDagManagerHelper(dagAction); | ||
| } else { | ||
| log.warn("Received unsupported dagAction {}. Expected to be a KILL, RESUME, or LAUNCH", dagActionType); | ||
| this.unexpectedErrors.mark(); | ||
|
|
@@ -195,15 +194,15 @@ protected void processMessage(DecodeableKafkaRecord message) { | |
| dagActionsSeenCache.put(changeIdentifier, changeIdentifier); | ||
| } | ||
|
|
||
| protected void submitFlowToDagManagerHelper(String flowGroup, String flowName, String flowExecutionId) { | ||
| protected void submitFlowToDagManagerHelper(DagActionStore.DagAction dagAction) { | ||
|
Comment on lines
-198
to
+195
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. excellent--much clearer method signature! |
||
| // Retrieve job execution plan by recompiling the flow spec to send to the DagManager | ||
| FlowId flowId = new FlowId().setFlowGroup(flowGroup).setFlowName(flowName); | ||
| FlowId flowId = new FlowId().setFlowGroup(dagAction.getFlowGroup()).setFlowName(dagAction.getFlowName()); | ||
| FlowSpec spec = null; | ||
| try { | ||
| URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); | ||
| spec = (FlowSpec) flowCatalog.getSpecs(flowUri); | ||
| // Pass flowExecutionId to DagManager to be used for scheduled flows that do not already contain a flowExecutionId | ||
| this.orchestrator.submitFlowToDagManager(spec, Optional.of(flowExecutionId)); | ||
| this.orchestrator.submitFlowToDagManager(spec, dagAction); | ||
| } catch (URISyntaxException e) { | ||
| log.warn("Could not create URI object for flowId {}. Exception {}", flowId, e.getMessage()); | ||
| this.failedFlowLaunchSubmissions.mark(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the motivation behind changing this function here? From my point of view we should be enforcing the existence of a flow execution ID here as a dag must have an associated flow execution ID, otherwise we should block progression as the dag is likely corrupt
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see, I changed this to abstract away the use of the key. I could update the function to not provide a default if one doesn't exist, always expecting existence of flow execution id or revert this. What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure in that case