-
Notifications
You must be signed in to change notification settings - Fork 749
[GOBBLIN-1984] Add consensus flowExecutionId to FlowSpec to use for compilation #3857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
8190591
6eaa5ca
70a2e64
465ed93
94a10fe
82fb861
ea8b1e0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.gobblin.runtime.api; | ||
|
|
||
| import com.typesafe.config.Config; | ||
| import java.net.URI; | ||
| import java.net.URISyntaxException; | ||
| import java.util.Properties; | ||
| import org.apache.gobblin.configuration.ConfigurationKeys; | ||
| import org.apache.gobblin.service.FlowId; | ||
| import org.testng.Assert; | ||
| import org.testng.annotations.Test; | ||
|
|
||
|
|
||
| public class FlowSpecTest { | ||
|
|
||
| /** | ||
| * Tests that the addProperty() function to ensure the new flowSpec returned has the original properties and updated | ||
| * ones | ||
| * @throws URISyntaxException | ||
| */ | ||
| @Test | ||
| public void testAddProperty() throws URISyntaxException { | ||
| String flowGroup = "myGroup"; | ||
| String flowName = "myName"; | ||
| String flowExecutionId = "1234"; | ||
| FlowId flowId = new FlowId().setFlowGroup(flowGroup).setFlowName(flowName); | ||
| URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); | ||
|
|
||
| // Create properties to be used as config | ||
| Properties properties = new Properties(); | ||
| properties.setProperty(ConfigurationKeys.FLOW_GROUP_KEY, flowGroup); | ||
| properties.setProperty(ConfigurationKeys.FLOW_NAME_KEY, flowName); | ||
| properties.setProperty(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY, "true"); | ||
|
|
||
| FlowSpec flowSpec = FlowSpec.builder(flowUri).withConfigAsProperties(properties).build(); | ||
| flowSpec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId); | ||
|
|
||
| Properties updatedProperties = flowSpec.getConfigAsProperties(); | ||
| Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId); | ||
| Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_GROUP_KEY), flowGroup); | ||
| Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_NAME_KEY), flowName); | ||
| Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY), "true"); | ||
|
|
||
| Config updatedConfig = flowSpec.getConfig(); | ||
| Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId); | ||
| Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), flowGroup); | ||
| Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), flowName); | ||
| Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY), "true"); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -65,13 +65,10 @@ public final class FlowCompilationValidationHelper { | |
| * flowspec can be compiled. If the pre-conditions hold, then a JobExecutionPlan is constructed and returned to the | ||
| * caller. | ||
| * @param flowSpec | ||
| * @param optionalFlowExecutionId for scheduled (non-ad-hoc) flows, to pass the ID "laundered" via the DB; | ||
| * see: {@link MysqlMultiActiveLeaseArbiter javadoc section titled | ||
| * `Database event_timestamp laundering`} | ||
| * @return jobExecutionPlan dag if one can be constructed for the given flowSpec | ||
| */ | ||
| public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowSpec, | ||
| Optional<String> optionalFlowExecutionId) throws IOException, InterruptedException { | ||
| public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowSpec) | ||
| throws IOException, InterruptedException { | ||
| Config flowConfig = flowSpec.getConfig(); | ||
| String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY); | ||
| String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY); | ||
|
|
@@ -94,7 +91,7 @@ public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowS | |
| return Optional.absent(); | ||
| } | ||
|
|
||
| addFlowExecutionIdIfAbsent(flowMetadata, optionalFlowExecutionId, jobExecutionPlanDagOptional.get()); | ||
| addFlowExecutionIdIfAbsent(flowMetadata, jobExecutionPlanDagOptional.get()); | ||
| if (flowCompilationTimer.isPresent()) { | ||
| flowCompilationTimer.get().stop(flowMetadata); | ||
| } | ||
|
|
@@ -122,7 +119,7 @@ public Optional<Dag<JobExecutionPlan>> validateAndHandleConcurrentExecution(Conf | |
| sharedFlowMetricsSingleton.conditionallyUpdateFlowGaugeSpecState(spec, | ||
| SharedFlowMetricsSingleton.CompiledState.SKIPPED); | ||
| Instrumented.markMeter(sharedFlowMetricsSingleton.getSkippedFlowsMeter()); | ||
| if (!isScheduledFlow((FlowSpec) spec)) { | ||
| if (!((FlowSpec) spec).isScheduled()) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a scenario where
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good callout, from checking the code we only call these functions with a FlowSpec and otherwise throw an error. Changing it to FlowSpec. |
||
| // For ad-hoc flow, we might already increase quota, we need to decrease here | ||
| for (Dag.DagNode dagNode : jobExecutionPlanDag.getStartNodes()) { | ||
| quotaManager.releaseQuota(dagNode); | ||
|
|
@@ -181,32 +178,13 @@ public static void populateFlowCompilationFailedEventMessage(Optional<EventSubmi | |
| } | ||
|
|
||
| /** | ||
| * If it is a scheduled flow (which does not have flowExecutionId in the FlowSpec) and the flow compilation is | ||
| * successful, retrieve flowExecutionId from the JobSpec. | ||
| * If it is a scheduled flow run without multi-active scheduler configuration (where the FlowSpec does not have a | ||
| * flowExecutionId) and the flow compilation is successful, retrieve flowExecutionId from the JobSpec. | ||
| */ | ||
| public static void addFlowExecutionIdIfAbsent(Map<String,String> flowMetadata, | ||
| Dag<JobExecutionPlan> jobExecutionPlanDag) { | ||
| addFlowExecutionIdIfAbsent(flowMetadata, Optional.absent(), jobExecutionPlanDag); | ||
| } | ||
|
|
||
| /** | ||
| * If it is a scheduled flow (which does not have flowExecutionId in the FlowSpec) and the flow compilation is | ||
| * successful, add a flowExecutionId using the optional parameter if it exists otherwise retrieve it from the JobSpec. | ||
| */ | ||
| public static void addFlowExecutionIdIfAbsent(Map<String,String> flowMetadata, | ||
| Optional<String> optionalFlowExecutionId, Dag<JobExecutionPlan> jobExecutionPlanDag) { | ||
| if (optionalFlowExecutionId.isPresent()) { | ||
| flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, optionalFlowExecutionId.get()); | ||
| } | ||
| flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, | ||
| jobExecutionPlanDag.getNodes().get(0).getValue().getJobSpec().getConfigAsProperties().getProperty( | ||
| ConfigurationKeys.FLOW_EXECUTION_ID_KEY)); | ||
| } | ||
|
|
||
| /** | ||
| * Return true if the spec contains a schedule, false otherwise. | ||
| */ | ||
| public static boolean isScheduledFlow(FlowSpec spec) { | ||
| return spec.getConfigAsProperties().containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| import lombok.Data; | ||
| import lombok.Getter; | ||
| import lombok.extern.slf4j.Slf4j; | ||
| import org.apache.gobblin.configuration.ConfigurationKeys; | ||
| import org.apache.gobblin.kafka.client.DecodeableKafkaRecord; | ||
| import org.apache.gobblin.metrics.ContextAwareGauge; | ||
| import org.apache.gobblin.metrics.ContextAwareMeter; | ||
|
|
@@ -276,8 +277,12 @@ protected void submitFlowToDagManagerHelper(DagActionStore.DagAction dagAction, | |
| try { | ||
| URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); | ||
| spec = (FlowSpec) flowCatalog.getSpecs(flowUri); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should i.e. the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you're wondering how Java knows which to choose at runtime - given this is still an the difference here is the more-specific typing in cases where the to illustrate, these two are legal: and but this does NOT type-check: it not only saves us typing on casting, but, in situations where we've already successfully casted to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I updated this method but left the other methods that return |
||
| // Pass flowExecutionId to DagManager to be used for scheduled flows that do not already contain a flowExecutionId | ||
| this.orchestrator.submitFlowToDagManager(spec, dagAction); | ||
| /* Update the spec to contain the flowExecutionId from the dagAction for scheduled flows that do not already | ||
| contain a flowExecutionId. Adhoc flowSpecs are already consistent with the dagAction so there's no effective | ||
| change. It's crucial to adopt the consensus flowExecutionId here to prevent creating a new one during compilation. | ||
| */ | ||
| spec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, dagAction.getFlowExecutionId()); | ||
| this.orchestrator.submitFlowToDagManager(spec); | ||
| } catch (URISyntaxException e) { | ||
| log.warn("Could not create URI object for flowId {}. Exception {}", flowId, e.getMessage()); | ||
| launchSubmissionMetricProxy.markFailure(); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to ensure these two (now modifiable) fields only ever change in lockstep, instances now require synchronization around all access of either field
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added the
volatilekeyword and note about accessing both parameters with synchronized key word if multi-threaded access (which is unlikely since we'd probably only access one not both). When trying to use synchronized keyword on the two functions it results in a number ofInconsistent synchronization of org.apache.gobblin.runtime.api.FlowSpec.config; locked 50% of timeerrors so I had to remove the@Dataannotation for the class and would have to add separate@Getter/Setter/toStringetc... annotations for all the fields to make onlyConfig/ConfigAsPropertiessynchronized. I don't think this is that useful so made the compromise above. Let me know.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably overkill but I think setting a rw lock would handle this right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
suggested to set
configAsPropertiesandconfigasprivateandvolatile, so one should always be forced to use this method to update themUh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will also remove the findBugsMain error, because by annotating them
volatilewe are telling it that these fields can be changed from other threads.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I updated the fields to be private so they are only updated through the addProperty method and bc we don't expect this scenario to occur I don't want to over complicate with rw lock for now. Future developers should be able to quickly notice this with the volatile keyword and documentation.