Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.linkedin.data.template.StringMap;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigValueFactory;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.net.URI;
import java.net.URISyntaxException;
Expand All @@ -35,6 +36,7 @@
import java.util.List;
import java.util.Properties;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.extern.slf4j.Slf4j;
Expand All @@ -56,6 +58,7 @@
*
*/
@Alpha
@AllArgsConstructor
@Data
@EqualsAndHashCode(exclude={"compilationErrors"})
@SuppressFBWarnings(value="SE_BAD_FIELD",
Expand All @@ -75,8 +78,8 @@ public class FlowSpec implements Configurable, Spec {
/** Human-readable description of the flow spec */
final String description;

/** Flow config as a typesafe config object */
final Config config;
/** Flow config as a typesafe config object which can be replaced */
Config config;

/** Flow config as a properties collection for backwards compatibility */
// Note that this property is not strictly necessary as it can be generated from the typesafe
Expand Down Expand Up @@ -125,6 +128,19 @@ public static FlowSpec.Builder builder(URI catalogURI, Properties flowProps) {
}
}

/**
* Add property to Config (also propagated to the Properties field)
* @param key
* @param value
*/
public void addProperty(String key, String value) {

@phet phet Jan 12, 2024

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to ensure these two (now modifiable) fields only ever change in lockstep, instances now require synchronization around all access of either field

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the volatile keyword and note about accessing both parameters with synchronized key word if multi-threaded access (which is unlikely since we'd probably only access one not both). When trying to use synchronized keyword on the two functions it results in a number of Inconsistent synchronization of org.apache.gobblin.runtime.api.FlowSpec.config; locked 50% of time errors so I had to remove the @Data annotation for the class and would have to add separate @Getter/Setter/toString etc... annotations for all the fields to make only Config/ConfigAsProperties synchronized. I don't think this is that useful so made the compromise above. Let me know.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably overkill but I think setting a rw lock would handle this right?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggested to set configAsProperties and config as private and volatile , so one should always be forced to use this method to update them

@arjun4084346 arjun4084346 Jan 12, 2024

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will also remove the findBugsMain error, because by annotating them volatile we are telling it that these fields can be changed from other threads.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the fields to be private so they are only updated through the addProperty method and bc we don't expect this scenario to occur I don't want to over complicate with rw lock for now. Future developers should be able to quickly notice this with the volatile keyword and documentation.

this.config = config.withValue(key, ConfigValueFactory.fromAnyRef(value));
// Make sure configAsProperties has been initialized and is updated
this.getConfigAsProperties();
this.configAsProperties.setProperty(key, value);

}

public void addCompilationError(String src, String dst, String errorMessage, int numberOfHops) {
this.compilationErrors.add(new CompilationError(getConfig(), src, dst, errorMessage, numberOfHops));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.gobblin.runtime.api;

import com.typesafe.config.Config;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Properties;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.service.FlowId;
import org.testng.Assert;
import org.testng.annotations.Test;


public class FlowSpecTest {

/**
* Tests that the addProperty() function to ensure the new flowSpec returned has the original properties and updated
* ones
* @throws URISyntaxException
*/
@Test
public void testAddProperty() throws URISyntaxException {
String flowGroup = "myGroup";
String flowName = "myName";
String flowExecutionId = "1234";
FlowId flowId = new FlowId().setFlowGroup(flowGroup).setFlowName(flowName);
URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId);

// Create properties to be used as config
Properties properties = new Properties();
properties.setProperty(ConfigurationKeys.FLOW_GROUP_KEY, flowGroup);
properties.setProperty(ConfigurationKeys.FLOW_NAME_KEY, flowName);
properties.setProperty(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY, "true");

FlowSpec flowSpec = FlowSpec.builder(flowUri).withConfigAsProperties(properties).build();
flowSpec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, flowExecutionId);

Properties updatedProperties = flowSpec.getConfigAsProperties();
Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId);
Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_GROUP_KEY), flowGroup);
Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_NAME_KEY), flowName);
Assert.assertEquals(updatedProperties.getProperty(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY), "true");

Config updatedConfig = flowSpec.getConfig();
Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_EXECUTION_ID_KEY), flowExecutionId);
Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY), flowGroup);
Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_NAME_KEY), flowName);
Assert.assertEquals(updatedConfig.getString(ConfigurationKeys.FLOW_IS_REMINDER_EVENT_KEY), "true");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,9 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil
Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS);
}

public void submitFlowToDagManager(FlowSpec flowSpec, DagActionStore.DagAction flowAction) throws IOException, InterruptedException {
public void submitFlowToDagManager(FlowSpec flowSpec) throws IOException, InterruptedException {
Optional<Dag<JobExecutionPlan>> optionalJobExecutionPlanDag =
this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec,
Optional.of(flowAction.getFlowExecutionId()));
this.flowCompilationValidationHelper.createExecutionPlanIfValid(flowSpec);
if (optionalJobExecutionPlanDag.isPresent()) {
submitFlowToDagManager(flowSpec, optionalJobExecutionPlanDag.get());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,10 @@ public final class FlowCompilationValidationHelper {
* flowspec can be compiled. If the pre-conditions hold, then a JobExecutionPlan is constructed and returned to the
* caller.
* @param flowSpec
* @param optionalFlowExecutionId for scheduled (non-ad-hoc) flows, to pass the ID "laundered" via the DB;
* see: {@link MysqlMultiActiveLeaseArbiter javadoc section titled
* `Database event_timestamp laundering`}
* @return jobExecutionPlan dag if one can be constructed for the given flowSpec
*/
public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowSpec,
Optional<String> optionalFlowExecutionId) throws IOException, InterruptedException {
public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowSpec)
throws IOException, InterruptedException {
Config flowConfig = flowSpec.getConfig();
String flowGroup = flowConfig.getString(ConfigurationKeys.FLOW_GROUP_KEY);
String flowName = flowConfig.getString(ConfigurationKeys.FLOW_NAME_KEY);
Expand All @@ -94,7 +91,7 @@ public Optional<Dag<JobExecutionPlan>> createExecutionPlanIfValid(FlowSpec flowS
return Optional.absent();
}

addFlowExecutionIdIfAbsent(flowMetadata, optionalFlowExecutionId, jobExecutionPlanDagOptional.get());
addFlowExecutionIdIfAbsent(flowMetadata, jobExecutionPlanDagOptional.get());
if (flowCompilationTimer.isPresent()) {
flowCompilationTimer.get().stop(flowMetadata);
}
Expand Down Expand Up @@ -122,7 +119,7 @@ public Optional<Dag<JobExecutionPlan>> validateAndHandleConcurrentExecution(Conf
sharedFlowMetricsSingleton.conditionallyUpdateFlowGaugeSpecState(spec,
SharedFlowMetricsSingleton.CompiledState.SKIPPED);
Instrumented.markMeter(sharedFlowMetricsSingleton.getSkippedFlowsMeter());
if (!isScheduledFlow((FlowSpec) spec)) {
if (!((FlowSpec) spec).isScheduled()) {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a scenario where spec isn't a flowspec here? Maybe it makes sense for the function to require a flowspec so we don't need to keep casting.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good callout, from checking the code we only call these functions with a FlowSpec and otherwise throw an error. Changing it to FlowSpec.

// For ad-hoc flow, we might already increase quota, we need to decrease here
for (Dag.DagNode dagNode : jobExecutionPlanDag.getStartNodes()) {
quotaManager.releaseQuota(dagNode);
Expand Down Expand Up @@ -181,32 +178,13 @@ public static void populateFlowCompilationFailedEventMessage(Optional<EventSubmi
}

/**
* If it is a scheduled flow (which does not have flowExecutionId in the FlowSpec) and the flow compilation is
* successful, retrieve flowExecutionId from the JobSpec.
* If it is a scheduled flow run without multi-active scheduler configuration (where the FlowSpec does not have a
* flowExecutionId) and the flow compilation is successful, retrieve flowExecutionId from the JobSpec.
*/
public static void addFlowExecutionIdIfAbsent(Map<String,String> flowMetadata,
Dag<JobExecutionPlan> jobExecutionPlanDag) {
addFlowExecutionIdIfAbsent(flowMetadata, Optional.absent(), jobExecutionPlanDag);
}

/**
* If it is a scheduled flow (which does not have flowExecutionId in the FlowSpec) and the flow compilation is
* successful, add a flowExecutionId using the optional parameter if it exists otherwise retrieve it from the JobSpec.
*/
public static void addFlowExecutionIdIfAbsent(Map<String,String> flowMetadata,
Optional<String> optionalFlowExecutionId, Dag<JobExecutionPlan> jobExecutionPlanDag) {
if (optionalFlowExecutionId.isPresent()) {
flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, optionalFlowExecutionId.get());
}
flowMetadata.putIfAbsent(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD,
jobExecutionPlanDag.getNodes().get(0).getValue().getJobSpec().getConfigAsProperties().getProperty(
ConfigurationKeys.FLOW_EXECUTION_ID_KEY));
}

/**
* Return true if the spec contains a schedule, false otherwise.
*/
public static boolean isScheduledFlow(FlowSpec spec) {
return spec.getConfigAsProperties().containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import lombok.Data;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.gobblin.configuration.ConfigurationKeys;
import org.apache.gobblin.kafka.client.DecodeableKafkaRecord;
import org.apache.gobblin.metrics.ContextAwareGauge;
import org.apache.gobblin.metrics.ContextAwareMeter;
Expand Down Expand Up @@ -276,8 +277,12 @@ protected void submitFlowToDagManagerHelper(DagActionStore.DagAction dagAction,
try {
URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId);
spec = (FlowSpec) flowCatalog.getSpecs(flowUri);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should FlowCatalog.getSpecs override w/ a covariant return type?

i.e. the FlowSpec in:

  @Override
  public FlowSpec getSpecs(URI uri) throws SpecNotFoundException {

@phet phet Jan 12, 2024

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you're wondering how Java knows which to choose at runtime - given this is still an @Override, per usual it's chosen dynamically based on the runtime type.

the difference here is the more-specific typing in cases where the SpecCatalog derived type can be guaranteed statically.

to illustrate, these two are legal:

URI uri = ...
SpecCatalog sc = ...
Spec s = sc.getSpecs(uri);

and

FlowSpecCatalog fsc = ...
FlowSpec fs = fsc.getSpecs(uri);

but this does NOT type-check:

SpecCatalog sc = ...
FlowSpec fs = sc.getSpecs(uri);

it not only saves us typing on casting, but, in situations where we've already successfully casted to FlowSpecCatalog, it precludes failure from subsequent ClassCastException to FlowSpec

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated this method but left the other methods that return Collection<Spec> intact as to avoid iterating through each one to cast it to a FlowSpec.

// Pass flowExecutionId to DagManager to be used for scheduled flows that do not already contain a flowExecutionId
this.orchestrator.submitFlowToDagManager(spec, dagAction);
/* Update the spec to contain the flowExecutionId from the dagAction for scheduled flows that do not already
contain a flowExecutionId. Adhoc flowSpecs are already consistent with the dagAction so there's no effective
change. It's crucial to adopt the consensus flowExecutionId here to prevent creating a new one during compilation.
*/
spec.addProperty(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, dagAction.getFlowExecutionId());
this.orchestrator.submitFlowToDagManager(spec);
} catch (URISyntaxException e) {
log.warn("Could not create URI object for flowId {}. Exception {}", flowId, e.getMessage());
launchSubmissionMetricProxy.markFailure();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

package org.apache.gobblin.service.modules.utils;

import com.google.common.base.Optional;
import java.net.URISyntaxException;
import java.util.HashMap;
import org.apache.gobblin.metrics.event.TimingEvent;
Expand All @@ -35,7 +34,6 @@
public class FlowCompilationValidationHelperTest {
private String dagId = "testDag";
private Long jobSpecFlowExecutionId = 1234L;
private String newFlowExecutionId = "5678";
private String existingFlowExecutionId = "9999";
private Dag<JobExecutionPlan> jobExecutionPlanDag;

Expand All @@ -46,14 +44,13 @@ public void setup() throws URISyntaxException {
}

/*
Tests that addFlowExecutionIdIfAbsent adds flowExecutionId to a flowMetadata object when it is absent, prioritizing
the optional flowExecutionId over the one from the job spec
Tests that addFlowExecutionIdIfAbsent adds the jobSpec flowExecutionId to a flowMetadata object when it is absent
*/
@Test
public void testAddFlowExecutionIdWhenAbsent() {
HashMap<String, String> flowMetadata = new HashMap<>();
FlowCompilationValidationHelper.addFlowExecutionIdIfAbsent(flowMetadata, Optional.of(newFlowExecutionId), jobExecutionPlanDag);
Assert.assertEquals(flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD), newFlowExecutionId);
FlowCompilationValidationHelper.addFlowExecutionIdIfAbsent(flowMetadata, jobExecutionPlanDag);
Assert.assertEquals(flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD), String.valueOf(jobSpecFlowExecutionId));
}

/*
Expand All @@ -63,7 +60,7 @@ public void testAddFlowExecutionIdWhenAbsent() {
public void testSkipAddingFlowExecutionIdWhenPresent() {
HashMap<String, String> flowMetadata = new HashMap<>();
flowMetadata.put(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD, existingFlowExecutionId);
FlowCompilationValidationHelper.addFlowExecutionIdIfAbsent(flowMetadata, Optional.of(newFlowExecutionId), jobExecutionPlanDag);
FlowCompilationValidationHelper.addFlowExecutionIdIfAbsent(flowMetadata,jobExecutionPlanDag);
Assert.assertEquals(flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD), existingFlowExecutionId);
}
}