-
Notifications
You must be signed in to change notification settings - Fork 749
[GOBBLIN-1840] Helix Job scheduler should not try to replace running workflow if within configured time #3704
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
29f5c2d
45c87ff
df182ff
3c552b7
88b9a02
d75e522
4ddd7c5
0e7ba4d
c449a71
3160d8b
ae2b58c
e6ea195
6e8358c
a15afd8
bbe4a0b
701881e
cfdc115
b598455
32ea7ed
2496212
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,9 @@ | |
| package org.apache.gobblin.cluster; | ||
|
|
||
| import java.io.IOException; | ||
| import java.time.Duration; | ||
| import java.time.Instant; | ||
| import java.time.temporal.ChronoUnit; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.List; | ||
|
|
@@ -110,14 +113,16 @@ public class GobblinHelixJobScheduler extends JobScheduler implements StandardMe | |
|
|
||
| private boolean startServicesCompleted; | ||
| private final long helixJobStopTimeoutMillis; | ||
| private final Duration throttleTimeoutDuration; | ||
| private ConcurrentHashMap<String, Instant> jobStartTimeMap; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to adjusting the map name. Pretty sure the key is the jobName is the key, which refers to the Gobblin configuration job.name
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also just my opinion, but Instant (or I guess I don't have an opinion on changing the above duration to Millis long to fit the rest of the class. But Instant vs long is a big deal because long is hard to reason about. It often refers to epoch millis but you always have to add that epochmillis to the name of the map. As for
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have that strong of a preference with Instant vs. Timestamp/Long, the latter are more common on GaaS side so I was initially surprised. More important to update the map name. |
||
|
|
||
| public GobblinHelixJobScheduler(Config sysConfig, | ||
| HelixManager jobHelixManager, | ||
| Optional<HelixManager> taskDriverHelixManager, | ||
| EventBus eventBus, | ||
| Path appWorkDir, List<? extends Tag<?>> metadataTags, | ||
| SchedulerService schedulerService, | ||
| MutableJobCatalog jobCatalog) throws Exception { | ||
| HelixManager jobHelixManager, | ||
|
Peiyingy marked this conversation as resolved.
Outdated
|
||
| Optional<HelixManager> taskDriverHelixManager, | ||
| EventBus eventBus, | ||
| Path appWorkDir, List<? extends Tag<?>> metadataTags, | ||
| SchedulerService schedulerService, | ||
| MutableJobCatalog jobCatalog) throws Exception { | ||
|
|
||
| super(ConfigUtils.configToProperties(sysConfig), schedulerService); | ||
| this.commonJobProperties = ConfigUtils.configToProperties(ConfigUtils.getConfigOrEmpty(sysConfig, COMMON_JOB_PROPS)); | ||
|
|
@@ -131,28 +136,28 @@ public GobblinHelixJobScheduler(Config sysConfig, | |
| this.metricContext = Instrumented.getMetricContext(new org.apache.gobblin.configuration.State(properties), this.getClass()); | ||
|
|
||
| int metricsWindowSizeInMin = ConfigUtils.getInt(sysConfig, | ||
| ConfigurationKeys.METRIC_TIMER_WINDOW_SIZE_IN_MINUTES, | ||
| ConfigurationKeys.DEFAULT_METRIC_TIMER_WINDOW_SIZE_IN_MINUTES); | ||
| ConfigurationKeys.METRIC_TIMER_WINDOW_SIZE_IN_MINUTES, | ||
| ConfigurationKeys.DEFAULT_METRIC_TIMER_WINDOW_SIZE_IN_MINUTES); | ||
|
|
||
| this.launcherMetrics = new GobblinHelixJobLauncherMetrics("launcherInScheduler", | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
|
|
||
| this.jobSchedulerMetrics = new GobblinHelixJobSchedulerMetrics(this.jobExecutor, | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
|
|
||
| this.jobsMapping = new HelixJobsMapping(ConfigUtils.propertiesToConfig(properties), | ||
| PathUtils.getRootPath(appWorkDir).toUri(), | ||
| appWorkDir.toString()); | ||
|
|
||
| this.planningJobLauncherMetrics = new GobblinHelixPlanningJobLauncherMetrics("planningLauncherInScheduler", | ||
| this.metricContext, | ||
| metricsWindowSizeInMin, this.jobsMapping); | ||
| this.metricContext, | ||
| metricsWindowSizeInMin, this.jobsMapping); | ||
|
|
||
| this.helixMetrics = new GobblinHelixMetrics("helixMetricsInJobScheduler", | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
| this.metricContext, | ||
| metricsWindowSizeInMin); | ||
|
|
||
| this.startServicesCompleted = false; | ||
|
|
||
|
|
@@ -162,14 +167,19 @@ public GobblinHelixJobScheduler(Config sysConfig, | |
| this.helixWorkflowListingTimeoutMillis = ConfigUtils.getLong(sysConfig, GobblinClusterConfigurationKeys.HELIX_WORKFLOW_LISTING_TIMEOUT_SECONDS, | ||
| GobblinClusterConfigurationKeys.DEFAULT_HELIX_WORKFLOW_LISTING_TIMEOUT_SECONDS) * 1000; | ||
|
|
||
| this.throttleTimeoutDuration = Duration.of(ConfigUtils.getLong(sysConfig, GobblinClusterConfigurationKeys.HELIX_JOB_SCHEDULING_THROTTLE_TIMEOUT_SECONDS_KEY, | ||
|
Peiyingy marked this conversation as resolved.
Outdated
|
||
| GobblinClusterConfigurationKeys.DEFAULT_HELIX_JOB_SCHEDULING_THROTTLE_TIMEOUT_SECONDS_KEY), ChronoUnit.SECONDS); | ||
|
|
||
| this.jobStartTimeMap = new ConcurrentHashMap<>(); | ||
|
|
||
| } | ||
|
|
||
| @Override | ||
| public Collection<StandardMetrics> getStandardMetricsCollection() { | ||
| return ImmutableList.of(this.launcherMetrics, | ||
| this.jobSchedulerMetrics, | ||
| this.planningJobLauncherMetrics, | ||
| this.helixMetrics); | ||
| this.jobSchedulerMetrics, | ||
| this.planningJobLauncherMetrics, | ||
| this.helixMetrics); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -188,9 +198,9 @@ public void scheduleJob(Properties jobProps, JobListener jobListener) throws Job | |
| } | ||
|
|
||
| scheduleJob(jobProps, | ||
| jobListener, | ||
| Maps.newHashMap(), | ||
| GobblinHelixJob.class); | ||
| jobListener, | ||
| Maps.newHashMap(), | ||
| GobblinHelixJob.class); | ||
|
|
||
| } catch (Exception e) { | ||
| throw new JobException("Failed to schedule job " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); | ||
|
|
@@ -319,12 +329,14 @@ public void handleNewJobConfigArrival(NewJobConfigArrivalEvent newJobArrival) { | |
| if (jobProps.containsKey(ConfigurationKeys.JOB_SCHEDULE_KEY)) { | ||
| LOGGER.info("Scheduling job " + jobUri); | ||
| scheduleJob(jobProps, | ||
| new GobblinHelixJobLauncherListener(this.launcherMetrics)); | ||
| new GobblinHelixJobLauncherListener(this.launcherMetrics)); | ||
| } else { | ||
| LOGGER.info("No job schedule found, so running job " + jobUri); | ||
| this.jobExecutor.execute(new NonScheduledJobRunner(jobProps, | ||
| new GobblinHelixJobLauncherListener(this.launcherMetrics))); | ||
| new GobblinHelixJobLauncherListener(this.launcherMetrics))); | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Does not need a new line |
||
| this.jobStartTimeMap.put(jobUri, Instant.now()); | ||
| } catch (JobException je) { | ||
| LOGGER.error("Failed to schedule or run job " + jobUri, je); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update this log to say that you are resetting the clock |
||
| } | ||
|
|
@@ -333,6 +345,20 @@ public void handleNewJobConfigArrival(NewJobConfigArrivalEvent newJobArrival) { | |
| @Subscribe | ||
| public void handleUpdateJobConfigArrival(UpdateJobConfigArrivalEvent updateJobArrival) { | ||
| LOGGER.info("Received update for job configuration of job " + updateJobArrival.getJobName()); | ||
| String jobName = updateJobArrival.getJobName(); | ||
| boolean throttleEnabled = PropertiesUtils.getPropAsBoolean(updateJobArrival.getJobConfig(), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. usually booleans are easily identified with
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this default to false if config is not provided? if not provide a default value
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Default is provided by GobblinClusterConfigurationKeys.DEFAULT_HELIX_JOB_SCHEDULING_THROTTLE_ENABLED_KEY in the config file, which is set to false. Should I add an additional default proof in this part?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but you are not using that value here right? You want to provide
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the function of getPropAsBoolean, that is: so it will call |
||
| GobblinClusterConfigurationKeys.HELIX_JOB_SCHEDULING_THROTTLE_ENABLED_KEY, | ||
| String.valueOf(GobblinClusterConfigurationKeys.DEFAULT_HELIX_JOB_SCHEDULING_THROTTLE_ENABLED_KEY)); | ||
|
|
||
| if (throttleEnabled && this.jobStartTimeMap.containsKey(jobName)) { | ||
| Instant jobStartTime = this.jobStartTimeMap.get(jobName); | ||
| Duration workflowDuration = Duration.between(jobStartTime, Instant.now()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
| Duration difference = workflowDuration.minus(throttleTimeoutDuration); | ||
|
Peiyingy marked this conversation as resolved.
Outdated
|
||
| if (difference.isNegative()) { | ||
|
Peiyingy marked this conversation as resolved.
Outdated
|
||
| return; | ||
| } | ||
| } | ||
|
|
||
| try { | ||
| handleDeleteJobConfigArrival(new DeleteJobConfigArrivalEvent(updateJobArrival.getJobName(), | ||
| updateJobArrival.getJobConfig())); | ||
|
|
@@ -452,7 +478,7 @@ class NonScheduledJobRunner implements Runnable { | |
| private final Long creationTimeInMillis; | ||
|
|
||
| public NonScheduledJobRunner(Properties jobProps, | ||
| GobblinHelixJobLauncherListener jobListener) { | ||
| GobblinHelixJobLauncherListener jobListener) { | ||
|
|
||
| this.jobProps = jobProps; | ||
| this.jobListener = jobListener; | ||
|
|
@@ -470,4 +496,4 @@ public void run() { | |
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.