From b6b135ba1501ec60713f22ecf56cbcbc8a2629ec Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Mon, 15 May 2023 18:57:04 -0400 Subject: [PATCH 01/11] basic outline of changes to make, started SchedulerLeaseDeterminationStore --- .../configuration/ConfigurationKeys.java | 13 ++ .../gobblin/service/ServiceConfigKeys.java | 1 + .../gobblin/runtime/api/DagActionStore.java | 1 + ...MysqlSchedulerLeaseDeterminationStore.java | 147 ++++++++++++++++++ .../api/SchedulerLeaseDeterminationStore.java | 23 +++ .../dag_action_store/MysqlDagActionStore.java | 2 + .../scheduler/GobblinServiceJobScheduler.java | 2 + .../DagActionStoreChangeMonitor.java | 5 +- 8 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java index f0e15bf94a4..ddb24b2a899 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java @@ -95,6 +95,19 @@ public class ConfigurationKeys { public static final int DEFAULT_LOAD_SPEC_BATCH_SIZE = 500; public static final String SKIP_SCHEDULING_FLOWS_AFTER_NUM_DAYS = "skip.scheduling.flows.after.num.days"; public static final int DEFAULT_NUM_DAYS_TO_SKIP_AFTER = 365; + // Scheduler lease determination store configuration + // TODO: multiActiveScheduler change here update values for the following keys and rename to more meaningful + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_JDBC_DRIVER_KEY = "state.store.db.jdbc.driver"; + public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_URL_KEY = "state.store.db.url"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_USER_KEY = "state.store.db.user"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_PASSWORD_KEY = "state.store.db.password"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "state.store.db.table"; + public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "gobblin_job_state"; + public static final String SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS = ""; + public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS = 100; + public static final String SCHEDULER_TRIGGER_EVENT_LINGER_VALUE_MILLIS = ""; + public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_LINGER_MILLIS = 1000; // Job executor thread pool size public static final String JOB_EXECUTOR_THREAD_POOL_SIZE_KEY = "jobexecutor.threadpool.size"; diff --git a/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java index ef4323538d9..1c412d015a5 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java @@ -41,6 +41,7 @@ public class ServiceConfigKeys { public static final boolean DEFAULT_GOBBLIN_SERVICE_DAG_MANAGER_ENABLED = false; public static final String GOBBLIN_SERVICE_JOB_STATUS_MONITOR_ENABLED_KEY = GOBBLIN_SERVICE_PREFIX + "jobStatusMonitor.enabled"; public static final String GOBBLIN_SERVICE_WARM_STANDBY_ENABLED_KEY = GOBBLIN_SERVICE_PREFIX + "warmStandby.enabled"; + // TODO: multiActiveScheduler change here to turn on multi-active scheduler // If true, will mark up/down d2 servers on leadership so that all requests will be routed to the leader node public static final String GOBBLIN_SERVICE_D2_ONLY_ANNOUNCE_LEADER = GOBBLIN_SERVICE_PREFIX + "d2.onlyAnnounceLeader"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index 5da8e6d31d8..b0dc4a05834 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -28,6 +28,7 @@ public interface DagActionStore { enum DagActionValue { KILL, RESUME + // TODO: multiActiveScheduler change here to add new action value and update constructor } @Getter diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java new file mode 100644 index 00000000000..b92e364bd2b --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -0,0 +1,147 @@ +package org.apache.gobblin.runtime.api; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; + +import com.typesafe.config.Config; + +import javax.sql.DataSource; + +import org.apache.gobblin.broker.SharedResourcesBrokerFactory; +import org.apache.gobblin.configuration.ConfigurationKeys; +import org.apache.gobblin.metastore.MysqlDataSourceFactory; +import org.apache.gobblin.service.ServiceConfigKeys; +import org.apache.gobblin.util.ConfigUtils; + + +/** + * TODO: multiActiveScheduler change here + */ +public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDeterminationStore { + public static final String CONFIG_PREFIX = "MysqlSchedulerLeaseDeterminationStore"; + + protected final DataSource dataSource; + private final String tableName; + private final long epsilon; + private final long linger; + protected static final String CONDITIONALLY_INSERT_TO_OBTAIN_LEASE_STATEMENT = "INSERT INTO %s (flow_group, " + + "flow_name, flow_execution_id, trigger_event_timestamp) VALUES (?, ?, ?, ?) WHERE NOT EXISTS (" + + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)" + + " <= %s)"; + protected static final String GET_PURSUANT_TIMESTAMP_STATEMENT = "SELECT pursuant_timestamp FROM %s WHERE flow_group=? " + + "AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)"; + // TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original + // value, but it's a bit hard to reason about this statement working. + protected static final String ATTEMPT_OBTAINING_LEASE_ELSE_RETURN_EXISTING_ROW_STATEMENT = "SELECT * FROM " + + "(SELECT ? AS flow_group, ? AS flow_name, ? AS flow_execution_id, ? AS trigger_event_timestamp) AS new_rows" + + "WHERE EXISTS (SELECT * FROM %s WHERE %s.flow_group = new_rows.flow_group AND %s.flow_name = new_rows.flow_name)" + + " AND %s.flow_execution_id = new_rows.flow_execution_id AND ABS(trigger_event_timestamp-?) <= %s)"; + private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " + + "trigger_event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,trigger_event_timestamp)"; + + // Note: that union-ing the read value in the same query will require keeping track of the + // host number to see who did the insert and checking that. Instead we can conditionally + // write and read separately + + // OVERALL LOGIC + // check if event within epsilon exists + // QUERY 1: if one does exist return the row and check value of pursuant + // if pursuant == null someone else completed + // RETURN + // QUERY 2: else if pursuant + linger <= current_timestamp + // insert row yourself + // QUERY 2: else pursuant + linger > current_timestamp + // set reminder to check back after linger + // QUERY 1: else insert row urself + // RETURN + + // QUERY should check if row exists and return value if so + // if row does not exist then do write + // then based on what result we get and checking corresponding value do other stuff + /* + ``` + INSERT INTO mytable (column1, column2, column3) + SELECT 'value1', 'value2', 'value3' + FROM dual + WHERE NOT EXISTS ( + SELECT * + FROM mytable + WHERE column1 = 'value1' + AND column2 = 'value2' + AND column3 = 'value3' + ); + ``` + */ + public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { + if (config.hasPath(CONFIG_PREFIX)) { + config = config.getConfig(CONFIG_PREFIX).withFallback(config); + } else { + throw new IOException("Please specify the config for MysqlDagActionStore"); + } + + this.tableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE); + this.epsilon = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS, + ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); + this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS, + ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); + + this.dataSource = MysqlDataSourceFactory.get(config, SharedResourcesBrokerFactory.getImplicitBroker()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement createStatement = connection.prepareStatement(String.format(CREATE_TABLE_STATEMENT, tableName))) { + createStatement.executeUpdate(); + connection.commit(); + } catch (SQLException e) { + throw new IOException("Table creation failure for " + tableName, e); + } + } + + @Override + public boolean attemptLeaseOfLaunchEvent(String flowGroup, String flowName, String flowExecutionId, + Timestamp triggerTimestamp) + throws IOException { + try (Connection connection = this.dataSource.getConnection(); + PreparedStatement insertStatement = connection.prepareStatement( + String.format(CONDITIONALLY_INSERT_TO_OBTAIN_LEASE_STATEMENT, tableName, tableName, epsilon))) { + int i = 0; + insertStatement.setString(++i, flowGroup); + insertStatement.setString(++i, flowName); + insertStatement.setString(++i, flowExecutionId); + insertStatement.setTimestamp(++i, triggerTimestamp); + insertStatement.setString(++i, flowGroup); + insertStatement.setString(++i, flowName); + insertStatement.setString(++i, flowExecutionId); + insertStatement.setTimestamp(++i, triggerTimestamp); + i = insertStatement.executeUpdate(); + + if (i > 1) { + throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " + + "exist for the trigger flow event for table %s flow group: %s, flow name: %s flow execution id: %s " + + "and trigger timestamp: %s.", i, tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); + } + connection.commit(); + // Return whether this query obtained lease on trigger event and completed insert + return i == 1; + } catch (SQLException e) { + throw new IOException(String.format("Error encountered while trying to obtain lease on trigger flow event for " + + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, + flowGroup, flowName, flowExecutionId, triggerTimestamp, e)); + } + } + + @Override + public Timestamp getPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, + Timestamp triggerTimestamp) + throws IOException { + return null; + } +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java new file mode 100644 index 00000000000..94c8f39cfb3 --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java @@ -0,0 +1,23 @@ +package org.apache.gobblin.runtime.api; + +import java.io.IOException; +import java.sql.Timestamp; + + +public interface SchedulerLeaseDeterminationStore { + + /** + * + * @param flowGroup + * @param flowName + * @param flowExecutionId + * @param triggerTimestamp + * @return True if obtained lease and completed insert, False otherwise + */ + public boolean attemptLeaseOfLaunchEvent(String flowGroup, String flowName, String flowExecutionId, + Timestamp triggerTimestamp) throws IOException; + + public Timestamp getPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, + Timestamp triggerTimestamp) throws IOException; + +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java index d3f4db11bbb..dd5f5766120 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java @@ -55,6 +55,8 @@ public class MysqlDagActionStore implements DagActionStore { private static final String DELETE_STATEMENT = "DELETE FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; private static final String GET_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; private static final String GET_ALL_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s"; + // TODO: multiActiveScheduler change here to update schema to include trigger event timestamp and pursuing timestamp, + // maybe update primary key? private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java index 5ee5f978905..03004cd6b62 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java @@ -436,6 +436,8 @@ public synchronized void scheduleJob(Properties jobProps, JobListener jobListene } } + // TODO: multiActiveScheduler change here to use this in old state or with new config do the race to write? + // define a new class to handle the nonblocking and race @Override public void runJob(Properties jobProps, JobListener jobListener) throws JobException { try { diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index 732697038fd..3b2778150b8 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -163,7 +163,10 @@ protected void processMessage(DecodeableKafkaRecord message) { this.unexpectedErrors.mark(); } else if (operation.equals("DELETE")) { log.debug("Deleted flow group: {} name: {} executionId {} from DagActionStore", flowGroup, flowName, flowExecutionId); - } else { + } // TODO: multiActiveScheduler change here to add a case for a new launch flow action. We want to check if it is + // an execution that has been "won" by checking pursuant timestamp = null then pass to dag managers. the right one will + // actually launch it. if the config is NOT turned on we should do any of this handling or recieve these type of events + else { log.warn("Received unsupported change type of operation {}. Expected values to be in [INSERT, UPDATE, DELETE]", operation); this.unexpectedErrors.mark(); From ffed36c39805cf2709af9ec1259ecdc5455441ab Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Wed, 17 May 2023 15:25:50 -0400 Subject: [PATCH 02/11] multiple query options for store --- ...MysqlSchedulerLeaseDeterminationStore.java | 98 ++++++++++--------- .../api/SchedulerLeaseDeterminationStore.java | 32 ++++-- 2 files changed, 76 insertions(+), 54 deletions(-) diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java index b92e364bd2b..e6ae86f8809 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -28,29 +28,28 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete private final String tableName; private final long epsilon; private final long linger; + // TODO: define retention eventually on this table + // TODO: also add to primary key the type of event "launch" + // initialize table with one entry only if it doesn't exist. these configs + // another table with epsilon and linger then join the two tables protected static final String CONDITIONALLY_INSERT_TO_OBTAIN_LEASE_STATEMENT = "INSERT INTO %s (flow_group, " + "flow_name, flow_execution_id, trigger_event_timestamp) VALUES (?, ?, ?, ?) WHERE NOT EXISTS (" + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)" + " <= %s)"; protected static final String GET_PURSUANT_TIMESTAMP_STATEMENT = "SELECT pursuant_timestamp FROM %s WHERE flow_group=? " + "AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)"; - // TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original - // value, but it's a bit hard to reason about this statement working. + protected static final String ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT = "INSERT INTO %s (flow_group, " + + "flow_name, flow_execution_id, trigger_event_timestamp) VALUES (?, ?, ?, ?) WHERE NOT EXISTS (" + + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)" + + " <= %s); SELECT ROW_COUNT() AS rows_inserted_count, pursuant_timestamp FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND " + + "ABS(trigger_event_timestamp-?)"; + + /* TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original + value, but it's a bit hard to reason about this statement working. protected static final String ATTEMPT_OBTAINING_LEASE_ELSE_RETURN_EXISTING_ROW_STATEMENT = "SELECT * FROM " + "(SELECT ? AS flow_group, ? AS flow_name, ? AS flow_execution_id, ? AS trigger_event_timestamp) AS new_rows" + "WHERE EXISTS (SELECT * FROM %s WHERE %s.flow_group = new_rows.flow_group AND %s.flow_name = new_rows.flow_name)" + " AND %s.flow_execution_id = new_rows.flow_execution_id AND ABS(trigger_event_timestamp-?) <= %s)"; - private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" - + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" - + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" - + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " - + "trigger_event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " - + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," - + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,trigger_event_timestamp)"; - - // Note: that union-ing the read value in the same query will require keeping track of the - // host number to see who did the insert and checking that. Instead we can conditionally - // write and read separately // OVERALL LOGIC // check if event within epsilon exists @@ -63,24 +62,15 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete // set reminder to check back after linger // QUERY 1: else insert row urself // RETURN + */ + private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " + + "trigger_event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,trigger_event_timestamp)"; - // QUERY should check if row exists and return value if so - // if row does not exist then do write - // then based on what result we get and checking corresponding value do other stuff - /* - ``` - INSERT INTO mytable (column1, column2, column3) - SELECT 'value1', 'value2', 'value3' - FROM dual - WHERE NOT EXISTS ( - SELECT * - FROM mytable - WHERE column1 = 'value1' - AND column2 = 'value2' - AND column3 = 'value3' - ); - ``` - */ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { if (config.hasPath(CONFIG_PREFIX)) { config = config.getConfig(CONFIG_PREFIX).withFallback(config); @@ -106,12 +96,13 @@ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { } @Override - public boolean attemptLeaseOfLaunchEvent(String flowGroup, String flowName, String flowExecutionId, - Timestamp triggerTimestamp) + public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, + String flowExecutionId, Timestamp triggerTimestamp) throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement( - String.format(CONDITIONALLY_INSERT_TO_OBTAIN_LEASE_STATEMENT, tableName, tableName, epsilon))) { + String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, + tableName))) { int i = 0; insertStatement.setString(++i, flowGroup); insertStatement.setString(++i, flowName); @@ -121,27 +112,42 @@ public boolean attemptLeaseOfLaunchEvent(String flowGroup, String flowName, Stri insertStatement.setString(++i, flowName); insertStatement.setString(++i, flowExecutionId); insertStatement.setTimestamp(++i, triggerTimestamp); - i = insertStatement.executeUpdate(); + insertStatement.setString(++i, flowGroup); + insertStatement.setString(++i, flowName); + insertStatement.setString(++i, flowExecutionId); + insertStatement.setTimestamp(++i, triggerTimestamp); + ResultSet resultSet = insertStatement.executeQuery(); + connection.commit(); - if (i > 1) { + if (!resultSet.next()) { + throw new IOException(String.format("Unexpected error where no result returned while trying to obtain lease. " + + "This error indicates that no row was inserted but also no entry existed for trigger flow event for " + + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, + flowGroup, flowName, flowExecutionId, triggerTimestamp)); + } + // If a row was inserted, then we have obtained the lease + int rowsUpdated = resultSet.getInt(0); + if (rowsUpdated == 1) { + return LeaseAttemptStatus.LEASE_OBTAINED; + } else if (rowsUpdated > 1) { throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " + "exist for the trigger flow event for table %s flow group: %s, flow name: %s flow execution id: %s " + "and trigger timestamp: %s.", i, tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); } - connection.commit(); - // Return whether this query obtained lease on trigger event and completed insert - return i == 1; + Timestamp pursuantTimestamp = resultSet.getTimestamp(1); + long currentTimeMillis = System.currentTimeMillis(); + // Another host has obtained lease + if (pursuantTimestamp == null) { + return LeaseAttemptStatus.LEASE_OBTAINED; + } else if (pursuantTimestamp.getTime() + linger <= currentTimeMillis) { + return LeaseAttemptStatus.PREVIOUS_LEASE_EXPIRED; + } + // pursuant + linger > current timestamp + return LeaseAttemptStatus.PREVIOUS_LEASE_VALID; } catch (SQLException e) { throw new IOException(String.format("Error encountered while trying to obtain lease on trigger flow event for " - + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, + + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp, e)); } } - - @Override - public Timestamp getPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, - Timestamp triggerTimestamp) - throws IOException { - return null; - } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java index 94c8f39cfb3..7a011cd247e 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java @@ -6,18 +6,34 @@ public interface SchedulerLeaseDeterminationStore { + // Enum is used to reason about the three possible scenarios that can result from an attempt to obtain a lease for a + // particular trigger event of a flow + enum LeaseAttemptStatus { + LEASE_OBTAINED, + PREVIOUS_LEASE_EXPIRED, + PREVIOUS_LEASE_VALID + } + + // Action to take on a particular flow + enum flowActionType { + LAUNCH, + RETRY, + CANCEL, + NEXT_HOP + } + /** - * + * This method attempts to insert an entry into store for a particular flow's trigger event if one does not already + * exist in the store for the same trigger event. Regardless of the outcome it also reads the pursuant timestamp of + * the entry for that trigger event (it could have pre-existed in the table or been newly added by the previous + * write). Based on the transaction results, it will return @LeaseAttemptStatus to determine the next action. * @param flowGroup * @param flowName * @param flowExecutionId - * @param triggerTimestamp - * @return True if obtained lease and completed insert, False otherwise + * @param triggerTimestamp is the time this flow is supposed to be launched + * @return LeaseAttemptStatus + * @throws IOException */ - public boolean attemptLeaseOfLaunchEvent(String flowGroup, String flowName, String flowExecutionId, + public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, Timestamp triggerTimestamp) throws IOException; - - public Timestamp getPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, - Timestamp triggerTimestamp) throws IOException; - } From 0b4ebd8785adf681a85fb625fe5fe54651e458d7 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Wed, 17 May 2023 16:04:11 -0400 Subject: [PATCH 03/11] add launch type as column --- ...MysqlSchedulerLeaseDeterminationStore.java | 31 ++++++++++--------- .../api/SchedulerLeaseDeterminationStore.java | 8 ++--- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java index e6ae86f8809..691c300a75b 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -32,17 +32,12 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete // TODO: also add to primary key the type of event "launch" // initialize table with one entry only if it doesn't exist. these configs // another table with epsilon and linger then join the two tables - protected static final String CONDITIONALLY_INSERT_TO_OBTAIN_LEASE_STATEMENT = "INSERT INTO %s (flow_group, " - + "flow_name, flow_execution_id, trigger_event_timestamp) VALUES (?, ?, ?, ?) WHERE NOT EXISTS (" - + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)" - + " <= %s)"; - protected static final String GET_PURSUANT_TIMESTAMP_STATEMENT = "SELECT pursuant_timestamp FROM %s WHERE flow_group=? " - + "AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)"; protected static final String ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT = "INSERT INTO %s (flow_group, " - + "flow_name, flow_execution_id, trigger_event_timestamp) VALUES (?, ?, ?, ?) WHERE NOT EXISTS (" - + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND ABS(trigger_event_timestamp-?)" - + " <= %s); SELECT ROW_COUNT() AS rows_inserted_count, pursuant_timestamp FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND " - + "ABS(trigger_event_timestamp-?)"; + + "flow_name, flow_execution_id, flow_action, trigger_event_timestamp) VALUES (?, ?, ?, ?, ?) WHERE NOT EXISTS (" + + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND flow_action=? AND" + + "ABS(trigger_event_timestamp-?) <= %s); SELECT ROW_COUNT() AS rows_inserted_count, pursuant_timestamp FROM %s " + + "WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND flow_action=? AND" + + "ABS(trigger_event_timestamp-?) <= %s"; /* TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original value, but it's a bit hard to reason about this statement working. @@ -66,10 +61,10 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" - + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " + + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " + "trigger_event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," - + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,trigger_event_timestamp)"; + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action,trigger_event_timestamp)"; public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { if (config.hasPath(CONFIG_PREFIX)) { @@ -97,24 +92,30 @@ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { @Override public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, - String flowExecutionId, Timestamp triggerTimestamp) + String flowExecutionId, FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement( String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, - tableName))) { + tableName, epsilon))) { int i = 0; + // Values to set in new row insertStatement.setString(++i, flowGroup); insertStatement.setString(++i, flowName); insertStatement.setString(++i, flowExecutionId); + insertStatement.setString(++i, flowActionType.toString()); insertStatement.setTimestamp(++i, triggerTimestamp); + // Values to check if existing row matches insertStatement.setString(++i, flowGroup); insertStatement.setString(++i, flowName); insertStatement.setString(++i, flowExecutionId); + insertStatement.setString(++i, flowActionType.toString()); insertStatement.setTimestamp(++i, triggerTimestamp); + // Values to make select statement to read row insertStatement.setString(++i, flowGroup); insertStatement.setString(++i, flowName); insertStatement.setString(++i, flowExecutionId); + insertStatement.setString(++i, flowActionType.toString()); insertStatement.setTimestamp(++i, triggerTimestamp); ResultSet resultSet = insertStatement.executeQuery(); connection.commit(); @@ -142,7 +143,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, } else if (pursuantTimestamp.getTime() + linger <= currentTimeMillis) { return LeaseAttemptStatus.PREVIOUS_LEASE_EXPIRED; } - // pursuant + linger > current timestamp + // Previous lease owner still has valid lease (pursuant + linger > current timestamp) return LeaseAttemptStatus.PREVIOUS_LEASE_VALID; } catch (SQLException e) { throw new IOException(String.format("Error encountered while trying to obtain lease on trigger flow event for " diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java index 7a011cd247e..ec18f5fdaf7 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java @@ -14,8 +14,8 @@ enum LeaseAttemptStatus { PREVIOUS_LEASE_VALID } - // Action to take on a particular flow - enum flowActionType { + // Action to take on a particular flow + enum FlowActionType { LAUNCH, RETRY, CANCEL, @@ -34,6 +34,6 @@ enum flowActionType { * @return LeaseAttemptStatus * @throws IOException */ - public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, - Timestamp triggerTimestamp) throws IOException; + public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, + String flowExecutionId, FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException; } From 57962e35b57cc26e7c73d5c87975b259cebc3a19 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Wed, 17 May 2023 18:10:51 -0400 Subject: [PATCH 04/11] wip for scheduler abstractions --- .../dag_action_store/MysqlDagActionStore.java | 2 -- .../service/modules/orchestration/Orchestrator.java | 13 +++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java index dd5f5766120..d3f4db11bbb 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java @@ -55,8 +55,6 @@ public class MysqlDagActionStore implements DagActionStore { private static final String DELETE_STATEMENT = "DELETE FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; private static final String GET_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; private static final String GET_ALL_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s"; - // TODO: multiActiveScheduler change here to update schema to include trigger event timestamp and pursuing timestamp, - // maybe update primary key? private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index c9a21560c38..49817a40d85 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -310,6 +310,11 @@ public void orchestrate(Spec spec) throws Exception { flowCompilationTimer.get().stop(flowMetadata); } + /* TODO: multiactiveScheduler change here + do all the quota checking and compilation above as normal but before passing to dag manager should go to another + function that checks config and either passes to DM directly or ends up going through scheduler lock contention + */ + // CALLS NEW METHOD newAbstraction()... if (this.dagManager.isPresent()) { try { //Send the dag to the DagManager. @@ -364,6 +369,14 @@ public void orchestrate(Spec spec) throws Exception { Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS); } + private void newAbstraction() { + // TODO: check config + // either do old way and pass to DM + + // do lock contention + // Is this a new class...? how can I abstract and make modular? or simply a util method? + } + /** * Check if a FlowSpec instance is allowed to run. * From 1ecab1d415387d9006fa0103b62663e54d942abc Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Thu, 25 May 2023 17:34:49 -0400 Subject: [PATCH 05/11] non blocking algo impl, dag action store updates --- .../configuration/ConfigurationKeys.java | 11 +- .../gobblin/service/ServiceConfigKeys.java | 2 +- .../gobblin/runtime/api/DagActionStore.java | 16 ++- ...MysqlSchedulerLeaseDeterminationStore.java | 66 ++++++--- .../api/SchedulerLeaseDeterminationStore.java | 11 +- .../dag_action_store/MysqlDagActionStore.java | 38 ++--- .../gobblin/scheduler/JobScheduler.java | 13 +- .../MysqlDagActionStoreTest.java | 35 +++-- .../modules/orchestration/DagManager.java | 2 +- .../modules/orchestration/Orchestrator.java | 38 +++-- .../SchedulerLeaseAlgoHandler.java | 130 ++++++++++++++++++ ...ecutionResourceHandlerWithWarmStandby.java | 31 ++--- .../scheduler/GobblinServiceJobScheduler.java | 16 ++- .../orchestration/OrchestratorTest.java | 4 +- .../GobblinServiceJobSchedulerTest.java | 3 +- 15 files changed, 308 insertions(+), 108 deletions(-) create mode 100644 gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java index ddb24b2a899..fa3bfdc5a3e 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java @@ -104,10 +104,15 @@ public class ConfigurationKeys { public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_PASSWORD_KEY = "state.store.db.password"; public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "state.store.db.table"; public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "gobblin_job_state"; - public static final String SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS = ""; + // TODO: fix these keys + public static final String SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY = "originalTriggerTimestampMillis"; + public static final String SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY = "triggerTimestampMillis"; + public static final String SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY = ""; public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS = 100; - public static final String SCHEDULER_TRIGGER_EVENT_LINGER_VALUE_MILLIS = ""; - public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_LINGER_MILLIS = 1000; + public static final String SCHEDULER_TRIGGER_EVENT_LINGER_SEC_KEY = ""; + public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_LINGER_SEC = 30; + public static final String SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY = ""; + public static final int DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC = 5; // Job executor thread pool size public static final String JOB_EXECUTOR_THREAD_POOL_SIZE_KEY = "jobexecutor.threadpool.size"; diff --git a/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java index 1c412d015a5..21b32b58cc0 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/service/ServiceConfigKeys.java @@ -41,7 +41,7 @@ public class ServiceConfigKeys { public static final boolean DEFAULT_GOBBLIN_SERVICE_DAG_MANAGER_ENABLED = false; public static final String GOBBLIN_SERVICE_JOB_STATUS_MONITOR_ENABLED_KEY = GOBBLIN_SERVICE_PREFIX + "jobStatusMonitor.enabled"; public static final String GOBBLIN_SERVICE_WARM_STANDBY_ENABLED_KEY = GOBBLIN_SERVICE_PREFIX + "warmStandby.enabled"; - // TODO: multiActiveScheduler change here to turn on multi-active scheduler + public static final String GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY = GOBBLIN_SERVICE_PREFIX + "multiActiveScheduler.enabled"; // If true, will mark up/down d2 servers on leadership so that all requests will be routed to the leader node public static final String GOBBLIN_SERVICE_D2_ONLY_ANNOUNCE_LEADER = GOBBLIN_SERVICE_PREFIX + "d2.onlyAnnounceLeader"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index b0dc4a05834..762d405e469 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -27,8 +27,9 @@ public interface DagActionStore { enum DagActionValue { KILL, - RESUME - // TODO: multiActiveScheduler change here to add new action value and update constructor + RESUME, + // TODO: multiActiveScheduler change here: should flow actions be same as dag actions and be one enum or have overlap but not all the same? + LAUNCH } @Getter @@ -52,9 +53,10 @@ public DagAction(String flowGroup, String flowName, String flowExecutionId, DagA * @param flowGroup flow group for the dag action * @param flowName flow name for the dag action * @param flowExecutionId flow execution for the dag action + * @param dagActionValue the value of the dag action * @throws IOException */ - boolean exists(String flowGroup, String flowName, String flowExecutionId) throws IOException, SQLException; + boolean exists(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException, SQLException; /** * Persist the dag action in {@link DagActionStore} for durability @@ -71,21 +73,23 @@ public DagAction(String flowGroup, String flowName, String flowExecutionId, DagA * @param flowGroup flow group for the dag action * @param flowName flow name for the dag action * @param flowExecutionId flow execution for the dag action + * @param dagActionValue the value of the dag action * @throws IOException * @return true if we successfully delete one record, return false if the record does not exist */ - boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId) throws IOException; + boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException; /*** * Retrieve action value by the flow group, flow name and flow execution id from the {@link DagActionStore}. * @param flowGroup flow group for the dag action * @param flowName flow name for the dag action * @param flowExecutionId flow execution for the dag action + * @param dagActionValue the value of the dag action * @throws IOException Exception in retrieving the {@link DagAction}. * @throws SpecNotFoundException If {@link DagAction} being retrieved is not present in store. */ - DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId) throws IOException, SpecNotFoundException, - SQLException; + DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) + throws IOException, SpecNotFoundException, SQLException; /*** * Get all {@link DagAction}s from the {@link DagActionStore}. diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java index 691c300a75b..c76bcccbd2c 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -19,7 +19,7 @@ /** - * TODO: multiActiveScheduler change here + * TODO: multiActiveScheduler change here write doc for this class */ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDeterminationStore { public static final String CONFIG_PREFIX = "MysqlSchedulerLeaseDeterminationStore"; @@ -32,12 +32,15 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete // TODO: also add to primary key the type of event "launch" // initialize table with one entry only if it doesn't exist. these configs // another table with epsilon and linger then join the two tables + protected static final String WHERE_CLAUSE_TO_MATCH_ROW = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=? " + + "AND flow_action=? AND ABS(trigger_event_timestamp-?) <= %s"; protected static final String ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT = "INSERT INTO %s (flow_group, " + "flow_name, flow_execution_id, flow_action, trigger_event_timestamp) VALUES (?, ?, ?, ?, ?) WHERE NOT EXISTS (" - + "SELECT * FROM %s WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND flow_action=? AND" - + "ABS(trigger_event_timestamp-?) <= %s); SELECT ROW_COUNT() AS rows_inserted_count, pursuant_timestamp FROM %s " - + "WHERE flow_group=? AND flow_name=? AND flow_execution_id=? AND flow_action=? AND" - + "ABS(trigger_event_timestamp-?) <= %s"; + + "SELECT * FROM %s " + WHERE_CLAUSE_TO_MATCH_ROW + "; SELECT ROW_COUNT() AS rows_inserted_count, " + + "pursuant_timestamp FROM %s " + WHERE_CLAUSE_TO_MATCH_ROW; + + protected static final String UPDATE_PURSUANT_TIMESTAMP_STATEMENT = "UPDATE %s SET pursuant_timestamp = NULL " + + WHERE_CLAUSE_TO_MATCH_ROW; /* TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original value, but it's a bit hard to reason about this statement working. @@ -45,18 +48,6 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete + "(SELECT ? AS flow_group, ? AS flow_name, ? AS flow_execution_id, ? AS trigger_event_timestamp) AS new_rows" + "WHERE EXISTS (SELECT * FROM %s WHERE %s.flow_group = new_rows.flow_group AND %s.flow_name = new_rows.flow_name)" + " AND %s.flow_execution_id = new_rows.flow_execution_id AND ABS(trigger_event_timestamp-?) <= %s)"; - - // OVERALL LOGIC - // check if event within epsilon exists - // QUERY 1: if one does exist return the row and check value of pursuant - // if pursuant == null someone else completed - // RETURN - // QUERY 2: else if pursuant + linger <= current_timestamp - // insert row yourself - // QUERY 2: else pursuant + linger > current_timestamp - // set reminder to check back after linger - // QUERY 1: else insert row urself - // RETURN */ private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" @@ -75,9 +66,9 @@ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { this.tableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, ConfigurationKeys.DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE); - this.epsilon = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS, + this.epsilon = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); - this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_VALUE_MILLIS, + this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); this.dataSource = MysqlDataSourceFactory.get(config, SharedResourcesBrokerFactory.getImplicitBroker()); @@ -92,8 +83,9 @@ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { @Override public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, - String flowExecutionId, FlowActionType flowActionType, Timestamp triggerTimestamp) + String flowExecutionId, FlowActionType flowActionType, long triggerTimeMillis) throws IOException { + Timestamp triggerTimestamp = new Timestamp(triggerTimeMillis); try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement( String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, @@ -129,7 +121,10 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, // If a row was inserted, then we have obtained the lease int rowsUpdated = resultSet.getInt(0); if (rowsUpdated == 1) { - return LeaseAttemptStatus.LEASE_OBTAINED; + // TODO: write to dagactionstore then update pursuant to null + if (updatePursuantTimestamp(flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp)) { + return LeaseAttemptStatus.LEASE_OBTAINED; + } } else if (rowsUpdated > 1) { throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " + "exist for the trigger flow event for table %s flow group: %s, flow name: %s flow execution id: %s " @@ -137,7 +132,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, } Timestamp pursuantTimestamp = resultSet.getTimestamp(1); long currentTimeMillis = System.currentTimeMillis(); - // Another host has obtained lease + // Another host has obtained lease and no further steps required if (pursuantTimestamp == null) { return LeaseAttemptStatus.LEASE_OBTAINED; } else if (pursuantTimestamp.getTime() + linger <= currentTimeMillis) { @@ -151,4 +146,31 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, flowGroup, flowName, flowExecutionId, triggerTimestamp, e)); } } + + @Override + public boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, + FlowActionType flowActionType, Timestamp triggerTimestamp) + throws IOException { + try (Connection connection = this.dataSource.getConnection(); + PreparedStatement updateStatement = connection.prepareStatement( + String.format(UPDATE_PURSUANT_TIMESTAMP_STATEMENT, tableName))) { + int i = 0; + updateStatement.setString(++i, flowGroup); + updateStatement.setString(++i, flowName); + updateStatement.setString(++i, flowExecutionId); + updateStatement.setString(++i, flowActionType.toString()); + updateStatement.setTimestamp(++i, triggerTimestamp); + i = updateStatement.executeUpdate(); + connection.commit(); + + if (i != 1) { + LOG.warn("Expected to update 1 row's pursuant timestamp for a flow trigger event but instead updated {}", i); + } + return i >= 1; + } catch (SQLException e) { + throw new IOException(String.format("Encountered exception while trying to update pursuant timestamp to null for flowGroup: %s flowName: %s" + + "flowExecutionId: %s flowAction: %s triggerTimestamp: %s. Exception is %s", flowGroup, flowName, flowExecutionId, + flowActionType, triggerTimestamp), e); + } + } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java index ec18f5fdaf7..70eece8f0cc 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java @@ -3,8 +3,12 @@ import java.io.IOException; import java.sql.Timestamp; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public interface SchedulerLeaseDeterminationStore { + static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseDeterminationStore.class); // Enum is used to reason about the three possible scenarios that can result from an attempt to obtain a lease for a // particular trigger event of a flow @@ -30,10 +34,13 @@ enum FlowActionType { * @param flowGroup * @param flowName * @param flowExecutionId - * @param triggerTimestamp is the time this flow is supposed to be launched + * @param triggerTimeMillis is the time this flow is supposed to be launched * @return LeaseAttemptStatus * @throws IOException */ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, - String flowExecutionId, FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException; + String flowExecutionId, FlowActionType flowActionType, long triggerTimeMillis) throws IOException; + + public boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, + FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException; } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java index d3f4db11bbb..5b271a248e4 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java @@ -48,18 +48,18 @@ public class MysqlDagActionStore implements DagActionStore { protected final DataSource dataSource; private final String tableName; - private static final String EXISTS_STATEMENT = "SELECT EXISTS(SELECT * FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?)"; + private static final String EXISTS_STATEMENT = "SELECT EXISTS(SELECT * FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ? AND dag_action = ?)"; - protected static final String INSERT_STATEMENT = "INSERT INTO %s (flow_group, flow_name, flow_execution_id, dag_action ) " + protected static final String INSERT_STATEMENT = "INSERT INTO %s (flow_group, flow_name, flow_execution_id, dag_action) " + "VALUES (?, ?, ?, ?)"; - private static final String DELETE_STATEMENT = "DELETE FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; - private static final String GET_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ?"; + private static final String DELETE_STATEMENT = "DELETE FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ? AND dag_action = ?"; + private static final String GET_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s WHERE flow_group = ? AND flow_name =? AND flow_execution_id = ? AND dag_action = ?"; private static final String GET_ALL_STATEMENT = "SELECT flow_group, flow_name, flow_execution_id, dag_action FROM %s"; private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, " + "dag_action varchar(100) NOT NULL, modified_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP NOT NULL, " - + "PRIMARY KEY (flow_group,flow_name,flow_execution_id))"; + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id, dag_action))"; private final int getDagActionMaxRetries; @@ -86,7 +86,7 @@ public MysqlDagActionStore(Config config) throws IOException { } @Override - public boolean exists(String flowGroup, String flowName, String flowExecutionId) throws IOException, SQLException { + public boolean exists(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException, SQLException { ResultSet rs = null; try (Connection connection = this.dataSource.getConnection(); PreparedStatement existStatement = connection.prepareStatement(String.format(EXISTS_STATEMENT, tableName))) { @@ -94,12 +94,13 @@ public boolean exists(String flowGroup, String flowName, String flowExecutionId) existStatement.setString(++i, flowGroup); existStatement.setString(++i, flowName); existStatement.setString(++i, flowExecutionId); + existStatement.setString(++i, dagActionValue.toString()); rs = existStatement.executeQuery(); rs.next(); return rs.getBoolean(1); } catch (SQLException e) { - throw new IOException(String.format("Failure checking existence for table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", - tableName, flowGroup, flowName, flowExecutionId), e); + throw new IOException(String.format("Failure checking existence for table %s of flow with flow group:%s, flow name:%s, flow execution id:%s and dagAction: %s", + tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); } finally { if (rs != null) { rs.close(); @@ -121,12 +122,12 @@ public void addDagAction(String flowGroup, String flowName, String flowExecution connection.commit(); } catch (SQLException e) { throw new IOException(String.format("Failure to adding action for table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", - tableName, flowGroup, flowName, flowExecutionId), e); + tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); } } @Override - public boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId) throws IOException { + public boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement deleteStatement = connection.prepareStatement(String.format(DELETE_STATEMENT, tableName))) { int i = 0; @@ -137,12 +138,12 @@ public boolean deleteDagAction(String flowGroup, String flowName, String flowExe connection.commit(); return result != 0; } catch (SQLException e) { - throw new IOException(String.format("Failure to delete action for table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", + throw new IOException(String.format("Failure to delete action for table %s of flow with flow group:%s, flow name:%s, flow execution id:%s and dagAction: %s", tableName, flowGroup, flowName, flowExecutionId), e); } } - private DagAction getDagActionWithRetry(String flowGroup, String flowName, String flowExecutionId, ExponentialBackoff exponentialBackoff) + private DagAction getDagActionWithRetry(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue, ExponentialBackoff exponentialBackoff) throws IOException, SQLException { ResultSet rs = null; try (Connection connection = this.dataSource.getConnection(); @@ -151,20 +152,21 @@ private DagAction getDagActionWithRetry(String flowGroup, String flowName, Strin getStatement.setString(++i, flowGroup); getStatement.setString(++i, flowName); getStatement.setString(++i, flowExecutionId); + getStatement.setString(++i, dagActionValue.toString()); rs = getStatement.executeQuery(); if (rs.next()) { return new DagAction(rs.getString(1), rs.getString(2), rs.getString(3), DagActionValue.valueOf(rs.getString(4))); } else { if (exponentialBackoff.awaitNextRetryIfAvailable()) { - return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, exponentialBackoff); + return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, dagActionValue, exponentialBackoff); } else { - log.warn(String.format("Can not find dag action with flowGroup: %s, flowName: %s, flowExecutionId: %s",flowGroup, flowName, flowExecutionId)); + log.warn(String.format("Can not find dag action: %s with flowGroup: %s, flowName: %s, flowExecutionId: %s", dagActionValue, flowGroup, flowName, flowExecutionId)); return null; } } } catch (SQLException | InterruptedException e) { - throw new IOException(String.format("Failure get dag action from table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", - tableName, flowGroup, flowName, flowExecutionId), e); + throw new IOException(String.format("Failure get dag action: %s from table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", + dagActionValue, tableName, flowGroup, flowName, flowExecutionId), e); } finally { if (rs != null) { rs.close(); @@ -174,10 +176,10 @@ private DagAction getDagActionWithRetry(String flowGroup, String flowName, Strin } @Override - public DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId) + public DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException, SQLException { ExponentialBackoff exponentialBackoff = ExponentialBackoff.builder().initialDelay(GET_DAG_ACTION_INITIAL_WAIT_AFTER_FAILURE).maxRetries(this.getDagActionMaxRetries).build(); - return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, exponentialBackoff); + return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, dagActionValue, exponentialBackoff); } @Override diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java index 1ae73e9345b..671b37f0bf2 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java @@ -348,6 +348,7 @@ public void submitRunnableToExecutor(Runnable runnable) { * can be null if no callback is needed. * @param additionalJobData additional job data in a {@link Map} * @param jobClass Quartz job class + * @param triggerTimeMillis optionally include triggerTime * @throws JobException when there is anything wrong * with scheduling the job */ @@ -581,7 +582,7 @@ public void close() throws IOException { /** * Get a {@link org.quartz.Trigger} from the given job configuration properties. */ - private Trigger getTrigger(JobKey jobKey, Properties jobProps) { + public Trigger getTrigger(JobKey jobKey, Properties jobProps) { // Build a trigger for the job with the given cron-style schedule return TriggerBuilder.newTrigger() .withIdentity(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), @@ -600,11 +601,17 @@ public static class GobblinJob extends BaseGobblinJob implements InterruptableJo @Override public void executeImpl(JobExecutionContext context) throws JobExecutionException { - LOG.info("Starting job " + context.getJobDetail().getKey()); - JobDataMap dataMap = context.getJobDetail().getJobDataMap(); + JobDetail jobDetail = context.getJobDetail(); + LOG.info("Starting job " + jobDetail.getKey()); + JobDataMap dataMap = jobDetail.getJobDataMap(); JobScheduler jobScheduler = (JobScheduler) dataMap.get(JOB_SCHEDULER_KEY); Properties jobProps = (Properties) dataMap.get(PROPERTIES_KEY); JobListener jobListener = (JobListener) dataMap.get(JOB_LISTENER_KEY); + // Obtain trigger timestamp from trigger to pass to jobProps + Trigger trigger = context.getTrigger(); + long triggerTimestampMillis = trigger.getPreviousFireTime().getTime(); + jobProps.setProperty(ConfigurationKeys.SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY, + String.valueOf(triggerTimestampMillis)); try { jobScheduler.runJob(jobProps, jobListener); diff --git a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java index 0c65f224113..6f565b7bff5 100644 --- a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java +++ b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java @@ -62,39 +62,46 @@ public void setUp() throws Exception { @Test public void testAddAction() throws Exception { this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL); - //Should not be able to add again when previous one exist + //Should not be able to add KILL again when previous one exist Assert.expectThrows(IOException.class, - () -> this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME)); - //Should be able to add un-exist one - this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME); + () -> this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + //Should be able to add a RESUME action for same execution as well as KILL for another execution of the flow + this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME); + this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL); } @Test(dependsOnMethods = "testAddAction") public void testExists() throws Exception { - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2)); - Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.DagActionValue.RESUME)); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.DagActionValue.KILL)); } @Test(dependsOnMethods = "testExists") public void testGetAction() throws IOException, SQLException { - Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL), this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId)); - Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME), this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId_2)); + Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL), + this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL), + this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); Collection dagActions = this.mysqlDagActionStore.getDagActions(); - Assert.assertEquals(2, dagActions.size()); + Assert.assertEquals(3, dagActions.size()); HashSet set = new HashSet<>(); set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME)); set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME)); Assert.assertEquals(dagActions, set); } @Test(dependsOnMethods = "testGetAction") public void testDeleteAction() throws IOException, SQLException { - this.mysqlDagActionStore.deleteDagAction(flowGroup, flowName, flowExecutionId); + this.mysqlDagActionStore.deleteDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL); Assert.assertEquals(this.mysqlDagActionStore.getDagActions().size(), 1); - Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2)); - Assert.assertNull( this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId)); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); + Assert.assertNull( this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); } } \ No newline at end of file diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java index f47cbde507a..090377adaec 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java @@ -580,7 +580,7 @@ public void run() { private void clearUpDagAction(DagId dagId) throws IOException { if (this.dagActionStore.isPresent()) { - this.dagActionStore.get().deleteDagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId); + this.dagActionStore.get().deleteDagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, DagActionStore.DagActionValue.KILL); } } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 49817a40d85..45a6c78ce0c 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -58,6 +58,7 @@ import org.apache.gobblin.metrics.event.TimingEvent; import org.apache.gobblin.runtime.api.FlowSpec; import org.apache.gobblin.runtime.api.JobSpec; +import org.apache.gobblin.runtime.api.SchedulerLeaseDeterminationStore; import org.apache.gobblin.runtime.api.Spec; import org.apache.gobblin.runtime.api.SpecCatalogListener; import org.apache.gobblin.runtime.api.SpecProducer; @@ -103,7 +104,8 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { private FlowStatusGenerator flowStatusGenerator; private UserQuotaManager quotaManager; - + private boolean isMultiActiveSchedulerEnabled; + private SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler; private final ClassAliasResolver aliasResolver; @@ -111,13 +113,16 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { public Orchestrator(Config config, Optional topologyCatalog, Optional dagManager, Optional log, - FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled) { + FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); this.aliasResolver = new ClassAliasResolver<>(SpecCompiler.class); this.topologyCatalog = topologyCatalog; this.dagManager = dagManager; this.flowStatusGenerator = flowStatusGenerator; - + this.isMultiActiveSchedulerEnabled = + config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY) ? + config.getBoolean(ServiceConfigKeys.GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY) : false; + this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; try { String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; if (config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY)) { @@ -160,8 +165,8 @@ public Orchestrator(Config config, Optional topologyCatalog, Op @Inject public Orchestrator(Config config, FlowStatusGenerator flowStatusGenerator, Optional topologyCatalog, - Optional dagManager, Optional log) { - this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true); + Optional dagManager, Optional log, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { + this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, schedulerLeaseAlgoHandler); } @@ -222,7 +227,10 @@ public void onUpdateSpec(Spec updatedSpec) { } - public void orchestrate(Spec spec) throws Exception { + /* + New Orchestrate method + */ + public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMillis) throws Exception { // Add below waiting because TopologyCatalog and FlowCatalog service can be launched at the same time this.topologyCatalog.get().getInitComplete().await(); @@ -310,6 +318,16 @@ public void orchestrate(Spec spec) throws Exception { flowCompilationTimer.get().stop(flowMetadata); } + // If multi-active scheduler is enabled do not pass onto DagManager + if (this.isMultiActiveSchedulerEnabled) { + String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); + boolean leaseAttemptSucceeded = schedulerLeaseAlgoHandler.handleNewTriggerEvent(jobProps, flowGroup, flowName, + flowExecutionId, SchedulerLeaseDeterminationStore.FlowActionType.LAUNCH, triggerTimestampMillis); + _log.info("scheduler attempted lease on flowGroup: %s, flowName: %s, flowExecutionId: %s, LAUNCH event for " + + "triggerTimestamp: %s that was " + (leaseAttemptSucceeded ? "" : "NOT") + "successful", flowGroup, + flowName, flowExecutionId, triggerTimestampMillis); + return; + } /* TODO: multiactiveScheduler change here do all the quota checking and compilation above as normal but before passing to dag manager should go to another function that checks config and either passes to DM directly or ends up going through scheduler lock contention @@ -369,14 +387,6 @@ public void orchestrate(Spec spec) throws Exception { Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS); } - private void newAbstraction() { - // TODO: check config - // either do old way and pass to DM - - // do lock contention - // Is this a new class...? how can I abstract and make modular? or simply a util method? - } - /** * Check if a FlowSpec instance is allowed to run. * diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java new file mode 100644 index 00000000000..6b68ed0eb0e --- /dev/null +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java @@ -0,0 +1,130 @@ +package org.apache.gobblin.service.modules.orchestration; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoUnit; +import java.util.Locale; +import java.util.Properties; +import java.util.Random; + +import org.quartz.CronScheduleBuilder; +import org.quartz.JobKey; +import org.quartz.SchedulerException; +import org.quartz.Trigger; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.typesafe.config.Config; + +import javax.inject.Inject; + +import org.apache.gobblin.configuration.ConfigurationKeys; +import org.apache.gobblin.runtime.api.SchedulerLeaseDeterminationStore; +import org.apache.gobblin.scheduler.JobScheduler; +import org.apache.gobblin.scheduler.SchedulerService; +import org.apache.gobblin.util.ConfigUtils; + + +public class SchedulerLeaseAlgoHandler { + private static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseAlgoHandler.class); + private final long linger; + private final int staggerUpperBoundSec; + private static Random random = new Random(); + protected SchedulerLeaseDeterminationStore leaseDeterminationStore; + protected JobScheduler jobScheduler; + protected SchedulerService schedulerService; + @Inject + public SchedulerLeaseAlgoHandler(Config config, SchedulerLeaseDeterminationStore leaseDeterminationStore, + JobScheduler jobScheduler, SchedulerService schedulerService) + throws IOException { + this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); + this.staggerUpperBoundSec = ConfigUtils.getInt(config, + ConfigurationKeys.SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC); + this.leaseDeterminationStore = leaseDeterminationStore; + this.jobScheduler = jobScheduler; + this.schedulerService = schedulerService; + } + private SchedulerLeaseDeterminationStore schedulerLeaseDeterminationStore; + + /** + * This method is used in the multi-active scheduler case for one or more hosts to respond to a flow's trigger event + * by attempting a lease for the flow event. + * @param jobProps + * @param flowGroup + * @param flowName + * @param flowExecutionId + * @param flowActionType + * @param triggerTimeMillis + * @return true if this host obtained the lease for this flow's trigger event, false otherwise. + * @throws IOException + */ + public boolean handleNewTriggerEvent(Properties jobProps, String flowGroup, String flowName, String flowExecutionId, + SchedulerLeaseDeterminationStore.FlowActionType flowActionType, long triggerTimeMillis) + throws IOException { + SchedulerLeaseDeterminationStore.LeaseAttemptStatus leaseAttemptStatus = + schedulerLeaseDeterminationStore.attemptInsertAndGetPursuantTimestamp(flowGroup, flowName, flowExecutionId, + flowActionType, triggerTimeMillis); + // TODO: add a log event or metric for each of these cases + switch (leaseAttemptStatus) { + case LEASE_OBTAINED: + return true; + case PREVIOUS_LEASE_EXPIRED: + // recursively try obtaining lease again immediately, stops when reaches one of the other cases + return handleNewTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); + case PREVIOUS_LEASE_VALID: + // TODO: potentially return pursuant timestamp here so we can use that instead of current + scheduleReminderForTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); + } + return false; + } + + /** + * This method is used by {@link SchedulerLeaseAlgoHandler.handleNewTriggerEvent} to schedule a reminder for itself to + * check on the other participant's progress during pursuing orchestration after the time the lease should expire. + * If the previous participant was successful, then no further action is taken otherwise we re-attempt pursuing + * orchestration ourselves. + * @param flowGroup + * @param flowName + * @param flowExecutionId + * @param flowActionType + * @param triggerTimeMillis + */ + protected void scheduleReminderForTriggerEvent(Properties jobProps, String flowGroup, String flowName, String flowExecutionId, + SchedulerLeaseDeterminationStore.FlowActionType flowActionType, long triggerTimeMillis) { + // Check-in `linger` time after the current timestamp which is "close-enough" to the time the pursuant attempted + // the flow action. We also add a small randomization to avoid 'thundering herd' issue + String cronExpression = createCronFromDelayPeriod(linger + random.nextInt(staggerUpperBoundSec)); + jobProps.setProperty(ConfigurationKeys.JOB_SCHEDULE_KEY, cronExpression); + // This timestamp is what will be used to identify the particular flow trigger event it's associated with + jobProps.setProperty(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY, String.valueOf(triggerTimeMillis)); + JobKey key = new JobKey(flowName, flowGroup); + Trigger trigger = this.jobScheduler.getTrigger(key, jobProps); + try { + LOG.info("Attempting to add job reminder to Scheduler Service where job is %s trigger event %s and reminder is at " + + "%s.", key, triggerTimeMillis, trigger.getNextFireTime()); + this.schedulerService.getScheduler().scheduleJob(trigger); + } catch (SchedulerException e) { + LOG.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, triggerTimeMillis, e); + } + LOG.info(String.format("Scheduled reminder for job %s trigger event %s. Next run: %s.", key, triggerTimeMillis, trigger.getNextFireTime())); + } + + /** + * These methods should only be called from the Orchestrator or JobScheduler classes as it directly adds jobs to the + * Quartz scheduler + * @param delayPeriodSeconds + * @return + */ + protected static String createCronFromDelayPeriod(long delayPeriodSeconds) { + LocalDateTime now = LocalDateTime.now(); + LocalDateTime delaySecondsLater = now.plus(delayPeriodSeconds, ChronoUnit.SECONDS); + // TODO: potentially better way of generating cron expression that does not make it US dependent, this may not be an + // issue though. + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ss mm HH dd MM ? yyyy", Locale.US); + return delaySecondsLater.format(formatter); + } + +} diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java index 3919a3a7d74..360fd291505 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java @@ -30,7 +30,6 @@ import javax.inject.Named; import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.runtime.api.DagActionStore; -import org.apache.gobblin.runtime.api.SpecNotFoundException; import org.apache.gobblin.runtime.util.InjectionNames; import org.apache.gobblin.service.FlowExecutionResourceLocalHandler; import org.apache.gobblin.service.modules.core.GobblinServiceManager; @@ -54,27 +53,26 @@ public void resume(ComplexResourceKey quotaManager; + protected final SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler; @Getter protected final Map scheduledFlowSpecs; @Getter @@ -163,7 +166,7 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser Config config, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, SchedulerService schedulerService, Optional quotaManager, Optional log, - @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled) throws Exception { + @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { super(ConfigUtils.configToProperties(config), schedulerService); _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); @@ -179,6 +182,7 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser && config.hasPath(GOBBLIN_SERVICE_SCHEDULER_DR_NOMINATED); this.warmStandbyEnabled = warmStandbyEnabled; this.quotaManager = quotaManager; + this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; // Check that these metrics do not exist before adding, mainly for testing purpose which creates multiple instances // of the scheduler. If one metric exists, then the others should as well. MetricFilter filter = MetricFilter.contains(RuntimeMetrics.GOBBLIN_JOB_SCHEDULER_GET_SPECS_DURING_STARTUP_PER_SPEC_RATE_NANOS); @@ -200,9 +204,9 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser public GobblinServiceJobScheduler(String serviceName, Config config, FlowStatusGenerator flowStatusGenerator, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Optional dagManager, Optional quotaManager, - SchedulerService schedulerService, Optional log, boolean warmStandbyEnabled) throws Exception { + SchedulerService schedulerService, Optional log, boolean warmStandbyEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { this(serviceName, config, helixManager, flowCatalog, topologyCatalog, - new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log), schedulerService, quotaManager, log, warmStandbyEnabled); + new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, schedulerLeaseAlgoHandler), schedulerService, quotaManager, log, warmStandbyEnabled, schedulerLeaseAlgoHandler); } public synchronized void setActive(boolean isActive) { @@ -442,7 +446,11 @@ public synchronized void scheduleJob(Properties jobProps, JobListener jobListene public void runJob(Properties jobProps, JobListener jobListener) throws JobException { try { Spec flowSpec = this.scheduledFlowSpecs.get(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)); - this.orchestrator.orchestrate(flowSpec); + String triggerTimestampMillis = + jobProps.containsKey(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY) + ? jobProps.getProperty(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY, "0L"): + jobProps.getProperty(ConfigurationKeys.SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY,"0L"); + this.orchestrator.orchestrate(flowSpec, jobProps, Long.valueOf(triggerTimestampMillis)); } catch (Exception e) { throw new JobException("Failed to run Spec: " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); } diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/OrchestratorTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/OrchestratorTest.java index b1cec2b3aa1..b0b45380c04 100644 --- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/OrchestratorTest.java +++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/orchestration/OrchestratorTest.java @@ -341,13 +341,13 @@ public void doNotRegisterMetricsAdhocFlows() throws Exception { flowProps.put("gobblin.flow.destinationIdentifier", "destination"); flowProps.put("flow.allowConcurrentExecution", false); FlowSpec adhocSpec = new FlowSpec(URI.create("flow0/group0"), "1", "", ConfigUtils.propertiesToConfig(flowProps) , flowProps, Optional.absent(), Optional.absent()); - this.orchestrator.orchestrate(adhocSpec); + this.orchestrator.orchestrate(adhocSpec, flowProps, 0); String metricName = MetricRegistry.name(ServiceMetricNames.GOBBLIN_SERVICE_PREFIX, "group0", "flow0", ServiceMetricNames.COMPILED); Assert.assertNull(metricContext.getParent().get().getGauges().get(metricName)); flowProps.setProperty("job.schedule", "0/2 * * * * ?"); FlowSpec scheduledSpec = new FlowSpec(URI.create("flow0/group0"), "1", "", ConfigUtils.propertiesToConfig(flowProps) , flowProps, Optional.absent(), Optional.absent()); - this.orchestrator.orchestrate(scheduledSpec); + this.orchestrator.orchestrate(scheduledSpec, flowProps, 0); Assert.assertNotNull(metricContext.getParent().get().getGauges().get(metricName)); } } \ No newline at end of file diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java index 22f060bbd79..21b5dc0a4b5 100644 --- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java +++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java @@ -60,6 +60,7 @@ import org.apache.gobblin.service.modules.orchestration.AbstractUserQuotaManager; import org.apache.gobblin.service.modules.orchestration.InMemoryUserQuotaManager; import org.apache.gobblin.service.modules.orchestration.Orchestrator; +import org.apache.gobblin.service.modules.orchestration.SchedulerLeaseAlgoHandler; import org.apache.gobblin.service.modules.orchestration.UserQuotaManager; import org.apache.gobblin.service.modules.spec.JobExecutionPlan; import org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory; @@ -348,7 +349,7 @@ public void testJobSchedulerAddFlowQuotaExceeded() throws Exception { SchedulerService schedulerService = new SchedulerService(new Properties()); // Mock a GaaS scheduler not in warm standby mode GobblinServiceJobScheduler scheduler = new GobblinServiceJobScheduler("testscheduler", - ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false); + ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false, new SchedulerLeaseAlgoHandler(ConfigFactory.empty(), // TODO )); schedulerService.startAsync().awaitRunning(); scheduler.startUp(); From 75b446fffee1e3e2a50293f4ff5084bb659ba843 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Fri, 26 May 2023 15:32:29 -0400 Subject: [PATCH 06/11] DagActionMonitor changes to handle LAUNCH events --- .../main/avro/DagActionStoreChangeEvent.avsc | 5 + .../gobblin/runtime/api/DagActionStore.java | 2 +- ...MysqlSchedulerLeaseDeterminationStore.java | 81 +++++++++----- .../api/SchedulerLeaseDeterminationStore.java | 31 +++++- .../runtime/metrics/RuntimeMetrics.java | 1 + .../gobblin/scheduler/JobScheduler.java | 1 - .../modules/orchestration/DagManager.java | 3 +- .../modules/orchestration/Orchestrator.java | 8 +- .../SchedulerLeaseAlgoHandler.java | 4 +- .../orchestration/TimingEventUtils.java | 4 +- .../DagActionStoreChangeMonitor.java | 102 +++++++++++++----- .../DagActionStoreChangeMonitorFactory.java | 8 +- .../orchestration/OrchestratorTest.java | 4 +- 13 files changed, 183 insertions(+), 71 deletions(-) diff --git a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc index 268f18ad049..8bd4fb301b2 100644 --- a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc +++ b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc @@ -23,6 +23,11 @@ "type" : "string", "doc" : "flow execution id for the dag action", "compliance" : "NONE" + }, { + "name" : "dagAction", + "type" : "string", + "doc" : "type of dag action", + "compliance" : "NONE" } ] } \ No newline at end of file diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index 762d405e469..0933bfbea63 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -28,7 +28,7 @@ public interface DagActionStore { enum DagActionValue { KILL, RESUME, - // TODO: multiActiveScheduler change here: should flow actions be same as dag actions and be one enum or have overlap but not all the same? + // TODO: potentially combine this enum with {@link SchedulerLeaseDeterminationStore.FlowActionType} LAUNCH } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java index c76bcccbd2c..507b5c69b87 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.gobblin.runtime.api; import java.io.IOException; @@ -7,6 +24,7 @@ import java.sql.SQLException; import java.sql.Timestamp; +import com.google.inject.Inject; import com.typesafe.config.Config; import javax.sql.DataSource; @@ -18,20 +36,19 @@ import org.apache.gobblin.util.ConfigUtils; -/** - * TODO: multiActiveScheduler change here write doc for this class - */ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDeterminationStore { public static final String CONFIG_PREFIX = "MysqlSchedulerLeaseDeterminationStore"; protected final DataSource dataSource; + private final DagActionStore dagActionStore; private final String tableName; private final long epsilon; private final long linger; - // TODO: define retention eventually on this table - // TODO: also add to primary key the type of event "launch" - // initialize table with one entry only if it doesn't exist. these configs - // another table with epsilon and linger then join the two tables + /* TODO: + - define retention on this table + - initialize table with epsilon and linger if one already doesn't exist using these configs + - join with table above to ensure epsilon/linger values are consistent across hosts (in case hosts are deployed with different configs) + */ protected static final String WHERE_CLAUSE_TO_MATCH_ROW = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=? " + "AND flow_action=? AND ABS(trigger_event_timestamp-?) <= %s"; protected static final String ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT = "INSERT INTO %s (flow_group, " @@ -41,14 +58,6 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete protected static final String UPDATE_PURSUANT_TIMESTAMP_STATEMENT = "UPDATE %s SET pursuant_timestamp = NULL " + WHERE_CLAUSE_TO_MATCH_ROW; - - /* TODO: Potentially use the following statement that obtains the lease with the insert, otherwise returns the original - value, but it's a bit hard to reason about this statement working. - protected static final String ATTEMPT_OBTAINING_LEASE_ELSE_RETURN_EXISTING_ROW_STATEMENT = "SELECT * FROM " - + "(SELECT ? AS flow_group, ? AS flow_name, ? AS flow_execution_id, ? AS trigger_event_timestamp) AS new_rows" - + "WHERE EXISTS (SELECT * FROM %s WHERE %s.flow_group = new_rows.flow_group AND %s.flow_name = new_rows.flow_name)" - + " AND %s.flow_execution_id = new_rows.flow_execution_id AND ABS(trigger_event_timestamp-?) <= %s)"; - */ private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" @@ -57,11 +66,12 @@ public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDete + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action,trigger_event_timestamp)"; - public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { + @Inject + public MysqlSchedulerLeaseDeterminationStore(Config config, DagActionStore dagActionStore) throws IOException { if (config.hasPath(CONFIG_PREFIX)) { config = config.getConfig(CONFIG_PREFIX).withFallback(config); } else { - throw new IOException("Please specify the config for MysqlDagActionStore"); + throw new IOException("Please specify the config for MysqlSchedulerLeaseDeterminationStore"); } this.tableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, @@ -79,6 +89,7 @@ public MysqlSchedulerLeaseDeterminationStore(Config config) throws IOException { } catch (SQLException e) { throw new IOException("Table creation failure for " + tableName, e); } + this.dagActionStore = dagActionStore; } @Override @@ -89,7 +100,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement( String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, - tableName, epsilon))) { + epsilon))) { int i = 0; // Values to set in new row insertStatement.setString(++i, flowGroup); @@ -114,16 +125,30 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, if (!resultSet.next()) { throw new IOException(String.format("Unexpected error where no result returned while trying to obtain lease. " - + "This error indicates that no row was inserted but also no entry existed for trigger flow event for " - + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, - flowGroup, flowName, flowExecutionId, triggerTimestamp)); + + "This error indicates that no entry existed for trigger flow event for table %s flow group: %s, flow " + + "name: %s flow execution id: %s and trigger timestamp: %s when one should have been inserted", + tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); } // If a row was inserted, then we have obtained the lease int rowsUpdated = resultSet.getInt(0); if (rowsUpdated == 1) { - // TODO: write to dagactionstore then update pursuant to null - if (updatePursuantTimestamp(flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp)) { - return LeaseAttemptStatus.LEASE_OBTAINED; + // If the pursuing flow launch has been persisted to the {@link DagActionStore} we have completed lease obtainment + this.dagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.LAUNCH); + if (this.dagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.LAUNCH)) { + if (updatePursuantTimestamp(flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp)) { + // TODO: potentially add metric here to count number of flows scheduled by each scheduler + LOG.info("Host completed obtaining lease for flow group: %s, flow name: %s flow execution id: %s and " + + "trigger timestamp: %s", flowGroup, flowName, flowExecutionId, triggerTimestamp); + return LeaseAttemptStatus.LEASE_OBTAINED; + } else { + LOG.warn("Unable to update pursuant timestamp after persisting flow launch to DagActionStore for flow " + + "group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s.", flowGroup, flowName, + flowExecutionId, triggerTimestamp); + } + } else { + LOG.warn("Did not find flow launch action in DagActionStore after adding it for flow group: %s, flow name: " + + "%s flow execution id: %s and trigger timestamp: %s.", flowGroup, flowName, flowExecutionId, + triggerTimestamp); } } else if (rowsUpdated > 1) { throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " @@ -134,6 +159,8 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, long currentTimeMillis = System.currentTimeMillis(); // Another host has obtained lease and no further steps required if (pursuantTimestamp == null) { + LOG.info("Another host has already successfully obtained lease for flow group: %s, flow name: %s flow execution " + + "id: %s and trigger timestamp: %s", flowGroup, flowName, flowExecutionId, triggerTimeMillis); return LeaseAttemptStatus.LEASE_OBTAINED; } else if (pursuantTimestamp.getTime() + linger <= currentTimeMillis) { return LeaseAttemptStatus.PREVIOUS_LEASE_EXPIRED; @@ -168,9 +195,9 @@ public boolean updatePursuantTimestamp(String flowGroup, String flowName, String } return i >= 1; } catch (SQLException e) { - throw new IOException(String.format("Encountered exception while trying to update pursuant timestamp to null for flowGroup: %s flowName: %s" - + "flowExecutionId: %s flowAction: %s triggerTimestamp: %s. Exception is %s", flowGroup, flowName, flowExecutionId, - flowActionType, triggerTimestamp), e); + throw new IOException(String.format("Encountered exception while trying to update pursuant timestamp to null for " + + "flowGroup: %s flowName: %s flowExecutionId: %s flowAction: %s triggerTimestamp: %s. Exception is %s", + flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp), e); } } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java index 70eece8f0cc..42377839bda 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.gobblin.runtime.api; import java.io.IOException; @@ -7,6 +24,11 @@ import org.slf4j.LoggerFactory; +/** + * Interface defines the two basic actions required for lease determination for each FlowActionType event for a flow. + * It is used by the {@link SchedulerLeaseAlgoHandler} to allow multiple scheduler's on different hosts to determine + * which scheduler is tasked with ensuring the FlowAction is taken for the trigger. + */ public interface SchedulerLeaseDeterminationStore { static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseDeterminationStore.class); @@ -38,9 +60,14 @@ enum FlowActionType { * @return LeaseAttemptStatus * @throws IOException */ - public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, + LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType, long triggerTimeMillis) throws IOException; - public boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, + /** + * This method is used by `attemptInsertAndGetPursuantTimestamp` above to indicate the host has successfully completed + * actions necessary to confirm the lease of a flow trigger event. + * @return true if successfully updated, indicating no further actions need to be taken regarding this event. + */ + boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException; } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java index 3d9e9b5c55d..07c1a71e2dc 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java @@ -46,6 +46,7 @@ public class RuntimeMetrics { public static final String GOBBLIN_DAG_ACTION_STORE_MONITOR_KILLS_INVOKED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.kills.invoked"; public static final String GOBBLIN_DAG_ACTION_STORE_MONITOR_MESSAGE_PROCESSED= ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.message.processed"; public static final String GOBBLIN_DAG_ACTION_STORE_MONITOR_RESUMES_INVOKED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.resumes.invoked"; + public static final String GOBBLIN_DAG_ACTION_STORE_MONITOR_FLOWS_LAUNCHED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.flows.launched"; public static final String GOBBLIN_DAG_ACTION_STORE_MONITOR_UNEXPECTED_ERRORS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.unexpected.errors"; public static final String GOBBLIN_DAG_ACTION_STORE_PRODUCE_TO_CONSUME_DELAY_MILLIS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".dagActionStoreMonitor.produce.to.consume.delay"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java index 671b37f0bf2..24d96d2c089 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java @@ -348,7 +348,6 @@ public void submitRunnableToExecutor(Runnable runnable) { * can be null if no callback is needed. * @param additionalJobData additional job data in a {@link Map} * @param jobClass Quartz job class - * @param triggerTimeMillis optionally include triggerTime * @throws JobException when there is anything wrong * with scheduling the job */ diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java index 090377adaec..da7e928914e 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java @@ -277,8 +277,9 @@ protected void startUp() { * @param dag {@link Dag} to be added * @param persist whether to persist the dag to the {@link DagStateStore} * @param setStatus if true, set all jobs in the dag to pending + * Note this should only be called from the {@link Orchestrator} or {@link org.apache.gobblin.service.monitoring.DagActionStoreChangeMonitor} */ - synchronized void addDag(Dag dag, boolean persist, boolean setStatus) throws IOException { + public synchronized void addDag(Dag dag, boolean persist, boolean setStatus) throws IOException { if (persist) { //Persist the dag this.dagStateStore.writeCheckpoint(dag); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 45a6c78ce0c..9bab032191b 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -318,7 +318,7 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil flowCompilationTimer.get().stop(flowMetadata); } - // If multi-active scheduler is enabled do not pass onto DagManager + // If multi-active scheduler is enabled do not pass onto DagManager, otherwise scheduler forwards it directly if (this.isMultiActiveSchedulerEnabled) { String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); boolean leaseAttemptSucceeded = schedulerLeaseAlgoHandler.handleNewTriggerEvent(jobProps, flowGroup, flowName, @@ -328,11 +328,7 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil flowName, flowExecutionId, triggerTimestampMillis); return; } - /* TODO: multiactiveScheduler change here - do all the quota checking and compilation above as normal but before passing to dag manager should go to another - function that checks config and either passes to DM directly or ends up going through scheduler lock contention - */ - // CALLS NEW METHOD newAbstraction()... + if (this.dagManager.isPresent()) { try { //Send the dag to the DagManager. diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java index 6b68ed0eb0e..589d7ab65f3 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java @@ -75,7 +75,6 @@ public boolean handleNewTriggerEvent(Properties jobProps, String flowGroup, Stri // recursively try obtaining lease again immediately, stops when reaches one of the other cases return handleNewTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); case PREVIOUS_LEASE_VALID: - // TODO: potentially return pursuant timestamp here so we can use that instead of current scheduleReminderForTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); } return false; @@ -121,8 +120,7 @@ protected void scheduleReminderForTriggerEvent(Properties jobProps, String flowG protected static String createCronFromDelayPeriod(long delayPeriodSeconds) { LocalDateTime now = LocalDateTime.now(); LocalDateTime delaySecondsLater = now.plus(delayPeriodSeconds, ChronoUnit.SECONDS); - // TODO: potentially better way of generating cron expression that does not make it US dependent, this may not be an - // issue though. + // TODO: investigate potentially better way of generating cron expression that does not make it US dependent DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ss mm HH dd MM ? yyyy", Locale.US); return delaySecondsLater.format(formatter); } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/TimingEventUtils.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/TimingEventUtils.java index 65b464c888a..99661305fd3 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/TimingEventUtils.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/TimingEventUtils.java @@ -30,8 +30,8 @@ import org.apache.gobblin.util.ConfigUtils; -class TimingEventUtils { - static Map getFlowMetadata(FlowSpec flowSpec) { +public class TimingEventUtils { + public static Map getFlowMetadata(FlowSpec flowSpec) { return getFlowMetadata(flowSpec.getConfig()); } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index 3b2778150b8..e528e20aa68 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -18,10 +18,16 @@ package org.apache.gobblin.service.monitoring; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.net.URI; +import java.net.URISyntaxException; import java.sql.SQLException; +import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.reflect.ConstructorUtils; + import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -33,11 +39,22 @@ import org.apache.gobblin.kafka.client.DecodeableKafkaRecord; import org.apache.gobblin.metrics.ContextAwareGauge; import org.apache.gobblin.metrics.ContextAwareMeter; +import org.apache.gobblin.metrics.event.EventSubmitter; +import org.apache.gobblin.metrics.event.TimingEvent; import org.apache.gobblin.runtime.api.DagActionStore; +import org.apache.gobblin.runtime.api.FlowSpec; import org.apache.gobblin.runtime.api.SpecNotFoundException; import org.apache.gobblin.runtime.kafka.HighLevelConsumer; import org.apache.gobblin.runtime.metrics.RuntimeMetrics; +import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; +import org.apache.gobblin.service.FlowId; +import org.apache.gobblin.service.ServiceConfigKeys; +import org.apache.gobblin.service.modules.flow.SpecCompiler; +import org.apache.gobblin.service.modules.flowgraph.Dag; import org.apache.gobblin.service.modules.orchestration.DagManager; +import org.apache.gobblin.service.modules.orchestration.TimingEventUtils; +import org.apache.gobblin.service.modules.spec.JobExecutionPlan; +import org.apache.gobblin.util.ClassAliasResolver; /** @@ -52,6 +69,7 @@ public class DagActionStoreChangeMonitor extends HighLevelConsumer { // Metrics private ContextAwareMeter killsInvoked; private ContextAwareMeter resumesInvoked; + private ContextAwareMeter flowsLaunched; private ContextAwareMeter unexpectedErrors; private ContextAwareMeter messageProcessedMeter; private ContextAwareGauge produceToConsumeDelayMillis; // Reports delay from all partitions in one gauge @@ -71,17 +89,36 @@ public String load(String key) throws Exception { protected DagActionStore dagActionStore; protected DagManager dagManager; + protected SpecCompiler specCompiler; + protected FlowCatalog flowCatalog; + protected EventSubmitter eventSubmitter; // Note that the topic is an empty string (rather than null to avoid NPE) because this monitor relies on the consumer // client itself to determine all Kafka related information dynamically rather than through the config. public DagActionStoreChangeMonitor(String topic, Config config, DagActionStore dagActionStore, DagManager dagManager, - int numThreads) { + int numThreads, FlowCatalog flowCatalog) { // Differentiate group id for each host super(topic, config.withValue(GROUP_ID_KEY, ConfigValueFactory.fromAnyRef(DAG_ACTION_CHANGE_MONITOR_PREFIX + UUID.randomUUID().toString())), numThreads); this.dagActionStore = dagActionStore; this.dagManager = dagManager; + ClassAliasResolver aliasResolver = new ClassAliasResolver(SpecCompiler.class); + try { + String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; + if (config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY)) { + specCompilerClassName = config.getString(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY); + } + log.info("Using specCompiler class name/alias " + specCompilerClassName); + + this.specCompiler = (SpecCompiler) ConstructorUtils.invokeConstructor(Class.forName(aliasResolver.resolve( + specCompilerClassName)), config); + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException + | ClassNotFoundException e) { + throw new RuntimeException(e); + } + this.flowCatalog = flowCatalog; + this.eventSubmitter = new EventSubmitter.Builder(this.getMetricContext(), "org.apache.gobblin.service").build(); } @Override @@ -108,6 +145,7 @@ protected void processMessage(DecodeableKafkaRecord message) { String flowGroup = value.getFlowGroup(); String flowName = value.getFlowName(); String flowExecutionId = value.getFlowExecutionId(); + DagActionStore.DagActionValue dagAction = DagActionStore.DagActionValue.valueOf(value.getDagAction()); produceToConsumeDelayValue = calcMillisSince(produceTimestamp); log.debug("Processing Dag Action message for flow group: {} name: {} executionId: {} tid: {} operation: {} lag: {}", @@ -119,28 +157,8 @@ protected void processMessage(DecodeableKafkaRecord message) { return; } - // Retrieve the Dag Action taken from MySQL table unless operation is DELETE - DagActionStore.DagActionValue dagAction = null; - if (!operation.equals("DELETE")) { - try { - dagAction = dagActionStore.getDagAction(flowGroup, flowName, flowExecutionId).getDagActionValue(); - } catch (IOException e) { - log.error("Encountered IOException trying to retrieve dagAction for flow group: {} name: {} executionId: {}. " + "Exception: {}", flowGroup, flowName, flowExecutionId, e); - this.unexpectedErrors.mark(); - return; - } catch (SpecNotFoundException e) { - log.error("DagAction not found for flow group: {} name: {} executionId: {} Exception: {}", flowGroup, flowName, - flowExecutionId, e); - this.unexpectedErrors.mark(); - return; - } catch (SQLException throwables) { - log.error("Encountered SQLException trying to retrieve dagAction for flow group: {} name: {} executionId: {}. " + "Exception: {}", flowGroup, flowName, flowExecutionId, throwables); - return; - } - } - - // We only expert INSERT and DELETE operations done to this table. INSERTs correspond to resume or delete flow - // requests that have to be processed. DELETEs require no action. + // We only expect INSERT and DELETE operations done to this table. INSERTs correspond to any type of + // {@link DagActionStore.DagACtionValue} flow requests that have to be processed. DELETEs require no action. try { if (operation.equals("INSERT")) { if (dagAction.equals(DagActionStore.DagActionValue.RESUME)) { @@ -151,7 +169,10 @@ protected void processMessage(DecodeableKafkaRecord message) { log.info("Received insert dag action and about to send kill flow request"); dagManager.handleKillFlowRequest(flowGroup, flowName, Long.parseLong(flowExecutionId)); this.killsInvoked.mark(); - } else { + } else if (dagAction.equals(DagActionStore.DagActionValue.LAUNCH)) { + log.info("Received insert dag action and about to forward launch request to DagManager"); + submitFlowToDagManager(flowGroup, flowName); + }else { log.warn("Received unsupported dagAction {}. Expected to be a KILL or RESUME", dagAction); this.unexpectedErrors.mark(); return; @@ -180,15 +201,46 @@ protected void processMessage(DecodeableKafkaRecord message) { dagActionsSeenCache.put(changeIdentifier, changeIdentifier); } + protected void submitFlowToDagManager(String flowGroup, String flowName) { + // Retrieve job execution plan by recompiling the flow spec to send to the DagManager + FlowId flowId = new FlowId().setFlowGroup(flowGroup).setFlowName(flowName); + FlowSpec spec = null; + try { + URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); + spec = (FlowSpec) flowCatalog.getSpecs(flowUri); + Dag jobExecutionPlanDag = specCompiler.compileFlow(spec); + //Send the dag to the DagManager. + dagManager.addDag(jobExecutionPlanDag, true, true); + } catch (URISyntaxException e) { + log.warn("Could not create URI object for flowId {} due to error {}", flowId, e.getMessage()); + this.unexpectedErrors.mark(); + return; + } catch (SpecNotFoundException e) { + log.warn("Spec not found for flow group: {} name: {} Exception: {}", flowGroup, flowName, e); + this.unexpectedErrors.mark(); + return; + } catch (IOException e) { + Map flowMetadata = TimingEventUtils.getFlowMetadata(spec); + String failureMessage = "Failed to add Job Execution Plan due to: " + e.getMessage(); + flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage); + new TimingEvent(this.eventSubmitter, TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata); + log.warn("Failed to add Job Execution Plan for flow group: {} name: {} due to error {}", flowGroup, flowName, e); + this.unexpectedErrors.mark(); + return; + } + // Only mark this if the dag was successfully added + this.flowsLaunched.mark(); + } + @Override protected void createMetrics() { super.createMetrics(); this.killsInvoked = this.getMetricContext().contextAwareMeter(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_MONITOR_KILLS_INVOKED); this.resumesInvoked = this.getMetricContext().contextAwareMeter(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_MONITOR_RESUMES_INVOKED); + this.flowsLaunched = this.getMetricContext().contextAwareMeter(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_MONITOR_FLOWS_LAUNCHED); this.unexpectedErrors = this.getMetricContext().contextAwareMeter(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_MONITOR_UNEXPECTED_ERRORS); this.messageProcessedMeter = this.getMetricContext().contextAwareMeter(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_MONITOR_MESSAGE_PROCESSED); this.produceToConsumeDelayMillis = this.getMetricContext().newContextAwareGauge(RuntimeMetrics.GOBBLIN_DAG_ACTION_STORE_PRODUCE_TO_CONSUME_DELAY_MILLIS, () -> produceToConsumeDelayValue); this.getMetricContext().register(this.produceToConsumeDelayMillis); } - } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java index d4a0656b3f2..6f7e55113e5 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java @@ -26,6 +26,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.runtime.api.DagActionStore; +import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; import org.apache.gobblin.service.modules.orchestration.DagManager; import org.apache.gobblin.util.ConfigUtils; @@ -40,12 +41,15 @@ public class DagActionStoreChangeMonitorFactory implements Providerabsent(), Optional.of(logger)); + Optional.of(this.topologyCatalog), Optional.absent(), Optional.of(logger), this.mockSchedulerLeaseAlgoHandler); this.topologyCatalog.addListener(orchestrator); this.flowCatalog.addListener(orchestrator); // Start application From 888b8d19351d65bb7fb1cbece73f5d6cfd480f84 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Fri, 26 May 2023 17:04:06 -0400 Subject: [PATCH 07/11] clean up comments, add docstrings --- .../MysqlSchedulerLeaseDeterminationStore.java | 16 ++++++++++------ .../dag_action_store/MysqlDagActionStore.java | 7 ++++--- .../gobblin/runtime/util/InjectionNames.java | 1 + .../core/GobblinServiceConfiguration.java | 4 ++++ .../core/GobblinServiceGuiceModule.java | 10 ++++++++++ .../modules/orchestration/Orchestrator.java | 16 +++++++--------- .../SchedulerLeaseAlgoHandler.java | 18 +++++++++++++++++- .../scheduler/GobblinServiceJobScheduler.java | 14 +++++++------- .../DagActionStoreChangeMonitor.java | 16 ++++++++++------ .../DagActionStoreChangeMonitorFactory.java | 11 +++++++++-- .../orchestration/OrchestratorTest.java | 2 +- .../GobblinServiceJobSchedulerTest.java | 6 +++--- 12 files changed, 83 insertions(+), 38 deletions(-) diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java index 507b5c69b87..4dbc1584f04 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java @@ -99,7 +99,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, Timestamp triggerTimestamp = new Timestamp(triggerTimeMillis); try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement( - String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, + String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, tableName, epsilon))) { int i = 0; // Values to set in new row @@ -124,13 +124,14 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, connection.commit(); if (!resultSet.next()) { + resultSet.close(); throw new IOException(String.format("Unexpected error where no result returned while trying to obtain lease. " + "This error indicates that no entry existed for trigger flow event for table %s flow group: %s, flow " + "name: %s flow execution id: %s and trigger timestamp: %s when one should have been inserted", tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); } // If a row was inserted, then we have obtained the lease - int rowsUpdated = resultSet.getInt(0); + int rowsUpdated = resultSet.getInt(1); if (rowsUpdated == 1) { // If the pursuing flow launch has been persisted to the {@link DagActionStore} we have completed lease obtainment this.dagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.LAUNCH); @@ -139,6 +140,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, // TODO: potentially add metric here to count number of flows scheduled by each scheduler LOG.info("Host completed obtaining lease for flow group: %s, flow name: %s flow execution id: %s and " + "trigger timestamp: %s", flowGroup, flowName, flowExecutionId, triggerTimestamp); + resultSet.close(); return LeaseAttemptStatus.LEASE_OBTAINED; } else { LOG.warn("Unable to update pursuant timestamp after persisting flow launch to DagActionStore for flow " @@ -151,11 +153,13 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, triggerTimestamp); } } else if (rowsUpdated > 1) { + resultSet.close(); throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " + "exist for the trigger flow event for table %s flow group: %s, flow name: %s flow execution id: %s " + "and trigger timestamp: %s.", i, tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); } - Timestamp pursuantTimestamp = resultSet.getTimestamp(1); + Timestamp pursuantTimestamp = resultSet.getTimestamp(2); + resultSet.close(); long currentTimeMillis = System.currentTimeMillis(); // Another host has obtained lease and no further steps required if (pursuantTimestamp == null) { @@ -170,7 +174,7 @@ public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, } catch (SQLException e) { throw new IOException(String.format("Error encountered while trying to obtain lease on trigger flow event for " + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, - flowGroup, flowName, flowExecutionId, triggerTimestamp, e)); + flowGroup, flowName, flowExecutionId, triggerTimestamp), e); } } @@ -180,7 +184,7 @@ public boolean updatePursuantTimestamp(String flowGroup, String flowName, String throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement updateStatement = connection.prepareStatement( - String.format(UPDATE_PURSUANT_TIMESTAMP_STATEMENT, tableName))) { + String.format(UPDATE_PURSUANT_TIMESTAMP_STATEMENT, tableName, epsilon))) { int i = 0; updateStatement.setString(++i, flowGroup); updateStatement.setString(++i, flowName); @@ -197,7 +201,7 @@ public boolean updatePursuantTimestamp(String flowGroup, String flowName, String } catch (SQLException e) { throw new IOException(String.format("Encountered exception while trying to update pursuant timestamp to null for " + "flowGroup: %s flowName: %s flowExecutionId: %s flowAction: %s triggerTimestamp: %s. Exception is %s", - flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp), e); + flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp, e)); } } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java index 5b271a248e4..a71ab449298 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java @@ -121,8 +121,9 @@ public void addDagAction(String flowGroup, String flowName, String flowExecution insertStatement.executeUpdate(); connection.commit(); } catch (SQLException e) { - throw new IOException(String.format("Failure to adding action for table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", - tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); + throw new IOException(String.format("Failure to adding action for table %s of flow with flow group: %s, flow name" + + ": %s, flow execution id: %s, and dag action: %s", tableName, flowGroup, flowName, flowExecutionId, + dagActionValue), e); } } @@ -139,7 +140,7 @@ public boolean deleteDagAction(String flowGroup, String flowName, String flowExe return result != 0; } catch (SQLException e) { throw new IOException(String.format("Failure to delete action for table %s of flow with flow group:%s, flow name:%s, flow execution id:%s and dagAction: %s", - tableName, flowGroup, flowName, flowExecutionId), e); + tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/util/InjectionNames.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/util/InjectionNames.java index d0e42f525f4..c293d165c65 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/util/InjectionNames.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/util/InjectionNames.java @@ -26,4 +26,5 @@ public final class InjectionNames { public static final String FORCE_LEADER = "forceLeader"; public static final String FLOW_CATALOG_LOCAL_COMMIT = "flowCatalogLocalCommit"; public static final String WARM_STANDBY_ENABLED = "statelessRestAPIEnabled"; + public static final String MULTI_ACTIVE_SCHEDULER_ENABLED = "multiActiveSchedulerEnabled"; } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceConfiguration.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceConfiguration.java index 0a3640da5bf..03081b3ba2d 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceConfiguration.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceConfiguration.java @@ -43,6 +43,9 @@ public class GobblinServiceConfiguration { @Getter private final boolean isWarmStandbyEnabled; + @Getter + private final boolean isMultiActiveSchedulerEnabled; + @Getter private final boolean isTopologyCatalogEnabled; @@ -107,6 +110,7 @@ public GobblinServiceConfiguration(String serviceName, String serviceId, Config } this.isWarmStandbyEnabled = ConfigUtils.getBoolean(config, ServiceConfigKeys.GOBBLIN_SERVICE_WARM_STANDBY_ENABLED_KEY, false); + this.isMultiActiveSchedulerEnabled = ConfigUtils.getBoolean(config, ServiceConfigKeys.GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY, false); this.isHelixManagerEnabled = config.hasPath(ServiceConfigKeys.ZK_CONNECTION_STRING_KEY); this.isDagManagerEnabled = diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java index 642f78f118e..7233ba40b40 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java @@ -20,7 +20,9 @@ import java.util.Objects; import org.apache.gobblin.runtime.api.DagActionStore; +import org.apache.gobblin.runtime.api.MysqlSchedulerLeaseDeterminationStore; import org.apache.gobblin.runtime.dag_action_store.MysqlDagActionStore; +import org.apache.gobblin.service.modules.orchestration.SchedulerLeaseAlgoHandler; import org.apache.gobblin.service.modules.orchestration.UserQuotaManager; import org.apache.gobblin.service.modules.restli.GobblinServiceFlowConfigV2ResourceHandlerWithWarmStandby; import org.apache.gobblin.service.modules.restli.GobblinServiceFlowExecutionResourceHandlerWithWarmStandby; @@ -147,6 +149,9 @@ public void configure(Binder binder) { binder.bindConstant() .annotatedWith(Names.named(InjectionNames.WARM_STANDBY_ENABLED)) .to(serviceConfig.isWarmStandbyEnabled()); + binder.bindConstant() + .annotatedWith(Names.named(InjectionNames.MULTI_ACTIVE_SCHEDULER_ENABLED)) + .to(serviceConfig.isMultiActiveSchedulerEnabled()); OptionalBinder.newOptionalBinder(binder, DagActionStore.class); if (serviceConfig.isWarmStandbyEnabled()) { binder.bind(DagActionStore.class).to(MysqlDagActionStore.class); @@ -241,6 +246,11 @@ public void configure(Binder binder) { binder.bind(DagActionStoreChangeMonitor.class).toProvider(DagActionStoreChangeMonitorFactory.class).in(Singleton.class); } + if (serviceConfig.isMultiActiveSchedulerEnabled()) { + binder.bind(MysqlSchedulerLeaseDeterminationStore.class); + binder.bind(SchedulerLeaseAlgoHandler.class); + } + binder.bind(GobblinServiceManager.class); binder.bind(ServiceDatabaseProvider.class).to(ServiceDatabaseProviderImpl.class); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 9bab032191b..a5cc9a17c51 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -40,6 +40,7 @@ import javax.annotation.Nonnull; import javax.inject.Inject; +import javax.inject.Named; import javax.inject.Singleton; import lombok.Getter; import lombok.Setter; @@ -65,6 +66,7 @@ import org.apache.gobblin.runtime.api.TopologySpec; import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse; import org.apache.gobblin.runtime.spec_catalog.TopologyCatalog; +import org.apache.gobblin.runtime.util.InjectionNames; import org.apache.gobblin.service.ServiceConfigKeys; import org.apache.gobblin.service.modules.flow.SpecCompiler; import org.apache.gobblin.service.modules.flowgraph.Dag; @@ -113,15 +115,13 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { public Orchestrator(Config config, Optional topologyCatalog, Optional dagManager, Optional log, - FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { + FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, boolean isMultiActiveSchedulerEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); this.aliasResolver = new ClassAliasResolver<>(SpecCompiler.class); this.topologyCatalog = topologyCatalog; this.dagManager = dagManager; this.flowStatusGenerator = flowStatusGenerator; - this.isMultiActiveSchedulerEnabled = - config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY) ? - config.getBoolean(ServiceConfigKeys.GOBBLIN_SERVICE_MULTI_ACTIVE_SCHEDULER_ENABLED_KEY) : false; + this.isMultiActiveSchedulerEnabled = isMultiActiveSchedulerEnabled; this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; try { String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; @@ -165,8 +165,9 @@ public Orchestrator(Config config, Optional topologyCatalog, Op @Inject public Orchestrator(Config config, FlowStatusGenerator flowStatusGenerator, Optional topologyCatalog, - Optional dagManager, Optional log, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { - this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, schedulerLeaseAlgoHandler); + Optional dagManager, Optional log, @Named(InjectionNames.MULTI_ACTIVE_SCHEDULER_ENABLED) boolean multiActiveSchedulerEnabled, + SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { + this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler); } @@ -227,9 +228,6 @@ public void onUpdateSpec(Spec updatedSpec) { } - /* - New Orchestrate method - */ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMillis) throws Exception { // Add below waiting because TopologyCatalog and FlowCatalog service can be launched at the same time this.topologyCatalog.get().getInitComplete().await(); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java index 589d7ab65f3..8f4a206650b 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.gobblin.service.modules.orchestration; import java.io.IOException; @@ -8,7 +25,6 @@ import java.util.Properties; import java.util.Random; -import org.quartz.CronScheduleBuilder; import org.quartz.JobKey; import org.quartz.SchedulerException; import org.quartz.Trigger; diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java index 26e423dcd7a..b85c8f0d967 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobScheduler.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.net.URI; -import java.sql.Timestamp; import java.text.ParseException; import java.util.Calendar; import java.util.Collection; @@ -110,6 +109,7 @@ public class GobblinServiceJobScheduler extends JobScheduler implements SpecCata protected final Orchestrator orchestrator; protected final Boolean warmStandbyEnabled; protected final Optional quotaManager; + protected final Boolean multiActiveSchedulerEnabled; protected final SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler; @Getter protected final Map scheduledFlowSpecs; @@ -166,7 +166,8 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser Config config, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, SchedulerService schedulerService, Optional quotaManager, Optional log, - @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { + @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, @Named(InjectionNames.MULTI_ACTIVE_SCHEDULER_ENABLED) boolean multiActiveSchedulerEnabled, + SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { super(ConfigUtils.configToProperties(config), schedulerService); _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); @@ -182,6 +183,7 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser && config.hasPath(GOBBLIN_SERVICE_SCHEDULER_DR_NOMINATED); this.warmStandbyEnabled = warmStandbyEnabled; this.quotaManager = quotaManager; + this.multiActiveSchedulerEnabled = multiActiveSchedulerEnabled; this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; // Check that these metrics do not exist before adding, mainly for testing purpose which creates multiple instances // of the scheduler. If one metric exists, then the others should as well. @@ -204,9 +206,9 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser public GobblinServiceJobScheduler(String serviceName, Config config, FlowStatusGenerator flowStatusGenerator, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Optional dagManager, Optional quotaManager, - SchedulerService schedulerService, Optional log, boolean warmStandbyEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { + SchedulerService schedulerService, Optional log, boolean warmStandbyEnabled, boolean multiActiveSchedulerEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { this(serviceName, config, helixManager, flowCatalog, topologyCatalog, - new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, schedulerLeaseAlgoHandler), schedulerService, quotaManager, log, warmStandbyEnabled, schedulerLeaseAlgoHandler); + new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler), schedulerService, quotaManager, log, warmStandbyEnabled, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler); } public synchronized void setActive(boolean isActive) { @@ -440,8 +442,6 @@ public synchronized void scheduleJob(Properties jobProps, JobListener jobListene } } - // TODO: multiActiveScheduler change here to use this in old state or with new config do the race to write? - // define a new class to handle the nonblocking and race @Override public void runJob(Properties jobProps, JobListener jobListener) throws JobException { try { @@ -450,7 +450,7 @@ public void runJob(Properties jobProps, JobListener jobListener) throws JobExcep jobProps.containsKey(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY) ? jobProps.getProperty(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY, "0L"): jobProps.getProperty(ConfigurationKeys.SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY,"0L"); - this.orchestrator.orchestrate(flowSpec, jobProps, Long.valueOf(triggerTimestampMillis)); + this.orchestrator.orchestrate(flowSpec, jobProps, Long.parseLong(triggerTimestampMillis)); } catch (Exception e) { throw new JobException("Failed to run Spec: " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index e528e20aa68..45ece3b91d5 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -21,7 +21,6 @@ import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; -import java.sql.SQLException; import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; @@ -90,13 +89,14 @@ public String load(String key) throws Exception { protected DagManager dagManager; protected SpecCompiler specCompiler; + protected boolean isMultiActiveSchedulerEnabled; protected FlowCatalog flowCatalog; protected EventSubmitter eventSubmitter; // Note that the topic is an empty string (rather than null to avoid NPE) because this monitor relies on the consumer // client itself to determine all Kafka related information dynamically rather than through the config. public DagActionStoreChangeMonitor(String topic, Config config, DagActionStore dagActionStore, DagManager dagManager, - int numThreads, FlowCatalog flowCatalog) { + int numThreads, boolean isMultiActiveSchedulerEnabled, FlowCatalog flowCatalog) { // Differentiate group id for each host super(topic, config.withValue(GROUP_ID_KEY, ConfigValueFactory.fromAnyRef(DAG_ACTION_CHANGE_MONITOR_PREFIX + UUID.randomUUID().toString())), @@ -117,6 +117,7 @@ public DagActionStoreChangeMonitor(String topic, Config config, DagActionStore d | ClassNotFoundException e) { throw new RuntimeException(e); } + this.isMultiActiveSchedulerEnabled = isMultiActiveSchedulerEnabled; this.flowCatalog = flowCatalog; this.eventSubmitter = new EventSubmitter.Builder(this.getMetricContext(), "org.apache.gobblin.service").build(); } @@ -170,6 +171,12 @@ protected void processMessage(DecodeableKafkaRecord message) { dagManager.handleKillFlowRequest(flowGroup, flowName, Long.parseLong(flowExecutionId)); this.killsInvoked.mark(); } else if (dagAction.equals(DagActionStore.DagActionValue.LAUNCH)) { + // If multi-active scheduler is NOT turned on we should not receive these type of events + if (!this.isMultiActiveSchedulerEnabled) { + log.warn("Received LAUNCH dagAction while not in multi-active scheduler mode for flow group: {}, flow name:" + + "{}, execution id: {}, dagAction: {}", flowGroup, flowName, flowExecutionId, dagAction); + this.unexpectedErrors.mark(); + } log.info("Received insert dag action and about to forward launch request to DagManager"); submitFlowToDagManager(flowGroup, flowName); }else { @@ -184,10 +191,7 @@ protected void processMessage(DecodeableKafkaRecord message) { this.unexpectedErrors.mark(); } else if (operation.equals("DELETE")) { log.debug("Deleted flow group: {} name: {} executionId {} from DagActionStore", flowGroup, flowName, flowExecutionId); - } // TODO: multiActiveScheduler change here to add a case for a new launch flow action. We want to check if it is - // an execution that has been "won" by checking pursuant timestamp = null then pass to dag managers. the right one will - // actually launch it. if the config is NOT turned on we should do any of this handling or recieve these type of events - else { + } else { log.warn("Received unsupported change type of operation {}. Expected values to be in [INSERT, UPDATE, DELETE]", operation); this.unexpectedErrors.mark(); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java index 6f7e55113e5..5ac4c40f565 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java @@ -22,11 +22,14 @@ import com.typesafe.config.Config; import javax.inject.Inject; +import javax.inject.Named; import javax.inject.Provider; import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.runtime.api.DagActionStore; import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; +import org.apache.gobblin.runtime.util.InjectionNames; +import org.apache.gobblin.service.ServiceConfigKeys; import org.apache.gobblin.service.modules.orchestration.DagManager; import org.apache.gobblin.util.ConfigUtils; @@ -42,14 +45,18 @@ public class DagActionStoreChangeMonitorFactory implements Providerabsent(), Optional.of(logger), this.mockSchedulerLeaseAlgoHandler); + Optional.of(this.topologyCatalog), Optional.absent(), Optional.of(logger), false, this.mockSchedulerLeaseAlgoHandler); this.topologyCatalog.addListener(orchestrator); this.flowCatalog.addListener(orchestrator); // Start application diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java index 21b5dc0a4b5..81562ecf03c 100644 --- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java +++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java @@ -349,7 +349,7 @@ public void testJobSchedulerAddFlowQuotaExceeded() throws Exception { SchedulerService schedulerService = new SchedulerService(new Properties()); // Mock a GaaS scheduler not in warm standby mode GobblinServiceJobScheduler scheduler = new GobblinServiceJobScheduler("testscheduler", - ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false, new SchedulerLeaseAlgoHandler(ConfigFactory.empty(), // TODO )); + ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); schedulerService.startAsync().awaitRunning(); scheduler.startUp(); @@ -367,7 +367,7 @@ public void testJobSchedulerAddFlowQuotaExceeded() throws Exception { //Mock a GaaS scheduler in warm standby mode, where we don't check quota GobblinServiceJobScheduler schedulerWithWarmStandbyEnabled = new GobblinServiceJobScheduler("testscheduler", - ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), true); + ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), true, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); schedulerWithWarmStandbyEnabled.startUp(); schedulerWithWarmStandbyEnabled.setActive(true); @@ -389,7 +389,7 @@ class TestGobblinServiceJobScheduler extends GobblinServiceJobScheduler { public TestGobblinServiceJobScheduler(String serviceName, Config config, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, Optional quotaManager, SchedulerService schedulerService, boolean isWarmStandbyEnabled) throws Exception { - super(serviceName, config, Optional.absent(), flowCatalog, topologyCatalog, orchestrator, schedulerService, quotaManager, Optional.absent(), isWarmStandbyEnabled); + super(serviceName, config, Optional.absent(), flowCatalog, topologyCatalog, orchestrator, schedulerService, quotaManager, Optional.absent(), isWarmStandbyEnabled, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); if (schedulerService != null) { hasScheduler = true; } From 9fb1d555d59018336905c1b656e52ed0ddd719e1 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Mon, 5 Jun 2023 16:56:30 -0700 Subject: [PATCH 08/11] redefined lease arbiter & algo handler to separate scheduler specific logic from general lease handler --- .../configuration/ConfigurationKeys.java | 26 +- .../main/avro/DagActionStoreChangeEvent.avsc | 15 +- .../gobblin/runtime/api/DagActionStore.java | 52 +-- .../runtime/api/LeaseAttemptStatus.java | 22 + .../runtime/api/LeaseObtainedStatus.java | 38 ++ .../runtime/api/LeasedToAnotherStatus.java | 39 ++ .../runtime/api/MultiActiveLeaseArbiter.java | 68 +++ .../api/MySQLMultiActiveLeaseArbiter.java | 389 ++++++++++++++++++ ...MysqlSchedulerLeaseDeterminationStore.java | 207 ---------- .../runtime/api/NoLongerLeasingStatus.java | 29 ++ .../api/SchedulerLeaseDeterminationStore.java | 73 ---- .../dag_action_store/MysqlDagActionStore.java | 57 ++- .../runtime/metrics/RuntimeMetrics.java | 2 + .../gobblin/scheduler/JobScheduler.java | 2 +- .../MysqlDagActionStoreTest.java | 42 +- .../core/GobblinServiceGuiceModule.java | 4 +- .../modules/orchestration/DagManager.java | 8 +- .../modules/orchestration/Orchestrator.java | 17 +- .../SchedulerLeaseAlgoHandler.java | 143 ++++--- ...ecutionResourceHandlerWithWarmStandby.java | 20 +- .../scheduler/GobblinServiceJobScheduler.java | 16 +- .../DagActionStoreChangeMonitor.java | 22 +- .../DagActionStoreChangeMonitorFactory.java | 7 +- .../orchestration/DagManagerFlowTest.java | 4 +- 24 files changed, 817 insertions(+), 485 deletions(-) create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java create mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/NoLongerLeasingStatus.java delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java index fa3bfdc5a3e..0c877c0b6b3 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java @@ -96,21 +96,17 @@ public class ConfigurationKeys { public static final String SKIP_SCHEDULING_FLOWS_AFTER_NUM_DAYS = "skip.scheduling.flows.after.num.days"; public static final int DEFAULT_NUM_DAYS_TO_SKIP_AFTER = 365; // Scheduler lease determination store configuration - // TODO: multiActiveScheduler change here update values for the following keys and rename to more meaningful - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_JDBC_DRIVER_KEY = "state.store.db.jdbc.driver"; - public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_URL_KEY = "state.store.db.url"; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_USER_KEY = "state.store.db.user"; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_PASSWORD_KEY = "state.store.db.password"; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "state.store.db.table"; - public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "gobblin_job_state"; - // TODO: fix these keys - public static final String SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY = "originalTriggerTimestampMillis"; - public static final String SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY = "triggerTimestampMillis"; - public static final String SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY = ""; - public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS = 100; - public static final String SCHEDULER_TRIGGER_EVENT_LINGER_SEC_KEY = ""; - public static final long DEFAULT_SCHEDULER_TRIGGER_EVENT_LINGER_SEC = 30; + public static final String MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY = "multi.active.scheduler.constants.db.table"; + public static final String DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE = "multi.active.scheduler."; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "scheduler.lease.determination.store.db.table"; + public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "gobblin_scheduler_lease_determination_store"; + public static final String SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY = "reminderEventTimestampMillis"; + public static final String SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY = "newEventTimestampMillis"; + public static final String SCHEDULER_EVENT_EPSILON_MILLIS_KEY = ""; + public static final int DEFAULT_SCHEDULER_EVENT_EPSILON_MILLIS = 100; + // Note: linger should be on the order of seconds even though we measure in millis + public static final String SCHEDULER_EVENT_LINGER_MILLIS_KEY = ""; + public static final int DEFAULT_SCHEDULER_EVENT_LINGER_MILLIS = 30000; public static final String SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY = ""; public static final int DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC = 5; diff --git a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc index 8bd4fb301b2..b628f17146e 100644 --- a/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc +++ b/gobblin-metrics-libs/gobblin-metrics-base/src/main/avro/DagActionStoreChangeEvent.avsc @@ -25,7 +25,20 @@ "compliance" : "NONE" }, { "name" : "dagAction", - "type" : "string", + "type": { + "type": "enum", + "name": "DagActionValue", + "symbols": [ + "KILL", + "RESUME", + "LAUNCH" + ], + "symbolDocs": { + "KILL": "Kill the flow corresponding to this dag", + "RESUME": "Resume or start a new flow corresponding to this dag", + "LAUNCH": "Launch a new execution of the flow corresponding to this dag" + } + }, "doc" : "type of dag action", "compliance" : "NONE" } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index 0933bfbea63..aa0e1237dcf 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -20,30 +20,31 @@ import java.io.IOException; import java.sql.SQLException; import java.util.Collection; -import lombok.EqualsAndHashCode; -import lombok.Getter; + +import lombok.Data; public interface DagActionStore { - enum DagActionValue { - KILL, - RESUME, - // TODO: potentially combine this enum with {@link SchedulerLeaseDeterminationStore.FlowActionType} - LAUNCH + enum FlowActionType { + KILL, // Kill invoked through API call + RESUME, // Resume flow invoked through API call + LAUNCH, // Launch new flow execution invoked adhoc or through scheduled trigger + RETRY, // Invoked through DagManager for flows configured to allow retries + CANCEL, // Invoked through DagManager if flow has been stuck in Orchestrated state for a while + ADVANCE // Launch next step in multi-hop dag } - @Getter - @EqualsAndHashCode + @Data class DagAction { String flowGroup; String flowName; String flowExecutionId; - DagActionValue dagActionValue; - public DagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) { + FlowActionType flowActionType; + public DagAction(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) { this.flowGroup = flowGroup; this.flowName = flowName; this.flowExecutionId = flowExecutionId; - this.dagActionValue = dagActionValue; + this.flowActionType = flowActionType; } } @@ -53,43 +54,28 @@ public DagAction(String flowGroup, String flowName, String flowExecutionId, DagA * @param flowGroup flow group for the dag action * @param flowName flow name for the dag action * @param flowExecutionId flow execution for the dag action - * @param dagActionValue the value of the dag action + * @param flowActionType the value of the dag action * @throws IOException */ - boolean exists(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException, SQLException; + boolean exists(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) throws IOException, SQLException; /** * Persist the dag action in {@link DagActionStore} for durability * @param flowGroup flow group for the dag action * @param flowName flow name for the dag action * @param flowExecutionId flow execution for the dag action - * @param dagActionValue the value of the dag action + * @param flowActionType the value of the dag action * @throws IOException */ - void addDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException; + void addDagAction(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) throws IOException; /** * delete the dag action from {@link DagActionStore} - * @param flowGroup flow group for the dag action - * @param flowName flow name for the dag action - * @param flowExecutionId flow execution for the dag action - * @param dagActionValue the value of the dag action + * @param DagAction containing all information needed to identify dag and specific action value * @throws IOException * @return true if we successfully delete one record, return false if the record does not exist */ - boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException; - - /*** - * Retrieve action value by the flow group, flow name and flow execution id from the {@link DagActionStore}. - * @param flowGroup flow group for the dag action - * @param flowName flow name for the dag action - * @param flowExecutionId flow execution for the dag action - * @param dagActionValue the value of the dag action - * @throws IOException Exception in retrieving the {@link DagAction}. - * @throws SpecNotFoundException If {@link DagAction} being retrieved is not present in store. - */ - DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) - throws IOException, SpecNotFoundException, SQLException; + boolean deleteDagAction(DagAction dagAction) throws IOException; /*** * Get all {@link DagAction}s from the {@link DagActionStore}. diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java new file mode 100644 index 00000000000..dd4125ca9c1 --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +public abstract class LeaseAttemptStatus { + protected LeaseAttemptStatus() {} +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java new file mode 100644 index 00000000000..5a7fb93cb91 --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +import lombok.Getter; + +/* +The instance calling this method acquired the lease for the event in question. The class contains the `eventTimestamp` +associated with the lease as well as the time the lease was obtained by me or `myLeaseAcquisitionTimestamp`. + */ +public class LeaseObtainedStatus extends LeaseAttemptStatus { + @Getter + private final long eventTimestamp; + + @Getter + private final long myLeaseAcquisitionTimestamp; + + protected LeaseObtainedStatus(long eventTimestamp, long myLeaseAcquisitionTimestamp) { + super(); + this.eventTimestamp = eventTimestamp; + this.myLeaseAcquisitionTimestamp = myLeaseAcquisitionTimestamp; + } +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java new file mode 100644 index 00000000000..e4da8bf7485 --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +import lombok.Getter; + +/* +The event in question has been leased to another. This object contains `reminderEventTimestamp` which is the event +timestamp the lease is associated with as well as `minimumReminderWaitMillis` the minimum amount of time to wait +before returning to check if the lease has completed or expired. + */ +public class LeasedToAnotherStatus extends LeaseAttemptStatus { + @Getter + private final long reminderEventTimeMillis; + + @Getter + private final long minimumReminderWaitMillis; + + protected LeasedToAnotherStatus(long reminderEventTimeMillis, long minimumReminderWaitMillis) { + super(); + this.reminderEventTimeMillis = reminderEventTimeMillis; + this.minimumReminderWaitMillis = minimumReminderWaitMillis; + } +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java new file mode 100644 index 00000000000..dd0e9d506eb --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This interface defines a generic approach to a non-blocking, multiple active thread or host system, in which one or + * more active instances compete over ownership of a particular flow's event. The type of flow event in question does + * not impact the algorithm other than to uniquely identify the flow event. Each instance uses the interface to initiate + * an attempt at ownership over the flow event and receives a response indicating the status of the attempt. + * + * At a high level the lease arbiter works as follows: + * 1. Multiple instances receive knowledge of a flow action event to act upon + * 2. Each instance attempts to acquire rights or `a lease` to be the sole instance acting on the event by calling the + * tryAcquireLease method below and receives the resulting status. The status indicates whether this instance has + * a) acquired the lease -> then this instance will attempt to complete the lease + * b) another has acquired the lease -> then another will attempt to complete the lease + * c) flow event no longer needs to be acted upon -> terminal state + * 3. If another has acquired the lease, then the instance will check back in at the time of lease expiry to see if it + * needs to attempt the lease again [status (b) above]. + * 4. Once the instance which acquired the lease completes its work on the flow event, it calls completeLeaseUse to + * indicate to all other instances that the flow event no longer needs to be acted upon [status (c) above] + */ +public interface MultiActiveLeaseArbiter { + static final Logger LOG = LoggerFactory.getLogger(MultiActiveLeaseArbiter.class); + + /** + * This method attempts to insert an entry into store for a particular flow action event if one does not already + * exist in the store for the flow action or has expired. Regardless of the outcome it also reads the lease + * acquisition timestamp of the entry for that flow action event (it could have pre-existed in the table or been newly + * added by the previous write). Based on the transaction results, it will return @LeaseAttemptStatus to determine + * the next action. + * @param flowAction uniquely identifies the flow + * @param eventTimeMillis is the time this flow action should occur + * @return LeaseAttemptStatus + * @throws IOException + */ + LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, long eventTimeMillis) throws IOException; + + /** + * This method is used to indicate the owner of the lease has successfully completed required actions while holding + * the lease of the flow action event. It marks the lease as "no longer leasing", if the eventTimeMillis and + * leaseAcquisitionTimeMillis values have not changed since this owner acquired the lease (indicating the lease did + * not expire). + * @return true if successfully updated, indicating no further actions need to be taken regarding this event. + */ + boolean completeLeaseUse(DagActionStore.DagAction flowAction, long eventTimeMillis, long leaseAcquisitionTimeMillis) + throws IOException; +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java new file mode 100644 index 00000000000..81a449a8348 --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java @@ -0,0 +1,389 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; + +import com.google.inject.Inject; +import com.typesafe.config.Config; +import com.zaxxer.hikari.HikariDataSource; + +import javax.sql.DataSource; + +import org.apache.gobblin.broker.SharedResourcesBrokerFactory; +import org.apache.gobblin.configuration.ConfigurationKeys; +import org.apache.gobblin.metastore.MysqlDataSourceFactory; +import org.apache.gobblin.service.ServiceConfigKeys; +import org.apache.gobblin.util.ConfigUtils; + + +/** + * MySQL based implementation of the {@link MultiActiveLeaseArbiter} which uses a MySQL store to resolve ownership of + * a flow event amongst multiple competing instances. A MySQL table is used to store flow identifying information as + * well as the flow action associated with it. It uses two additional values of the `event_timestamp` and + * `lease_acquisition_timestamp` to indicate an active lease, expired lease, and state of no longer leasing. The table + * schema is as follows: + * [flow_group | flow_name | flow_execution_id | flow_action | event_timestamp | lease_acquisition_timestamp] + * (----------------------primary key------------------------) + * We also maintain another table in the database with two constants that allow us to coordinate between instances and + * ensure they are using the same values to base their coordination off of. + * [epsilon | linger] + * `epsilon` - time within we consider to timestamps to be the same, to account for between-host clock drift + * `linger` - minimum time to occur before another host may attempt a lease on a flow event. It should be much greater + * than epsilon and encapsulate executor communication latency including retry attempts + * + * The `event_timestamp` is the time of the flow_action event request. + * ---Event consolidation--- + * Note that for the sake of simplification, we only allow one event associated with a particular flow's flow_action + * (ie: only one LAUNCH for example of flow FOO, but there can be a LAUNCH, KILL, & RESUME for flow FOO at once) during + * the time it takes to execute the flow action. In most cases, the execution time should be so negligible that this + * event consolidation of duplicate flow action requests is not noticed and even during executor downtime this behavior + * is acceptable as the user generally expects a timely execution of the most recent request rather than one execution + * per request. + * + * The `lease_acquisition_timestamp` is the time a host acquired ownership of this flow action, and it is valid for + * `linger` period of time after which it expires and any host can re-attempt ownership. In most cases, the original + * host should actually complete its work while having the lease and then mark the flow action as NULL to indicate no + * further leasing should be done for the event. + */ +public class MySQLMultiActiveLeaseArbiter implements MultiActiveLeaseArbiter { + /** `j.u.Function` variant for an operation that may @throw IOException or SQLException: preserves method signature checked exceptions */ + @FunctionalInterface + protected interface CheckedFunction { + R apply(T t) throws IOException, SQLException; + } + + public static final String CONFIG_PREFIX = "MySQLMultiActiveLeaseArbiter"; + + protected final DataSource dataSource; + private final String leaseArbiterTableName; + private final String constantsTableName; + private final int epsilon; + private final int linger; + protected static final String WHERE_CLAUSE_TO_MATCH_KEY = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=?" + + " AND flow_action=?"; + protected static final String WHERE_CLAUSE_TO_MATCH_ROW = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=?" + + " AND flow_action=? AND event_timestamp=? AND lease_acquisition_timestamp=?"; + + protected static final String SELECT_AFTER_INSERT_STATEMENT = "SELECT ROW_COUNT() AS rows_inserted_count, " + + "lease_acquisition_timestamp, linger FROM %s, %s " + WHERE_CLAUSE_TO_MATCH_KEY; + + // Does a cross join between the two tables to have epsilon and linger values available. Returns the following values: + // event_timestamp, lease_acquisition_timestamp, isWithinEpsilon (boolean if event_timestamp in table is within + // epsilon), leaseValidityStatus (1 if lease has not expired, 2 if expired, 3 if column is NULL or no longer leasing) + protected static final String GET_EVENT_INFO_STATEMENT = "SELECT event_timestamp, lease_acquisition_timestamp, " + + "abs(event_timestamp - ?) <= epsilon as isWithinEpsilon, CASE " + + "WHEN CURRENT_TIMESTAMP < (lease_acquisition_timestamp + linger) then 1" + + "WHEN CURRENT_TIMESTAMP >= (lease_acquisition_timestamp + linger) then 2" + + "ELSE 3 END as leaseValidityStatus, linger FROM %s, %s " + WHERE_CLAUSE_TO_MATCH_KEY; + + // Insert or update row to acquire lease if values have not changed since the previous read + // Need to define three separate statements to handle cases where row does not exist or has null values to check + protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT = "INSERT INTO %s " + + "(flow_group, flow_name, flow_execution_id, flow_action, event_timestamp) VALUES (?, ?, ?, ?, ?) WHERE NOT " + + "EXISTS (SELECT * FROM %s " + WHERE_CLAUSE_TO_MATCH_KEY + "); " + SELECT_AFTER_INSERT_STATEMENT; + protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT = "UPDATE %s " + + "SET event_timestamp=?" + WHERE_CLAUSE_TO_MATCH_KEY + + " AND event_timestamp=? AND lease_acquisition_timestamp is NULL; " + SELECT_AFTER_INSERT_STATEMENT; + protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT = "UPDATE %s " + + "SET event_timestamp=?" + WHERE_CLAUSE_TO_MATCH_ROW + + " AND event_timestamp=? AND lease_acquisition_timestamp=?; " + SELECT_AFTER_INSERT_STATEMENT; + + // Complete lease acquisition if values have not changed since lease was acquired + protected static final String CONDITIONALLY_COMPLETE_LEASE_STATEMENT = "UPDATE %s SET " + + "lease_acquisition_timestamp = NULL " + WHERE_CLAUSE_TO_MATCH_ROW; + + // TODO: define retention on this table + private static final String CREATE_LEASE_ARBITER_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " + + "event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + + "lease_acquisition_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action))"; + + private static final String CREATE_CONSTANTS_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s " + + "(epsilon INT, linger INT), PRIMARY KEY (epsilon, linger); INSERT INTO %s (epsilon, linger) VALUES (?,?)"; + + @Inject + public MySQLMultiActiveLeaseArbiter(Config config) throws IOException { + if (config.hasPath(CONFIG_PREFIX)) { + config = config.getConfig(CONFIG_PREFIX).withFallback(config); + } else { + throw new IOException("Please specify the config for MySQLMultiActiveLeaseArbiter"); + } + + this.leaseArbiterTableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE); + this.constantsTableName = ConfigUtils.getString(config, ConfigurationKeys.MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY, + ConfigurationKeys.DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE); + this.epsilon = ConfigUtils.getInt(config, ConfigurationKeys.SCHEDULER_EVENT_EPSILON_MILLIS_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_EVENT_EPSILON_MILLIS); + this.linger = ConfigUtils.getInt(config, ConfigurationKeys.SCHEDULER_EVENT_LINGER_MILLIS_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_EVENT_LINGER_MILLIS); + this.dataSource = MysqlDataSourceFactory.get(config, SharedResourcesBrokerFactory.getImplicitBroker()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement createStatement = connection.prepareStatement(String.format( + CREATE_LEASE_ARBITER_TABLE_STATEMENT, leaseArbiterTableName))) { + createStatement.executeUpdate(); + connection.commit(); + } catch (SQLException e) { + throw new IOException("Table creation failure for " + leaseArbiterTableName, e); + } + withPreparedStatement(String.format(CREATE_CONSTANTS_TABLE_STATEMENT, this.constantsTableName, this.constantsTableName), + createStatement -> { + int i = 0; + createStatement.setInt(++i, epsilon); + createStatement.setInt(++i, linger); + return createStatement.executeUpdate();}, true); + } + + @Override + public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, long eventTimeMillis) + throws IOException { + String flowGroup = flowAction.getFlowGroup(); + String flowName = flowAction.getFlowName(); + String flowExecutionId = flowAction.getFlowExecutionId(); + Timestamp eventTimestamp = new Timestamp(eventTimeMillis); + + // Check table for an existing entry for this flow action and event time + ResultSet resultSet = withPreparedStatement( + String.format(GET_EVENT_INFO_STATEMENT, this.leaseArbiterTableName, this.constantsTableName), + getInfoStatement -> { + int i = 0; + getInfoStatement.setTimestamp(i, eventTimestamp); + getInfoStatement.setString(i, flowGroup); + getInfoStatement.setString(i, flowName); + getInfoStatement.setString(i, flowExecutionId); + getInfoStatement.setString(i, flowAction.getFlowActionType().toString()); + return getInfoStatement.executeQuery(); + }, true); + + try { + // CASE 1: If no existing row for this flow action, then go ahead and insert + if (!resultSet.next()) { + ResultSet rs = withPreparedStatement( + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT, this.leaseArbiterTableName, + this.leaseArbiterTableName, this.leaseArbiterTableName, this.constantsTableName), + insertStatement -> { + completeInsertPreparedStatement(insertStatement, flowAction, eventTimeMillis); + return insertStatement.executeQuery(); + }, true); + return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + } + + // Extract values from result set + Timestamp dbEventTimestamp = resultSet.getTimestamp(1); + Timestamp dbLeaseAcquisitionTimestamp = resultSet.getTimestamp(2); + boolean isWithinEpsilon = resultSet.getBoolean(3); + int leaseValidityStatus = resultSet.getInt(4); + int dbLinger = resultSet.getInt(5); + + // Lease is valid + if (leaseValidityStatus == 1) { + // CASE 2: Same event, lease is valid + if (isWithinEpsilon) { + // Utilize db timestamp for reminder + return new LeasedToAnotherStatus(dbEventTimestamp.getTime(), + dbLeaseAcquisitionTimestamp.getTime() + dbLinger); + } + // CASE 3: Distinct event, lease is valid + // Utilize db timestamp for wait time, but be reminded of own event timestamp + return new LeasedToAnotherStatus(eventTimeMillis, + dbLeaseAcquisitionTimestamp.getTime() + dbLinger); + } + // CASE 4: Lease is out of date (regardless of whether same or distinct event) + else if (leaseValidityStatus == 2) { + if (isWithinEpsilon) { + LOG.warn("Lease should not be out of date for the same trigger event since epsilon << linger for flowAction" + + " {}, db eventTimestamp {}, db leaseAcquisitionTimestamp {}, linger {}", flowAction, + dbEventTimestamp, dbLeaseAcquisitionTimestamp, dbLinger); + } + // Use our event to acquire lease, check for previous db eventTimestamp and leaseAcquisitionTimestamp + ResultSet rs = withPreparedStatement( + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT, this.leaseArbiterTableName, + this.leaseArbiterTableName, this.constantsTableName), + updateStatement -> { + completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, + true, dbEventTimestamp, dbLeaseAcquisitionTimestamp); + return updateStatement.executeQuery(); + }, true); + return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + } // No longer leasing this event + // CASE 5: Same event, no longer leasing event in db: terminate + if (isWithinEpsilon) { + return new NoLongerLeasingStatus(); + } + // CASE 6: Distinct event, no longer leasing event in db + // Use our event to acquire lease, check for previous db eventTimestamp and NULL leaseAcquisitionTimestamp + ResultSet rs = withPreparedStatement( + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT, this.leaseArbiterTableName, + this.leaseArbiterTableName, this.constantsTableName), + updateStatement -> { + completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, + false, dbEventTimestamp, null); + return updateStatement.executeQuery(); + }, true); + return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + /** + * Attempt lease by insert or update following a read based on the condition the state of the table has not changed + * since the read. Parse the result to return the corresponding status based on successful insert/update or not. + * @param resultSet + * @param eventTimeMillis + * @return LeaseAttemptStatus + * @throws SQLException + * @throws IOException + */ + protected LeaseAttemptStatus handleResultFromAttemptedLeaseObtainment(ResultSet resultSet, long eventTimeMillis) + throws SQLException, IOException { + if (!resultSet.next()) { + throw new IOException("Expected num rows and lease_acquisition_timestamp returned from query but received nothing"); + } + int numRowsUpdated = resultSet.getInt(1); + long leaseAcquisitionTimeMillis = resultSet.getTimestamp(2).getTime(); + int dbLinger = resultSet.getInt(3); + if (numRowsUpdated == 1) { + return new LeaseObtainedStatus(eventTimeMillis, leaseAcquisitionTimeMillis); + } + // Another instance acquired lease in between + return new LeasedToAnotherStatus(eventTimeMillis, leaseAcquisitionTimeMillis + dbLinger); + } + + /** + * Complete the INSERT statement for a new flow action lease where the flow action is not present in the table + * @param statement + * @param flowAction + * @param eventTimeMillis + * @throws SQLException + */ + protected void completeInsertPreparedStatement(PreparedStatement statement, DagActionStore.DagAction flowAction, + long eventTimeMillis) throws SQLException { + int i = 0; + // Values to set in new row + statement.setString(++i, flowAction.getFlowGroup()); + statement.setString(++i, flowAction.getFlowName()); + statement.setString(++i, flowAction.getFlowExecutionId()); + statement.setString(++i, flowAction.getFlowActionType().toString()); + statement.setTimestamp(++i, new Timestamp(eventTimeMillis)); + // Values to check if existing row matches previous read + statement.setString(++i, flowAction.getFlowGroup()); + statement.setString(++i, flowAction.getFlowName()); + statement.setString(++i, flowAction.getFlowExecutionId()); + statement.setString(++i, flowAction.getFlowActionType().toString()); + // Values to select for return + statement.setString(++i, flowAction.getFlowGroup()); + statement.setString(++i, flowAction.getFlowName()); + statement.setString(++i, flowAction.getFlowExecutionId()); + statement.setString(++i, flowAction.getFlowActionType().toString()); + } + + /** + * Complete the UPDATE prepared statements for a flow action that already exists in the table that needs to be + * updated. + * @param statement + * @param flowAction + * @param eventTimeMillis + * @param needEventTimeCheck true if need to compare `originalEventTimestamp` with db event_timestamp + * @param needLeaseAcquisitionTimeCheck true if need to compare `originalLeaseAcquisitionTimestamp` with db one + * @param originalEventTimestamp value to compare to db one, null if not needed + * @param originalLeaseAcquisitionTimestamp value to compare to db one, null if not needed + * @throws SQLException + */ + protected void completeUpdatePreparedStatement(PreparedStatement statement, DagActionStore.DagAction flowAction, + long eventTimeMillis, boolean needEventTimeCheck, boolean needLeaseAcquisitionTimeCheck, + Timestamp originalEventTimestamp, Timestamp originalLeaseAcquisitionTimestamp) throws SQLException { + int i = 0; + // Value to update + statement.setTimestamp(++i, new Timestamp(eventTimeMillis)); + // Values to check if existing row matches previous read + statement.setString(++i, flowAction.getFlowGroup()); + statement.setString(++i, flowAction.getFlowName()); + statement.setString(++i, flowAction.getFlowExecutionId()); + statement.setString(++i, flowAction.getFlowActionType().toString()); + // Values that may be needed depending on the insert statement + if (needEventTimeCheck) { + statement.setTimestamp(++i, originalEventTimestamp); + } + if (needLeaseAcquisitionTimeCheck) { + statement.setTimestamp(++i, originalLeaseAcquisitionTimestamp); + } + // Values to select for return + statement.setString(++i, flowAction.getFlowGroup()); + statement.setString(++i, flowAction.getFlowName()); + statement.setString(++i, flowAction.getFlowExecutionId()); + statement.setString(++i, flowAction.getFlowActionType().toString()); + } + + @Override + public boolean completeLeaseUse(DagActionStore.DagAction flowAction, long eventTimeMillis, + long leaseAcquisitionTimeMillis) throws IOException { + String flowGroup = flowAction.getFlowGroup(); + String flowName = flowAction.getFlowName(); + String flowExecutionId = flowAction.getFlowExecutionId(); + DagActionStore.FlowActionType flowActionType = flowAction.getFlowActionType(); + return withPreparedStatement(String.format(CONDITIONALLY_COMPLETE_LEASE_STATEMENT, leaseArbiterTableName), + updateStatement -> { + int i = 0; + updateStatement.setString(++i, flowGroup); + updateStatement.setString(++i, flowName); + updateStatement.setString(++i, flowExecutionId); + updateStatement.setString(++i, flowActionType.toString()); + updateStatement.setTimestamp(++i, new Timestamp(eventTimeMillis)); + updateStatement.setTimestamp(++i, new Timestamp(leaseAcquisitionTimeMillis)); + int numRowsUpdated = updateStatement.executeUpdate(); + if (numRowsUpdated == 0) { + LOG.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - FAILED to complete because " + + "lease expired or event cleaned up before host completed required actions", flowAction, + eventTimeMillis); + return false; + } + if( numRowsUpdated == 1) { + LOG.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - COMPLETED, no longer leasing" + + " this event after this.", flowAction, eventTimeMillis); + return true; + }; + throw new IOException(String.format("Attempt to complete lease use: [%s, eventTimestamp: %s] - updated more " + + "rows than expected", flowAction, eventTimeMillis)); + }, true); + } + + /** Abstracts recurring pattern around resource management and exception re-mapping. */ + protected T withPreparedStatement(String sql, CheckedFunction f, boolean shouldCommit) throws IOException { + try (Connection connection = this.dataSource.getConnection(); + PreparedStatement statement = connection.prepareStatement(sql)) { + T result = f.apply(statement); + if (shouldCommit) { + connection.commit(); + } + return result; + } catch (SQLException e) { + LOG.warn("Received SQL exception that can result from invalid connection. Checking if validation query is set {} Exception is {}", ((HikariDataSource) this.dataSource).getConnectionTestQuery(), e); + throw new IOException(e); + } + } +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java deleted file mode 100644 index 4dbc1584f04..00000000000 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlSchedulerLeaseDeterminationStore.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.gobblin.runtime.api; - -import java.io.IOException; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Timestamp; - -import com.google.inject.Inject; -import com.typesafe.config.Config; - -import javax.sql.DataSource; - -import org.apache.gobblin.broker.SharedResourcesBrokerFactory; -import org.apache.gobblin.configuration.ConfigurationKeys; -import org.apache.gobblin.metastore.MysqlDataSourceFactory; -import org.apache.gobblin.service.ServiceConfigKeys; -import org.apache.gobblin.util.ConfigUtils; - - -public class MysqlSchedulerLeaseDeterminationStore implements SchedulerLeaseDeterminationStore { - public static final String CONFIG_PREFIX = "MysqlSchedulerLeaseDeterminationStore"; - - protected final DataSource dataSource; - private final DagActionStore dagActionStore; - private final String tableName; - private final long epsilon; - private final long linger; - /* TODO: - - define retention on this table - - initialize table with epsilon and linger if one already doesn't exist using these configs - - join with table above to ensure epsilon/linger values are consistent across hosts (in case hosts are deployed with different configs) - */ - protected static final String WHERE_CLAUSE_TO_MATCH_ROW = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=? " - + "AND flow_action=? AND ABS(trigger_event_timestamp-?) <= %s"; - protected static final String ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT = "INSERT INTO %s (flow_group, " - + "flow_name, flow_execution_id, flow_action, trigger_event_timestamp) VALUES (?, ?, ?, ?, ?) WHERE NOT EXISTS (" - + "SELECT * FROM %s " + WHERE_CLAUSE_TO_MATCH_ROW + "; SELECT ROW_COUNT() AS rows_inserted_count, " - + "pursuant_timestamp FROM %s " + WHERE_CLAUSE_TO_MATCH_ROW; - - protected static final String UPDATE_PURSUANT_TIMESTAMP_STATEMENT = "UPDATE %s SET pursuant_timestamp = NULL " - + WHERE_CLAUSE_TO_MATCH_ROW; - private static final String CREATE_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + "flow_group varchar(" - + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" - + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" - + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " - + "trigger_event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " - + "pursuant_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," - + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action,trigger_event_timestamp)"; - - @Inject - public MysqlSchedulerLeaseDeterminationStore(Config config, DagActionStore dagActionStore) throws IOException { - if (config.hasPath(CONFIG_PREFIX)) { - config = config.getConfig(CONFIG_PREFIX).withFallback(config); - } else { - throw new IOException("Please specify the config for MysqlSchedulerLeaseDeterminationStore"); - } - - this.tableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, - ConfigurationKeys.DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE); - this.epsilon = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, - ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); - this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, - ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); - - this.dataSource = MysqlDataSourceFactory.get(config, SharedResourcesBrokerFactory.getImplicitBroker()); - try (Connection connection = dataSource.getConnection(); - PreparedStatement createStatement = connection.prepareStatement(String.format(CREATE_TABLE_STATEMENT, tableName))) { - createStatement.executeUpdate(); - connection.commit(); - } catch (SQLException e) { - throw new IOException("Table creation failure for " + tableName, e); - } - this.dagActionStore = dagActionStore; - } - - @Override - public LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, - String flowExecutionId, FlowActionType flowActionType, long triggerTimeMillis) - throws IOException { - Timestamp triggerTimestamp = new Timestamp(triggerTimeMillis); - try (Connection connection = this.dataSource.getConnection(); - PreparedStatement insertStatement = connection.prepareStatement( - String.format(ATTEMPT_INSERT_AND_GET_PURSUANT_TIMESTAMP_STATEMENT, tableName, tableName, epsilon, tableName, - epsilon))) { - int i = 0; - // Values to set in new row - insertStatement.setString(++i, flowGroup); - insertStatement.setString(++i, flowName); - insertStatement.setString(++i, flowExecutionId); - insertStatement.setString(++i, flowActionType.toString()); - insertStatement.setTimestamp(++i, triggerTimestamp); - // Values to check if existing row matches - insertStatement.setString(++i, flowGroup); - insertStatement.setString(++i, flowName); - insertStatement.setString(++i, flowExecutionId); - insertStatement.setString(++i, flowActionType.toString()); - insertStatement.setTimestamp(++i, triggerTimestamp); - // Values to make select statement to read row - insertStatement.setString(++i, flowGroup); - insertStatement.setString(++i, flowName); - insertStatement.setString(++i, flowExecutionId); - insertStatement.setString(++i, flowActionType.toString()); - insertStatement.setTimestamp(++i, triggerTimestamp); - ResultSet resultSet = insertStatement.executeQuery(); - connection.commit(); - - if (!resultSet.next()) { - resultSet.close(); - throw new IOException(String.format("Unexpected error where no result returned while trying to obtain lease. " - + "This error indicates that no entry existed for trigger flow event for table %s flow group: %s, flow " - + "name: %s flow execution id: %s and trigger timestamp: %s when one should have been inserted", - tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); - } - // If a row was inserted, then we have obtained the lease - int rowsUpdated = resultSet.getInt(1); - if (rowsUpdated == 1) { - // If the pursuing flow launch has been persisted to the {@link DagActionStore} we have completed lease obtainment - this.dagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.LAUNCH); - if (this.dagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.LAUNCH)) { - if (updatePursuantTimestamp(flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp)) { - // TODO: potentially add metric here to count number of flows scheduled by each scheduler - LOG.info("Host completed obtaining lease for flow group: %s, flow name: %s flow execution id: %s and " - + "trigger timestamp: %s", flowGroup, flowName, flowExecutionId, triggerTimestamp); - resultSet.close(); - return LeaseAttemptStatus.LEASE_OBTAINED; - } else { - LOG.warn("Unable to update pursuant timestamp after persisting flow launch to DagActionStore for flow " - + "group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s.", flowGroup, flowName, - flowExecutionId, triggerTimestamp); - } - } else { - LOG.warn("Did not find flow launch action in DagActionStore after adding it for flow group: %s, flow name: " - + "%s flow execution id: %s and trigger timestamp: %s.", flowGroup, flowName, flowExecutionId, - triggerTimestamp); - } - } else if (rowsUpdated > 1) { - resultSet.close(); - throw new IOException(String.format("Expect at most 1 row in table for a given trigger event. %s rows " - + "exist for the trigger flow event for table %s flow group: %s, flow name: %s flow execution id: %s " - + "and trigger timestamp: %s.", i, tableName, flowGroup, flowName, flowExecutionId, triggerTimestamp)); - } - Timestamp pursuantTimestamp = resultSet.getTimestamp(2); - resultSet.close(); - long currentTimeMillis = System.currentTimeMillis(); - // Another host has obtained lease and no further steps required - if (pursuantTimestamp == null) { - LOG.info("Another host has already successfully obtained lease for flow group: %s, flow name: %s flow execution " - + "id: %s and trigger timestamp: %s", flowGroup, flowName, flowExecutionId, triggerTimeMillis); - return LeaseAttemptStatus.LEASE_OBTAINED; - } else if (pursuantTimestamp.getTime() + linger <= currentTimeMillis) { - return LeaseAttemptStatus.PREVIOUS_LEASE_EXPIRED; - } - // Previous lease owner still has valid lease (pursuant + linger > current timestamp) - return LeaseAttemptStatus.PREVIOUS_LEASE_VALID; - } catch (SQLException e) { - throw new IOException(String.format("Error encountered while trying to obtain lease on trigger flow event for " - + "table %s flow group: %s, flow name: %s flow execution id: %s and trigger timestamp: %s", tableName, - flowGroup, flowName, flowExecutionId, triggerTimestamp), e); - } - } - - @Override - public boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, - FlowActionType flowActionType, Timestamp triggerTimestamp) - throws IOException { - try (Connection connection = this.dataSource.getConnection(); - PreparedStatement updateStatement = connection.prepareStatement( - String.format(UPDATE_PURSUANT_TIMESTAMP_STATEMENT, tableName, epsilon))) { - int i = 0; - updateStatement.setString(++i, flowGroup); - updateStatement.setString(++i, flowName); - updateStatement.setString(++i, flowExecutionId); - updateStatement.setString(++i, flowActionType.toString()); - updateStatement.setTimestamp(++i, triggerTimestamp); - i = updateStatement.executeUpdate(); - connection.commit(); - - if (i != 1) { - LOG.warn("Expected to update 1 row's pursuant timestamp for a flow trigger event but instead updated {}", i); - } - return i >= 1; - } catch (SQLException e) { - throw new IOException(String.format("Encountered exception while trying to update pursuant timestamp to null for " - + "flowGroup: %s flowName: %s flowExecutionId: %s flowAction: %s triggerTimestamp: %s. Exception is %s", - flowGroup, flowName, flowExecutionId, flowActionType, triggerTimestamp, e)); - } - } -} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/NoLongerLeasingStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/NoLongerLeasingStatus.java new file mode 100644 index 00000000000..e055b0a4e3d --- /dev/null +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/NoLongerLeasingStatus.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.gobblin.runtime.api; + +/* +This status is returned when a flow event was successfully leased and an instance completed the requirements for the +event, so no further leasing is required. + */ +public class NoLongerLeasingStatus extends LeaseAttemptStatus { + + protected NoLongerLeasingStatus() { + super(); + } +} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java deleted file mode 100644 index 42377839bda..00000000000 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/SchedulerLeaseDeterminationStore.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.gobblin.runtime.api; - -import java.io.IOException; -import java.sql.Timestamp; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * Interface defines the two basic actions required for lease determination for each FlowActionType event for a flow. - * It is used by the {@link SchedulerLeaseAlgoHandler} to allow multiple scheduler's on different hosts to determine - * which scheduler is tasked with ensuring the FlowAction is taken for the trigger. - */ -public interface SchedulerLeaseDeterminationStore { - static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseDeterminationStore.class); - - // Enum is used to reason about the three possible scenarios that can result from an attempt to obtain a lease for a - // particular trigger event of a flow - enum LeaseAttemptStatus { - LEASE_OBTAINED, - PREVIOUS_LEASE_EXPIRED, - PREVIOUS_LEASE_VALID - } - - // Action to take on a particular flow - enum FlowActionType { - LAUNCH, - RETRY, - CANCEL, - NEXT_HOP - } - - /** - * This method attempts to insert an entry into store for a particular flow's trigger event if one does not already - * exist in the store for the same trigger event. Regardless of the outcome it also reads the pursuant timestamp of - * the entry for that trigger event (it could have pre-existed in the table or been newly added by the previous - * write). Based on the transaction results, it will return @LeaseAttemptStatus to determine the next action. - * @param flowGroup - * @param flowName - * @param flowExecutionId - * @param triggerTimeMillis is the time this flow is supposed to be launched - * @return LeaseAttemptStatus - * @throws IOException - */ - LeaseAttemptStatus attemptInsertAndGetPursuantTimestamp(String flowGroup, String flowName, - String flowExecutionId, FlowActionType flowActionType, long triggerTimeMillis) throws IOException; - - /** - * This method is used by `attemptInsertAndGetPursuantTimestamp` above to indicate the host has successfully completed - * actions necessary to confirm the lease of a flow trigger event. - * @return true if successfully updated, indicating no further actions need to be taken regarding this event. - */ - boolean updatePursuantTimestamp(String flowGroup, String flowName, String flowExecutionId, - FlowActionType flowActionType, Timestamp triggerTimestamp) throws IOException; -} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java index a71ab449298..ab5faee8ca0 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStore.java @@ -43,8 +43,6 @@ public class MysqlDagActionStore implements DagActionStore { public static final String CONFIG_PREFIX = "MysqlDagActionStore"; - private static final long GET_DAG_ACTION_INITIAL_WAIT_AFTER_FAILURE = 1000L; - protected final DataSource dataSource; private final String tableName; @@ -86,7 +84,7 @@ public MysqlDagActionStore(Config config) throws IOException { } @Override - public boolean exists(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException, SQLException { + public boolean exists(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) throws IOException, SQLException { ResultSet rs = null; try (Connection connection = this.dataSource.getConnection(); PreparedStatement existStatement = connection.prepareStatement(String.format(EXISTS_STATEMENT, tableName))) { @@ -94,13 +92,13 @@ public boolean exists(String flowGroup, String flowName, String flowExecutionId, existStatement.setString(++i, flowGroup); existStatement.setString(++i, flowName); existStatement.setString(++i, flowExecutionId); - existStatement.setString(++i, dagActionValue.toString()); + existStatement.setString(++i, flowActionType.toString()); rs = existStatement.executeQuery(); rs.next(); return rs.getBoolean(1); } catch (SQLException e) { - throw new IOException(String.format("Failure checking existence for table %s of flow with flow group:%s, flow name:%s, flow execution id:%s and dagAction: %s", - tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); + throw new IOException(String.format("Failure checking existence of DagAction: %s in table %s", + new DagAction(flowGroup, flowName, flowExecutionId, flowActionType), tableName), e); } finally { if (rs != null) { rs.close(); @@ -109,7 +107,7 @@ public boolean exists(String flowGroup, String flowName, String flowExecutionId, } @Override - public void addDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) + public void addDagAction(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement insertStatement = connection.prepareStatement(String.format(INSERT_STATEMENT, tableName))) { @@ -117,34 +115,35 @@ public void addDagAction(String flowGroup, String flowName, String flowExecution insertStatement.setString(++i, flowGroup); insertStatement.setString(++i, flowName); insertStatement.setString(++i, flowExecutionId); - insertStatement.setString(++i, dagActionValue.toString()); + insertStatement.setString(++i, flowActionType.toString()); insertStatement.executeUpdate(); connection.commit(); } catch (SQLException e) { - throw new IOException(String.format("Failure to adding action for table %s of flow with flow group: %s, flow name" - + ": %s, flow execution id: %s, and dag action: %s", tableName, flowGroup, flowName, flowExecutionId, - dagActionValue), e); + throw new IOException(String.format("Failure adding action for DagAction: %s in table %s", + new DagAction(flowGroup, flowName, flowExecutionId, flowActionType), tableName), e); } } @Override - public boolean deleteDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) throws IOException { + public boolean deleteDagAction(DagAction dagAction) throws IOException { try (Connection connection = this.dataSource.getConnection(); PreparedStatement deleteStatement = connection.prepareStatement(String.format(DELETE_STATEMENT, tableName))) { int i = 0; - deleteStatement.setString(++i, flowGroup); - deleteStatement.setString(++i, flowName); - deleteStatement.setString(++i, flowExecutionId); + deleteStatement.setString(++i, dagAction.getFlowGroup()); + deleteStatement.setString(++i, dagAction.getFlowName()); + deleteStatement.setString(++i, dagAction.getFlowExecutionId()); + deleteStatement.setString(++i, dagAction.getFlowActionType().toString()); int result = deleteStatement.executeUpdate(); connection.commit(); return result != 0; } catch (SQLException e) { - throw new IOException(String.format("Failure to delete action for table %s of flow with flow group:%s, flow name:%s, flow execution id:%s and dagAction: %s", - tableName, flowGroup, flowName, flowExecutionId, dagActionValue), e); + throw new IOException(String.format("Failure deleting action for DagAction: %s in table %s", dagAction, + tableName), e); } } - private DagAction getDagActionWithRetry(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue, ExponentialBackoff exponentialBackoff) + // TODO: later change this to getDagActions relating to a particular flow execution if it makes sense + private DagAction getDagActionWithRetry(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType, ExponentialBackoff exponentialBackoff) throws IOException, SQLException { ResultSet rs = null; try (Connection connection = this.dataSource.getConnection(); @@ -153,21 +152,22 @@ private DagAction getDagActionWithRetry(String flowGroup, String flowName, Strin getStatement.setString(++i, flowGroup); getStatement.setString(++i, flowName); getStatement.setString(++i, flowExecutionId); - getStatement.setString(++i, dagActionValue.toString()); + getStatement.setString(++i, flowActionType.toString()); rs = getStatement.executeQuery(); if (rs.next()) { - return new DagAction(rs.getString(1), rs.getString(2), rs.getString(3), DagActionValue.valueOf(rs.getString(4))); + return new DagAction(rs.getString(1), rs.getString(2), rs.getString(3), FlowActionType.valueOf(rs.getString(4))); } else { if (exponentialBackoff.awaitNextRetryIfAvailable()) { - return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, dagActionValue, exponentialBackoff); + return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, flowActionType, exponentialBackoff); } else { - log.warn(String.format("Can not find dag action: %s with flowGroup: %s, flowName: %s, flowExecutionId: %s", dagActionValue, flowGroup, flowName, flowExecutionId)); + log.warn(String.format("Can not find dag action: %s with flowGroup: %s, flowName: %s, flowExecutionId: %s", + flowActionType, flowGroup, flowName, flowExecutionId)); return null; } } } catch (SQLException | InterruptedException e) { - throw new IOException(String.format("Failure get dag action: %s from table %s of flow with flow group:%s, flow name:%s and flow execution id:%s", - dagActionValue, tableName, flowGroup, flowName, flowExecutionId), e); + throw new IOException(String.format("Failure get %s from table %s", new DagAction(flowGroup, flowName, flowExecutionId, + flowActionType), tableName), e); } finally { if (rs != null) { rs.close(); @@ -176,13 +176,6 @@ private DagAction getDagActionWithRetry(String flowGroup, String flowName, Strin } - @Override - public DagAction getDagAction(String flowGroup, String flowName, String flowExecutionId, DagActionValue dagActionValue) - throws IOException, SQLException { - ExponentialBackoff exponentialBackoff = ExponentialBackoff.builder().initialDelay(GET_DAG_ACTION_INITIAL_WAIT_AFTER_FAILURE).maxRetries(this.getDagActionMaxRetries).build(); - return getDagActionWithRetry(flowGroup, flowName, flowExecutionId, dagActionValue, exponentialBackoff); - } - @Override public Collection getDagActions() throws IOException { HashSet result = new HashSet<>(); @@ -191,7 +184,7 @@ public Collection getDagActions() throws IOException { ResultSet rs = getAllStatement.executeQuery()) { while (rs.next()) { result.add( - new DagAction(rs.getString(1), rs.getString(2), rs.getString(3), DagActionValue.valueOf(rs.getString(4)))); + new DagAction(rs.getString(1), rs.getString(2), rs.getString(3), FlowActionType.valueOf(rs.getString(4)))); } if (rs != null) { rs.close(); diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java index 07c1a71e2dc..06bd97efd69 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java @@ -73,6 +73,8 @@ public class RuntimeMetrics { public static final String GOBBLIN_JOB_SCHEDULER_TOTAL_GET_SPEC_TIME_NANOS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.totalGetSpecTimeNanos"; public static final String GOBBLIN_JOB_SCHEDULER_TOTAL_ADD_SPEC_TIME_NANOS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.totalAddSpecTimeNanos"; public static final String GOBBLIN_JOB_SCHEDULER_NUM_JOBS_SCHEDULED_DURING_STARTUP = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.numJobsScheduledDuringStartup"; + // Metrics Used to Track SchedulerLeaseAlgoHandlerProgress + public static final String GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_LEASES_COMPLETED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".schedulerLeaseAlgoHandler.numLeasesCompleted"; // Metadata keys public static final String TOPIC = "topic"; public static final String GROUP_ID = "groupId"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java index 24d96d2c089..f979bc64875 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java @@ -609,7 +609,7 @@ public void executeImpl(JobExecutionContext context) // Obtain trigger timestamp from trigger to pass to jobProps Trigger trigger = context.getTrigger(); long triggerTimestampMillis = trigger.getPreviousFireTime().getTime(); - jobProps.setProperty(ConfigurationKeys.SCHEDULER_TRIGGER_TIMESTAMP_MILLIS_KEY, + jobProps.setProperty(ConfigurationKeys.SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY, String.valueOf(triggerTimestampMillis)); try { diff --git a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java index 6f565b7bff5..3ab44ebbcf5 100644 --- a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java +++ b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java @@ -61,47 +61,43 @@ public void setUp() throws Exception { @Test public void testAddAction() throws Exception { - this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL); + this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL); //Should not be able to add KILL again when previous one exist Assert.expectThrows(IOException.class, - () -> this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + () -> this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); //Should be able to add a RESUME action for same execution as well as KILL for another execution of the flow - this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME); - this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL); + this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.RESUME); + this.mysqlDagActionStore.addDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.FlowActionType.KILL); } @Test(dependsOnMethods = "testAddAction") public void testExists() throws Exception { - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); - Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.DagActionValue.RESUME)); - Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.DagActionValue.KILL)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.RESUME)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.FlowActionType.KILL)); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.FlowActionType.RESUME)); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_3, DagActionStore.FlowActionType.KILL)); } @Test(dependsOnMethods = "testExists") - public void testGetAction() throws IOException, SQLException { - Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL), - this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); - Assert.assertEquals(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL), - this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); + public void testGetActions() throws IOException { Collection dagActions = this.mysqlDagActionStore.getDagActions(); Assert.assertEquals(3, dagActions.size()); HashSet set = new HashSet<>(); - set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); - set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.RESUME)); - set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME)); + set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); + set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.RESUME)); + set.add(new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId_2, DagActionStore.FlowActionType.KILL)); Assert.assertEquals(dagActions, set); } @Test(dependsOnMethods = "testGetAction") public void testDeleteAction() throws IOException, SQLException { - this.mysqlDagActionStore.deleteDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL); - Assert.assertEquals(this.mysqlDagActionStore.getDagActions().size(), 1); - Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.RESUME)); - Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.DagActionValue.KILL)); - Assert.assertNull( this.mysqlDagActionStore.getDagAction(flowGroup, flowName, flowExecutionId, DagActionStore.DagActionValue.KILL)); + this.mysqlDagActionStore.deleteDagAction( + new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); + Assert.assertEquals(this.mysqlDagActionStore.getDagActions().size(), 2); + Assert.assertFalse(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.RESUME)); + Assert.assertTrue(this.mysqlDagActionStore.exists(flowGroup, flowName, flowExecutionId_2, DagActionStore.FlowActionType.KILL)); } } \ No newline at end of file diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java index 7233ba40b40..f1edc4c7479 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java @@ -20,7 +20,7 @@ import java.util.Objects; import org.apache.gobblin.runtime.api.DagActionStore; -import org.apache.gobblin.runtime.api.MysqlSchedulerLeaseDeterminationStore; +import org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter; import org.apache.gobblin.runtime.dag_action_store.MysqlDagActionStore; import org.apache.gobblin.service.modules.orchestration.SchedulerLeaseAlgoHandler; import org.apache.gobblin.service.modules.orchestration.UserQuotaManager; @@ -247,7 +247,7 @@ public void configure(Binder binder) { } if (serviceConfig.isMultiActiveSchedulerEnabled()) { - binder.bind(MysqlSchedulerLeaseDeterminationStore.class); + binder.bind(MySQLMultiActiveLeaseArbiter.class); binder.bind(SchedulerLeaseAlgoHandler.class); } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java index da7e928914e..a4df52eb4f2 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java @@ -426,7 +426,7 @@ public synchronized void setActive(boolean active) { if (dagActionStore.isPresent()) { Collection dagActions = dagActionStore.get().getDagActions(); for (DagActionStore.DagAction action : dagActions) { - switch (action.getDagActionValue()) { + switch (action.getFlowActionType()) { case KILL: this.handleKillFlowEvent(new KillFlowEvent(action.getFlowGroup(), action.getFlowName(), Long.parseLong(action.getFlowExecutionId()))); break; @@ -434,7 +434,7 @@ public synchronized void setActive(boolean active) { this.handleResumeFlowEvent(new ResumeFlowEvent(action.getFlowGroup(), action.getFlowName(), Long.parseLong(action.getFlowExecutionId()))); break; default: - log.warn("Unsupported dagAction: " + action.getDagActionValue().toString()); + log.warn("Unsupported dagAction: " + action.getFlowActionType().toString()); } } } @@ -581,7 +581,9 @@ public void run() { private void clearUpDagAction(DagId dagId) throws IOException { if (this.dagActionStore.isPresent()) { - this.dagActionStore.get().deleteDagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, DagActionStore.DagActionValue.KILL); + this.dagActionStore.get().deleteDagAction( + new DagActionStore.DagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, + DagActionStore.FlowActionType.KILL)); } } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index a5cc9a17c51..856664d2203 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -57,9 +57,9 @@ import org.apache.gobblin.metrics.Tag; import org.apache.gobblin.metrics.event.EventSubmitter; import org.apache.gobblin.metrics.event.TimingEvent; +import org.apache.gobblin.runtime.api.DagActionStore; import org.apache.gobblin.runtime.api.FlowSpec; import org.apache.gobblin.runtime.api.JobSpec; -import org.apache.gobblin.runtime.api.SchedulerLeaseDeterminationStore; import org.apache.gobblin.runtime.api.Spec; import org.apache.gobblin.runtime.api.SpecCatalogListener; import org.apache.gobblin.runtime.api.SpecProducer; @@ -319,15 +319,12 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil // If multi-active scheduler is enabled do not pass onto DagManager, otherwise scheduler forwards it directly if (this.isMultiActiveSchedulerEnabled) { String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); - boolean leaseAttemptSucceeded = schedulerLeaseAlgoHandler.handleNewTriggerEvent(jobProps, flowGroup, flowName, - flowExecutionId, SchedulerLeaseDeterminationStore.FlowActionType.LAUNCH, triggerTimestampMillis); - _log.info("scheduler attempted lease on flowGroup: %s, flowName: %s, flowExecutionId: %s, LAUNCH event for " - + "triggerTimestamp: %s that was " + (leaseAttemptSucceeded ? "" : "NOT") + "successful", flowGroup, - flowName, flowExecutionId, triggerTimestampMillis); - return; - } - - if (this.dagManager.isPresent()) { + DagActionStore.DagAction flowAction = + new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.LAUNCH); + schedulerLeaseAlgoHandler.handleNewSchedulerEvent(jobProps, flowAction, triggerTimestampMillis); + _log.info("Multi-active scheduler finished handling trigger event: [%s, triggerEventTimestamp: %s]", flowAction, + triggerTimestampMillis); + } else if (this.dagManager.isPresent()) { try { //Send the dag to the DagManager. this.dagManager.get().addDag(jobExecutionPlanDag, true, true); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java index 8f4a206650b..e256fcadc5b 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java @@ -18,7 +18,9 @@ package org.apache.gobblin.service.modules.orchestration; import java.io.IOException; +import java.sql.SQLException; import java.time.LocalDateTime; +import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; import java.util.Locale; @@ -36,95 +38,129 @@ import javax.inject.Inject; import org.apache.gobblin.configuration.ConfigurationKeys; -import org.apache.gobblin.runtime.api.SchedulerLeaseDeterminationStore; +import org.apache.gobblin.instrumented.Instrumented; +import org.apache.gobblin.metrics.ContextAwareMeter; +import org.apache.gobblin.metrics.MetricContext; +import org.apache.gobblin.runtime.api.DagActionStore; +import org.apache.gobblin.runtime.api.LeaseAttemptStatus; +import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter; +import org.apache.gobblin.runtime.metrics.RuntimeMetrics; import org.apache.gobblin.scheduler.JobScheduler; import org.apache.gobblin.scheduler.SchedulerService; import org.apache.gobblin.util.ConfigUtils; +import org.apache.gobblin.runtime.api.LeaseObtainedStatus; +import org.apache.gobblin.runtime.api.LeasedToAnotherStatus; +/** + * Handler used to coordinate multiple hosts with enabled schedulers to respond to flow action events. It uses the + * {@link org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter} to determine a single lease owner at a given time + * for a flow action event. After acquiring the lease, it persists the flow action event to the {@link DagActionStore} + * to be eventually acted upon by the host with the active DagManager. Once it has completed this action, it will mark + * the lease as completed by calling the + * {@link org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter.completeLeaseUse} method. Hosts that do not gain + * the lease for the event, instead schedule a reminder using the {@link SchedulerService} to check back in on the + * previous lease owner's completion status after the lease should expire to ensure the event is handled in failure + * cases. + */ public class SchedulerLeaseAlgoHandler { private static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseAlgoHandler.class); - private final long linger; private final int staggerUpperBoundSec; private static Random random = new Random(); - protected SchedulerLeaseDeterminationStore leaseDeterminationStore; + protected MultiActiveLeaseArbiter multiActiveLeaseArbiter; protected JobScheduler jobScheduler; protected SchedulerService schedulerService; + protected DagActionStore dagActionStore; + private MetricContext metricContext; + private ContextAwareMeter numLeasesCompleted; @Inject - public SchedulerLeaseAlgoHandler(Config config, SchedulerLeaseDeterminationStore leaseDeterminationStore, - JobScheduler jobScheduler, SchedulerService schedulerService) - throws IOException { - this.linger = ConfigUtils.getLong(config, ConfigurationKeys.SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS_KEY, - ConfigurationKeys.DEFAULT_SCHEDULER_TRIGGER_EVENT_EPSILON_MILLIS); + public SchedulerLeaseAlgoHandler(Config config, MultiActiveLeaseArbiter leaseDeterminationStore, + JobScheduler jobScheduler, SchedulerService schedulerService, DagActionStore dagActionStore) { this.staggerUpperBoundSec = ConfigUtils.getInt(config, ConfigurationKeys.SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY, ConfigurationKeys.DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC); - this.leaseDeterminationStore = leaseDeterminationStore; + this.multiActiveLeaseArbiter = leaseDeterminationStore; this.jobScheduler = jobScheduler; this.schedulerService = schedulerService; + this.dagActionStore = dagActionStore; + this.metricContext = Instrumented.getMetricContext(new org.apache.gobblin.configuration.State(ConfigUtils.configToProperties(config)), + this.getClass()); + this.numLeasesCompleted = metricContext.contextAwareMeter(RuntimeMetrics.GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_LEASES_COMPLETED); } - private SchedulerLeaseDeterminationStore schedulerLeaseDeterminationStore; /** - * This method is used in the multi-active scheduler case for one or more hosts to respond to a flow's trigger event - * by attempting a lease for the flow event. + * This method is used in the multi-active scheduler case for one or more hosts to respond to a flow action event + * by attempting a lease for the flow event and processing the result depending on the status of the attempt. * @param jobProps - * @param flowGroup - * @param flowName - * @param flowExecutionId - * @param flowActionType - * @param triggerTimeMillis - * @return true if this host obtained the lease for this flow's trigger event, false otherwise. + * @param flowAction + * @param eventTimeMillis * @throws IOException */ - public boolean handleNewTriggerEvent(Properties jobProps, String flowGroup, String flowName, String flowExecutionId, - SchedulerLeaseDeterminationStore.FlowActionType flowActionType, long triggerTimeMillis) + public void handleNewSchedulerEvent(Properties jobProps, DagActionStore.DagAction flowAction, long eventTimeMillis) throws IOException { - SchedulerLeaseDeterminationStore.LeaseAttemptStatus leaseAttemptStatus = - schedulerLeaseDeterminationStore.attemptInsertAndGetPursuantTimestamp(flowGroup, flowName, flowExecutionId, - flowActionType, triggerTimeMillis); + LeaseAttemptStatus leaseAttemptStatus = + multiActiveLeaseArbiter.tryAcquireLease(flowAction, eventTimeMillis); // TODO: add a log event or metric for each of these cases - switch (leaseAttemptStatus) { - case LEASE_OBTAINED: - return true; - case PREVIOUS_LEASE_EXPIRED: - // recursively try obtaining lease again immediately, stops when reaches one of the other cases - return handleNewTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); - case PREVIOUS_LEASE_VALID: - scheduleReminderForTriggerEvent(jobProps, flowGroup, flowName, flowExecutionId, flowActionType, triggerTimeMillis); + switch (leaseAttemptStatus.getClass().getSimpleName()) { + case "LeaseObtainedStatus": + finalizeLease((LeaseObtainedStatus) leaseAttemptStatus, flowAction); + break; + case "LeasedToAnotherStatus": + scheduleReminderForEvent(jobProps, (LeasedToAnotherStatus) leaseAttemptStatus, flowAction, eventTimeMillis); + break; + case "NoLongerLeasingStatus": + break; + default: + } + } + + // Called after obtaining a lease to persist the flow action to {@link DagActionStore} and mark the lease as done + private boolean finalizeLease(LeaseObtainedStatus status, DagActionStore.DagAction flowAction) { + try { + this.dagActionStore.addDagAction(flowAction.getFlowGroup(), flowAction.getFlowName(), + flowAction.getFlowExecutionId(), flowAction.getFlowActionType()); + if (this.dagActionStore.exists(flowAction.getFlowGroup(), flowAction.getFlowName(), + flowAction.getFlowExecutionId(), flowAction.getFlowActionType())) { + // If the flow action has been persisted to the {@link DagActionStore} we can close the lease + this.numLeasesCompleted.mark(); + return this.multiActiveLeaseArbiter.completeLeaseUse(flowAction, status.getEventTimestamp(), + status.getMyLeaseAcquisitionTimestamp()); + } + } catch (IOException | SQLException e) { + throw new RuntimeException(e); } + // TODO: should this return an error or print a warning log if failed to commit to dag action store? return false; } /** - * This method is used by {@link SchedulerLeaseAlgoHandler.handleNewTriggerEvent} to schedule a reminder for itself to - * check on the other participant's progress during pursuing orchestration after the time the lease should expire. - * If the previous participant was successful, then no further action is taken otherwise we re-attempt pursuing - * orchestration ourselves. - * @param flowGroup - * @param flowName - * @param flowExecutionId - * @param flowActionType - * @param triggerTimeMillis + * This method is used by {@link SchedulerLeaseAlgoHandler.handleNewSchedulerEvent} to schedule a reminder for itself + * to check on the other participant's progress to finish acting on a flow action after the time the lease should + * expire. + * @param jobProps + * @param status used to extract event to be reminded for and the minimum time after which reminder should occur + * @param originalEventTimeMillis the event timestamp we were originally handling + * @param flowAction */ - protected void scheduleReminderForTriggerEvent(Properties jobProps, String flowGroup, String flowName, String flowExecutionId, - SchedulerLeaseDeterminationStore.FlowActionType flowActionType, long triggerTimeMillis) { - // Check-in `linger` time after the current timestamp which is "close-enough" to the time the pursuant attempted - // the flow action. We also add a small randomization to avoid 'thundering herd' issue - String cronExpression = createCronFromDelayPeriod(linger + random.nextInt(staggerUpperBoundSec)); + private void scheduleReminderForEvent(Properties jobProps, LeasedToAnotherStatus status, + DagActionStore.DagAction flowAction, long originalEventTimeMillis) { + // Add a small randomization to the minimum reminder wait time to avoid 'thundering herd' issue + String cronExpression = createCronFromDelayPeriod(status.getMinimumReminderWaitMillis() + random.nextInt(staggerUpperBoundSec)); jobProps.setProperty(ConfigurationKeys.JOB_SCHEDULE_KEY, cronExpression); - // This timestamp is what will be used to identify the particular flow trigger event it's associated with - jobProps.setProperty(ConfigurationKeys.SCHEDULER_ORIGINAL_TRIGGER_TIMESTAMP_MILLIS_KEY, String.valueOf(triggerTimeMillis)); - JobKey key = new JobKey(flowName, flowGroup); + // Ensure we save the event timestamp that we're setting reminder for, in addition to our own event timestamp which may be different + jobProps.setProperty(ConfigurationKeys.SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY, String.valueOf(status.getReminderEventTimeMillis())); + jobProps.setProperty(ConfigurationKeys.SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY, String.valueOf(status.getReminderEventTimeMillis())); + JobKey key = new JobKey(flowAction.getFlowName(), flowAction.getFlowGroup()); Trigger trigger = this.jobScheduler.getTrigger(key, jobProps); try { - LOG.info("Attempting to add job reminder to Scheduler Service where job is %s trigger event %s and reminder is at " - + "%s.", key, triggerTimeMillis, trigger.getNextFireTime()); + LOG.info("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - attempting to schedule reminder for event %s in %s millis", + flowAction, originalEventTimeMillis, status.getReminderEventTimeMillis(), trigger.getNextFireTime()); this.schedulerService.getScheduler().scheduleJob(trigger); } catch (SchedulerException e) { - LOG.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, triggerTimeMillis, e); + LOG.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, status.getReminderEventTimeMillis(), e); } - LOG.info(String.format("Scheduled reminder for job %s trigger event %s. Next run: %s.", key, triggerTimeMillis, trigger.getNextFireTime())); + LOG.info(String.format("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - SCHEDULED REMINDER for event %s in %s millis", + flowAction, originalEventTimeMillis, status.getReminderEventTimeMillis(), trigger.getNextFireTime())); } /** @@ -134,11 +170,10 @@ protected void scheduleReminderForTriggerEvent(Properties jobProps, String flowG * @return */ protected static String createCronFromDelayPeriod(long delayPeriodSeconds) { - LocalDateTime now = LocalDateTime.now(); + LocalDateTime now = LocalDateTime.now(ZoneId.of("UTC")); LocalDateTime delaySecondsLater = now.plus(delayPeriodSeconds, ChronoUnit.SECONDS); // TODO: investigate potentially better way of generating cron expression that does not make it US dependent DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ss mm HH dd MM ? yyyy", Locale.US); return delaySecondsLater.format(formatter); } - } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java index 360fd291505..d75465f48ef 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java @@ -54,25 +54,25 @@ public void resume(ComplexResourceKey Date: Wed, 7 Jun 2023 19:09:08 -0700 Subject: [PATCH 09/11] Address second round of review comments --- .../configuration/ConfigurationKeys.java | 20 ++-- .../gobblin/runtime/api/DagActionStore.java | 8 +- .../runtime/api/LeaseAttemptStatus.java | 22 ---- .../runtime/api/LeaseObtainedStatus.java | 38 ------- .../runtime/api/LeasedToAnotherStatus.java | 39 ------- .../runtime/api/MultiActiveLeaseArbiter.java | 77 +++++++++---- ...java => MysqlMultiActiveLeaseArbiter.java} | 102 ++++++++--------- .../runtime/api/NoLongerLeasingStatus.java | 29 ----- .../runtime/metrics/RuntimeMetrics.java | 2 +- .../gobblin/runtime/util/InjectionNames.java | 2 + .../gobblin/scheduler/JobScheduler.java | 7 +- .../MysqlDagActionStoreTest.java | 2 +- .../core/GobblinServiceGuiceModule.java | 16 +-- .../modules/orchestration/DagManager.java | 14 +-- ...goHandler.java => FlowTriggerHandler.java} | 106 +++++++++--------- .../modules/orchestration/Orchestrator.java | 62 ++++++---- ...ecutionResourceHandlerWithWarmStandby.java | 34 +++--- .../scheduler/GobblinServiceJobScheduler.java | 34 ++---- .../DagActionStoreChangeMonitor.java | 51 ++------- .../DagActionStoreChangeMonitorFactory.java | 9 +- .../SpecStoreChangeMonitorFactory.java | 2 +- .../orchestration/DagManagerFlowTest.java | 2 +- .../orchestration/OrchestratorTest.java | 8 +- .../GobblinServiceJobSchedulerTest.java | 11 +- 24 files changed, 286 insertions(+), 411 deletions(-) delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java rename gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/{MySQLMultiActiveLeaseArbiter.java => MysqlMultiActiveLeaseArbiter.java} (87%) delete mode 100644 gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/NoLongerLeasingStatus.java rename gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/{SchedulerLeaseAlgoHandler.java => FlowTriggerHandler.java} (59%) diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java index 0c877c0b6b3..e763c37faf2 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java @@ -96,19 +96,19 @@ public class ConfigurationKeys { public static final String SKIP_SCHEDULING_FLOWS_AFTER_NUM_DAYS = "skip.scheduling.flows.after.num.days"; public static final int DEFAULT_NUM_DAYS_TO_SKIP_AFTER = 365; // Scheduler lease determination store configuration - public static final String MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY = "multi.active.scheduler.constants.db.table"; - public static final String DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE = "multi.active.scheduler."; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "scheduler.lease.determination.store.db.table"; - public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "gobblin_scheduler_lease_determination_store"; - public static final String SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY = "reminderEventTimestampMillis"; - public static final String SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY = "newEventTimestampMillis"; - public static final String SCHEDULER_EVENT_EPSILON_MILLIS_KEY = ""; + public static final String MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY = "MysqlMultiActiveLeaseArbiter.constantsTable"; + public static final String DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE = "MysqlMultiActiveLeaseArbiter.gobblin_multi_active_scheduler_constants_store"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "MysqlMultiActiveLeaseArbiter.schedulerLeaseArbiterTable"; + public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "MysqlMultiActiveLeaseArbiter.gobblin_scheduler_lease_determination_store"; + public static final String SCHEDULER_EVENT_TO_REVISIT_TIMESTAMP_MILLIS_KEY = "eventToRevisitTimestampMillis"; + public static final String SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY = "triggerEventTimestampMillis"; + public static final String SCHEDULER_EVENT_EPSILON_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.epsilonMillis"; public static final int DEFAULT_SCHEDULER_EVENT_EPSILON_MILLIS = 100; // Note: linger should be on the order of seconds even though we measure in millis - public static final String SCHEDULER_EVENT_LINGER_MILLIS_KEY = ""; + public static final String SCHEDULER_EVENT_LINGER_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.lingerMillis"; public static final int DEFAULT_SCHEDULER_EVENT_LINGER_MILLIS = 30000; - public static final String SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY = ""; - public static final int DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC = 5; + public static final String SCHEDULER_MAX_BACKOFF_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.maxBackoffMillis"; + public static final int DEFAULT_SCHEDULER_MAX_BACKOFF_MILLIS = 5000; // Job executor thread pool size public static final String JOB_EXECUTOR_THREAD_POOL_SIZE_KEY = "jobexecutor.threadpool.size"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index aa0e1237dcf..90e8db6d280 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -21,6 +21,7 @@ import java.sql.SQLException; import java.util.Collection; +import lombok.AllArgsConstructor; import lombok.Data; @@ -35,17 +36,12 @@ enum FlowActionType { } @Data + @AllArgsConstructor class DagAction { String flowGroup; String flowName; String flowExecutionId; FlowActionType flowActionType; - public DagAction(String flowGroup, String flowName, String flowExecutionId, FlowActionType flowActionType) { - this.flowGroup = flowGroup; - this.flowName = flowName; - this.flowExecutionId = flowExecutionId; - this.flowActionType = flowActionType; - } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java deleted file mode 100644 index dd4125ca9c1..00000000000 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseAttemptStatus.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.gobblin.runtime.api; - -public abstract class LeaseAttemptStatus { - protected LeaseAttemptStatus() {} -} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java deleted file mode 100644 index 5a7fb93cb91..00000000000 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeaseObtainedStatus.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.gobblin.runtime.api; - -import lombok.Getter; - -/* -The instance calling this method acquired the lease for the event in question. The class contains the `eventTimestamp` -associated with the lease as well as the time the lease was obtained by me or `myLeaseAcquisitionTimestamp`. - */ -public class LeaseObtainedStatus extends LeaseAttemptStatus { - @Getter - private final long eventTimestamp; - - @Getter - private final long myLeaseAcquisitionTimestamp; - - protected LeaseObtainedStatus(long eventTimestamp, long myLeaseAcquisitionTimestamp) { - super(); - this.eventTimestamp = eventTimestamp; - this.myLeaseAcquisitionTimestamp = myLeaseAcquisitionTimestamp; - } -} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java deleted file mode 100644 index e4da8bf7485..00000000000 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/LeasedToAnotherStatus.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.gobblin.runtime.api; - -import lombok.Getter; - -/* -The event in question has been leased to another. This object contains `reminderEventTimestamp` which is the event -timestamp the lease is associated with as well as `minimumReminderWaitMillis` the minimum amount of time to wait -before returning to check if the lease has completed or expired. - */ -public class LeasedToAnotherStatus extends LeaseAttemptStatus { - @Getter - private final long reminderEventTimeMillis; - - @Getter - private final long minimumReminderWaitMillis; - - protected LeasedToAnotherStatus(long reminderEventTimeMillis, long minimumReminderWaitMillis) { - super(); - this.reminderEventTimeMillis = reminderEventTimeMillis; - this.minimumReminderWaitMillis = minimumReminderWaitMillis; - } -} diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java index dd0e9d506eb..1f45c1cb352 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java @@ -19,38 +19,38 @@ import java.io.IOException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import lombok.Data; + /** * This interface defines a generic approach to a non-blocking, multiple active thread or host system, in which one or - * more active instances compete over ownership of a particular flow's event. The type of flow event in question does - * not impact the algorithm other than to uniquely identify the flow event. Each instance uses the interface to initiate - * an attempt at ownership over the flow event and receives a response indicating the status of the attempt. + * more active participants compete to take responsiblity for a particular flow's event. The type of flow event in + * question does not impact the algorithm other than to uniquely identify the flow event. Each participant uses the + * interface to initiate an attempt at ownership over the flow event and receives a response indicating the status of + * the attempt. * * At a high level the lease arbiter works as follows: - * 1. Multiple instances receive knowledge of a flow action event to act upon - * 2. Each instance attempts to acquire rights or `a lease` to be the sole instance acting on the event by calling the - * tryAcquireLease method below and receives the resulting status. The status indicates whether this instance has - * a) acquired the lease -> then this instance will attempt to complete the lease - * b) another has acquired the lease -> then another will attempt to complete the lease - * c) flow event no longer needs to be acted upon -> terminal state - * 3. If another has acquired the lease, then the instance will check back in at the time of lease expiry to see if it - * needs to attempt the lease again [status (b) above]. - * 4. Once the instance which acquired the lease completes its work on the flow event, it calls completeLeaseUse to - * indicate to all other instances that the flow event no longer needs to be acted upon [status (c) above] + * 1. Multiple participants independently learn of a flow action event to act upon + * 2. Each participant attempts to acquire rights or `a lease` to be the sole participant acting on the event by + * calling the tryAcquireLease method below and receives the resulting status. The status indicates whether this + * participant has + * a) LeaseObtainedStatus -> this participant will attempt to carry out the required action before the lease expires + * b) LeasedToAnotherStatus -> another will attempt to carry out the required action before the lease expires + * c) NoLongerLeasingStatus -> flow event no longer needs to be acted upon or terminal state + * 3. If another participant has acquired the lease before this one could, then the present participant must check back + * in at the time of lease expiry to see if it needs to attempt the lease again [status (b) above]. + * 4. Once the participant which acquired the lease completes its work on the flow event, it calls recordLeaseSuccess + * to indicate to all other participants that the flow event no longer needs to be acted upon [status (c) above] */ public interface MultiActiveLeaseArbiter { - static final Logger LOG = LoggerFactory.getLogger(MultiActiveLeaseArbiter.class); - /** * This method attempts to insert an entry into store for a particular flow action event if one does not already * exist in the store for the flow action or has expired. Regardless of the outcome it also reads the lease * acquisition timestamp of the entry for that flow action event (it could have pre-existed in the table or been newly - * added by the previous write). Based on the transaction results, it will return @LeaseAttemptStatus to determine - * the next action. - * @param flowAction uniquely identifies the flow - * @param eventTimeMillis is the time this flow action should occur + * added by the previous write). Based on the transaction results, it will return {@link LeaseAttemptStatus} to + * determine the next action. + * @param flowAction uniquely identifies the flow and the present action upon it + * @param eventTimeMillis is the time this flow action was triggered * @return LeaseAttemptStatus * @throws IOException */ @@ -62,7 +62,38 @@ public interface MultiActiveLeaseArbiter { * leaseAcquisitionTimeMillis values have not changed since this owner acquired the lease (indicating the lease did * not expire). * @return true if successfully updated, indicating no further actions need to be taken regarding this event. + * false if failed to update the lease properly, the caller should continue seeking to acquire the lease as + * if any actions it did successfully accomplish, do not count */ - boolean completeLeaseUse(DagActionStore.DagAction flowAction, long eventTimeMillis, long leaseAcquisitionTimeMillis) - throws IOException; + boolean recordLeaseSuccess(DagActionStore.DagAction flowAction, LeaseObtainedStatus status) throws IOException; + + /* + Object used to encapsulate status of lease acquisition attempt and derived should contain information specific to + the status that results. + */ + abstract class LeaseAttemptStatus {} + + class NoLongerLeasingStatus extends LeaseAttemptStatus {} + + /* + The participant calling this method acquired the lease for the event in question. The class contains the `eventTimestamp` + associated with the lease as well as the time the caller obtained the lease or `leaseAcquisitionTimestamp`. + */ + @Data + class LeaseObtainedStatus extends LeaseAttemptStatus { + private final long eventTimestamp; + private final long leaseAcquisitionTimestamp; + } + + /* + This flow action event already has a valid lease owned by another host. + */ + @Data + class LeasedToAnotherStatus extends LeaseAttemptStatus { + // the timestamp the lease is associated with, but it may be a different timestamp for the same flow action + // (a previous participant of the event) + private final long eventTimeMillis; + // the minimum amount of time to wait before returning to check if the lease has completed or expired + private final long minimumLingerDurationMillis; +} } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java similarity index 87% rename from gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java rename to gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java index 81a449a8348..51633dd55b3 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MySQLMultiActiveLeaseArbiter.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java @@ -29,6 +29,7 @@ import com.zaxxer.hikari.HikariDataSource; import javax.sql.DataSource; +import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.broker.SharedResourcesBrokerFactory; import org.apache.gobblin.configuration.ConfigurationKeys; @@ -39,13 +40,13 @@ /** * MySQL based implementation of the {@link MultiActiveLeaseArbiter} which uses a MySQL store to resolve ownership of - * a flow event amongst multiple competing instances. A MySQL table is used to store flow identifying information as + * a flow event amongst multiple competing participants. A MySQL table is used to store flow identifying information as * well as the flow action associated with it. It uses two additional values of the `event_timestamp` and * `lease_acquisition_timestamp` to indicate an active lease, expired lease, and state of no longer leasing. The table * schema is as follows: * [flow_group | flow_name | flow_execution_id | flow_action | event_timestamp | lease_acquisition_timestamp] * (----------------------primary key------------------------) - * We also maintain another table in the database with two constants that allow us to coordinate between instances and + * We also maintain another table in the database with two constants that allow us to coordinate between participants and * ensure they are using the same values to base their coordination off of. * [epsilon | linger] * `epsilon` - time within we consider to timestamps to be the same, to account for between-host clock drift @@ -66,24 +67,37 @@ * host should actually complete its work while having the lease and then mark the flow action as NULL to indicate no * further leasing should be done for the event. */ -public class MySQLMultiActiveLeaseArbiter implements MultiActiveLeaseArbiter { +@Slf4j +public class MysqlMultiActiveLeaseArbiter implements MultiActiveLeaseArbiter { /** `j.u.Function` variant for an operation that may @throw IOException or SQLException: preserves method signature checked exceptions */ @FunctionalInterface protected interface CheckedFunction { R apply(T t) throws IOException, SQLException; } - public static final String CONFIG_PREFIX = "MySQLMultiActiveLeaseArbiter"; + public static final String CONFIG_PREFIX = "MysqlMultiActiveLeaseArbiter"; protected final DataSource dataSource; private final String leaseArbiterTableName; private final String constantsTableName; private final int epsilon; private final int linger; + + // TODO: define retention on this table + private static final String CREATE_LEASE_ARBITER_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" + + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " + + "event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + + "lease_acquisition_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action))"; + + private static final String CREATE_CONSTANTS_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s " + + "(epsilon INT, linger INT), PRIMARY KEY (epsilon, linger); INSERT INTO %s (epsilon, linger) VALUES (?,?)"; protected static final String WHERE_CLAUSE_TO_MATCH_KEY = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=?" + " AND flow_action=?"; - protected static final String WHERE_CLAUSE_TO_MATCH_ROW = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=?" - + " AND flow_action=? AND event_timestamp=? AND lease_acquisition_timestamp=?"; + protected static final String WHERE_CLAUSE_TO_MATCH_ROW = WHERE_CLAUSE_TO_MATCH_KEY + + " AND event_timestamp=? AND lease_acquisition_timestamp=?"; protected static final String SELECT_AFTER_INSERT_STATEMENT = "SELECT ROW_COUNT() AS rows_inserted_count, " + "lease_acquisition_timestamp, linger FROM %s, %s " + WHERE_CLAUSE_TO_MATCH_KEY; @@ -100,8 +114,8 @@ protected interface CheckedFunction { // Insert or update row to acquire lease if values have not changed since the previous read // Need to define three separate statements to handle cases where row does not exist or has null values to check protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT = "INSERT INTO %s " - + "(flow_group, flow_name, flow_execution_id, flow_action, event_timestamp) VALUES (?, ?, ?, ?, ?) WHERE NOT " - + "EXISTS (SELECT * FROM %s " + WHERE_CLAUSE_TO_MATCH_KEY + "); " + SELECT_AFTER_INSERT_STATEMENT; + + "(flow_group, flow_name, flow_execution_id, flow_action, event_timestamp) VALUES (?, ?, ?, ?, ?); " + + SELECT_AFTER_INSERT_STATEMENT; protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT = "UPDATE %s " + "SET event_timestamp=?" + WHERE_CLAUSE_TO_MATCH_KEY + " AND event_timestamp=? AND lease_acquisition_timestamp is NULL; " + SELECT_AFTER_INSERT_STATEMENT; @@ -113,24 +127,13 @@ protected interface CheckedFunction { protected static final String CONDITIONALLY_COMPLETE_LEASE_STATEMENT = "UPDATE %s SET " + "lease_acquisition_timestamp = NULL " + WHERE_CLAUSE_TO_MATCH_ROW; - // TODO: define retention on this table - private static final String CREATE_LEASE_ARBITER_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %S (" - + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" - + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" - + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " - + "event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " - + "lease_acquisition_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," - + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action))"; - - private static final String CREATE_CONSTANTS_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s " - + "(epsilon INT, linger INT), PRIMARY KEY (epsilon, linger); INSERT INTO %s (epsilon, linger) VALUES (?,?)"; - @Inject - public MySQLMultiActiveLeaseArbiter(Config config) throws IOException { + public MysqlMultiActiveLeaseArbiter(Config config) throws IOException { if (config.hasPath(CONFIG_PREFIX)) { config = config.getConfig(CONFIG_PREFIX).withFallback(config); } else { - throw new IOException("Please specify the config for MySQLMultiActiveLeaseArbiter"); + throw new IOException(String.format("Please specify the config for MysqlMultiActiveLeaseArbiter using prefix %s " + + "before all properties", CONFIG_PREFIX)); } this.leaseArbiterTableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, @@ -161,21 +164,16 @@ public MySQLMultiActiveLeaseArbiter(Config config) throws IOException { @Override public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, long eventTimeMillis) throws IOException { - String flowGroup = flowAction.getFlowGroup(); - String flowName = flowAction.getFlowName(); - String flowExecutionId = flowAction.getFlowExecutionId(); - Timestamp eventTimestamp = new Timestamp(eventTimeMillis); - // Check table for an existing entry for this flow action and event time ResultSet resultSet = withPreparedStatement( String.format(GET_EVENT_INFO_STATEMENT, this.leaseArbiterTableName, this.constantsTableName), getInfoStatement -> { int i = 0; - getInfoStatement.setTimestamp(i, eventTimestamp); - getInfoStatement.setString(i, flowGroup); - getInfoStatement.setString(i, flowName); - getInfoStatement.setString(i, flowExecutionId); - getInfoStatement.setString(i, flowAction.getFlowActionType().toString()); + getInfoStatement.setTimestamp(++i, new Timestamp(eventTimeMillis)); + getInfoStatement.setString(++i, flowAction.getFlowGroup()); + getInfoStatement.setString(++i, flowAction.getFlowName()); + getInfoStatement.setString(++i, flowAction.getFlowExecutionId()); + getInfoStatement.setString(++i, flowAction.getFlowActionType().toString()); return getInfoStatement.executeQuery(); }, true); @@ -184,7 +182,7 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, l if (!resultSet.next()) { ResultSet rs = withPreparedStatement( String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT, this.leaseArbiterTableName, - this.leaseArbiterTableName, this.leaseArbiterTableName, this.constantsTableName), + this.leaseArbiterTableName, this.constantsTableName), insertStatement -> { completeInsertPreparedStatement(insertStatement, flowAction, eventTimeMillis); return insertStatement.executeQuery(); @@ -199,23 +197,27 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, l int leaseValidityStatus = resultSet.getInt(4); int dbLinger = resultSet.getInt(5); + // CASE 2: If our event timestamp is older than the last event in db, then skip this trigger + if (eventTimeMillis < dbEventTimestamp.getTime()) { + return new NoLongerLeasingStatus(); + } // Lease is valid if (leaseValidityStatus == 1) { - // CASE 2: Same event, lease is valid + // CASE 3: Same event, lease is valid if (isWithinEpsilon) { // Utilize db timestamp for reminder return new LeasedToAnotherStatus(dbEventTimestamp.getTime(), dbLeaseAcquisitionTimestamp.getTime() + dbLinger); } - // CASE 3: Distinct event, lease is valid + // CASE 4: Distinct event, lease is valid // Utilize db timestamp for wait time, but be reminded of own event timestamp return new LeasedToAnotherStatus(eventTimeMillis, dbLeaseAcquisitionTimestamp.getTime() + dbLinger); } - // CASE 4: Lease is out of date (regardless of whether same or distinct event) + // CASE 5: Lease is out of date (regardless of whether same or distinct event) else if (leaseValidityStatus == 2) { if (isWithinEpsilon) { - LOG.warn("Lease should not be out of date for the same trigger event since epsilon << linger for flowAction" + log.warn("Lease should not be out of date for the same trigger event since epsilon << linger for flowAction" + " {}, db eventTimestamp {}, db leaseAcquisitionTimestamp {}, linger {}", flowAction, dbEventTimestamp, dbLeaseAcquisitionTimestamp, dbLinger); } @@ -230,11 +232,11 @@ else if (leaseValidityStatus == 2) { }, true); return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); } // No longer leasing this event - // CASE 5: Same event, no longer leasing event in db: terminate + // CASE 6: Same event, no longer leasing event in db: terminate if (isWithinEpsilon) { return new NoLongerLeasingStatus(); } - // CASE 6: Distinct event, no longer leasing event in db + // CASE 7: Distinct event, no longer leasing event in db // Use our event to acquire lease, check for previous db eventTimestamp and NULL leaseAcquisitionTimestamp ResultSet rs = withPreparedStatement( String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT, this.leaseArbiterTableName, @@ -270,7 +272,7 @@ protected LeaseAttemptStatus handleResultFromAttemptedLeaseObtainment(ResultSet if (numRowsUpdated == 1) { return new LeaseObtainedStatus(eventTimeMillis, leaseAcquisitionTimeMillis); } - // Another instance acquired lease in between + // Another participant acquired lease in between return new LeasedToAnotherStatus(eventTimeMillis, leaseAcquisitionTimeMillis + dbLinger); } @@ -340,8 +342,8 @@ protected void completeUpdatePreparedStatement(PreparedStatement statement, DagA } @Override - public boolean completeLeaseUse(DagActionStore.DagAction flowAction, long eventTimeMillis, - long leaseAcquisitionTimeMillis) throws IOException { + public boolean recordLeaseSuccess(DagActionStore.DagAction flowAction, LeaseObtainedStatus status) + throws IOException { String flowGroup = flowAction.getFlowGroup(); String flowName = flowAction.getFlowName(); String flowExecutionId = flowAction.getFlowExecutionId(); @@ -353,22 +355,22 @@ public boolean completeLeaseUse(DagActionStore.DagAction flowAction, long eventT updateStatement.setString(++i, flowName); updateStatement.setString(++i, flowExecutionId); updateStatement.setString(++i, flowActionType.toString()); - updateStatement.setTimestamp(++i, new Timestamp(eventTimeMillis)); - updateStatement.setTimestamp(++i, new Timestamp(leaseAcquisitionTimeMillis)); + updateStatement.setTimestamp(++i, new Timestamp(status.getEventTimestamp())); + updateStatement.setTimestamp(++i, new Timestamp(status.getLeaseAcquisitionTimestamp())); int numRowsUpdated = updateStatement.executeUpdate(); if (numRowsUpdated == 0) { - LOG.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - FAILED to complete because " + log.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - FAILED to complete because " + "lease expired or event cleaned up before host completed required actions", flowAction, - eventTimeMillis); + status.getEventTimestamp()); return false; } if( numRowsUpdated == 1) { - LOG.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - COMPLETED, no longer leasing" - + " this event after this.", flowAction, eventTimeMillis); + log.info("Multi-active lease arbiter lease attempt: [%s, eventTimestamp: %s] - COMPLETED, no longer leasing" + + " this event after this.", flowAction, status.getEventTimestamp()); return true; }; throw new IOException(String.format("Attempt to complete lease use: [%s, eventTimestamp: %s] - updated more " - + "rows than expected", flowAction, eventTimeMillis)); + + "rows than expected", flowAction, status.getEventTimestamp())); }, true); } @@ -382,7 +384,7 @@ protected T withPreparedStatement(String sql, CheckedFunction long triggerTimestampMillis = trigger.getPreviousFireTime().getTime(); - jobProps.setProperty(ConfigurationKeys.SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY, + jobProps.setProperty(ConfigurationKeys.SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY, String.valueOf(triggerTimestampMillis)); try { diff --git a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java index 3ab44ebbcf5..255dd07898f 100644 --- a/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java +++ b/gobblin-runtime/src/test/java/org/apache/gobblin/runtime/dag_action_store/MysqlDagActionStoreTest.java @@ -90,7 +90,7 @@ public void testGetActions() throws IOException { Assert.assertEquals(dagActions, set); } - @Test(dependsOnMethods = "testGetAction") + @Test(dependsOnMethods = "testGetActions") public void testDeleteAction() throws IOException, SQLException { this.mysqlDagActionStore.deleteDagAction( new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.KILL)); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java index f1edc4c7479..c0f140a9fe4 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/core/GobblinServiceGuiceModule.java @@ -20,9 +20,10 @@ import java.util.Objects; import org.apache.gobblin.runtime.api.DagActionStore; -import org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter; +import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter; +import org.apache.gobblin.runtime.api.MysqlMultiActiveLeaseArbiter; import org.apache.gobblin.runtime.dag_action_store.MysqlDagActionStore; -import org.apache.gobblin.service.modules.orchestration.SchedulerLeaseAlgoHandler; +import org.apache.gobblin.service.modules.orchestration.FlowTriggerHandler; import org.apache.gobblin.service.modules.orchestration.UserQuotaManager; import org.apache.gobblin.service.modules.restli.GobblinServiceFlowConfigV2ResourceHandlerWithWarmStandby; import org.apache.gobblin.service.modules.restli.GobblinServiceFlowExecutionResourceHandlerWithWarmStandby; @@ -164,6 +165,12 @@ public void configure(Binder binder) { binder.bind(FlowExecutionResourceHandler.class).to(GobblinServiceFlowExecutionResourceHandler.class); } + OptionalBinder.newOptionalBinder(binder, MultiActiveLeaseArbiter.class); + OptionalBinder.newOptionalBinder(binder, FlowTriggerHandler.class); + if (serviceConfig.isMultiActiveSchedulerEnabled()) { + binder.bind(MysqlMultiActiveLeaseArbiter.class); + binder.bind(FlowTriggerHandler.class); + } binder.bind(FlowConfigsResource.class); binder.bind(FlowConfigsV2Resource.class); @@ -246,11 +253,6 @@ public void configure(Binder binder) { binder.bind(DagActionStoreChangeMonitor.class).toProvider(DagActionStoreChangeMonitorFactory.class).in(Singleton.class); } - if (serviceConfig.isMultiActiveSchedulerEnabled()) { - binder.bind(MySQLMultiActiveLeaseArbiter.class); - binder.bind(SchedulerLeaseAlgoHandler.class); - } - binder.bind(GobblinServiceManager.class); binder.bind(ServiceDatabaseProvider.class).to(ServiceDatabaseProviderImpl.class); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java index a4df52eb4f2..28e7f5c8421 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java @@ -579,11 +579,10 @@ public void run() { } } - private void clearUpDagAction(DagId dagId) throws IOException { + private void clearUpDagAction(DagId dagId, DagActionStore.FlowActionType flowActionType) throws IOException { if (this.dagActionStore.isPresent()) { this.dagActionStore.get().deleteDagAction( - new DagActionStore.DagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, - DagActionStore.FlowActionType.KILL)); + new DagActionStore.DagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, flowActionType)); } } @@ -595,13 +594,13 @@ private void beginResumingDag(DagId dagIdToResume) throws IOException { String dagId= dagIdToResume.toString(); if (!this.failedDagIds.contains(dagId)) { log.warn("No dag found with dagId " + dagId + ", so cannot resume flow"); - clearUpDagAction(dagIdToResume); + clearUpDagAction(dagIdToResume, DagActionStore.FlowActionType.RESUME); return; } Dag dag = this.failedDagStateStore.getDag(dagId); if (dag == null) { log.error("Dag " + dagId + " was found in memory but not found in failed dag state store"); - clearUpDagAction(dagIdToResume); + clearUpDagAction(dagIdToResume, DagActionStore.FlowActionType.RESUME); return; } @@ -652,7 +651,7 @@ private void finishResumingDags() throws IOException { if (dagReady) { this.dagStateStore.writeCheckpoint(dag.getValue()); this.failedDagStateStore.cleanUp(dag.getValue()); - clearUpDagAction(DagManagerUtils.generateDagId(dag.getValue())); + clearUpDagAction(DagManagerUtils.generateDagId(dag.getValue()), DagActionStore.FlowActionType.RESUME); this.failedDagIds.remove(dag.getKey()); this.resumingDags.remove(dag.getKey()); initialize(dag.getValue()); @@ -681,7 +680,8 @@ private void cancelDag(DagId dagId) throws ExecutionException, InterruptedExcept } else { log.warn("Did not find Dag with id {}, it might be already cancelled/finished.", dagToCancel); } - clearUpDagAction(dagId); + // Called after a KILL request is received + clearUpDagAction(dagId, DagActionStore.FlowActionType.KILL); } private void cancelDagNode(DagNode dagNodeToCancel) throws ExecutionException, InterruptedException { diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java similarity index 59% rename from gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java rename to gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java index e256fcadc5b..ba639f6b003 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/SchedulerLeaseAlgoHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java @@ -18,7 +18,6 @@ package org.apache.gobblin.service.modules.orchestration; import java.io.IOException; -import java.sql.SQLException; import java.time.LocalDateTime; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -30,42 +29,39 @@ import org.quartz.JobKey; import org.quartz.SchedulerException; import org.quartz.Trigger; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import com.typesafe.config.Config; import javax.inject.Inject; +import lombok.extern.slf4j.Slf4j; import org.apache.gobblin.configuration.ConfigurationKeys; import org.apache.gobblin.instrumented.Instrumented; import org.apache.gobblin.metrics.ContextAwareMeter; import org.apache.gobblin.metrics.MetricContext; import org.apache.gobblin.runtime.api.DagActionStore; -import org.apache.gobblin.runtime.api.LeaseAttemptStatus; import org.apache.gobblin.runtime.api.MultiActiveLeaseArbiter; +import org.apache.gobblin.runtime.api.MysqlMultiActiveLeaseArbiter; import org.apache.gobblin.runtime.metrics.RuntimeMetrics; import org.apache.gobblin.scheduler.JobScheduler; import org.apache.gobblin.scheduler.SchedulerService; import org.apache.gobblin.util.ConfigUtils; -import org.apache.gobblin.runtime.api.LeaseObtainedStatus; -import org.apache.gobblin.runtime.api.LeasedToAnotherStatus; /** * Handler used to coordinate multiple hosts with enabled schedulers to respond to flow action events. It uses the - * {@link org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter} to determine a single lease owner at a given time + * {@link MysqlMultiActiveLeaseArbiter} to determine a single lease owner at a given time * for a flow action event. After acquiring the lease, it persists the flow action event to the {@link DagActionStore} * to be eventually acted upon by the host with the active DagManager. Once it has completed this action, it will mark * the lease as completed by calling the - * {@link org.apache.gobblin.runtime.api.MySQLMultiActiveLeaseArbiter.completeLeaseUse} method. Hosts that do not gain + * MysqlMultiActiveLeaseArbiter.recordLeaseSuccess method. Hosts that do not gain * the lease for the event, instead schedule a reminder using the {@link SchedulerService} to check back in on the * previous lease owner's completion status after the lease should expire to ensure the event is handled in failure * cases. */ -public class SchedulerLeaseAlgoHandler { - private static final Logger LOG = LoggerFactory.getLogger(SchedulerLeaseAlgoHandler.class); - private final int staggerUpperBoundSec; +@Slf4j +public class FlowTriggerHandler { + private final int schedulerMaxBackoffMillis; private static Random random = new Random(); protected MultiActiveLeaseArbiter multiActiveLeaseArbiter; protected JobScheduler jobScheduler; @@ -73,19 +69,20 @@ public class SchedulerLeaseAlgoHandler { protected DagActionStore dagActionStore; private MetricContext metricContext; private ContextAwareMeter numLeasesCompleted; + @Inject - public SchedulerLeaseAlgoHandler(Config config, MultiActiveLeaseArbiter leaseDeterminationStore, + // TODO: should multiActiveLeaseArbiter and DagActionStore be optional? + public FlowTriggerHandler(Config config, MultiActiveLeaseArbiter leaseDeterminationStore, JobScheduler jobScheduler, SchedulerService schedulerService, DagActionStore dagActionStore) { - this.staggerUpperBoundSec = ConfigUtils.getInt(config, - ConfigurationKeys.SCHEDULER_STAGGERING_UPPER_BOUND_SEC_KEY, - ConfigurationKeys.DEFAULT_SCHEDULER_STAGGERING_UPPER_BOUND_SEC); + this.schedulerMaxBackoffMillis = ConfigUtils.getInt(config, ConfigurationKeys.SCHEDULER_MAX_BACKOFF_MILLIS_KEY, + ConfigurationKeys.DEFAULT_SCHEDULER_MAX_BACKOFF_MILLIS); this.multiActiveLeaseArbiter = leaseDeterminationStore; this.jobScheduler = jobScheduler; this.schedulerService = schedulerService; this.dagActionStore = dagActionStore; this.metricContext = Instrumented.getMetricContext(new org.apache.gobblin.configuration.State(ConfigUtils.configToProperties(config)), this.getClass()); - this.numLeasesCompleted = metricContext.contextAwareMeter(RuntimeMetrics.GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_LEASES_COMPLETED); + this.numLeasesCompleted = metricContext.contextAwareMeter(RuntimeMetrics.GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_FLOWS_SUBMITTED); } /** @@ -96,82 +93,79 @@ public SchedulerLeaseAlgoHandler(Config config, MultiActiveLeaseArbiter leaseDet * @param eventTimeMillis * @throws IOException */ - public void handleNewSchedulerEvent(Properties jobProps, DagActionStore.DagAction flowAction, long eventTimeMillis) + public void handleTriggerEvent(Properties jobProps, DagActionStore.DagAction flowAction, long eventTimeMillis) throws IOException { - LeaseAttemptStatus leaseAttemptStatus = + MultiActiveLeaseArbiter.LeaseAttemptStatus leaseAttemptStatus = multiActiveLeaseArbiter.tryAcquireLease(flowAction, eventTimeMillis); // TODO: add a log event or metric for each of these cases - switch (leaseAttemptStatus.getClass().getSimpleName()) { - case "LeaseObtainedStatus": - finalizeLease((LeaseObtainedStatus) leaseAttemptStatus, flowAction); - break; - case "LeasedToAnotherStatus": - scheduleReminderForEvent(jobProps, (LeasedToAnotherStatus) leaseAttemptStatus, flowAction, eventTimeMillis); - break; - case "NoLongerLeasingStatus": - break; - default: + if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.LeaseObtainedStatus) { + persistFlowAction((MultiActiveLeaseArbiter.LeaseObtainedStatus) leaseAttemptStatus, flowAction); + return; + } else if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.LeasedToAnotherStatus) { + scheduleReminderForEvent(jobProps, (MultiActiveLeaseArbiter.LeasedToAnotherStatus) leaseAttemptStatus, flowAction, + eventTimeMillis); + } else if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.NoLongerLeasingStatus) { + return; } + log.warn("Received type of leaseAttemptStatus: {} not handled by this method", leaseAttemptStatus.getClass().getName()); } // Called after obtaining a lease to persist the flow action to {@link DagActionStore} and mark the lease as done - private boolean finalizeLease(LeaseObtainedStatus status, DagActionStore.DagAction flowAction) { + private boolean persistFlowAction(MultiActiveLeaseArbiter.LeaseObtainedStatus status, DagActionStore.DagAction flowAction) { try { this.dagActionStore.addDagAction(flowAction.getFlowGroup(), flowAction.getFlowName(), flowAction.getFlowExecutionId(), flowAction.getFlowActionType()); - if (this.dagActionStore.exists(flowAction.getFlowGroup(), flowAction.getFlowName(), - flowAction.getFlowExecutionId(), flowAction.getFlowActionType())) { - // If the flow action has been persisted to the {@link DagActionStore} we can close the lease - this.numLeasesCompleted.mark(); - return this.multiActiveLeaseArbiter.completeLeaseUse(flowAction, status.getEventTimestamp(), - status.getMyLeaseAcquisitionTimestamp()); - } - } catch (IOException | SQLException e) { + // If the flow action has been persisted to the {@link DagActionStore} we can close the lease + this.numLeasesCompleted.mark(); + return this.multiActiveLeaseArbiter.recordLeaseSuccess(flowAction, status); + } catch (IOException e) { throw new RuntimeException(e); } - // TODO: should this return an error or print a warning log if failed to commit to dag action store? - return false; } /** - * This method is used by {@link SchedulerLeaseAlgoHandler.handleNewSchedulerEvent} to schedule a reminder for itself - * to check on the other participant's progress to finish acting on a flow action after the time the lease should - * expire. + * This method is used by FlowTriggerHandler.handleNewSchedulerEvent to schedule a reminder for itself to check on + * the other participant's progress to finish acting on a flow action after the time the lease should expire. * @param jobProps * @param status used to extract event to be reminded for and the minimum time after which reminder should occur * @param originalEventTimeMillis the event timestamp we were originally handling * @param flowAction */ - private void scheduleReminderForEvent(Properties jobProps, LeasedToAnotherStatus status, + private void scheduleReminderForEvent(Properties jobProps, MultiActiveLeaseArbiter.LeasedToAnotherStatus status, DagActionStore.DagAction flowAction, long originalEventTimeMillis) { // Add a small randomization to the minimum reminder wait time to avoid 'thundering herd' issue - String cronExpression = createCronFromDelayPeriod(status.getMinimumReminderWaitMillis() + random.nextInt(staggerUpperBoundSec)); + String cronExpression = createCronFromDelayPeriod(status.getMinimumLingerDurationMillis() + + random.nextInt(schedulerMaxBackoffMillis)); jobProps.setProperty(ConfigurationKeys.JOB_SCHEDULE_KEY, cronExpression); - // Ensure we save the event timestamp that we're setting reminder for, in addition to our own event timestamp which may be different - jobProps.setProperty(ConfigurationKeys.SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY, String.valueOf(status.getReminderEventTimeMillis())); - jobProps.setProperty(ConfigurationKeys.SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY, String.valueOf(status.getReminderEventTimeMillis())); + // Ensure we save the event timestamp that we're setting reminder for to have for debugging purposes + // in addition to the event we want to initiate + jobProps.setProperty(ConfigurationKeys.SCHEDULER_EVENT_TO_REVISIT_TIMESTAMP_MILLIS_KEY, + String.valueOf(status.getEventTimeMillis())); + jobProps.setProperty(ConfigurationKeys.SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY, + String.valueOf(originalEventTimeMillis)); JobKey key = new JobKey(flowAction.getFlowName(), flowAction.getFlowGroup()); - Trigger trigger = this.jobScheduler.getTrigger(key, jobProps); + // Create a new trigger for the flow in job scheduler that is set to fire at the minimum reminder wait time calculated + Trigger trigger = this.jobScheduler.createTriggerForJob(key, jobProps); try { - LOG.info("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - attempting to schedule reminder for event %s in %s millis", - flowAction, originalEventTimeMillis, status.getReminderEventTimeMillis(), trigger.getNextFireTime()); + log.info("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - attempting to schedule reminder for event %s in %s millis", + flowAction, originalEventTimeMillis, status.getEventTimeMillis(), trigger.getNextFireTime()); this.schedulerService.getScheduler().scheduleJob(trigger); } catch (SchedulerException e) { - LOG.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, status.getReminderEventTimeMillis(), e); + log.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, status.getEventTimeMillis(), e); } - LOG.info(String.format("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - SCHEDULED REMINDER for event %s in %s millis", - flowAction, originalEventTimeMillis, status.getReminderEventTimeMillis(), trigger.getNextFireTime())); + log.info(String.format("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - SCHEDULED REMINDER for event %s in %s millis", + flowAction, originalEventTimeMillis, status.getEventTimeMillis(), trigger.getNextFireTime())); } /** * These methods should only be called from the Orchestrator or JobScheduler classes as it directly adds jobs to the * Quartz scheduler - * @param delayPeriodSeconds + * @param delayPeriodMillis * @return */ - protected static String createCronFromDelayPeriod(long delayPeriodSeconds) { + protected static String createCronFromDelayPeriod(long delayPeriodMillis) { LocalDateTime now = LocalDateTime.now(ZoneId.of("UTC")); - LocalDateTime delaySecondsLater = now.plus(delayPeriodSeconds, ChronoUnit.SECONDS); + LocalDateTime delaySecondsLater = now.plus(delayPeriodMillis, ChronoUnit.MILLIS); // TODO: investigate potentially better way of generating cron expression that does not make it US dependent DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ss mm HH dd MM ? yyyy", Locale.US); return delaySecondsLater.format(formatter); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 856664d2203..09eff7b04d1 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -40,7 +40,6 @@ import javax.annotation.Nonnull; import javax.inject.Inject; -import javax.inject.Named; import javax.inject.Singleton; import lombok.Getter; import lombok.Setter; @@ -66,7 +65,6 @@ import org.apache.gobblin.runtime.api.TopologySpec; import org.apache.gobblin.runtime.spec_catalog.AddSpecResponse; import org.apache.gobblin.runtime.spec_catalog.TopologyCatalog; -import org.apache.gobblin.runtime.util.InjectionNames; import org.apache.gobblin.service.ServiceConfigKeys; import org.apache.gobblin.service.modules.flow.SpecCompiler; import org.apache.gobblin.service.modules.flowgraph.Dag; @@ -106,8 +104,7 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { private FlowStatusGenerator flowStatusGenerator; private UserQuotaManager quotaManager; - private boolean isMultiActiveSchedulerEnabled; - private SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler; + private Optional schedulerLeaseAlgoHandler; private final ClassAliasResolver aliasResolver; @@ -115,13 +112,12 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { public Orchestrator(Config config, Optional topologyCatalog, Optional dagManager, Optional log, - FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, boolean isMultiActiveSchedulerEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { + FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, Optional schedulerLeaseAlgoHandler) { _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); this.aliasResolver = new ClassAliasResolver<>(SpecCompiler.class); this.topologyCatalog = topologyCatalog; this.dagManager = dagManager; this.flowStatusGenerator = flowStatusGenerator; - this.isMultiActiveSchedulerEnabled = isMultiActiveSchedulerEnabled; this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; try { String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; @@ -165,9 +161,8 @@ public Orchestrator(Config config, Optional topologyCatalog, Op @Inject public Orchestrator(Config config, FlowStatusGenerator flowStatusGenerator, Optional topologyCatalog, - Optional dagManager, Optional log, @Named(InjectionNames.MULTI_ACTIVE_SCHEDULER_ENABLED) boolean multiActiveSchedulerEnabled, - SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) { - this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler); + Optional dagManager, Optional log, Optional schedulerLeaseAlgoHandler) { + this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, schedulerLeaseAlgoHandler); } @@ -317,26 +312,27 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil } // If multi-active scheduler is enabled do not pass onto DagManager, otherwise scheduler forwards it directly - if (this.isMultiActiveSchedulerEnabled) { + if (schedulerLeaseAlgoHandler.isPresent()) { + // If triggerTimestampMillis is 0, then it was not set by the job trigger handler, and we cannot handle this event + if (triggerTimestampMillis == 0L) { + _log.warn("Skipping execution of spec: {} because missing trigger timestamp in job properties", + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)); + flowMetadata.put(TimingEvent.METADATA_MESSAGE, "Flow orchestration skipped because no trigger timestamp " + + "associated with flow action."); + if (this.eventSubmitter.isPresent()) { + new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata); + } + return; + } + String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); DagActionStore.DagAction flowAction = new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.LAUNCH); - schedulerLeaseAlgoHandler.handleNewSchedulerEvent(jobProps, flowAction, triggerTimestampMillis); + schedulerLeaseAlgoHandler.get().handleTriggerEvent(jobProps, flowAction, triggerTimestampMillis); _log.info("Multi-active scheduler finished handling trigger event: [%s, triggerEventTimestamp: %s]", flowAction, triggerTimestampMillis); } else if (this.dagManager.isPresent()) { - try { - //Send the dag to the DagManager. - this.dagManager.get().addDag(jobExecutionPlanDag, true, true); - } catch (Exception ex) { - if (this.eventSubmitter.isPresent()) { - // pronounce failed before stack unwinds, to ensure flow not marooned in `COMPILED` state; (failure likely attributable to DB connection/failover) - String failureMessage = "Failed to add Job Execution Plan due to: " + ex.getMessage(); - flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage); - new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata); - } - throw ex; - } + submitFlowToDagManager((FlowSpec) spec, Optional.of(jobExecutionPlanDag)); } else { // Schedule all compiled JobSpecs on their respective Executor for (Dag.DagNode dagNode : jobExecutionPlanDag.getNodes()) { @@ -378,6 +374,26 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS); } + public void submitFlowToDagManager(FlowSpec flowSpec, Optional> jobExecutionPlanDag) + throws IOException { + if (!jobExecutionPlanDag.isPresent()) { + jobExecutionPlanDag = Optional.of(specCompiler.compileFlow(flowSpec)); + } + try { + //Send the dag to the DagManager. + this.dagManager.get().addDag(jobExecutionPlanDag.get(), true, true); + } catch (Exception ex) { + if (this.eventSubmitter.isPresent()) { + // pronounce failed before stack unwinds, to ensure flow not marooned in `COMPILED` state; (failure likely attributable to DB connection/failover) + String failureMessage = "Failed to add Job Execution Plan due to: " + ex.getMessage(); + Map flowMetadata = TimingEventUtils.getFlowMetadata(flowSpec); + flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage); + new TimingEvent(this.eventSubmitter.get(), TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata); + } + throw ex; + } + } + /** * Check if a FlowSpec instance is allowed to run. * diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java index d75465f48ef..0c9bbe2da4b 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java @@ -55,9 +55,8 @@ public void resume(ComplexResourceKey quotaManager; - protected final Boolean multiActiveSchedulerEnabled; - protected final SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler; + protected final Optional schedulerLeaseAlgoHandler; @Getter protected final Map scheduledFlowSpecs; @Getter @@ -166,8 +165,8 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser Config config, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, SchedulerService schedulerService, Optional quotaManager, Optional log, - @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, @Named(InjectionNames.MULTI_ACTIVE_SCHEDULER_ENABLED) boolean multiActiveSchedulerEnabled, - SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { + @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, + Optional schedulerLeaseAlgoHandler) throws Exception { super(ConfigUtils.configToProperties(config), schedulerService); _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); @@ -183,7 +182,6 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser && config.hasPath(GOBBLIN_SERVICE_SCHEDULER_DR_NOMINATED); this.warmStandbyEnabled = warmStandbyEnabled; this.quotaManager = quotaManager; - this.multiActiveSchedulerEnabled = multiActiveSchedulerEnabled; this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; // Check that these metrics do not exist before adding, mainly for testing purpose which creates multiple instances // of the scheduler. If one metric exists, then the others should as well. @@ -204,11 +202,13 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser } public GobblinServiceJobScheduler(String serviceName, Config config, FlowStatusGenerator flowStatusGenerator, - Optional helixManager, - Optional flowCatalog, Optional topologyCatalog, Optional dagManager, Optional quotaManager, - SchedulerService schedulerService, Optional log, boolean warmStandbyEnabled, boolean multiActiveSchedulerEnabled, SchedulerLeaseAlgoHandler schedulerLeaseAlgoHandler) throws Exception { + Optional helixManager, Optional flowCatalog, Optional topologyCatalog, + Optional dagManager, Optional quotaManager, SchedulerService schedulerService, + Optional log, boolean warmStandbyEnabled, Optional schedulerLeaseAlgoHandler) + throws Exception { this(serviceName, config, helixManager, flowCatalog, topologyCatalog, - new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler), schedulerService, quotaManager, log, warmStandbyEnabled, multiActiveSchedulerEnabled, schedulerLeaseAlgoHandler); + new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, schedulerLeaseAlgoHandler), + schedulerService, quotaManager, log, warmStandbyEnabled, schedulerLeaseAlgoHandler); } public synchronized void setActive(boolean isActive) { @@ -446,24 +446,14 @@ public synchronized void scheduleJob(Properties jobProps, JobListener jobListene public void runJob(Properties jobProps, JobListener jobListener) throws JobException { try { Spec flowSpec = this.scheduledFlowSpecs.get(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)); - String triggerTimestampMillis = extractTriggerTimestampMillis(jobProps); + String triggerTimestampMillis = jobProps.getProperty( + ConfigurationKeys.SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY, "0L"); this.orchestrator.orchestrate(flowSpec, jobProps, Long.parseLong(triggerTimestampMillis)); } catch (Exception e) { throw new JobException("Failed to run Spec: " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); } } - /* - Helper method used to extract the trigger timestamp from Properties object. If key for `original` trigger exists, then - we use that because this is a reminder event and the actual event trigger is the time we wanted to be reminded of the - original trigger. - */ - public static String extractTriggerTimestampMillis(Properties jobProps) { - return jobProps.containsKey(ConfigurationKeys.SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY) - ? jobProps.getProperty(ConfigurationKeys.SCHEDULER_REMINDER_EVENT_TIMESTAMP_MILLIS_KEY, "0L"): - jobProps.getProperty(ConfigurationKeys.SCHEDULER_NEW_EVENT_TIMESTAMP_MILLIS_KEY,"0L"); - } - /** * * @param addedSpec spec to be added diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index 6bbe231e345..99d5e00694c 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -18,15 +18,12 @@ package org.apache.gobblin.service.monitoring; import java.io.IOException; -import java.lang.reflect.InvocationTargetException; import java.net.URI; import java.net.URISyntaxException; -import java.util.Map; import java.util.UUID; import java.util.concurrent.TimeUnit; -import org.apache.commons.lang3.reflect.ConstructorUtils; - +import com.google.common.base.Optional; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -38,8 +35,6 @@ import org.apache.gobblin.kafka.client.DecodeableKafkaRecord; import org.apache.gobblin.metrics.ContextAwareGauge; import org.apache.gobblin.metrics.ContextAwareMeter; -import org.apache.gobblin.metrics.event.EventSubmitter; -import org.apache.gobblin.metrics.event.TimingEvent; import org.apache.gobblin.runtime.api.DagActionStore; import org.apache.gobblin.runtime.api.FlowSpec; import org.apache.gobblin.runtime.api.SpecNotFoundException; @@ -47,13 +42,8 @@ import org.apache.gobblin.runtime.metrics.RuntimeMetrics; import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; import org.apache.gobblin.service.FlowId; -import org.apache.gobblin.service.ServiceConfigKeys; -import org.apache.gobblin.service.modules.flow.SpecCompiler; -import org.apache.gobblin.service.modules.flowgraph.Dag; import org.apache.gobblin.service.modules.orchestration.DagManager; -import org.apache.gobblin.service.modules.orchestration.TimingEventUtils; -import org.apache.gobblin.service.modules.spec.JobExecutionPlan; -import org.apache.gobblin.util.ClassAliasResolver; +import org.apache.gobblin.service.modules.orchestration.Orchestrator; /** @@ -88,38 +78,23 @@ public String load(String key) throws Exception { protected DagActionStore dagActionStore; protected DagManager dagManager; - protected SpecCompiler specCompiler; + protected Orchestrator orchestrator; protected boolean isMultiActiveSchedulerEnabled; protected FlowCatalog flowCatalog; - protected EventSubmitter eventSubmitter; // Note that the topic is an empty string (rather than null to avoid NPE) because this monitor relies on the consumer // client itself to determine all Kafka related information dynamically rather than through the config. public DagActionStoreChangeMonitor(String topic, Config config, DagActionStore dagActionStore, DagManager dagManager, - int numThreads, boolean isMultiActiveSchedulerEnabled, FlowCatalog flowCatalog) { + int numThreads, FlowCatalog flowCatalog, Orchestrator orchestrator, boolean isMultiActiveSchedulerEnabled) { // Differentiate group id for each host super(topic, config.withValue(GROUP_ID_KEY, ConfigValueFactory.fromAnyRef(DAG_ACTION_CHANGE_MONITOR_PREFIX + UUID.randomUUID().toString())), numThreads); this.dagActionStore = dagActionStore; this.dagManager = dagManager; - ClassAliasResolver aliasResolver = new ClassAliasResolver(SpecCompiler.class); - try { - String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; - if (config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY)) { - specCompilerClassName = config.getString(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY); - } - log.info("Using specCompiler class name/alias " + specCompilerClassName); - - this.specCompiler = (SpecCompiler) ConstructorUtils.invokeConstructor(Class.forName(aliasResolver.resolve( - specCompilerClassName)), config); - } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException - | ClassNotFoundException e) { - throw new RuntimeException(e); - } - this.isMultiActiveSchedulerEnabled = isMultiActiveSchedulerEnabled; this.flowCatalog = flowCatalog; - this.eventSubmitter = new EventSubmitter.Builder(this.getMetricContext(), "org.apache.gobblin.service").build(); + this.orchestrator = orchestrator; + this.isMultiActiveSchedulerEnabled = isMultiActiveSchedulerEnabled; } @Override @@ -131,7 +106,7 @@ protected void assignTopicPartitions() { @Override /* - This class is multi-threaded and this message will be called by multiple threads, however any given message will be + This class is multithreaded and this message will be called by multiple threads, however any given message will be partitioned and processed by only one thread (and corresponding queue). */ protected void processMessage(DecodeableKafkaRecord message) { @@ -180,7 +155,7 @@ protected void processMessage(DecodeableKafkaRecord message) { new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, dagActionType))); } log.info("Received insert dag action and about to forward launch request to DagManager"); - submitFlowToDagManager(flowGroup, flowName); + submitFlowToDagManagerHelper(flowGroup, flowName); } else { log.warn("Received unsupported dagAction {}. Expected to be a KILL, RESUME, or LAUNCH", dagActionType); this.unexpectedErrors.mark(); @@ -207,16 +182,14 @@ protected void processMessage(DecodeableKafkaRecord message) { dagActionsSeenCache.put(changeIdentifier, changeIdentifier); } - protected void submitFlowToDagManager(String flowGroup, String flowName) { + protected void submitFlowToDagManagerHelper(String flowGroup, String flowName) { // Retrieve job execution plan by recompiling the flow spec to send to the DagManager FlowId flowId = new FlowId().setFlowGroup(flowGroup).setFlowName(flowName); FlowSpec spec = null; try { URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); spec = (FlowSpec) flowCatalog.getSpecs(flowUri); - Dag jobExecutionPlanDag = specCompiler.compileFlow(spec); - //Send the dag to the DagManager. - dagManager.addDag(jobExecutionPlanDag, true, true); + this.orchestrator.submitFlowToDagManager(spec, Optional.absent()); } catch (URISyntaxException e) { log.warn("Could not create URI object for flowId {} due to error {}", flowId, e.getMessage()); this.unexpectedErrors.mark(); @@ -226,10 +199,6 @@ protected void submitFlowToDagManager(String flowGroup, String flowName) { this.unexpectedErrors.mark(); return; } catch (IOException e) { - Map flowMetadata = TimingEventUtils.getFlowMetadata(spec); - String failureMessage = "Failed to add Job Execution Plan due to: " + e.getMessage(); - flowMetadata.put(TimingEvent.METADATA_MESSAGE, failureMessage); - new TimingEvent(this.eventSubmitter, TimingEvent.FlowTimings.FLOW_FAILED).stop(flowMetadata); log.warn("Failed to add Job Execution Plan for flow group: {} name: {} due to error {}", flowGroup, flowName, e); this.unexpectedErrors.mark(); return; diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java index 7b54c3fb03e..5806949a8b1 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitorFactory.java @@ -30,6 +30,7 @@ import org.apache.gobblin.runtime.spec_catalog.FlowCatalog; import org.apache.gobblin.runtime.util.InjectionNames; import org.apache.gobblin.service.modules.orchestration.DagManager; +import org.apache.gobblin.service.modules.orchestration.Orchestrator; import org.apache.gobblin.util.ConfigUtils; @@ -44,15 +45,18 @@ public class DagActionStoreChangeMonitorFactory implements Providerabsent(), Optional.of(logger), false, this.mockSchedulerLeaseAlgoHandler); + this.mockStatusGenerator, Optional.of(this.topologyCatalog), Optional.absent(), Optional.of(logger), + Optional.of(this._mockFlowTriggerHandler)); this.topologyCatalog.addListener(orchestrator); this.flowCatalog.addListener(orchestrator); // Start application diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java index 81562ecf03c..6db4a83e33f 100644 --- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java +++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/scheduler/GobblinServiceJobSchedulerTest.java @@ -60,7 +60,7 @@ import org.apache.gobblin.service.modules.orchestration.AbstractUserQuotaManager; import org.apache.gobblin.service.modules.orchestration.InMemoryUserQuotaManager; import org.apache.gobblin.service.modules.orchestration.Orchestrator; -import org.apache.gobblin.service.modules.orchestration.SchedulerLeaseAlgoHandler; +import org.apache.gobblin.service.modules.orchestration.FlowTriggerHandler; import org.apache.gobblin.service.modules.orchestration.UserQuotaManager; import org.apache.gobblin.service.modules.spec.JobExecutionPlan; import org.apache.gobblin.service.modules.spec.JobExecutionPlanDagFactory; @@ -349,7 +349,8 @@ public void testJobSchedulerAddFlowQuotaExceeded() throws Exception { SchedulerService schedulerService = new SchedulerService(new Properties()); // Mock a GaaS scheduler not in warm standby mode GobblinServiceJobScheduler scheduler = new GobblinServiceJobScheduler("testscheduler", - ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); + ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), false, Optional.of(Mockito.mock( + FlowTriggerHandler.class))); schedulerService.startAsync().awaitRunning(); scheduler.startUp(); @@ -367,7 +368,8 @@ public void testJobSchedulerAddFlowQuotaExceeded() throws Exception { //Mock a GaaS scheduler in warm standby mode, where we don't check quota GobblinServiceJobScheduler schedulerWithWarmStandbyEnabled = new GobblinServiceJobScheduler("testscheduler", - ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), true, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); + ConfigFactory.empty(), Optional.absent(), Optional.of(flowCatalog), null, mockOrchestrator, schedulerService, Optional.of(new InMemoryUserQuotaManager(quotaConfig)), Optional.absent(), true, Optional.of(Mockito.mock( + FlowTriggerHandler.class))); schedulerWithWarmStandbyEnabled.startUp(); schedulerWithWarmStandbyEnabled.setActive(true); @@ -389,7 +391,8 @@ class TestGobblinServiceJobScheduler extends GobblinServiceJobScheduler { public TestGobblinServiceJobScheduler(String serviceName, Config config, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, Optional quotaManager, SchedulerService schedulerService, boolean isWarmStandbyEnabled) throws Exception { - super(serviceName, config, Optional.absent(), flowCatalog, topologyCatalog, orchestrator, schedulerService, quotaManager, Optional.absent(), isWarmStandbyEnabled, false, Mockito.mock(SchedulerLeaseAlgoHandler.class)); + super(serviceName, config, Optional.absent(), flowCatalog, topologyCatalog, orchestrator, schedulerService, quotaManager, Optional.absent(), isWarmStandbyEnabled, Optional.of(Mockito.mock( + FlowTriggerHandler.class))); if (schedulerService != null) { hasScheduler = true; } From 8e434cbd888470caa4412b44cced752973f956d0 Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Wed, 14 Jun 2023 10:42:36 -0700 Subject: [PATCH 10/11] Cleanup in response to review, fix failing test --- .../configuration/ConfigurationKeys.java | 17 ++--- .../gobblin/runtime/api/DagActionStore.java | 10 ++- .../runtime/api/MultiActiveLeaseArbiter.java | 22 +++--- .../api/MysqlMultiActiveLeaseArbiter.java | 71 +++++++++---------- .../runtime/metrics/RuntimeMetrics.java | 4 +- .../gobblin/scheduler/JobScheduler.java | 2 +- .../modules/orchestration/DagManager.java | 10 +-- .../orchestration/FlowTriggerHandler.java | 54 +++++++------- .../modules/orchestration/Orchestrator.java | 27 +++---- ...ecutionResourceHandlerWithWarmStandby.java | 12 ++-- .../scheduler/GobblinServiceJobScheduler.java | 14 ++-- .../DagActionStoreChangeMonitor.java | 3 +- 12 files changed, 126 insertions(+), 120 deletions(-) diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java index e763c37faf2..b155e8089bb 100644 --- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java +++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java @@ -96,18 +96,19 @@ public class ConfigurationKeys { public static final String SKIP_SCHEDULING_FLOWS_AFTER_NUM_DAYS = "skip.scheduling.flows.after.num.days"; public static final int DEFAULT_NUM_DAYS_TO_SKIP_AFTER = 365; // Scheduler lease determination store configuration - public static final String MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY = "MysqlMultiActiveLeaseArbiter.constantsTable"; - public static final String DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE = "MysqlMultiActiveLeaseArbiter.gobblin_multi_active_scheduler_constants_store"; - public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = "MysqlMultiActiveLeaseArbiter.schedulerLeaseArbiterTable"; - public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = "MysqlMultiActiveLeaseArbiter.gobblin_scheduler_lease_determination_store"; + public static final String MYSQL_LEASE_ARBITER_PREFIX = "MysqlMultiActiveLeaseArbiter"; + public static final String MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE_KEY = MYSQL_LEASE_ARBITER_PREFIX + ".constantsTable"; + public static final String DEFAULT_MULTI_ACTIVE_SCHEDULER_CONSTANTS_DB_TABLE = MYSQL_LEASE_ARBITER_PREFIX + ".gobblin_multi_active_scheduler_constants_store"; + public static final String SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY = MYSQL_LEASE_ARBITER_PREFIX + ".schedulerLeaseArbiterTable"; + public static final String DEFAULT_SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE = MYSQL_LEASE_ARBITER_PREFIX + ".gobblin_scheduler_lease_determination_store"; public static final String SCHEDULER_EVENT_TO_REVISIT_TIMESTAMP_MILLIS_KEY = "eventToRevisitTimestampMillis"; public static final String SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY = "triggerEventTimestampMillis"; - public static final String SCHEDULER_EVENT_EPSILON_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.epsilonMillis"; - public static final int DEFAULT_SCHEDULER_EVENT_EPSILON_MILLIS = 100; + public static final String SCHEDULER_EVENT_EPSILON_MILLIS_KEY = MYSQL_LEASE_ARBITER_PREFIX + ".epsilonMillis"; + public static final int DEFAULT_SCHEDULER_EVENT_EPSILON_MILLIS = 5000; // Note: linger should be on the order of seconds even though we measure in millis - public static final String SCHEDULER_EVENT_LINGER_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.lingerMillis"; + public static final String SCHEDULER_EVENT_LINGER_MILLIS_KEY = MYSQL_LEASE_ARBITER_PREFIX + ".lingerMillis"; public static final int DEFAULT_SCHEDULER_EVENT_LINGER_MILLIS = 30000; - public static final String SCHEDULER_MAX_BACKOFF_MILLIS_KEY = "MysqlMultiActiveLeaseArbiter.maxBackoffMillis"; + public static final String SCHEDULER_MAX_BACKOFF_MILLIS_KEY = MYSQL_LEASE_ARBITER_PREFIX + ".maxBackoffMillis"; public static final int DEFAULT_SCHEDULER_MAX_BACKOFF_MILLIS = 5000; // Job executor thread pool size diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java index 90e8db6d280..a1a0ea237e6 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/DagActionStore.java @@ -21,7 +21,6 @@ import java.sql.SQLException; import java.util.Collection; -import lombok.AllArgsConstructor; import lombok.Data; @@ -36,12 +35,11 @@ enum FlowActionType { } @Data - @AllArgsConstructor class DagAction { - String flowGroup; - String flowName; - String flowExecutionId; - FlowActionType flowActionType; + final String flowGroup; + final String flowName; + final String flowExecutionId; + final FlowActionType flowActionType; } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java index 1f45c1cb352..ab9e03599b5 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MultiActiveLeaseArbiter.java @@ -36,7 +36,7 @@ * participant has * a) LeaseObtainedStatus -> this participant will attempt to carry out the required action before the lease expires * b) LeasedToAnotherStatus -> another will attempt to carry out the required action before the lease expires - * c) NoLongerLeasingStatus -> flow event no longer needs to be acted upon or terminal state + * c) NoLongerLeasingStatus -> flow event no longer needs to be acted upon (terminal state) * 3. If another participant has acquired the lease before this one could, then the present participant must check back * in at the time of lease expiry to see if it needs to attempt the lease again [status (b) above]. * 4. Once the participant which acquired the lease completes its work on the flow event, it calls recordLeaseSuccess @@ -65,10 +65,10 @@ public interface MultiActiveLeaseArbiter { * false if failed to update the lease properly, the caller should continue seeking to acquire the lease as * if any actions it did successfully accomplish, do not count */ - boolean recordLeaseSuccess(DagActionStore.DagAction flowAction, LeaseObtainedStatus status) throws IOException; + boolean recordLeaseSuccess(LeaseObtainedStatus status) throws IOException; /* - Object used to encapsulate status of lease acquisition attempt and derived should contain information specific to + Class used to encapsulate status of lease acquisition attempt and derivations should contain information specific to the status that results. */ abstract class LeaseAttemptStatus {} @@ -76,24 +76,28 @@ abstract class LeaseAttemptStatus {} class NoLongerLeasingStatus extends LeaseAttemptStatus {} /* - The participant calling this method acquired the lease for the event in question. The class contains the `eventTimestamp` - associated with the lease as well as the time the caller obtained the lease or `leaseAcquisitionTimestamp`. + The participant calling this method acquired the lease for the event in question. The class contains the + `eventTimestamp` associated with the lease as well as the time the caller obtained the lease or + `leaseAcquisitionTimestamp`. */ @Data class LeaseObtainedStatus extends LeaseAttemptStatus { + private final DagActionStore.DagAction flowAction; private final long eventTimestamp; private final long leaseAcquisitionTimestamp; } /* - This flow action event already has a valid lease owned by another host. + This flow action event already has a valid lease owned by another participant. + `eventTimeMillis` is the timestamp the lease is associated with, which may be a different timestamp for the same flow + action corresponding to the same instance of the event or a distinct one. + `minimumLingerDurationMillis` is the minimum amount of time to wait before this participant should return to check if + the lease has completed or expired */ @Data class LeasedToAnotherStatus extends LeaseAttemptStatus { - // the timestamp the lease is associated with, but it may be a different timestamp for the same flow action - // (a previous participant of the event) + private final DagActionStore.DagAction flowAction; private final long eventTimeMillis; - // the minimum amount of time to wait before returning to check if the lease has completed or expired private final long minimumLingerDurationMillis; } } diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java index 51633dd55b3..f1765e2e712 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java @@ -75,8 +75,6 @@ protected interface CheckedFunction { R apply(T t) throws IOException, SQLException; } - public static final String CONFIG_PREFIX = "MysqlMultiActiveLeaseArbiter"; - protected final DataSource dataSource; private final String leaseArbiterTableName; private final String constantsTableName; @@ -88,20 +86,17 @@ protected interface CheckedFunction { + "flow_group varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, flow_name varchar(" + ServiceConfigKeys.MAX_FLOW_GROUP_LENGTH + ") NOT NULL, " + "flow_execution_id varchar(" + ServiceConfigKeys.MAX_FLOW_EXECUTION_ID_LENGTH + ") NOT NULL, flow_action varchar(100) NOT NULL, " - + "event_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " + + "event_timestamp TIMESTAMP, " + "lease_acquisition_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP," + "PRIMARY KEY (flow_group,flow_name,flow_execution_id,flow_action))"; - private static final String CREATE_CONSTANTS_TABLE_STATEMENT = "CREATE TABLE IF NOT EXISTS %s " + "(epsilon INT, linger INT), PRIMARY KEY (epsilon, linger); INSERT INTO %s (epsilon, linger) VALUES (?,?)"; protected static final String WHERE_CLAUSE_TO_MATCH_KEY = "WHERE flow_group=? AND flow_name=? AND flow_execution_id=?" + " AND flow_action=?"; protected static final String WHERE_CLAUSE_TO_MATCH_ROW = WHERE_CLAUSE_TO_MATCH_KEY + " AND event_timestamp=? AND lease_acquisition_timestamp=?"; - protected static final String SELECT_AFTER_INSERT_STATEMENT = "SELECT ROW_COUNT() AS rows_inserted_count, " + "lease_acquisition_timestamp, linger FROM %s, %s " + WHERE_CLAUSE_TO_MATCH_KEY; - // Does a cross join between the two tables to have epsilon and linger values available. Returns the following values: // event_timestamp, lease_acquisition_timestamp, isWithinEpsilon (boolean if event_timestamp in table is within // epsilon), leaseValidityStatus (1 if lease has not expired, 2 if expired, 3 if column is NULL or no longer leasing) @@ -110,30 +105,27 @@ protected interface CheckedFunction { + "WHEN CURRENT_TIMESTAMP < (lease_acquisition_timestamp + linger) then 1" + "WHEN CURRENT_TIMESTAMP >= (lease_acquisition_timestamp + linger) then 2" + "ELSE 3 END as leaseValidityStatus, linger FROM %s, %s " + WHERE_CLAUSE_TO_MATCH_KEY; - // Insert or update row to acquire lease if values have not changed since the previous read // Need to define three separate statements to handle cases where row does not exist or has null values to check protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT = "INSERT INTO %s " - + "(flow_group, flow_name, flow_execution_id, flow_action, event_timestamp) VALUES (?, ?, ?, ?, ?); " - + SELECT_AFTER_INSERT_STATEMENT; + + "(flow_group, flow_name, flow_execution_id, flow_action, event_timestamp) VALUES (?, ?, ?, ?, ?)"; protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT = "UPDATE %s " + "SET event_timestamp=?" + WHERE_CLAUSE_TO_MATCH_KEY - + " AND event_timestamp=? AND lease_acquisition_timestamp is NULL; " + SELECT_AFTER_INSERT_STATEMENT; + + " AND event_timestamp=? AND lease_acquisition_timestamp is NULL"; protected static final String CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT = "UPDATE %s " + "SET event_timestamp=?" + WHERE_CLAUSE_TO_MATCH_ROW - + " AND event_timestamp=? AND lease_acquisition_timestamp=?; " + SELECT_AFTER_INSERT_STATEMENT; - + + " AND event_timestamp=? AND lease_acquisition_timestamp=?"; // Complete lease acquisition if values have not changed since lease was acquired protected static final String CONDITIONALLY_COMPLETE_LEASE_STATEMENT = "UPDATE %s SET " + "lease_acquisition_timestamp = NULL " + WHERE_CLAUSE_TO_MATCH_ROW; @Inject public MysqlMultiActiveLeaseArbiter(Config config) throws IOException { - if (config.hasPath(CONFIG_PREFIX)) { - config = config.getConfig(CONFIG_PREFIX).withFallback(config); + if (config.hasPath(ConfigurationKeys.MYSQL_LEASE_ARBITER_PREFIX)) { + config = config.getConfig(ConfigurationKeys.MYSQL_LEASE_ARBITER_PREFIX).withFallback(config); } else { throw new IOException(String.format("Please specify the config for MysqlMultiActiveLeaseArbiter using prefix %s " - + "before all properties", CONFIG_PREFIX)); + + "before all properties", ConfigurationKeys.MYSQL_LEASE_ARBITER_PREFIX)); } this.leaseArbiterTableName = ConfigUtils.getString(config, ConfigurationKeys.SCHEDULER_LEASE_DETERMINATION_STORE_DB_TABLE_KEY, @@ -181,21 +173,21 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, l // CASE 1: If no existing row for this flow action, then go ahead and insert if (!resultSet.next()) { ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT, this.leaseArbiterTableName, - this.leaseArbiterTableName, this.constantsTableName), + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT + "; " + SELECT_AFTER_INSERT_STATEMENT, + this.leaseArbiterTableName, this.leaseArbiterTableName, this.constantsTableName), insertStatement -> { completeInsertPreparedStatement(insertStatement, flowAction, eventTimeMillis); return insertStatement.executeQuery(); }, true); - return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + return handleResultFromAttemptedLeaseObtainment(rs, flowAction, eventTimeMillis); } // Extract values from result set - Timestamp dbEventTimestamp = resultSet.getTimestamp(1); - Timestamp dbLeaseAcquisitionTimestamp = resultSet.getTimestamp(2); - boolean isWithinEpsilon = resultSet.getBoolean(3); - int leaseValidityStatus = resultSet.getInt(4); - int dbLinger = resultSet.getInt(5); + Timestamp dbEventTimestamp = resultSet.getTimestamp("event_timestamp"); + Timestamp dbLeaseAcquisitionTimestamp = resultSet.getTimestamp("lease_acquisition_timestamp"); + boolean isWithinEpsilon = resultSet.getBoolean("isWithinEpsilon"); + int leaseValidityStatus = resultSet.getInt("leaseValidityStatus"); + int dbLinger = resultSet.getInt("linger"); // CASE 2: If our event timestamp is older than the last event in db, then skip this trigger if (eventTimeMillis < dbEventTimestamp.getTime()) { @@ -206,13 +198,13 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, l // CASE 3: Same event, lease is valid if (isWithinEpsilon) { // Utilize db timestamp for reminder - return new LeasedToAnotherStatus(dbEventTimestamp.getTime(), - dbLeaseAcquisitionTimestamp.getTime() + dbLinger); + return new LeasedToAnotherStatus(flowAction, dbEventTimestamp.getTime(), + dbLeaseAcquisitionTimestamp.getTime() + dbLinger - System.currentTimeMillis()); } // CASE 4: Distinct event, lease is valid // Utilize db timestamp for wait time, but be reminded of own event timestamp - return new LeasedToAnotherStatus(eventTimeMillis, - dbLeaseAcquisitionTimestamp.getTime() + dbLinger); + return new LeasedToAnotherStatus(flowAction, eventTimeMillis, + dbLeaseAcquisitionTimestamp.getTime() + dbLinger - System.currentTimeMillis()); } // CASE 5: Lease is out of date (regardless of whether same or distinct event) else if (leaseValidityStatus == 2) { @@ -223,14 +215,15 @@ else if (leaseValidityStatus == 2) { } // Use our event to acquire lease, check for previous db eventTimestamp and leaseAcquisitionTimestamp ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT, this.leaseArbiterTableName, - this.leaseArbiterTableName, this.constantsTableName), + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT + "; " + + SELECT_AFTER_INSERT_STATEMENT, this.leaseArbiterTableName, this.leaseArbiterTableName, + this.constantsTableName), updateStatement -> { completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, true, dbEventTimestamp, dbLeaseAcquisitionTimestamp); return updateStatement.executeQuery(); }, true); - return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + return handleResultFromAttemptedLeaseObtainment(rs, flowAction, eventTimeMillis); } // No longer leasing this event // CASE 6: Same event, no longer leasing event in db: terminate if (isWithinEpsilon) { @@ -239,14 +232,15 @@ else if (leaseValidityStatus == 2) { // CASE 7: Distinct event, no longer leasing event in db // Use our event to acquire lease, check for previous db eventTimestamp and NULL leaseAcquisitionTimestamp ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT, this.leaseArbiterTableName, - this.leaseArbiterTableName, this.constantsTableName), + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT + "; " + + SELECT_AFTER_INSERT_STATEMENT, this.leaseArbiterTableName, this.leaseArbiterTableName, + this.constantsTableName), updateStatement -> { completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, false, dbEventTimestamp, null); return updateStatement.executeQuery(); }, true); - return handleResultFromAttemptedLeaseObtainment(rs, eventTimeMillis); + return handleResultFromAttemptedLeaseObtainment(rs, flowAction, eventTimeMillis); } catch (SQLException e) { throw new RuntimeException(e); } @@ -261,7 +255,8 @@ else if (leaseValidityStatus == 2) { * @throws SQLException * @throws IOException */ - protected LeaseAttemptStatus handleResultFromAttemptedLeaseObtainment(ResultSet resultSet, long eventTimeMillis) + protected LeaseAttemptStatus handleResultFromAttemptedLeaseObtainment(ResultSet resultSet, + DagActionStore.DagAction flowAction, long eventTimeMillis) throws SQLException, IOException { if (!resultSet.next()) { throw new IOException("Expected num rows and lease_acquisition_timestamp returned from query but received nothing"); @@ -270,10 +265,11 @@ protected LeaseAttemptStatus handleResultFromAttemptedLeaseObtainment(ResultSet long leaseAcquisitionTimeMillis = resultSet.getTimestamp(2).getTime(); int dbLinger = resultSet.getInt(3); if (numRowsUpdated == 1) { - return new LeaseObtainedStatus(eventTimeMillis, leaseAcquisitionTimeMillis); + return new LeaseObtainedStatus(flowAction, eventTimeMillis, leaseAcquisitionTimeMillis); } // Another participant acquired lease in between - return new LeasedToAnotherStatus(eventTimeMillis, leaseAcquisitionTimeMillis + dbLinger); + return new LeasedToAnotherStatus(flowAction, eventTimeMillis, + leaseAcquisitionTimeMillis + dbLinger - System.currentTimeMillis()); } /** @@ -342,8 +338,9 @@ protected void completeUpdatePreparedStatement(PreparedStatement statement, DagA } @Override - public boolean recordLeaseSuccess(DagActionStore.DagAction flowAction, LeaseObtainedStatus status) + public boolean recordLeaseSuccess(LeaseObtainedStatus status) throws IOException { + DagActionStore.DagAction flowAction = status.getFlowAction(); String flowGroup = flowAction.getFlowGroup(); String flowName = flowAction.getFlowName(); String flowExecutionId = flowAction.getFlowExecutionId(); diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java index b545ae014c5..dfccb0c0715 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/metrics/RuntimeMetrics.java @@ -73,8 +73,8 @@ public class RuntimeMetrics { public static final String GOBBLIN_JOB_SCHEDULER_TOTAL_GET_SPEC_TIME_NANOS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.totalGetSpecTimeNanos"; public static final String GOBBLIN_JOB_SCHEDULER_TOTAL_ADD_SPEC_TIME_NANOS = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.totalAddSpecTimeNanos"; public static final String GOBBLIN_JOB_SCHEDULER_NUM_JOBS_SCHEDULED_DURING_STARTUP = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".jobScheduler.numJobsScheduledDuringStartup"; - // Metrics Used to Track SchedulerLeaseAlgoHandlerProgress - public static final String GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_FLOWS_SUBMITTED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".schedulerLeaseAlgoHandler.numFlowsSubmitted"; + // Metrics Used to Track flowTriggerHandlerProgress + public static final String GOBBLIN_FLOW_TRIGGER_HANDLER_NUM_FLOWS_SUBMITTED = ServiceMetricNames.GOBBLIN_SERVICE_PREFIX + ".flowTriggerHandler.numFlowsSubmitted"; // Metadata keys public static final String TOPIC = "topic"; public static final String GROUP_ID = "groupId"; diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java index 297779e9761..56b1ac8c045 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/scheduler/JobScheduler.java @@ -581,7 +581,7 @@ public void close() throws IOException { /** * Get a {@link org.quartz.Trigger} from the given job configuration properties. */ - public Trigger createTriggerForJob(JobKey jobKey, Properties jobProps) { + public static Trigger createTriggerForJob(JobKey jobKey, Properties jobProps) { // Build a trigger for the job with the given cron-style schedule return TriggerBuilder.newTrigger() .withIdentity(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java index 28e7f5c8421..80da8a9e99d 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/DagManager.java @@ -579,7 +579,7 @@ public void run() { } } - private void clearUpDagAction(DagId dagId, DagActionStore.FlowActionType flowActionType) throws IOException { + private void removeDagActionFromStore(DagId dagId, DagActionStore.FlowActionType flowActionType) throws IOException { if (this.dagActionStore.isPresent()) { this.dagActionStore.get().deleteDagAction( new DagActionStore.DagAction(dagId.flowGroup, dagId.flowName, dagId.flowExecutionId, flowActionType)); @@ -594,13 +594,13 @@ private void beginResumingDag(DagId dagIdToResume) throws IOException { String dagId= dagIdToResume.toString(); if (!this.failedDagIds.contains(dagId)) { log.warn("No dag found with dagId " + dagId + ", so cannot resume flow"); - clearUpDagAction(dagIdToResume, DagActionStore.FlowActionType.RESUME); + removeDagActionFromStore(dagIdToResume, DagActionStore.FlowActionType.RESUME); return; } Dag dag = this.failedDagStateStore.getDag(dagId); if (dag == null) { log.error("Dag " + dagId + " was found in memory but not found in failed dag state store"); - clearUpDagAction(dagIdToResume, DagActionStore.FlowActionType.RESUME); + removeDagActionFromStore(dagIdToResume, DagActionStore.FlowActionType.RESUME); return; } @@ -651,7 +651,7 @@ private void finishResumingDags() throws IOException { if (dagReady) { this.dagStateStore.writeCheckpoint(dag.getValue()); this.failedDagStateStore.cleanUp(dag.getValue()); - clearUpDagAction(DagManagerUtils.generateDagId(dag.getValue()), DagActionStore.FlowActionType.RESUME); + removeDagActionFromStore(DagManagerUtils.generateDagId(dag.getValue()), DagActionStore.FlowActionType.RESUME); this.failedDagIds.remove(dag.getKey()); this.resumingDags.remove(dag.getKey()); initialize(dag.getValue()); @@ -681,7 +681,7 @@ private void cancelDag(DagId dagId) throws ExecutionException, InterruptedExcept log.warn("Did not find Dag with id {}, it might be already cancelled/finished.", dagToCancel); } // Called after a KILL request is received - clearUpDagAction(dagId, DagActionStore.FlowActionType.KILL); + removeDagActionFromStore(dagId, DagActionStore.FlowActionType.KILL); } private void cancelDagNode(DagNode dagNodeToCancel) throws ExecutionException, InterruptedException { diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java index ba639f6b003..a0803bb55b8 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java @@ -54,35 +54,31 @@ * for a flow action event. After acquiring the lease, it persists the flow action event to the {@link DagActionStore} * to be eventually acted upon by the host with the active DagManager. Once it has completed this action, it will mark * the lease as completed by calling the - * MysqlMultiActiveLeaseArbiter.recordLeaseSuccess method. Hosts that do not gain - * the lease for the event, instead schedule a reminder using the {@link SchedulerService} to check back in on the - * previous lease owner's completion status after the lease should expire to ensure the event is handled in failure - * cases. + * {@link MysqlMultiActiveLeaseArbiter.recordLeaseSuccess()} method. Hosts that do not gain the lease for the event, + * instead schedule a reminder using the {@link SchedulerService} to check back in on the previous lease owner's + * completion status after the lease should expire to ensure the event is handled in failure cases. */ @Slf4j public class FlowTriggerHandler { private final int schedulerMaxBackoffMillis; private static Random random = new Random(); protected MultiActiveLeaseArbiter multiActiveLeaseArbiter; - protected JobScheduler jobScheduler; protected SchedulerService schedulerService; protected DagActionStore dagActionStore; private MetricContext metricContext; - private ContextAwareMeter numLeasesCompleted; + private ContextAwareMeter numFlowsSubmitted; @Inject - // TODO: should multiActiveLeaseArbiter and DagActionStore be optional? public FlowTriggerHandler(Config config, MultiActiveLeaseArbiter leaseDeterminationStore, - JobScheduler jobScheduler, SchedulerService schedulerService, DagActionStore dagActionStore) { + SchedulerService schedulerService, DagActionStore dagActionStore) { this.schedulerMaxBackoffMillis = ConfigUtils.getInt(config, ConfigurationKeys.SCHEDULER_MAX_BACKOFF_MILLIS_KEY, ConfigurationKeys.DEFAULT_SCHEDULER_MAX_BACKOFF_MILLIS); this.multiActiveLeaseArbiter = leaseDeterminationStore; - this.jobScheduler = jobScheduler; this.schedulerService = schedulerService; this.dagActionStore = dagActionStore; this.metricContext = Instrumented.getMetricContext(new org.apache.gobblin.configuration.State(ConfigUtils.configToProperties(config)), this.getClass()); - this.numLeasesCompleted = metricContext.contextAwareMeter(RuntimeMetrics.GOBBLIN_SCHEDULER_LEASE_ALGO_HANDLER_NUM_FLOWS_SUBMITTED); + this.numFlowsSubmitted = metricContext.contextAwareMeter(RuntimeMetrics.GOBBLIN_FLOW_TRIGGER_HANDLER_NUM_FLOWS_SUBMITTED); } /** @@ -99,40 +95,50 @@ public void handleTriggerEvent(Properties jobProps, DagActionStore.DagAction flo multiActiveLeaseArbiter.tryAcquireLease(flowAction, eventTimeMillis); // TODO: add a log event or metric for each of these cases if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.LeaseObtainedStatus) { - persistFlowAction((MultiActiveLeaseArbiter.LeaseObtainedStatus) leaseAttemptStatus, flowAction); + MultiActiveLeaseArbiter.LeaseObtainedStatus leaseObtainedStatus = (MultiActiveLeaseArbiter.LeaseObtainedStatus) leaseAttemptStatus; + if (persistFlowAction(leaseObtainedStatus)) { + return; + } + // If persisting the flow action failed, then we set another trigger for this event to occur immediately to + // re-attempt handling the event + scheduleReminderForEvent(jobProps, new MultiActiveLeaseArbiter.LeasedToAnotherStatus(flowAction, + leaseObtainedStatus.getEventTimestamp(), 0L), eventTimeMillis); return; } else if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.LeasedToAnotherStatus) { - scheduleReminderForEvent(jobProps, (MultiActiveLeaseArbiter.LeasedToAnotherStatus) leaseAttemptStatus, flowAction, + scheduleReminderForEvent(jobProps, (MultiActiveLeaseArbiter.LeasedToAnotherStatus) leaseAttemptStatus, eventTimeMillis); + return; } else if (leaseAttemptStatus instanceof MultiActiveLeaseArbiter.NoLongerLeasingStatus) { return; } - log.warn("Received type of leaseAttemptStatus: {} not handled by this method", leaseAttemptStatus.getClass().getName()); + throw new RuntimeException(String.format("Received type of leaseAttemptStatus: %s not handled by this method", + leaseAttemptStatus.getClass().getName())); } // Called after obtaining a lease to persist the flow action to {@link DagActionStore} and mark the lease as done - private boolean persistFlowAction(MultiActiveLeaseArbiter.LeaseObtainedStatus status, DagActionStore.DagAction flowAction) { + private boolean persistFlowAction(MultiActiveLeaseArbiter.LeaseObtainedStatus status) { try { + DagActionStore.DagAction flowAction = status.getFlowAction(); this.dagActionStore.addDagAction(flowAction.getFlowGroup(), flowAction.getFlowName(), flowAction.getFlowExecutionId(), flowAction.getFlowActionType()); // If the flow action has been persisted to the {@link DagActionStore} we can close the lease - this.numLeasesCompleted.mark(); - return this.multiActiveLeaseArbiter.recordLeaseSuccess(flowAction, status); + this.numFlowsSubmitted.mark(); + return this.multiActiveLeaseArbiter.recordLeaseSuccess(status); } catch (IOException e) { throw new RuntimeException(e); } } /** - * This method is used by FlowTriggerHandler.handleNewSchedulerEvent to schedule a reminder for itself to check on + * This method is used by {@link FlowTriggerHandler.handleTriggerEvent} to schedule a self-reminder to check on * the other participant's progress to finish acting on a flow action after the time the lease should expire. * @param jobProps * @param status used to extract event to be reminded for and the minimum time after which reminder should occur * @param originalEventTimeMillis the event timestamp we were originally handling - * @param flowAction */ private void scheduleReminderForEvent(Properties jobProps, MultiActiveLeaseArbiter.LeasedToAnotherStatus status, - DagActionStore.DagAction flowAction, long originalEventTimeMillis) { + long originalEventTimeMillis) { + DagActionStore.DagAction flowAction = status.getFlowAction(); // Add a small randomization to the minimum reminder wait time to avoid 'thundering herd' issue String cronExpression = createCronFromDelayPeriod(status.getMinimumLingerDurationMillis() + random.nextInt(schedulerMaxBackoffMillis)); @@ -145,15 +151,15 @@ private void scheduleReminderForEvent(Properties jobProps, MultiActiveLeaseArbit String.valueOf(originalEventTimeMillis)); JobKey key = new JobKey(flowAction.getFlowName(), flowAction.getFlowGroup()); // Create a new trigger for the flow in job scheduler that is set to fire at the minimum reminder wait time calculated - Trigger trigger = this.jobScheduler.createTriggerForJob(key, jobProps); + Trigger trigger = JobScheduler.createTriggerForJob(key, jobProps); try { - log.info("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - attempting to schedule reminder for event %s in %s millis", + log.info("Flow Trigger Handler - [%s, eventTimestamp: %s] - attempting to schedule reminder for event %s in %s millis", flowAction, originalEventTimeMillis, status.getEventTimeMillis(), trigger.getNextFireTime()); this.schedulerService.getScheduler().scheduleJob(trigger); } catch (SchedulerException e) { log.warn("Failed to add job reminder due to SchedulerException for job %s trigger event %s ", key, status.getEventTimeMillis(), e); } - log.info(String.format("Scheduler Lease Algo Handler - [%s, eventTimestamp: %s] - SCHEDULED REMINDER for event %s in %s millis", + log.info(String.format("Flow Trigger Handler - [%s, eventTimestamp: %s] - SCHEDULED REMINDER for event %s in %s millis", flowAction, originalEventTimeMillis, status.getEventTimeMillis(), trigger.getNextFireTime())); } @@ -165,9 +171,9 @@ private void scheduleReminderForEvent(Properties jobProps, MultiActiveLeaseArbit */ protected static String createCronFromDelayPeriod(long delayPeriodMillis) { LocalDateTime now = LocalDateTime.now(ZoneId.of("UTC")); - LocalDateTime delaySecondsLater = now.plus(delayPeriodMillis, ChronoUnit.MILLIS); + LocalDateTime timeToScheduleReminder = now.plus(delayPeriodMillis, ChronoUnit.MILLIS); // TODO: investigate potentially better way of generating cron expression that does not make it US dependent DateTimeFormatter formatter = DateTimeFormatter.ofPattern("ss mm HH dd MM ? yyyy", Locale.US); - return delaySecondsLater.format(formatter); + return timeToScheduleReminder.format(formatter); } } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 09eff7b04d1..4fc23c0d1a8 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -104,7 +104,7 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { private FlowStatusGenerator flowStatusGenerator; private UserQuotaManager quotaManager; - private Optional schedulerLeaseAlgoHandler; + private Optional flowTriggerHandler; private final ClassAliasResolver aliasResolver; @@ -112,13 +112,13 @@ public class Orchestrator implements SpecCatalogListener, Instrumentable { public Orchestrator(Config config, Optional topologyCatalog, Optional dagManager, Optional log, - FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, Optional schedulerLeaseAlgoHandler) { + FlowStatusGenerator flowStatusGenerator, boolean instrumentationEnabled, Optional flowTriggerHandler) { _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); this.aliasResolver = new ClassAliasResolver<>(SpecCompiler.class); this.topologyCatalog = topologyCatalog; this.dagManager = dagManager; this.flowStatusGenerator = flowStatusGenerator; - this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; + this.flowTriggerHandler = flowTriggerHandler; try { String specCompilerClassName = ServiceConfigKeys.DEFAULT_GOBBLIN_SERVICE_FLOWCOMPILER_CLASS; if (config.hasPath(ServiceConfigKeys.GOBBLIN_SERVICE_FLOWCOMPILER_CLASS_KEY)) { @@ -161,8 +161,8 @@ public Orchestrator(Config config, Optional topologyCatalog, Op @Inject public Orchestrator(Config config, FlowStatusGenerator flowStatusGenerator, Optional topologyCatalog, - Optional dagManager, Optional log, Optional schedulerLeaseAlgoHandler) { - this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, schedulerLeaseAlgoHandler); + Optional dagManager, Optional log, Optional flowTriggerHandler) { + this(config, topologyCatalog, dagManager, log, flowStatusGenerator, true, flowTriggerHandler); } @@ -312,7 +312,7 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil } // If multi-active scheduler is enabled do not pass onto DagManager, otherwise scheduler forwards it directly - if (schedulerLeaseAlgoHandler.isPresent()) { + if (flowTriggerHandler.isPresent()) { // If triggerTimestampMillis is 0, then it was not set by the job trigger handler, and we cannot handle this event if (triggerTimestampMillis == 0L) { _log.warn("Skipping execution of spec: {} because missing trigger timestamp in job properties", @@ -328,11 +328,11 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil String flowExecutionId = flowMetadata.get(TimingEvent.FlowEventConstants.FLOW_EXECUTION_ID_FIELD); DagActionStore.DagAction flowAction = new DagActionStore.DagAction(flowGroup, flowName, flowExecutionId, DagActionStore.FlowActionType.LAUNCH); - schedulerLeaseAlgoHandler.get().handleTriggerEvent(jobProps, flowAction, triggerTimestampMillis); + flowTriggerHandler.get().handleTriggerEvent(jobProps, flowAction, triggerTimestampMillis); _log.info("Multi-active scheduler finished handling trigger event: [%s, triggerEventTimestamp: %s]", flowAction, triggerTimestampMillis); } else if (this.dagManager.isPresent()) { - submitFlowToDagManager((FlowSpec) spec, Optional.of(jobExecutionPlanDag)); + submitFlowToDagManager((FlowSpec) spec, jobExecutionPlanDag); } else { // Schedule all compiled JobSpecs on their respective Executor for (Dag.DagNode dagNode : jobExecutionPlanDag.getNodes()) { @@ -374,14 +374,15 @@ public void orchestrate(Spec spec, Properties jobProps, long triggerTimestampMil Instrumented.updateTimer(this.flowOrchestrationTimer, System.nanoTime() - startTime, TimeUnit.NANOSECONDS); } - public void submitFlowToDagManager(FlowSpec flowSpec, Optional> jobExecutionPlanDag) + public void submitFlowToDagManager(FlowSpec flowSpec) + throws IOException { + submitFlowToDagManager(flowSpec, specCompiler.compileFlow(flowSpec)); + } + public void submitFlowToDagManager(FlowSpec flowSpec, Dag jobExecutionPlanDag) throws IOException { - if (!jobExecutionPlanDag.isPresent()) { - jobExecutionPlanDag = Optional.of(specCompiler.compileFlow(flowSpec)); - } try { //Send the dag to the DagManager. - this.dagManager.get().addDag(jobExecutionPlanDag.get(), true, true); + this.dagManager.get().addDag(jobExecutionPlanDag, true, true); } catch (Exception ex) { if (this.eventSubmitter.isPresent()) { // pronounce failed before stack unwinds, to ensure flow not marooned in `COMPILED` state; (failure likely attributable to DB connection/failover) diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java index 0c9bbe2da4b..0b5d1cdc7e9 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/restli/GobblinServiceFlowExecutionResourceHandlerWithWarmStandby.java @@ -55,7 +55,7 @@ public void resume(ComplexResourceKey quotaManager; - protected final Optional schedulerLeaseAlgoHandler; + protected final Optional flowTriggerHandler; @Getter protected final Map scheduledFlowSpecs; @Getter @@ -166,7 +166,7 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Orchestrator orchestrator, SchedulerService schedulerService, Optional quotaManager, Optional log, @Named(InjectionNames.WARM_STANDBY_ENABLED) boolean warmStandbyEnabled, - Optional schedulerLeaseAlgoHandler) throws Exception { + Optional flowTriggerHandler) throws Exception { super(ConfigUtils.configToProperties(config), schedulerService); _log = log.isPresent() ? log.get() : LoggerFactory.getLogger(getClass()); @@ -182,7 +182,7 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser && config.hasPath(GOBBLIN_SERVICE_SCHEDULER_DR_NOMINATED); this.warmStandbyEnabled = warmStandbyEnabled; this.quotaManager = quotaManager; - this.schedulerLeaseAlgoHandler = schedulerLeaseAlgoHandler; + this.flowTriggerHandler = flowTriggerHandler; // Check that these metrics do not exist before adding, mainly for testing purpose which creates multiple instances // of the scheduler. If one metric exists, then the others should as well. MetricFilter filter = MetricFilter.contains(RuntimeMetrics.GOBBLIN_JOB_SCHEDULER_GET_SPECS_DURING_STARTUP_PER_SPEC_RATE_NANOS); @@ -204,11 +204,11 @@ public GobblinServiceJobScheduler(@Named(InjectionNames.SERVICE_NAME) String ser public GobblinServiceJobScheduler(String serviceName, Config config, FlowStatusGenerator flowStatusGenerator, Optional helixManager, Optional flowCatalog, Optional topologyCatalog, Optional dagManager, Optional quotaManager, SchedulerService schedulerService, - Optional log, boolean warmStandbyEnabled, Optional schedulerLeaseAlgoHandler) + Optional log, boolean warmStandbyEnabled, Optional flowTriggerHandler) throws Exception { this(serviceName, config, helixManager, flowCatalog, topologyCatalog, - new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, schedulerLeaseAlgoHandler), - schedulerService, quotaManager, log, warmStandbyEnabled, schedulerLeaseAlgoHandler); + new Orchestrator(config, flowStatusGenerator, topologyCatalog, dagManager, log, flowTriggerHandler), + schedulerService, quotaManager, log, warmStandbyEnabled, flowTriggerHandler); } public synchronized void setActive(boolean isActive) { @@ -447,7 +447,7 @@ public void runJob(Properties jobProps, JobListener jobListener) throws JobExcep try { Spec flowSpec = this.scheduledFlowSpecs.get(jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY)); String triggerTimestampMillis = jobProps.getProperty( - ConfigurationKeys.SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY, "0L"); + ConfigurationKeys.SCHEDULER_EVENT_TO_TRIGGER_TIMESTAMP_MILLIS_KEY, "0"); this.orchestrator.orchestrate(flowSpec, jobProps, Long.parseLong(triggerTimestampMillis)); } catch (Exception e) { throw new JobException("Failed to run Spec: " + jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY), e); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index 99d5e00694c..fae40c9bc90 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -23,7 +23,6 @@ import java.util.UUID; import java.util.concurrent.TimeUnit; -import com.google.common.base.Optional; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -189,7 +188,7 @@ protected void submitFlowToDagManagerHelper(String flowGroup, String flowName) { try { URI flowUri = FlowSpec.Utils.createFlowSpecUri(flowId); spec = (FlowSpec) flowCatalog.getSpecs(flowUri); - this.orchestrator.submitFlowToDagManager(spec, Optional.absent()); + this.orchestrator.submitFlowToDagManager(spec); } catch (URISyntaxException e) { log.warn("Could not create URI object for flowId {} due to error {}", flowId, e.getMessage()); this.unexpectedErrors.mark(); From b6b78a56e7d96ba54c040be8eea50b7224b14bed Mon Sep 17 00:00:00 2001 From: Urmi Mustafi Date: Thu, 15 Jun 2023 09:42:11 -0700 Subject: [PATCH 11/11] small clean ups --- .../api/MysqlMultiActiveLeaseArbiter.java | 19 +++++++++++-------- .../orchestration/FlowTriggerHandler.java | 6 +++--- .../modules/orchestration/Orchestrator.java | 1 + .../DagActionStoreChangeMonitor.java | 2 +- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java index f1765e2e712..8a40c71b2e5 100644 --- a/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java +++ b/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/api/MysqlMultiActiveLeaseArbiter.java @@ -169,12 +169,15 @@ public LeaseAttemptStatus tryAcquireLease(DagActionStore.DagAction flowAction, l return getInfoStatement.executeQuery(); }, true); + String formattedSelectAfterInsertStatement = + String.format(SELECT_AFTER_INSERT_STATEMENT, this.leaseArbiterTableName, this.constantsTableName); try { // CASE 1: If no existing row for this flow action, then go ahead and insert if (!resultSet.next()) { + String formattedAcquireLeaseNewRowStatement = + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT, this.leaseArbiterTableName); ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_NEW_ROW_STATEMENT + "; " + SELECT_AFTER_INSERT_STATEMENT, - this.leaseArbiterTableName, this.leaseArbiterTableName, this.constantsTableName), + formattedAcquireLeaseNewRowStatement + "; " + formattedSelectAfterInsertStatement, insertStatement -> { completeInsertPreparedStatement(insertStatement, flowAction, eventTimeMillis); return insertStatement.executeQuery(); @@ -214,10 +217,10 @@ else if (leaseValidityStatus == 2) { dbEventTimestamp, dbLeaseAcquisitionTimestamp, dbLinger); } // Use our event to acquire lease, check for previous db eventTimestamp and leaseAcquisitionTimestamp + String formattedAcquireLeaseIfMatchingAllStatement = + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT, this.leaseArbiterTableName); ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_MATCHING_ALL_COLS_STATEMENT + "; " - + SELECT_AFTER_INSERT_STATEMENT, this.leaseArbiterTableName, this.leaseArbiterTableName, - this.constantsTableName), + formattedAcquireLeaseIfMatchingAllStatement + "; " + formattedSelectAfterInsertStatement, updateStatement -> { completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, true, dbEventTimestamp, dbLeaseAcquisitionTimestamp); @@ -231,10 +234,10 @@ else if (leaseValidityStatus == 2) { } // CASE 7: Distinct event, no longer leasing event in db // Use our event to acquire lease, check for previous db eventTimestamp and NULL leaseAcquisitionTimestamp + String formattedAcquireLeaseIfFinishedStatement = + String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT, this.leaseArbiterTableName); ResultSet rs = withPreparedStatement( - String.format(CONDITIONALLY_ACQUIRE_LEASE_IF_FINISHED_LEASING_STATEMENT + "; " - + SELECT_AFTER_INSERT_STATEMENT, this.leaseArbiterTableName, this.leaseArbiterTableName, - this.constantsTableName), + formattedAcquireLeaseIfFinishedStatement + "; " + formattedSelectAfterInsertStatement, updateStatement -> { completeUpdatePreparedStatement(updateStatement, flowAction, eventTimeMillis, true, false, dbEventTimestamp, null); diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java index a0803bb55b8..42ab0af96fa 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/FlowTriggerHandler.java @@ -116,14 +116,14 @@ public void handleTriggerEvent(Properties jobProps, DagActionStore.DagAction flo } // Called after obtaining a lease to persist the flow action to {@link DagActionStore} and mark the lease as done - private boolean persistFlowAction(MultiActiveLeaseArbiter.LeaseObtainedStatus status) { + private boolean persistFlowAction(MultiActiveLeaseArbiter.LeaseObtainedStatus leaseStatus) { try { - DagActionStore.DagAction flowAction = status.getFlowAction(); + DagActionStore.DagAction flowAction = leaseStatus.getFlowAction(); this.dagActionStore.addDagAction(flowAction.getFlowGroup(), flowAction.getFlowName(), flowAction.getFlowExecutionId(), flowAction.getFlowActionType()); // If the flow action has been persisted to the {@link DagActionStore} we can close the lease this.numFlowsSubmitted.mark(); - return this.multiActiveLeaseArbiter.recordLeaseSuccess(status); + return this.multiActiveLeaseArbiter.recordLeaseSuccess(leaseStatus); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java index 4fc23c0d1a8..a0c19678955 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/orchestration/Orchestrator.java @@ -378,6 +378,7 @@ public void submitFlowToDagManager(FlowSpec flowSpec) throws IOException { submitFlowToDagManager(flowSpec, specCompiler.compileFlow(flowSpec)); } + public void submitFlowToDagManager(FlowSpec flowSpec, Dag jobExecutionPlanDag) throws IOException { try { diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java index fae40c9bc90..456851612af 100644 --- a/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java +++ b/gobblin-service/src/main/java/org/apache/gobblin/service/monitoring/DagActionStoreChangeMonitor.java @@ -105,7 +105,7 @@ protected void assignTopicPartitions() { @Override /* - This class is multithreaded and this message will be called by multiple threads, however any given message will be + This class is multithreaded and this method will be called by multiple threads, however any given message will be partitioned and processed by only one thread (and corresponding queue). */ protected void processMessage(DecodeableKafkaRecord message) {