Skip to content

Commit

Permalink
Add warm restart support (#13)
Browse files Browse the repository at this point in the history
* Add warm restart support

* Prevent hard reinit when warm start
  • Loading branch information
kcudnik committed May 3, 2016
1 parent f41c54e commit 1f9baf4
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 22 deletions.
9 changes: 8 additions & 1 deletion syncd/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
AM_CPPFLAGS =
AM_CPPFLAGS += -I$(top_srcdir)/common

bin_PROGRAMS = syncd
bin_PROGRAMS = syncd syncd_request_shutdown

if DEBUG
DBGFLAGS = -ggdb -DDEBUG
Expand Down Expand Up @@ -33,3 +33,10 @@ syncd_CPPFLAGS = $(DBGFLAGS) $(AM_CPPFLAGS) $(CFLAGS_COMMON) \
-I/usr/include/sai $(SAIFLAGS)
syncd_LDADD = -lhiredis -lswsscommon -lsai -lpthread


syncd_request_shutdown_SOURCES = syncd_request_shutdown.cpp

syncd_request_shutdown_CPPFLAGS = $(DBGFLAGS) $(AM_CPPFLAGS) $(CFLAGS_COMMON)

syncd_request_shutdown_LDADD = -lhiredis -lswsscommon -lpthread

2 changes: 2 additions & 0 deletions syncd/profile.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SAI_WARM_BOOT_READ_FILE=/var/cache/sai_warmboot.bin
SAI_WARM_BOOT_WRITE_FILE=/var/cache/sai_warmboot.bin
129 changes: 112 additions & 17 deletions syncd/syncd.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
#include <thread>
#include "syncd.h"

#include <getopt.h>

std::mutex g_mutex;

swss::RedisClient *g_redisClient = NULL;
Expand Down Expand Up @@ -858,7 +855,7 @@ void sendResponse(sai_status_t status)

std::vector<swss::FieldValueTuple> entry;

SWSS_LOG_INFO("sending response: %s", strStatus.c_str());
SWSS_LOG_NOTICE("sending response: %s", strStatus.c_str());

notifySyncdResponse->send(strStatus, "", entry);
}
Expand Down Expand Up @@ -904,6 +901,7 @@ struct cmdOptions
{
int countersThreadIntervalInSeconds;
bool diagShell;
bool warmStart;
bool disableCountersThread;
std::string profileMapFile;
};
Expand All @@ -924,39 +922,59 @@ cmdOptions handleCmdLine(int argc, char **argv)
{
{ "diag", no_argument, 0, 'd' },
{ "nocounters", no_argument, 0, 'N' },
{ "warmStart", no_argument, 0, 'w' },
{ "profile", required_argument, 0, 'p' },
{ "countersInterval", required_argument, 0, 'i' },
{ 0, 0, 0, 0 }
};

int option_index = 0;

int c = getopt_long(argc, argv, "dNp:i:", long_options, &option_index);
int c = getopt_long(argc, argv, "dNwp:i:", long_options, &option_index);

if (c == -1)
break;

switch (c)
{
case 'N':
SWSS_LOG_INFO("disable counters thread");
SWSS_LOG_NOTICE("disable counters thread");
options.disableCountersThread = true;
break;

case 'd':
SWSS_LOG_INFO("enable diag shell");
SWSS_LOG_NOTICE("enable diag shell");
options.diagShell = true;
break;

case 'p':
SWSS_LOG_INFO("profile map file: %s", optarg);
SWSS_LOG_NOTICE("profile map file: %s", optarg);
options.profileMapFile = std::string(optarg);
break;

case 'i':
SWSS_LOG_INFO("counters thread interval: %s", optarg);
options.countersThreadIntervalInSeconds =
std::max(defaultCountersThreadIntervalInSeconds, std::stoi(std::string(optarg)));
{
SWSS_LOG_NOTICE("counters thread interval: %s", optarg);

int interval = std::stoi(std::string(optarg));

if (interval == 0)
{
// use zero interval to disable counters thread
options.disableCountersThread = true;
}
else
{
options.countersThreadIntervalInSeconds =
std::max(defaultCountersThreadIntervalInSeconds, interval);
}

break;
}

case 'w':
SWSS_LOG_NOTICE("warm start request");
options.warmStart = true;
break;

case '?':
Expand Down Expand Up @@ -992,6 +1010,9 @@ void handleProfileMap(const std::string& profileMapFile)

while(getline(profile, line))
{
if (line.size() > 0 && (line[0] == '#' || line[0] == ';'))
continue;

size_t pos = line.find("=");

if (pos == std::string::npos)
Expand All @@ -1009,6 +1030,34 @@ void handleProfileMap(const std::string& profileMapFile)
}
}

bool handleRestartQuery(swss::NotificationConsumer &restartQuery)
{
SWSS_LOG_ENTER();

std::string op;
std::string data;
std::vector<swss::FieldValueTuple> values;

restartQuery.pop(op, data, values);

SWSS_LOG_DEBUG("op = %d", op.c_str());

if (op == "COLD")
{
SWSS_LOG_NOTICE("received COLD switch shutdown event");
return false;
}

if (op == "WARM")
{
SWSS_LOG_NOTICE("received WARM switch shutdown event");
return true;
}

SWSS_LOG_WARN("received '%s' unknown switch shutdown event, assuming COLD", op.c_str());
return false;
}

int main(int argc, char **argv)
{
swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG);
Expand All @@ -1028,6 +1077,7 @@ int main(int argc, char **argv)

swss::ConsumerTable *asicState = new swss::ConsumerTable(db, "ASIC_STATE");
swss::NotificationConsumer *notifySyncdQuery = new swss::NotificationConsumer(db, "NOTIFYSYNCDREQUERY");
swss::NotificationConsumer *restartQuery = new swss::NotificationConsumer(db, "RESTARTQUERY");

// at the end we cant use producer consumer concept since
// if one proces will restart there may be something in the queue
Expand All @@ -1042,6 +1092,22 @@ int main(int argc, char **argv)
gProfileMap[SAI_KEY_INIT_CONFIG_FILE] = mlnx_config_file;
#endif /* MLNX_SAI */

if (options.warmStart)
{
const char *warmBootReadFile = profile_get_value(0, SAI_KEY_WARM_BOOT_READ_FILE);

SWSS_LOG_NOTICE("using warmBootReadFile: '%s'", warmBootReadFile);

if (warmBootReadFile == NULL || access(warmBootReadFile, F_OK) == -1)
{
SWSS_LOG_WARN("user requested warmStart but warmBootReadFile is not specified or not accesible, forcing cold start");

options.warmStart = false;
}
}

gProfileMap[SAI_KEY_WARM_BOOT] = options.warmStart ? "1" : "0";

sai_api_initialize(0, (service_method_table_t*)&test_services);

populate_sai_apis();
Expand All @@ -1060,34 +1126,37 @@ int main(int argc, char **argv)

if (options.diagShell)
{
SWSS_LOG_INFO("starting bcm diag shell thread");
SWSS_LOG_NOTICE("starting bcm diag shell thread");

std::thread bcm_diag_shell_thread = std::thread(sai_diag_shell);
bcm_diag_shell_thread.detach();
}

#endif /* BRCMSAI */

SWSS_LOG_INFO("syncd started");
SWSS_LOG_NOTICE("syncd started");

bool warmRestartHint = false;

try
{
onSyncdStart();

SWSS_LOG_INFO("syncd listening for events");
onSyncdStart(options.warmStart);

if (options.disableCountersThread == false)
{
SWSS_LOG_INFO("starting counters thread");
SWSS_LOG_NOTICE("starting counters thread");

startCountersThread(options.countersThreadIntervalInSeconds);
}

SWSS_LOG_NOTICE("syncd listening for events");

swss::Select s;

s.addSelectable(getRequest);
s.addSelectable(asicState);
s.addSelectable(notifySyncdQuery);
s.addSelectable(restartQuery);

while(true)
{
Expand All @@ -1097,6 +1166,12 @@ int main(int argc, char **argv)

int result = s.select(&sel, &fd);

if (sel == restartQuery)
{
warmRestartHint = handleRestartQuery(*restartQuery);
break;
}

if (sel == notifySyncdQuery)
{
notifySyncd(*notifySyncdQuery);
Expand All @@ -1112,9 +1187,29 @@ int main(int argc, char **argv)
catch(const std::exception &e)
{
SWSS_LOG_ERROR("Runtime error: %s", e.what());

exit(EXIT_FAILURE);
}

endCountersThread();

if (warmRestartHint)
{
const char *warmBootWriteFile = profile_get_value(0, SAI_KEY_WARM_BOOT_WRITE_FILE);

SWSS_LOG_NOTICE("using warmBootWriteFile: '%s'", warmBootWriteFile);

if (warmBootWriteFile == NULL)
{
SWSS_LOG_WARN("user requested warm shutdown but warmBootWriteFile is not specified, forcing cold shutdown");

warmRestartHint = false;
}
}

sai_switch_api->shutdown_switch(warmRestartHint);

SWSS_LOG_NOTICE("calling api uninitialize");

sai_api_uninitialize();
}
5 changes: 4 additions & 1 deletion syncd/syncd.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
#include <stdexcept>
#include <thread>
#include <mutex>
#include <thread>

#include <unistd.h>
#include <execinfo.h>
#include <signal.h>
#include <getopt.h>

#include "string.h"
extern "C" {
Expand All @@ -24,6 +26,7 @@ extern "C" {
#include "swss/consumertable.h"
#include "swss/notificationconsumer.h"
#include "swss/notificationproducer.h"
#include "swss/selectableevent.h"
#include "swss/select.h"
#include "swss/scheme.h"
#include "swss/logger.h"
Expand All @@ -44,7 +47,7 @@ extern "C" {

extern std::mutex g_mutex;

void onSyncdStart();
void onSyncdStart(bool warmStart);
void hardReinit();

sai_object_id_t replaceVidToRid(const sai_object_id_t &virtual_object_id);
Expand Down
14 changes: 12 additions & 2 deletions syncd/syncd_counters.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "syncd.h"
#include <condition_variable>

void collectCounters(swss::Table &countersTable,
const std::vector<sai_port_stat_counter_t> &supportedCounters)
Expand Down Expand Up @@ -84,6 +85,9 @@ std::vector<sai_port_stat_counter_t> getSupportedCounters(sai_object_id_t portId
static volatile bool g_runCountersThread = false;
static std::shared_ptr<std::thread> g_countersThread = NULL;

static std::mutex mtx_sleep;
static std::condition_variable cv_sleep;

void collectCountersThread(int intervalInSeconds)
{
SWSS_LOG_ENTER();
Expand All @@ -103,8 +107,8 @@ void collectCountersThread(int intervalInSeconds)
{
collectCounters(countersTable, supportedCounters);

// collect counters every second
sleep(intervalInSeconds);
std::unique_lock<std::mutex> lk(mtx_sleep);
cv_sleep.wait_for(lk, std::chrono::seconds(intervalInSeconds));
}
}

Expand All @@ -123,8 +127,14 @@ void endCountersThread()

g_runCountersThread = false;

cv_sleep.notify_all();

if (g_countersThread != NULL)
{
SWSS_LOG_NOTICE("counters thread join");

g_countersThread->join();
}

SWSS_LOG_NOTICE("counters thread ended");
}
10 changes: 9 additions & 1 deletion syncd/syncd_reinit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ void helperCheckVlanId()
g_redisClient->hset(strKey, "NULL", "NULL");
}

void onSyncdStart()
void onSyncdStart(bool warmStart)
{
SWSS_LOG_ENTER();

Expand All @@ -554,5 +554,13 @@ void onSyncdStart()

helperCheckPortIds();

if (warmStart)
{
SWSS_LOG_NOTICE("skipping hard reinit since WARM start was performed");
return;
}

SWSS_LOG_NOTICE("performing hard reinit since COLD start was performed");

hardReinit();
}
Loading

0 comments on commit 1f9baf4

Please sign in to comment.