Skip to content

Commit ff5efe8

Browse files
authored
[eventd] Fix eventd UT flakiness (#17055)
### Why I did it Fix flakiness of eventd UT - run sub after capture service starts ##### Work item tracking - Microsoft ADO **(number only)**:25650744 #### How I did it Run sub socket after capture socket is initialized #### How to verify it Pipeline
1 parent c6602c9 commit ff5efe8

File tree

5 files changed

+63
-59
lines changed

5 files changed

+63
-59
lines changed

src/sonic-eventd/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ endif
2929
-include rsyslog_plugin/subdir.mk
3030
-include rsyslog_plugin_tests/subdir.mk
3131

32-
all: sonic-eventd eventd-tool rsyslog-plugin
32+
all: sonic-eventd eventd-tests eventd-tool rsyslog-plugin rsyslog-plugin-tests
3333

3434
sonic-eventd: $(OBJS)
3535
@echo 'Building target: $@'

src/sonic-eventd/src/eventd.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,9 @@ capture_service::set_control(capture_control_t ctrl, event_serialized_lst_t *lst
546546
switch(ctrl) {
547547
case INIT_CAPTURE:
548548
m_thr = thread(&capture_service::do_capture, this);
549-
for(int i=0; !m_cap_run && (i < 100); ++i) {
549+
for(int i=0; !m_cap_run && (i < CAPTURE_SERVICE_POLLING_RETRIES); ++i) {
550550
/* Wait max a second for thread to init */
551-
this_thread::sleep_for(chrono::milliseconds(10));
551+
this_thread::sleep_for(chrono::milliseconds(CAPTURE_SERVICE_POLLING_DURATION));
552552
}
553553
RET_ON_ERR(m_cap_run, "Failed to init capture");
554554
m_ctrl = ctrl;

src/sonic-eventd/src/eventd.h

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ typedef enum {
2121

2222
#define EVENTS_STATS_FIELD_NAME "value"
2323
#define STATS_HEARTBEAT_MIN 300
24+
#define CAPTURE_SERVICE_POLLING_DURATION 10
25+
#define CAPTURE_SERVICE_POLLING_RETRIES 100
2426

2527
/*
2628
* Started by eventd_service.

src/sonic-eventd/tests/eventd_ut.cpp

+48-24
Original file line numberDiff line numberDiff line change
@@ -152,25 +152,23 @@ static const test_data_t ldata[] = {
152152

153153

154154
void run_cap(void *zctx, bool &term, string &read_source,
155-
int &cnt)
155+
int &cnt, bool &should_read_control)
156156
{
157157
void *mock_cap = zmq_socket (zctx, ZMQ_SUB);
158158
string source;
159159
internal_event_t ev_int;
160160
int block_ms = 200;
161161
int i=0;
162-
static int proxy_finished_init = false;
163162

164163
EXPECT_TRUE(NULL != mock_cap);
165164
EXPECT_EQ(0, zmq_connect(mock_cap, get_config(CAPTURE_END_KEY).c_str()));
166165
EXPECT_EQ(0, zmq_setsockopt(mock_cap, ZMQ_SUBSCRIBE, "", 0));
167166
EXPECT_EQ(0, zmq_setsockopt(mock_cap, ZMQ_RCVTIMEO, &block_ms, sizeof (block_ms)));
168167

169-
if(!proxy_finished_init) {
168+
if(should_read_control) {
170169
zmq_msg_t msg;
171170
zmq_msg_init(&msg);
172-
EXPECT_EQ(1, zmq_msg_recv(&msg, mock_cap, 0)); // Subscription message
173-
proxy_finished_init = true;
171+
EXPECT_NE(1, zmq_msg_recv(&msg, mock_cap, 0)); // Subscription message should be read by do_capture
174172
}
175173

176174
while(!term) {
@@ -227,10 +225,10 @@ void run_pub(void *mock_pub, const string wr_source, internal_events_lst_t &lst)
227225
}
228226
}
229227

230-
231228
TEST(eventd, proxy)
232229
{
233230
printf("Proxy TEST started\n");
231+
bool should_read_control = false;
234232
bool term_sub = false;
235233
bool term_cap = false;
236234
string rd_csource, rd_source, wr_source("hello");
@@ -247,12 +245,12 @@ TEST(eventd, proxy)
247245
/* Starting proxy */
248246
EXPECT_EQ(0, pxy->init());
249247

248+
/* capture in a thread */
249+
thread thrc(&run_cap, zctx, ref(term_cap), ref(rd_csource), ref(rd_cevts_sz), ref(should_read_control));
250+
250251
/* subscriber in a thread */
251252
thread thr(&run_sub, zctx, ref(term_sub), ref(rd_source), ref(rd_evts), ref(rd_evts_sz));
252253

253-
/* capture in a thread */
254-
thread thrc(&run_cap, zctx, ref(term_cap), ref(rd_csource), ref(rd_cevts_sz));
255-
256254
/* Init pub connection */
257255
void *mock_pub = init_pub(zctx);
258256

@@ -275,9 +273,6 @@ TEST(eventd, proxy)
275273
}
276274
this_thread::sleep_for(chrono::milliseconds(1000));
277275

278-
delete pxy;
279-
pxy = NULL;
280-
281276
term_sub = true;
282277
term_cap = true;
283278

@@ -287,6 +282,18 @@ TEST(eventd, proxy)
287282
EXPECT_EQ(rd_cevts_sz, wr_evts.size());
288283

289284
zmq_close(mock_pub);
285+
286+
/* Do control test */
287+
288+
should_read_control = true;
289+
290+
/* capture in a thread */
291+
thread thrcc(&run_cap, zctx, ref(term_cap), ref(rd_csource), ref(rd_cevts_sz), ref(should_read_control));
292+
293+
delete pxy;
294+
pxy = NULL;
295+
296+
thrcc.join();
290297
zmq_ctx_term(zctx);
291298

292299
/* Provide time for async proxy removal to complete */
@@ -295,7 +302,6 @@ TEST(eventd, proxy)
295302
printf("eventd_proxy is tested GOOD\n");
296303
}
297304

298-
299305
TEST(eventd, capture)
300306
{
301307
printf("Capture TEST started\n");
@@ -329,9 +335,6 @@ TEST(eventd, capture)
329335
/* Starting proxy */
330336
EXPECT_EQ(0, pxy->init());
331337

332-
/* Run subscriber; Else publisher will drop events on floor, with no subscriber. */
333-
thread thr_sub(&run_sub, zctx, ref(term_sub), ref(sub_source), ref(sub_evts), ref(sub_evts_sz));
334-
335338
/* Create capture service */
336339
capture_service *pcap = new capture_service(zctx, cache_max, &stats_instance);
337340

@@ -341,6 +344,9 @@ TEST(eventd, capture)
341344
/* Initialize the capture */
342345
EXPECT_EQ(0, pcap->set_control(INIT_CAPTURE));
343346

347+
/* Run subscriber; Else publisher will drop events on floor, with no subscriber. */
348+
thread thr_sub(&run_sub, zctx, ref(term_sub), ref(sub_source), ref(sub_evts), ref(sub_evts_sz));
349+
344350
EXPECT_TRUE(init_cache > 1);
345351
EXPECT_TRUE((cache_max+3) < (int)ARRAY_SIZE(ldata));
346352

@@ -473,9 +479,6 @@ TEST(eventd, captureCacheMax)
473479
/* Starting proxy */
474480
EXPECT_EQ(0, pxy->init());
475481

476-
/* Run subscriber; Else publisher will drop events on floor, with no subscriber. */
477-
thread thr_sub(&run_sub, zctx, ref(term_sub), ref(sub_source), ref(sub_evts), ref(sub_evts_sz));
478-
479482
/* Create capture service */
480483
capture_service *pcap = new capture_service(zctx, cache_max, &stats_instance);
481484

@@ -484,6 +487,9 @@ TEST(eventd, captureCacheMax)
484487

485488
EXPECT_TRUE(init_cache > 1);
486489

490+
/* Run subscriber; Else publisher will drop events on floor, with no subscriber. */
491+
thread thr_sub(&run_sub, zctx, ref(term_sub), ref(sub_source), ref(sub_evts), ref(sub_evts_sz));
492+
487493
/* Collect few serailized strings of events for startup cache */
488494
for(int i=0; i < init_cache; ++i) {
489495
internal_event_t ev(create_ev(ldata[i]));
@@ -595,6 +601,7 @@ TEST(eventd, service)
595601
}
596602

597603
thread thread_service(&run_eventd_service);
604+
this_thread::sleep_for(chrono::milliseconds(CAPTURE_SERVICE_POLLING_DURATION * CAPTURE_SERVICE_POLLING_RETRIES));
598605

599606
/* Need client side service to interact with server side */
600607
EXPECT_EQ(0, service.init_client(zctx));
@@ -610,7 +617,7 @@ TEST(eventd, service)
610617
string wr_source("hello");
611618

612619
/* Test service startup caching */
613-
event_serialized_lst_t evts_start, evts_read;
620+
event_serialized_lst_t evts_start, evts_read, polled_events;
614621

615622
for(int i=0; i<wr_sz; ++i) {
616623
string evt_str;
@@ -624,15 +631,32 @@ TEST(eventd, service)
624631
/* Publish events. */
625632
run_pub(mock_pub, wr_source, wr_evts);
626633

627-
/* Published events must have been captured. Give a pause, to ensure sent. */
628-
this_thread::sleep_for(chrono::milliseconds(200));
634+
int max_polling_duration = 2000;
635+
int polling_interval = 100;
636+
auto poll_start_ts = chrono::steady_clock::now();
637+
638+
while(true) {
639+
auto current_ts = chrono::steady_clock::now();
640+
if(chrono::duration_cast<chrono::milliseconds>(current_ts - poll_start_ts).count() >= max_polling_duration) {
641+
break;
642+
}
643+
event_serialized_lst_t read_events;
644+
service.cache_read(read_events);
645+
polled_events.insert(polled_events.end(), read_events.begin(), read_events.end());
646+
if (!read_events.empty()) {
647+
break;
648+
}
649+
this_thread::sleep_for(chrono::milliseconds(polling_interval));
650+
}
629651

630652
EXPECT_EQ(0, service.cache_stop());
631653

632-
/* Read the cache; expect wr_sz events */
654+
/* Read remaining events in cache, if any */
633655
EXPECT_EQ(0, service.cache_read(evts_read));
634656

635-
EXPECT_EQ(evts_read, evts_start);
657+
polled_events.insert(polled_events.end(), evts_read.begin(), evts_read.end());
658+
659+
EXPECT_EQ(polled_events, evts_start);
636660

637661
zmq_close(mock_pub);
638662
}

src/sonic-eventd/tests/main.cpp

+10-32
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "gtest/gtest.h"
22
#include "dbconnector.h"
33
#include <iostream>
4+
#include <stdexcept>
45

56
using namespace std;
67
using namespace swss;
@@ -20,57 +21,34 @@ class SwsscommonEnvironment : public ::testing::Environment {
2021
// Override this to define how to set up the environment
2122
void SetUp() override {
2223
// by default , init should be false
23-
cout<<"Default : isInit = "<<SonicDBConfig::isInit()<<endl;
24+
cout << "Default : isInit = " << SonicDBConfig::isInit() << endl;
2425
EXPECT_FALSE(SonicDBConfig::isInit());
2526

26-
// load nonexisting file, should throw exception with NO file existing
27-
try
28-
{
29-
cout<<"INIT: loading nonexisting db config file"<<endl;
30-
SonicDBConfig::initialize(nonexisting_file);
31-
}
32-
catch (exception &e)
33-
{
34-
EXPECT_TRUE(strstr(e.what(), "Sonic database config file doesn't exist"));
35-
}
27+
EXPECT_THROW(SonicDBConfig::initialize(nonexisting_file), runtime_error);
28+
3629
EXPECT_FALSE(SonicDBConfig::isInit());
3730

3831
// load local config file, init should be true
3932
SonicDBConfig::initialize(existing_file);
40-
cout<<"INIT: load local db config file, isInit = "<<SonicDBConfig::isInit()<<endl;
33+
cout << "INIT: load local db config file, isInit = " << SonicDBConfig::isInit() << endl;
4134
EXPECT_TRUE(SonicDBConfig::isInit());
4235

4336
// Test the database_global.json file
4437
// by default , global_init should be false
45-
cout<<"Default : isGlobalInit = "<<SonicDBConfig::isGlobalInit()<<endl;
38+
cout << "Default : isGlobalInit = " << SonicDBConfig::isGlobalInit() << endl;
4639
EXPECT_FALSE(SonicDBConfig::isGlobalInit());
4740

4841
// Call an API which actually needs the data populated by SonicDBConfig::initializeGlobalConfig
49-
try
50-
{
51-
cout<<"INIT: Invoking SonicDBConfig::getDbId(APPL_DB, asic0)"<<endl;
52-
SonicDBConfig::getDbId(TEST_DB, TEST_NAMESPACE);
53-
}
54-
catch (exception &e)
55-
{
56-
EXPECT_TRUE(strstr(e.what(), "Initialize global DB config using API SonicDBConfig::initializeGlobalConfig"));
57-
}
42+
EXPECT_THROW(SonicDBConfig::getDbId(TEST_DB, TEST_NAMESPACE), runtime_error);
5843

5944
// load local global file, init should be true
6045
SonicDBConfig::initializeGlobalConfig(global_existing_file);
61-
cout<<"INIT: load global db config file, isInit = "<<SonicDBConfig::isGlobalInit()<<endl;
46+
cout << "INIT: load global db config file, isInit = " << SonicDBConfig::isGlobalInit() << endl;
6247
EXPECT_TRUE(SonicDBConfig::isGlobalInit());
6348

6449
// Call an API with wrong namespace passed
65-
try
66-
{
67-
cout<<"INIT: Invoking SonicDBConfig::getDbId(APPL_DB, invalid)"<<endl;
68-
SonicDBConfig::getDbId(TEST_DB, INVALID_NAMESPACE);
69-
}
70-
catch (exception &e)
71-
{
72-
EXPECT_TRUE(strstr(e.what(), "Namespace invalid is not a valid namespace name in config file"));
73-
}
50+
cout << "INIT: Invoking SonicDBConfig::getDbId(APPL_DB, invalid)" << endl;
51+
EXPECT_THROW(SonicDBConfig::getDbId(TEST_DB, INVALID_NAMESPACE), out_of_range);
7452

7553
// Get this info handy
7654
try

0 commit comments

Comments
 (0)