Skip to content

Commit c568ba8

Browse files
dvlasenkdcantrell
authored andcommitted
[new] inspect_virus: use parallel collector to run clamav on all CPUs
On a 8-thread CPU (Intel Tigerlake laptop), in test run a-la rpminspect -c /usr/share/rpminspect/fedora.yaml -p rawhide -Dv -k -t VERIFY -a x86_64,noarch,src -T virus kernel-6.8.10-200.fc39 this cuts "virus inspection" run time from 19 to 8 minutes. Signed-off-by: Denys Vlasenko <[email protected]>
1 parent b89d8ca commit c568ba8

File tree

2 files changed

+156
-27
lines changed

2 files changed

+156
-27
lines changed

lib/inspect_virus.c

+146-27
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,38 @@
1111
#include <sys/types.h>
1212
#include <clamav.h>
1313
#include "rpminspect.h"
14+
#include "parallel.h"
1415

1516
static struct cl_engine *engine = NULL;
1617
#ifndef CL_SCAN_STDOPT
1718
struct cl_scan_options clamav_opts;
1819
#endif
1920

20-
static struct result_params params;
21+
static parallel_t *col = NULL;
2122

22-
static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file)
23+
/* these variables have different values in each child */
24+
static unsigned child_no = 0;
25+
static unsigned file_no = (unsigned)-1;
26+
static int virus_countdown = 4000;
27+
static int write_fd;
28+
29+
static bool virus_driver(struct rpminspect *ri __attribute__((unused)), rpmfile_entry_t *file)
2330
{
24-
bool result = true;
2531
int r = 0;
2632
const char *virus = NULL;
2733

34+
/* cyclically count from 0 to NCHILDREN-1 */
35+
file_no++;
36+
37+
if (file_no == col->max_pids) {
38+
file_no = 0;
39+
}
40+
41+
/* handle only every Nth file */
42+
if (file_no != child_no) {
43+
return true;
44+
}
45+
2846
/* only check regular files */
2947
if (!S_ISREG(file->st.st_mode)) {
3048
return true;
@@ -36,32 +54,37 @@ static bool virus_driver(struct rpminspect *ri, rpmfile_entry_t *file)
3654
#else
3755
r = cl_scanfile(file->fullpath, &virus, NULL, engine, CL_SCAN_STDOPT);
3856
#endif
57+
#if 0 /* debug: uncomment for error injection - test that virus detection indeed works */
58+
if (child_no == 0 && file_no == 0 && virus_countdown == 4000) {
59+
virus = "b0g0virus";
60+
r = CL_VIRUS;
61+
}
62+
#endif
63+
64+
if (r != CL_CLEAN && r != CL_VIRUS) {
65+
/* unexpected failure, bail out */
66+
errx(EXIT_FAILURE, "*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r));
67+
}
3968

4069
if (r == CL_VIRUS) {
41-
params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS);
42-
43-
if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) {
44-
if (params.severity == RESULT_INFO) {
45-
params.waiverauth = NOT_WAIVABLE;
46-
params.verb = VERB_OK;
47-
} else {
48-
params.waiverauth = WAIVABLE_BY_SECURITY;
49-
params.verb = VERB_FAILED;
50-
result = false;
51-
}
70+
if (!virus || !virus[0]) {
71+
/* "nameless" virus? probably clamav bug, and our code would break on such: bail out */
72+
errx(EXIT_FAILURE, "*** cl_scanfile(%s): virus with no name???", file->localpath);
73+
}
5274

53-
params.arch = get_rpm_header_arch(file->rpm_header);
54-
params.file = file->localpath;
55-
params.remedy = get_remedy(REMEDY_VIRUS);
56-
xasprintf(&params.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus);
57-
add_result(ri, &params);
58-
free(params.msg);
75+
/* Cap the number of reported infections.
76+
* Receiving buffer has sanity limit, and we'd abort if it is exceeded.
77+
* If we see thousands of "infected" files, we probably aren't
78+
* interested in every one of them anyway.
79+
*/
80+
if (virus_countdown != 0) {
81+
virus_countdown--;
82+
full_write(write_fd, virus, strlen(virus) + 1);
83+
full_write(write_fd, &file, sizeof(file));
5984
}
60-
} else if (r != CL_CLEAN) {
61-
warnx("*** cl_scanfile(%s): %s", file->localpath, cl_strerror(r));
6285
}
6386

64-
return result;
87+
return true;
6588
}
6689

6790
bool inspect_virus(struct rpminspect *ri)
@@ -72,8 +95,8 @@ bool inspect_virus(struct rpminspect *ri)
7295
struct dirent *de = NULL;
7396
char *cvdpath = NULL;
7497
struct cl_cvd *cvd = NULL;
75-
bool result = false;
7698
int r = 0;
99+
struct result_params params;
77100
unsigned int loaded_signatures; /* unused, exists to make cl_load() happy */
78101

79102
/* initialize clamav */
@@ -189,11 +212,107 @@ bool inspect_virus(struct rpminspect *ri)
189212
params.msg = NULL;
190213
free(params.details);
191214
params.details = NULL;
192-
193-
/* run the virus check on each file */
194215
params.header = NAME_VIRUS;
195216
params.noun = _("virus or malware in ${FILE} on ${ARCH}");
196-
result = foreach_peer_file(ri, NAME_VIRUS, virus_driver);
217+
218+
/* fork $NCPUS children */
219+
fflush(NULL);
220+
col = new_parallel(0); /* 0: will have one child per CPU */
221+
222+
for (child_no = 0; child_no < col->max_pids; child_no++) {
223+
pid_t pid;
224+
int pipefd[2];
225+
226+
if (pipe(pipefd)) {
227+
err(EXIT_FAILURE, "pipe"); /* fatal */
228+
}
229+
230+
int rnd = rand();
231+
pid = fork();
232+
233+
if (pid < 0) {
234+
err(EXIT_FAILURE, "fork"); /* fatal */
235+
}
236+
237+
if (pid == 0) {
238+
/* child */
239+
close(pipefd[0]);
240+
write_fd = pipefd[1];
241+
242+
/* "If you’re using libclamav with a forking daemon you
243+
* should call srand() inside a forked child before making
244+
* any calls to the libclamav functions" - clamav docs
245+
*/
246+
srand(child_no ^ rnd);
247+
248+
/* run the virus check on each Nth file, then exit */
249+
foreach_peer_file(ri, NAME_VIRUS, virus_driver);
250+
_exit(0);
251+
}
252+
253+
/* parent */
254+
/* insert the child into collector */
255+
close(pipefd[1]);
256+
insert_new_pid_and_fd(col, pid, pipefd[0]);
257+
} /* forking N children */
258+
259+
/* Let all children run, collecting their outputs.
260+
* When any one of them finish, process its output.
261+
* Repeat until all of them exit.
262+
*/
263+
bool result = true;
264+
parallel_slot_t *slot;
265+
while ((slot = collect_one(col)) != NULL) {
266+
int r = slot->exit_status;
267+
268+
if (!WIFEXITED(r)) {
269+
errx(EXIT_FAILURE, "cl_scanfile() killed by signal %u", WTERMSIG(r));
270+
}
271+
272+
if (WEXITSTATUS(r) != 0) {
273+
errx(EXIT_FAILURE, "cl_scanfile() exited with %u", WEXITSTATUS(r));
274+
}
275+
276+
char *output = slot->output;
277+
278+
if (output) {
279+
/* consume all pairs of ("virusname",rpmfile_entry_t pointer) in output */
280+
while (output[0]) {
281+
rpmfile_entry_t *file;
282+
const char *virus = output;
283+
284+
output += strlen(virus) + 1;
285+
memcpy(&file, output, sizeof(file)); /* copy unaligned bytes */
286+
output += sizeof(file);
287+
288+
params.severity = get_secrule_result_severity(ri, file, SECRULE_VIRUS);
289+
290+
if (params.severity != RESULT_NULL && params.severity != RESULT_SKIP) {
291+
if (params.severity == RESULT_INFO) {
292+
params.waiverauth = NOT_WAIVABLE;
293+
params.verb = VERB_OK;
294+
} else {
295+
params.waiverauth = WAIVABLE_BY_SECURITY;
296+
params.verb = VERB_FAILED;
297+
result = false;
298+
}
299+
300+
params.arch = get_rpm_header_arch(file->rpm_header);
301+
params.file = file->localpath;
302+
params.remedy = get_remedy(REMEDY_VIRUS);
303+
xasprintf(&params.msg, _("Virus detected in %s in the %s package on %s: %s"), file->localpath, headerGetString(file->rpm_header, RPMTAG_NAME), params.arch, virus);
304+
add_result(ri, &params);
305+
free(params.msg);
306+
}
307+
} /* while (there is unprocessed output from this child) */
308+
309+
free(slot->output);
310+
slot->output = NULL; /* avoid double-free in delete_parallel() */
311+
}
312+
} /* while (waiting for a child to finish) */
313+
314+
delete_parallel(col, /*signal:*/ 0);
315+
col = NULL;
197316

198317
/* hope the result is always this */
199318
if (result) {

src/rpminspect.c

+10
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,16 @@ int main(int argc, char **argv)
390390
/* Be friendly to "rpminspect ... 2>&1 | tee" use case */
391391
setlinebuf(stdout);
392392

393+
/* clamav uses rand() to generate random temporary file names,
394+
* call srand() to avoid name collisions.
395+
* (In fact, source of clamav I examined isn't that bad, it calls
396+
* srand itself once: "srand(curtime_microsec + clock() + rand())"
397+
* but just to be safe, make sure _this_ call to rand() ^^^^^^^^
398+
* isn't deterministic).
399+
* Mixing in PID ensures that even two simultaneously
400+
* started rpminspect instances get differing seeds */
401+
srand(time(NULL) ^ getpid());
402+
393403
/* SIGABRT handler since we use abort() in some failure cases */
394404
abrt.sa_handler = sigabrt_handler;
395405
sigemptyset(&abrt.sa_mask);

0 commit comments

Comments
 (0)