From 2b1cde778318564d4fc040773d45d0adc07da73a Mon Sep 17 00:00:00 2001 From: George Dunlap Date: Wed, 11 Oct 2017 18:49:42 +0100 Subject: [PATCH] fuzz/x86_emulate: Take multiple test files for inputs Finding aggregate coverage for a set of test files means running each afl-generated test case through the harness. At the moment, this is done by re-executing afl-harness-cov with each input file. When a large number of test cases have been generated, this can take a significant amonut of time; a recent test with 30k total files generated by 4 parallel fuzzers took over 7 minutes. The vast majority of this time is taken up with 'exec', however. Since the harness is already designed to loop over multiple inputs for llvm "persistent mode", just allow it to take a large number of inputs on the same when *not* running in llvm "persistent mode".. Then the command can be efficiently executed like this: ls */queue/id* | xargs $path/afl-harness-cov For the above-mentioned test on 30k files, the time to generate coverage data was reduced from 7 minutes to under 30 seconds. Signed-off-by: George Dunlap Acked-by: Jan Beulich Acked-by: Andrew Cooper --- tools/fuzz/README.afl | 7 ++++++ .../x86_instruction_emulator/afl-harness.c | 25 +++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/fuzz/README.afl b/tools/fuzz/README.afl index 8b58b8cdea..a59564985a 100644 --- a/tools/fuzz/README.afl +++ b/tools/fuzz/README.afl @@ -49,6 +49,13 @@ generate coverage data. To do this, use the target `afl-cov`: $ make afl-cov #produces afl-harness-cov +In order to speed up the process of checking total coverage, +`afl-harness-cov` can take several test inputs on its command-line; +the speed-up effect should be similar to that of using afl-clang-fast. +You can use xargs to do this most efficiently, like so: + + $ ls queue/id* | xargs $path/afl-harness-cov + NOTE: Please also note that the coverage instrumentation hard-codes the absolute path for the instrumentation read and write files in the binary; so coverage data will always show up in the build directory no diff --git a/tools/fuzz/x86_instruction_emulator/afl-harness.c b/tools/fuzz/x86_instruction_emulator/afl-harness.c index 31ae1daef1..e0c56aadf7 100644 --- a/tools/fuzz/x86_instruction_emulator/afl-harness.c +++ b/tools/fuzz/x86_instruction_emulator/afl-harness.c @@ -16,6 +16,7 @@ int main(int argc, char **argv) { size_t size; FILE *fp = NULL; + int max, count; setbuf(stdin, NULL); setbuf(stdout, NULL); @@ -42,8 +43,7 @@ int main(int argc, char **argv) break; case '?': - usage: - printf("Usage: %s $FILE | [--min-input-size]\n", argv[0]); + printf("Usage: %s $FILE [$FILE...] | [--min-input-size]\n", argv[0]); exit(-1); break; @@ -54,10 +54,13 @@ int main(int argc, char **argv) } } - if ( optind == argc ) /* No positional parameters. Use stdin. */ + max = argc - optind; + + if ( !max ) /* No positional parameters. Use stdin. */ + { + max = 1; fp = stdin; - else if ( optind != (argc - 1) ) - goto usage; + } if ( LLVMFuzzerInitialize(&argc, &argv) ) exit(-1); @@ -65,12 +68,15 @@ int main(int argc, char **argv) #ifdef __AFL_HAVE_MANUAL_CONTROL __AFL_INIT(); - while ( __AFL_LOOP(1000) ) + for( count = 0; __AFL_LOOP(1000); ) +#else + for( count = 0; count < max; count++ ) #endif { if ( fp != stdin ) /* If not using stdin, open the provided file. */ { - fp = fopen(argv[optind], "rb"); + printf("Opening file %s\n", argv[optind + count]); + fp = fopen(argv[optind + count], "rb"); if ( fp == NULL ) { perror("fopen"); @@ -100,7 +106,10 @@ int main(int argc, char **argv) if ( !feof(fp) ) { printf("Input too large\n"); - exit(-1); + /* Don't exit if we're doing batch processing */ + if ( max == 1 ) + exit(-1); + continue; } if ( fp != stdin ) -- 2.30.2