Ensembling and seed synchronization API support

* Fine-grained compilation support and argparsing
* Refactored fuzzers, added better fuzzer process handling
* Add seed synchronization API support for frontends and API
This commit is contained in:
ex0dus-0x
2019-07-23 15:33:24 -04:00
parent 9b78a5a393
commit 542440c74f
4 changed files with 393 additions and 92 deletions

View File

@@ -15,11 +15,16 @@
import os import os
import sys import sys
import logging
import argparse import argparse
from .frontend import DeepStateFrontend, FrontendError from .frontend import DeepStateFrontend, FrontendError
L = logging.getLogger("deepstate.frontend.afl")
L.setLevel(os.environ.get("DEEPSTATE_LOG", "INFO").upper())
class AFL(DeepStateFrontend): class AFL(DeepStateFrontend):
""" Defines default AFL fuzzer frontend """ """ Defines default AFL fuzzer frontend """
@@ -30,19 +35,25 @@ class AFL(DeepStateFrontend):
def parse_args(cls): def parse_args(cls):
parser = argparse.ArgumentParser(description="Use AFL as a back-end for DeepState.") parser = argparse.ArgumentParser(description="Use AFL as a back-end for DeepState.")
# Compilation/instrumentation support
compile_group = parser.add_argument_group("compilation and instrumentation arguments") compile_group = parser.add_argument_group("compilation and instrumentation arguments")
compile_group.add_argument("--compile_test", type=str, help="Path to DeepState test harness for compilation.") compile_group.add_argument("--compile_test", type=str, help="Path to DeepState test harness for compilation.")
compile_group.add_argument("--compiler_args", default=[], nargs='+', help="Compiler flags (excluding -o) to pass to compiler.") compile_group.add_argument("--compiler_args", type=str, help="Linker flags (space seperated) to include for external libraries.")
compile_group.add_argument("--out_test_name", type=str, default="out", help="Set name of generated instrumented binary.") compile_group.add_argument("--out_test_name", type=str, default="out", help="Set name of generated instrumented binary.")
# Execution options
parser.add_argument("--dictionary", type=str, help="Optional fuzzer dictionary for AFL.") parser.add_argument("--dictionary", type=str, help="Optional fuzzer dictionary for AFL.")
parser.add_argument("--mem_limit", type=int, default=50, help="Child process memory limit in MB (default is 50).") parser.add_argument("--mem_limit", type=int, default=50, help="Child process memory limit in MB (default is 50).")
parser.add_argument("--file", type=str, help="Input file read by fuzzed program, if any.") parser.add_argument("--file", type=str, help="Input file read by fuzzed program, if any.")
parser.add_argument("--dirty_mode", action='store_true', help="Fuzz without deterministic steps.") # AFL execution modes
parser.add_argument("--dumb_mode", action='store_true', help="Fuzz without instrumentation.") parser.add_argument("--dirty_mode", action="store_true", help="Fuzz without deterministic steps.")
parser.add_argument("--qemu_mode", action='store_true', help="Fuzz with QEMU mode.") parser.add_argument("--dumb_mode", action="store_true", help="Fuzz without instrumentation.")
parser.add_argument("--crash_explore", action='store_true', help="Fuzz with crash exploration.") parser.add_argument("--qemu_mode", action="store_true", help="Fuzz with QEMU mode.")
parser.add_argument("--crash_explore", action="store_true", help="Fuzz with crash exploration.")
# Misc. post-processing
parser.add_argument("--post_stats", action="store_true", help="Output post-fuzzing stats.")
cls.parser = parser cls.parser = parser
return super(AFL, cls).parse_args() return super(AFL, cls).parse_args()
@@ -51,24 +62,35 @@ class AFL(DeepStateFrontend):
def compile(self): def compile(self):
args = self._ARGS args = self._ARGS
lib_path = "/usr/local/lib/" lib_path = "/usr/local/lib/libdeepstate_AFL.a"
if not os.path.isfile(lib_path + "libdeepstate_AFL.a"): L.debug(f"Static library path: {lib_path}")
if not os.path.isfile(lib_path):
raise RuntimeError("no AFL-instrumented DeepState static library found in {}".format(lib_path)) raise RuntimeError("no AFL-instrumented DeepState static library found in {}".format(lib_path))
compiler_args = [args.compile_test, "-std=c++11"] + args.compiler_args + \ flags = ["-ldeepstate_AFL"]
["-ldeepstate_AFL", "-o", args.out_test_name + ".afl"] if args.compiler_args:
flags += [arg for arg in args.compiler_args.split(" ")]
compiler_args = ["-std=c++11", args.compile_test] + flags + \
["-o", args.out_test_name + ".afl"]
super().compile(compiler_args) super().compile(compiler_args)
def pre_exec(self): def pre_exec(self):
"""
Perform argparse and environment-related sanity checks.
"""
# check if core dump pattern is set as `core`
with open("/proc/sys/kernel/core_pattern") as f:
if not "core" in f.read():
raise FrontendError("No core dump pattern set. Execute 'echo core | sudo tee /proc/sys/kernel/core_pattern'")
super().pre_exec() super().pre_exec()
args = self._ARGS args = self._ARGS
if args.compile_test:
self.compile()
sys.exit(0)
# require input seeds if we aren't in dumb mode, or we are using crash mode # require input seeds if we aren't in dumb mode, or we are using crash mode
if not args.dumb_mode or args.crash_mode: if not args.dumb_mode or args.crash_mode:
if not args.input_seeds: if not args.input_seeds:
@@ -91,12 +113,15 @@ class AFL(DeepStateFrontend):
args = self._ARGS args = self._ARGS
cmd_dict = { cmd_dict = {
"-i": args.input_seeds,
"-o": args.output_test_dir, "-o": args.output_test_dir,
"-t": str(args.timeout), "-t": str(args.timeout),
"-m": str(args.mem_limit) "-m": str(args.mem_limit)
} }
# since this is optional for AFL's dumb fuzzing
if args.input_seeds:
cmd_dict["-i"] = args.input_seeds
# check if we are using one of AFL's many "modes" # check if we are using one of AFL's many "modes"
if args.dirty_mode: if args.dirty_mode:
cmd_dict["-d"] = None cmd_dict["-d"] = None
@@ -126,29 +151,77 @@ class AFL(DeepStateFrontend):
return cmd_dict return cmd_dict
@property @property
def stats(self): def stats(self):
pass """
Retrieves and parses the stats file produced by AFL
"""
args = self._ARGS
stat_file = args.output_test_dir + "/fuzzer_stats"
with open(stat_file, "r") as sf:
lines = sf.readlines()
# TODO stats = {
def ensemble(self): "last_update": None,
"start_time": None,
"fuzzer_pid": None,
"cycles_done": None,
"execs_done": None,
"execs_per_sec": None,
"paths_total": None,
"paths_favored": None,
"paths_found": None,
"paths_imported": None,
"max_depth": None,
"cur_path": None,
"pending_favs": None,
"pending_total": None,
"variable_paths": None,
"stability": None,
"bitmap_cvg": None,
"unique_crashes": None,
"unique_hangs": None,
"last_path": None,
"last_crash": None,
"last_hang": None,
"execs_since_crash": None,
"exec_timeout": None,
"afl_banner": None,
"afl_version": None,
"command_line": None
}
# get original stats for l in lines:
orig_stats = self.stats for k in stats.keys():
if k in l:
stats[k] = l[19:].strip(": %\r\n")
return stats
# update stored stats at current point of execution
self._update_stats()
if stats["last_update"] != orig_stats["last_update"]: def _sync_seeds(self, mode, src, dest, excludes=["orig", ".state"]):
self.sync_seeds() super()._sync_seeds(mode, src, dest, excludes=excludes)
else:
self.get_seeds()
def post_exec(self):
"""
AFL post_exec outputs last updated fuzzer stats,
and (TODO) performs crash triaging with seeds from
both sync_dir and local queue.
"""
args = self._ARGS
if args.post_stats:
print("\nAFL RUN STATS:\n")
for stat, val in self.stats.items():
fstat = stat.replace("_", " ").upper()
print(f"{fstat}:\t\t\t{val}")
def main(): def main():
fuzzer = AFL() fuzzer = AFL()
args = fuzzer.parse_args() fuzzer.parse_args()
fuzzer.run() fuzzer.run()
return 0 return 0

View File

@@ -15,10 +15,17 @@
import os import os
import sys import sys
import pipes
import logging
import argparse import argparse
import subprocess
from .frontend import DeepStateFrontend, FrontendError from .frontend import DeepStateFrontend, FrontendError
L = logging.getLogger("deepstate.frontend.angora")
L.setLevel(os.environ.get("DEEPSTATE_LOG", "INFO").upper())
class Angora(DeepStateFrontend): class Angora(DeepStateFrontend):
FUZZER = "angora_fuzzer" FUZZER = "angora_fuzzer"
@@ -30,12 +37,12 @@ class Angora(DeepStateFrontend):
compile_group = parser.add_argument_group("compilation and instrumentation arguments") compile_group = parser.add_argument_group("compilation and instrumentation arguments")
compile_group.add_argument("--compile_test", type=str, help="Path to DeepState test harness for compilation.") compile_group.add_argument("--compile_test", type=str, help="Path to DeepState test harness for compilation.")
compile_group.add_argument("--ignored_taints", type=str, help="Path to ignored function calls for taint analysis.") compile_group.add_argument("--ignore_calls", type=str, help="Path to static/shared libraries (colon seperated) for functions to blackbox for taint analysis.")
compile_group.add_argument("--compiler_args", default=[], nargs='+', help="Compiler flags (excluding -o) to pass to compiler.") compile_group.add_argument("--compiler_args", type=str, help="Linker flags (space seperated) to include for external libraries.")
compile_group.add_argument("--out_test_name", type=str, default="test", help="Set name for generated *.taint and *.fast binaries.") compile_group.add_argument("--out_test_name", type=str, default="test", help="Set name for generated *.taint and *.fast binaries.")
parser.add_argument("taint_binary", nargs="?", type=str, help="Path to binary compiled with taint tracking.") parser.add_argument("taint_binary", nargs="?", type=str, help="Path to binary compiled with taint tracking.")
parser.add_argument("--mode", type=str, default="llvm", help="Specifies binary instrumentation framework used (either llvm or pin).") parser.add_argument("--mode", type=str, default="llvm", choices=["llvm", "pin"], help="Specifies binary instrumentation framework used (either llvm or pin).")
parser.add_argument("--no_afl", action='store_true', help="Disables AFL mutation strategies being used.") parser.add_argument("--no_afl", action='store_true', help="Disables AFL mutation strategies being used.")
parser.add_argument("--no_exploration", action='store_true', help="Disables context-sensitive input bytes mutation.") parser.add_argument("--no_exploration", action='store_true', help="Disables context-sensitive input bytes mutation.")
@@ -45,37 +52,77 @@ class Angora(DeepStateFrontend):
def compile(self): def compile(self):
args = self._ARGS args = self._ARGS
no_taints = args.ignored_taints
env = os.environ.copy() env = os.environ.copy()
# check if static libraries exist # check if static libraries exist
lib_path = "/usr/local/lib/" lib_path = "/usr/local/lib/"
L.debug(f"Static library path: {lib_path}")
if not os.path.isfile(lib_path + "libdeepstate_fast.a"): if not os.path.isfile(lib_path + "libdeepstate_fast.a"):
raise RuntimeError("no Angora branch-instrumented DeepState static library found in {}".format(lib_path)) raise RuntimeError("no Angora branch-instrumented DeepState static library found in {}".format(lib_path))
if not os.path.isfile(lib_path + "libdeepstate_taint.a"): if not os.path.isfile(lib_path + "libdeepstate_taint.a"):
raise RuntimeError("no Angora taint-tracked DeepState static library found in {}".format(lib_path)) raise RuntimeError("no Angora taint-tracked DeepState static library found in {}".format(lib_path))
# generate ignored functions output for taint tracking
# set envvar to file with ignored lib functions for taint tracking # set envvar to file with ignored lib functions for taint tracking
if no_taints: if args.ignore_calls:
if os.path.isfile(no_taints):
env["ANGORA_TAINT_RULE_LIST"] = os.path.abspath(no_taints)
# generate instrumented binary libpath = [path for path in args.ignore_calls.split(":")]
fast_args = [args.compile_test] + args.compiler_args + \ L.debug(f"Ignoring library objects: {libpath}")
["-ldeepstate_fast", "-o", args.out_test_name + ".fast"]
out_file = "abilist.txt"
# TODO(alan): more robust library check
ignore_bufs = []
for path in libpath:
if not os.path.isfile(path):
raise FrontendError(f"Library `{path}` to blackbox was not a valid library path.")
# instantiate command to call, but store output to buffer
cmd = [os.getenv("ANGORA") + "/tools/gen_library_abilist.sh", path, "discard"]
L.debug(f"Compilation command: {cmd}")
out = subprocess.check_output(cmd)
ignore_bufs += [out]
# write all to final out_file
with open(out_file, "wb") as f:
for buf in ignore_bufs:
f.write(buf)
# set envvar for fuzzer compilers
env["ANGORA_TAINT_RULE_LIST"] = os.path.abspath(out_file)
# make a binary with light instrumentation
fast_flags = ["-ldeepstate_fast"]
if args.compiler_args:
fast_flags += [arg for arg in args.compiler_args.split(" ")]
fast_args = ["-std=c++11", args.compile_test] + fast_flags + \
["-o", args.out_test_name + ".fast"]
L.info("Compiling {args.binary} for Angora with light instrumentation")
super().compile(compiler_args=fast_args, env=env) super().compile(compiler_args=fast_args, env=env)
# make a binary with taint tracking information # make a binary with taint tracking information
taint_flags = ["-ldeepstate_taint"]
if args.compiler_args:
taint_flags += [arg for arg in args.compiler_args.split(' ')]
if args.mode == "pin": if args.mode == "pin":
env["USE_PIN"] = "1" env["USE_PIN"] = "1"
else: else:
env["USE_TRACK"] = "1" env["USE_TRACK"] = "1"
taint_args = [args.compile_test] + args.compiler_args + \ taint_args = ["-std=c++11", args.compile_test] + taint_flags + \
["-ldeepstate_taint", "-o", args.out_test_name + ".taint"] ["-o", args.out_test_name + ".taint"]
L.info("Compiling {args.binary} for Angora with taint tracking")
super().compile(compiler_args=taint_args, env=env) super().compile(compiler_args=taint_args, env=env)
return 0
def pre_exec(self): def pre_exec(self):
@@ -83,11 +130,6 @@ class Angora(DeepStateFrontend):
args = self._ARGS args = self._ARGS
if args.compile_test:
print("COMPILING DEEPSTATE HARNESS FOR FUZZING...")
self.compile()
sys.exit(0)
# since base method checks for args.binary by default # since base method checks for args.binary by default
if not args.taint_binary: if not args.taint_binary:
self.parser.print_help() self.parser.print_help()
@@ -97,6 +139,7 @@ class Angora(DeepStateFrontend):
raise FrontendError("Must provide -i/--input_seeds option for Angora.") raise FrontendError("Must provide -i/--input_seeds option for Angora.")
seeds = os.path.abspath(args.input_seeds) seeds = os.path.abspath(args.input_seeds)
L.debug(f"Seed path: {seeds}")
if not os.path.exists(seeds): if not os.path.exists(seeds):
os.mkdir(seeds) os.mkdir(seeds)
@@ -105,6 +148,8 @@ class Angora(DeepStateFrontend):
if len([name for name in os.listdir(seeds)]) == 0: if len([name for name in os.listdir(seeds)]) == 0:
raise FrontendError(f"No seeds present in directory {seeds}") raise FrontendError(f"No seeds present in directory {seeds}")
if os.path.exists(args.output_test_dir):
raise FrontendError(f"Remove previous `{args.output_test_dir}` output directory before running Angora.")
@property @property

View File

@@ -13,14 +13,20 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import glob
import os import os
import shutil
import subprocess
import sys import sys
import glob
import shutil
import logging
import subprocess
from .frontend import DeepStateFrontend, FrontendError from .frontend import DeepStateFrontend, FrontendError
L = logging.getLogger("deepstate.frontend.eclipser")
L.setLevel(os.environ.get("DEEPSTATE_LOG", "INFO").upper())
class Eclipser(DeepStateFrontend): class Eclipser(DeepStateFrontend):
""" """
Eclipser front-end implemented with a base DeepStateFrontend object Eclipser front-end implemented with a base DeepStateFrontend object
@@ -31,18 +37,15 @@ class Eclipser(DeepStateFrontend):
def print_help(self): def print_help(self):
"""
Overrides default interface for calling for help.
"""
subprocess.call(["dotnet", self.fuzzer, "fuzz", "--help"]) subprocess.call(["dotnet", self.fuzzer, "fuzz", "--help"])
def pre_exec(self): def pre_exec(self):
super().pre_exec() super().pre_exec()
args = self._ARGS out = self._ARGS.output_test_dir
L.debug(f"Output test directory: {out}")
out = args.output_test_dir
if not os.path.exists(out): if not os.path.exists(out):
print("Creating output directory.") print("Creating output directory.")
os.mkdir(out) os.mkdir(out)
@@ -66,7 +69,7 @@ class Eclipser(DeepStateFrontend):
"fuzz": None, "fuzz": None,
"-p": args.binary, "-p": args.binary,
"-t": str(args.timeout), "-t": str(args.timeout),
"-o": args.output_test_dir + "/run", "-o": args.output_test_dir,
"--src": "file", "--src": "file",
"--fixfilepath": "eclipser.input", "--fixfilepath": "eclipser.input",
"--initarg": " ".join(deepargs), "--initarg": " ".join(deepargs),
@@ -74,19 +77,25 @@ class Eclipser(DeepStateFrontend):
} }
if args.input_seeds is not None: if args.input_seeds is not None:
cmd_dict["-i"] = args.input_seeds cmd_dict["--initseedsdir"] = args.input_seeds
return cmd_dict return cmd_dict
def ensemble(self):
local_queue = self._ARGS.output_test_dir + "/testcase/"
super().ensemble(local_queue)
def post_exec(self): def post_exec(self):
""" """
Decode and minimize testcases after fuzzing. Decode and minimize testcases after fuzzing.
""" """
out = self._ARGS.output_test_dir out = self._ARGS.output_test_dir
subprocess.call(["dotnet", self.fuzzer, "decode", "-i", out + "/run/testcase", "-o", out + "/decoded"]) L.info("Performing post-processing decoding on testcases and crashes")
subprocess.call(["dotnet", self.fuzzer, "decode", "-i", out + "/run/crash", "-o", out + "/decoded"]) subprocess.call(["dotnet", self.fuzzer, "decode", "-i", out + "/testcase", "-o", out + "/decoded"])
subprocess.call(["dotnet", self.fuzzer, "decode", "-i", out + "/crash", "-o", out + "/decoded"])
for f in glob.glob(out + "/decoded/decoded_files/*"): for f in glob.glob(out + "/decoded/decoded_files/*"):
shutil.copy(f, out) shutil.copy(f, out)
shutil.rmtree(out + "/decoded") shutil.rmtree(out + "/decoded")

View File

@@ -17,14 +17,17 @@ import logging
logging.basicConfig() logging.basicConfig()
import os import os
import sys
import time import time
import sys
import subprocess import subprocess
import threading
import argparse import argparse
import functools import functools
L = logging.getLogger("deepstate.frontend") L = logging.getLogger("deepstate.frontend")
L.setLevel(logging.INFO) L.setLevel(os.environ.get("DEEPSTATE_LOG", "INFO").upper())
class FrontendError(Exception): class FrontendError(Exception):
pass pass
@@ -81,7 +84,7 @@ class DeepStateFrontend(object):
# use first compiler executable if multiple exists # use first compiler executable if multiple exists
self.compiler = compiler_paths[0] self.compiler = compiler_paths[0]
L.info(f"Initialized compiler: {self.compiler}") L.debug(f"Initialized compiler: {self.compiler}")
# in case name supplied as `bin/fuzzer`, strip executable name # in case name supplied as `bin/fuzzer`, strip executable name
@@ -93,9 +96,9 @@ class DeepStateFrontend(object):
# use first fuzzer executable path if multiple exists # use first fuzzer executable path if multiple exists
self.fuzzer = fuzzer_paths[0] self.fuzzer = fuzzer_paths[0]
L.info(f"Initialized fuzzer path: {self.fuzzer}") L.debug(f"Initialized fuzzer path: {self.fuzzer}")
self.start_time = int(time.time()) self._start_time = int(time.time())
self._on = False self._on = False
@@ -120,20 +123,16 @@ class DeepStateFrontend(object):
if self.compiler is None: if self.compiler is None:
raise FrontendError(f"No compiler specified for compile-time instrumentation.") raise FrontendError(f"No compiler specified for compile-time instrumentation.")
L.info(f"Compiling test harness `{self._ARGS.compile_test}` with {self.compiler}") # initialize compiler envvars
env["CC"] = self.compiler env["CC"] = self.compiler
env["CXX"] = self.compiler env["CXX"] = self.compiler
L.debug(f"CC={env['CC']} and CXX={env['CXX']}") L.debug(f"CC={env['CC']} and CXX={env['CXX']}")
if custom_cmd is not None: # initialize command with prepended compiler
compile_cmd = custom_cmd compile_cmd = [self.compiler] + compiler_args
else:
compile_cmd = [self.compiler] + compiler_args
L.debug(f"Compilation command: {str(compile_cmd)}") L.debug(f"Compilation command: {str(compile_cmd)}")
L.info(f"Compiling test harness `{self._ARGS.compile_test}` with {self.compiler}")
try: try:
ps = subprocess.Popen(compile_cmd, env=env) ps = subprocess.Popen(compile_cmd, env=env)
ps.communicate() ps.communicate()
@@ -156,24 +155,40 @@ class DeepStateFrontend(object):
self.print_help() self.print_help()
sys.exit(0) sys.exit(0)
# if compile_test is an existing argument, call compile for user
if hasattr(args, "compile_test"):
if args.compile_test:
self.compile()
sys.exit(0)
# manually check if binary positional argument was passed
if args.binary is None: if args.binary is None:
self.print_help() self.parser.print_help()
print("\nError: Target binary not specified.")
sys.exit(1) sys.exit(1)
L.debug(f"Target binary: {args.binary}") L.debug(f"Target binary: {args.binary}")
if not args.output_test_dir: # no sanity check, since some fuzzers require optional input seeds
raise FrontendError("No output test directory path specified.") if args.input_seeds:
L.debug(f"Input seeds directory: {args.input_seeds}")
L.debug(f"Output directory: {args.output_test_dir}") L.debug(f"Output directory: {args.output_test_dir}")
# check if we in ensemble mode, and initialize directory
if args.enable_sync:
if not os.path.isdir(args.sync_dir):
L.info("Initializing sync directory for ensembling")
os.mkdir(args.sync_dir)
L.debug(f"Sync directory: {args.sync_dir}")
@staticmethod @staticmethod
def _dict_to_cmd(cmd_dict): def _dict_to_cmd(cmd_dict):
""" """
provides an interface for constructing proper command to be passed Helper that provides an interface for constructing proper command to be passed
to cli executable. to fuzzer executable. This takes a dict that maps a str argument flag to a value,
and transforms it into list.
:param cmd_dict: dict with keys as cli flags and values as arguments :param cmd_dict: dict with keys as cli flags and values as arguments
""" """
@@ -193,6 +208,7 @@ class DeepStateFrontend(object):
:param compiler: if necessary, a compiler that is invoked before fuzzer executable (ie `dotnet`) :param compiler: if necessary, a compiler that is invoked before fuzzer executable (ie `dotnet`)
""" """
args = self._ARGS
# call pre_exec for any checks/inits before execution # call pre_exec for any checks/inits before execution
L.info("Calling pre_exec before fuzzing") L.info("Calling pre_exec before fuzzing")
@@ -208,35 +224,188 @@ class DeepStateFrontend(object):
if compiler: if compiler:
command.insert(0, compiler) command.insert(0, compiler)
L.info(f"Executing command `{str(command)}`") L.info(f"Executing command `{str(command)}` in {args.jobs} fuzzer(s)")
# TODO(alan): other stuff before calling cmd # exec fuzzer
L.info(f"Fuzzer start time: {self.start_time}") L.info(f"Fuzzer start time: {self._start_time}")
self._on = True self._on = True
# TODO(alan): output to standardized logger with uniform pretty-printing
def output_reader(proc):
for line in iter(proc.stdout.readline, b''):
print("{}".format(line.decode("utf-8")), end='')
try: try:
ps = subprocess.Popen(command)
ps.communicate() # if we are syncing seeds, we background the AFL process but still process output
except BaseException as e: # to the foreground, while handling seed synchronization in a loop
if args.enable_sync:
self.proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
t = threading.Thread(target=output_reader, args=(self.proc,))
t.start()
# do not ensemble as fuzzer initializes
time.sleep(5)
self.sync_count = 0
L.info(f"Starting fuzzer with seed synchronization with PID `{self.proc.pid}`")
while self._is_alive():
L.info(f"Performing sync cycle {self.sync_count}")
time.sleep(args.sync_cycle)
self.ensemble()
self.sync_count += 1
# if not syncing, start regular foreground child process with regular thread for consistency
else:
self.proc = subprocess.Popen(command)
t = threading.Thread()
t.start()
L.info(f"Starting fuzzer normally with PID `{self.proc.pid}`")
self.proc.communicate()
except OSError as e:
raise FrontendError(f"{self.fuzzer} run interrupted due to exception {e}.") raise FrontendError(f"{self.fuzzer} run interrupted due to exception {e}.")
self._off = True except KeyboardInterrupt:
L.info(f"Fuzzer end time: {self.start_time}") self._kill()
t.join()
self.exec_time = round(time.time() - self._start_time, 2)
L.info(f"Fuzzer exec time: {self.exec_time}s")
# do post-fuzz operations # do post-fuzz operations
if hasattr(self, 'post_exec') and callable(getattr(self, 'post_exec')): if hasattr(self, "post_exec") and callable(getattr(self, "post_exec")):
L.info("Calling post-exec for fuzzer post-processing") L.info("Calling post-exec for fuzzer post-processing")
self.post_exec() self.post_exec()
# TODO def _is_alive(self):
def sync_seeds(self, path): """
pass Checks to see if fuzzer PID is running, but tossing SIGT (0) to see if we can
interact. Ideally used in an event loop during a running process.
"""
if self._on:
return True
try:
os.kill(self.proc.pid, 0)
except (OSError, ProcessLookupError):
return False
return True
def _kill(self):
"""
Kills running fuzzer process. Can be used forcefully if
KeyboardInterrupt signal falls through and process continues execution.
"""
if not hasattr(self, "proc"):
raise FrontendError("Attempted to kill non-running PID.")
self.proc.terminate()
self.proc.wait()
self._on = False
@property
def stats(self):
"""
Parses out stats generated by fuzzer output. Should be implemented by user, and can return custom
feedback.
"""
raise NotImplementedError("Must implement in frontend subclass.")
def _sync_seeds(self, mode, src, dest, excludes=[]):
"""
Helper that invokes rsync for convenient file syncing between two files.
TODO(alan): implement functionality for syncing across servers.
TODO(alan): consider implementing "native" syncing alongside current "rsync mode".
:param mode: str representing mode (either 'GET' or 'PUSH')
:param src: path to source queue
:param dest: path to destination queue
:param excludes: list of string patterns for paths to ignore when rsync-ing
"""
if not mode in ["GET", "PUSH"]:
raise FrontendError(f"Unknown mode for seed syncing: `{mode}`")
rsync_cmd = ["rsync", "-racz", "--ignore-existing"]
# subclass should invoke with list of pattern ignores
if len(excludes) > 0:
rsync_cmd += [f"--exclude={e}" for e in excludes]
# TODO: determine other necessary arguments
if mode == "GET":
rsync_cmd += [dest, src]
elif mode == "PUSH":
rsync_cmd += [src, dest]
L.debug(f"rsync command: {rsync_cmd}")
try:
subprocess.Popen(rsync_cmd)
except subprocess.CalledProcessError as e:
raise FrontendError(f"{self.fuzzer} run interrupted due to exception {e}.")
@staticmethod
def _queue_len(queue_path):
return len([path for path in os.listdir(queue_path)])
def ensemble(self, local_queue=None, global_queue=None):
"""
Base method for implementing ensemble fuzzing with seed synchronization. User should
implement any additional logic for determining whether to sync/get seeds as if in event loop.
"""
args = self._ARGS
if global_queue is None:
global_queue = args.sync_dir + "/"
global_len = DeepStateFrontend._queue_len(global_queue)
L.debug(f"Global seed queue: {global_queue} with {global_len} files")
if local_queue is None:
local_queue = args.output_test_dir + "/queue/"
local_len = DeepStateFrontend._queue_len(local_queue)
L.debug(f"Fuzzer local seed queue: {local_queue} with {local_len} files")
# sanity check: if global queue is empty, populate from local queue
if (global_len == 0) and (local_len > 0):
L.info("Nothing in global queue, pushing seeds from local queue")
self._sync_seeds("PUSH", local_queue, global_queue)
return
# get seeds from AFL to global queue, rsync will deal with duplicates
# TODO: rename sync seeds to arbitrary filenames in queue
self._sync_seeds("GET", global_queue, local_queue)
# push seeds from global queue to local, rsync will deal with duplicates
self._sync_seeds("PUSH", global_queue, local_queue)
_ARGS = None _ARGS = None
@classmethod @classmethod
def parse_args(cls): def parse_args(cls):
"""
Default base argument parser for DeepState frontends. Comprises of default arguments all
frontends must implement to maintain consistency in executables. Users can inherit this
method to extend and add own arguments or override for outstanding deviations in fuzzer CLIs.
"""
if cls._ARGS: if cls._ARGS:
return cls._ARGS return cls._ARGS
@@ -250,23 +419,28 @@ class DeepStateFrontend(object):
description="Use fuzzer as back-end for DeepState.") description="Use fuzzer as back-end for DeepState.")
# Target binary (not required, as we enforce manual checks in pre_exec) # Target binary (not required, as we enforce manual checks in pre_exec)
parser.add_argument("binary", nargs='?', type=str, help="Path to the test binary to run.") parser.add_argument("binary", nargs="?", type=str, help="Path to the test binary to run.")
# Input/output workdirs # Input/output workdirs
parser.add_argument("-i", "--input_seeds", type=str, help="Directory with seed inputs.") parser.add_argument("-i", "--input_seeds", type=str, help="Directory with seed inputs.")
parser.add_argument("-o", "--output_test_dir", type=str, default="out", help="Directory where tests will be saved.") parser.add_argument("-o", "--output_test_dir", type=str, default=f"out", help="Directory where tests will be saved.")
# Fuzzer execution options # Fuzzer execution options
parser.add_argument("-t", "--timeout", type=int, default=3600, help="How long to fuzz.") parser.add_argument("-t", "--timeout", type=int, default=3600, help="How long to fuzz.")
parser.add_argument("-j", "--jobs", type=int, default=1, help="How many worker processes to spawn.")
parser.add_argument("-s", "--max_input_size", type=int, default=8192, help="Maximum input size.") parser.add_argument("-s", "--max_input_size", type=int, default=8192, help="Maximum input size.")
parser.add_argument("-j", "--jobs", type=int, default=1, help="How many worker processes to spawn.")
# Parallel / Ensemble Fuzzing
parser.add_argument("--enable_sync", action="store_true", help="Enable seed synchronization.")
parser.add_argument("--sync_dir", type=str, default="out_sync", help="Directory for seed synchronization.")
parser.add_argument("--sync_cycle", type=int, default=5, help="Time between sync cycle.")
parser.add_argument("--sync_crashes", action="store_true", help="Sync crashes between local and global queue.")
parser.add_argument("--sync_hangs", action="store_true", help="Sync hanging input between local and global queue.")
# Miscellaneous options # Miscellaneous options
parser.add_argument("--fuzzer_help", action='store_true', help="Show fuzzer command line options.") parser.add_argument("--fuzzer_help", action="store_true", help="Show fuzzer command line options.")
parser.add_argument("--which_test", type=str, help="Which test to run (equivalent to --input_which_test).") parser.add_argument("--which_test", type=str, help="Which test to run (equivalent to --input_which_test).")
parser.add_argument("--args", default=[], nargs=argparse.REMAINDER, help="Overrides DeepState arguments to pass to test(s).") parser.add_argument("--args", default=[], nargs=argparse.REMAINDER, help="Overrides DeepState arguments to pass to test(s).")
cls._ARGS = parser.parse_args() cls._ARGS = parser.parse_args()
cls.parser = parser cls.parser = parser
return cls._ARGS