import os import sys import time import types import logging import tempfile import functools from multiprocessing import Manager, Pool from multiprocessing import Process from elftools.elf.elffile import ELFFile from elftools.elf.sections import SymbolTableSection from core.executor import Executor, State, AbandonState from core.parser import parse from core.smtlib import solver, Expression, Operators, SolverException, Array from core.smtlib import BitVec, Bool from models import linux, decree, windows from utils import gdb, qemu from core.smtlib import ConstraintSet logger = logging.getLogger('MANTICORE') def makeDecree(args): constraints = ConstraintSet() model = decree.SDecree(constraints, ','.join(args.programs)) initial_state = State(constraints, model) logger.info('Loading program %s', args.programs) #if args.data != '': # logger.info('Starting with concrete input: {}'.format(args.data)) model.input.transmit(args.data) model.input.transmit(initial_state.symbolicate_buffer('+'*14, label='RECEIVE')) return initial_state def makeLinux(program, arguments, environment, concrete_start = ''): logger.info('Loading program %s', program) constraints = ConstraintSet() model = linux.SLinux(constraints, program, argv=arguments, envp=environment, symbolic_files=('symbolic.txt')) initial_state = State(constraints, model) if concrete_start != '': logger.info('Starting with concrete input: {}'.format(concrete_start)) for i in xrange(len(arguments)): arguments[i] = initial_state.symbolicate_buffer(arguments[i], label='ARGV%d' % (i+1), string=True) for i in xrange(len(environment)): environment[i] = initial_state.symbolicate_buffer(environment[i], label='ENV%d' % (i+1), string=True) model.input.transmit(concrete_start) #set stdin input... model.input.transmit(initial_state.symbolicate_buffer('+'*256, label='STDIN')) return initial_state def makeWindows(args): assert args.size is not None, "Need to specify buffer size" assert args.buffer is not None, "Need to specify buffer base address" logger.debug('Loading program %s', args.programs) additional_context = None if args.context: with open(args.context, "r") as addl_context_file: additional_context = cPickle.loads(addl_context_file.read()) logger.debug('Additional context loaded with contents {}'.format(additional_context)) #DEBUG constraints = ConstraintSet() model = windows.SWindows(constraints, args.programs[0], additional_context, snapshot_folder=args.workspace) #This will interpret the buffer specification written in INTEL ASM. (It may dereference pointers) data_size = parse(args.size, model.current.read_bytes, model.current.read_register) data_ptr = parse(args.buffer, model.current.read_bytes, model.current.read_register) logger.debug('Buffer at %x size %d bytes)', data_ptr, data_size) buf_str = "".join(model.current.read_bytes(data_ptr, data_size)) logger.debug('Original buffer: %s', buf_str.encode('hex')) offset = args.offset concrete_data = args.data.decode('hex') assert data_size >= offset + len(concrete_data) size = min(args.maxsymb, data_size - offset - len(concrete_data)) symb = constraints.new_array(name='RAWMSG', index_max=size) model.current.write_bytes(data_ptr + offset, concrete_data) model.current.write_bytes(data_ptr + offset + len(concrete_data), [symb[i] for i in xrange(size)] ) logger.debug('First %d bytes are left concrete', offset) logger.debug('followed by %d bytes of concrete start', len(concrete_data)) hex_head = "".join(model.current.read_bytes(data_ptr, offset+len(concrete_data))) logger.debug('Hexdump head: %s', hex_head.encode('hex')) logger.debug('Total symbolic characters inserted: %d', size) logger.debug('followed by %d bytes of unmodified concrete bytes at end.', (data_size-offset-len(concrete_data))-size ) hex_tail = "".join(map(chr, model.current.read_bytes(data_ptr+offset+len(concrete_data)+size, data_size-(offset+len(concrete_data)+size)))) logger.debug('Hexdump tail: %s', hex_tail.encode('hex')) logger.info("Starting PC is: {:08x}".format(model.current.PC)) return State(constraints, model) def binary_type(path): ''' Given a path to a binary, return a string representation of its type. i.e. ELF, PE, DECREE, QNX ''' magic = None with open(path) as f: magic = f.read(4) if magic == '\x7fELF': return 'ELF' elif magic == 'MDMP': return 'PE' elif magic == '\x7fCGC': return 'DECREE' else: raise NotImplementedError("Binary {} not supported.".format(path)) def issymbolic(value): ''' Helper to determine whether a value read from memory is symbolic. ''' return isinstance(value, Expression) class Manticore(object): def __init__(self, binary_path, args = [], verbose = False): assert os.path.isfile(binary_path) self._binary = binary_path self._binary_type = binary_type(binary_path) self._argv = args # args.programs[1:] self._env = {} # Will be set to a temporary directory if not set before running start() self._workspace_path = None self._policy = 'random' self._coverage_file = None self._memory_errors = None self._should_profile = False self._workers = [] # XXX(yan) '_args' will be removed soon; exists currently to ease porting self._args = args self._time_started = 0 self._num_processes = 1 self._begun_trace = False self._assertions = {} self._model_hooks = {} self._hooks = {} self._running = False self._arch = None self._log_debug = False self._log_file = '/dev/stdout' self._concrete_data = '' self._dumpafter = 0 self._maxstates = 0 self._maxstorage = 0 self._verbosity = 0 manager = Manager() self._context = manager.dict() # XXX(yan) This is a bit obtuse; once PE support is updated this should # be refactored out if self._binary_type == 'ELF': self._binary_obj = ELFFile(file(self._binary)) self._init_logging() def _init_logging(self): fmt_str = '%(asctime)s: [%(process)d]%(stateid)s %(name)s:%(levelname)s: %(message)s' if self._log_debug: log_level = logging.DEBUG else: log_level = logging.WARNING logging.basicConfig(filename=self._log_file, format=fmt_str, level=log_level) def loggerSetState(logger, stateid): logger.filters[0].stateid = stateid class ContextFilter(logging.Filter): ''' This is a filter which injects contextual information into the log. ''' def filter(self, record): if hasattr(self, 'stateid') and isinstance(self.stateid, int): record.stateid = '[%d]' % self.stateid else: record.stateid = '' return True ctxfilter = ContextFilter() for name, logger in logging.Logger.manager.loggerDict.items(): logger.addFilter(ctxfilter) logger.setLevel(log_level) logger.setState = types.MethodType(loggerSetState, logger) @property def log_file(self): return self._log_file @log_file.setter def log_file(self, path): if self._log_file == path: return if path == '-': path = '/dev/stdout' self._log_file = path self._init_logging() # XXX(yan): args is a temporary hack to include while we continue moving # non-Linux platforms to new-style arg handling. @property def args(self): return self._args @args.setter def args(self, args): self._args = args @property def should_profile(self): return self._should_profile @should_profile.setter def should_profile(self, enable_profiling): self._should_profile = enable_profiling @property def concrete_data(self): return self._concrete_data @concrete_data.setter def concrete_data(self, data): self._concrete_data = data @property def maxstates(self): return self._maxstates @maxstates.setter def maxstates(self, max_states): self._maxstates = max_states @property def dumpafter(self): return self._dumpafter @dumpafter.setter def dumpafter(self, dump_after): self._dumpafter = dump_after @property def maxstorage(self): return self._maxstorage @maxstorage.setter def maxstorage(self, max_storage): self._maxstorage = max_storage @property def log_debug(self): return self._log_debug @log_debug.setter def log_debug(self, debug): if self._log_debug == debug: return self._log_debug = debug self._init_logging() @property def verbosity(self): return self._verbosity @verbosity.setter def verbosity(self, setting): levels = [[], [('EXECUTOR', logging.INFO)], [('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG)], [('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG)], [('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG)], [('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG), ('SMTLIB', logging.DEBUG)]] # Takes a value and ensures it's in a certain range def clamp(val, minimum, maximum): return sorted((minimum, val, maximum))[1] clamped = clamp(setting, 0, len(levels) - 1) if clamped != setting: logger.debug("%s not between 0 and %d, forcing to %d", setting, len(levels) - 1, clamped) for log_type, level in levels[clamped]: logging.getLogger(log_type).setLevel(level) self._verbosity = setting def add_hook(self, pc, callback): ''' Add a callback to be invoked on executing a program counter. Pass 'None' for pc to invoke callback on every instruction. ''' def _inner(state): callback(self._context, state) self._hooks.setdefault(pc, set()).add(_inner) def _get_symbol_address(self, symbol): ''' Return the address of |symbol| within the binary ''' if self._binary_obj is None: return NotImplementedError("Symbols aren't supported") for section in self._binary_obj.iter_sections(): if not isinstance(section, SymbolTableSection): continue symbols = section.get_symbol_by_name(symbol) if len(symbols) == 0: continue return symbols[0].entry['st_value'] def _make_state(self, path): if self._binary_type == 'ELF': # Linux env = ['%s=%s'%(k,v) for k,v in self._env.items()] state = makeLinux(self._binary, self._argv, env, self._concrete_data) elif self._binary_type == 'PE': # Windows state = makeWindows(self._args) elif self._binary_type == 'DECREE': # Decree state = makeDecree(self._args) else: raise NotImplementedError("Binary {} not supported.".format(path)) return state @property def workspace(self): if self._workspace_path is None: self._workspace_path = self._make_workspace() return self._workspace_path @workspace.setter def workspace(self, path): assert not self._running, "Can't set workspace if Manticore is running." if os.path.exists(path): assert os.path.isdir(path) else: os.mkdir(path) self._workspace_path = path def _make_workspace(self): ''' Make working directory ''' return tempfile.mkdtemp(prefix="pse_", dir='./') @property def workers(self): return self._num_processes @workers.setter def workers(self, n): assert not self._running, "Can't set workers if Manticore is running." self._num_processes = n @property def policy(self): return self._policy @policy.setter def policy(self, policy): assert not self._running, "Can't set policy if Manticore is running." self._policy = policy @property def coverage_file(self): return self._coverage_file @coverage_file.setter def coverage_file(self, path): assert not self._running, "Can't set coverage file if Manticore is running." self._coverage_file = path @property def memory_errors_file(self): return self._memory_errors @memory_errors_file.setter def memory_errors_file(self, path): assert not self._running, "Can't set memory errors if Manticore is running." self._memory_errors = path @property def env(self): return self._env @env.setter def env(self, env): ''' Update environment variables from |env|. Use repeated '+' chars for symbolic values. ''' assert isinstance(env, dict) assert not self._running, "Can't set process env if Manticore is running." self._env.update(env) return self._env def env_add(self, key, value, overwrite=True): if key in self._env: if overwrite: self._env[key] = value else: self._env[key] = value @property def arch(self): assert self._binary is not None if self._arch is not None: return self._arch arch = self._binary_obj.get_machine_arch() if arch == 'x86': self._arch = 'i386' elif arch == 'x64': self._arch = 'x86_64' elif arch == 'ARM': self._arch = 'arm' else: raise "Unsupported architecture: %s"%(arch, ) return self._arch def _start_workers(self, num_processes): assert num_processes > 0, "Must have more than 0 worker processes" logger.info("Starting %d processes.", num_processes) for _ in range(num_processes): p = Process(target=self._executor.run, args=()) self._workers.append(p) p.start() def _join_workers(self): while len(self._workers) > 0: w = self._workers.pop() try: w.join() except KeyboardInterrupt, e: self._executor.shutdown() # multiprocessing.dummy.Process does not support terminate if hasattr(w, 'terminate'): w.terminate() self._workers.append(w) ############################################################################ # Model hooks + callback ############################################################################ def apply_model_hooks(self, path): #TODO(yan): Simplify the partial function application # Imported straight from main.py; this will be re-written once the new # event code is in place. import core.cpu import importlib import models with open(path, 'r') as fnames: for line in fnames.readlines(): address, cc_name, name = line.strip().split(' ') cc = getattr(core.cpu.x86.ABI, cc_name) fmodel = models name_parts = name.split('.') importlib.import_module("models.{}".format(name_parts[0])) for n in name_parts: fmodel = getattr(fmodel,n) assert fmodel != models logger.debug("[+] Hooking 0x%x %s %s", int(address,0), cc_name, name ) def cb_function(cc, fmodel, state): cc(fmodel)(state.model) cb = functools.partial(cb_function, cc, fmodel) # TODO(yan) this should be a dict self._model_hooks.setdefault(int(address,0), set()).add(cb) def _model_hook_callback(self, state, pc): if pc not in self._model_hooks: return for cb in self._model_hooks[pc]: cb(state) ############################################################################ # Assertion hooks + callback ############################################################################ def load_assertions(self, path): with open(path, 'r') as f: for line in f.readlines(): pc = int(line.split(' ')[0], 16) if pc in self._assertions: logger.debug("Repeated PC in assertions file %s", path) self._assertions[pc] = ' '.join(line.split(' ')[1:]) def start(self): ''' Start Manticore, creating all necessary support classes. ''' assert not self._running, "Manticore is already running." args = self._args replay=None if hasattr(args, 'replay') and args.replay is not None: with open(args.replay, 'r') as freplay: replay = map(lambda x: int(x, 16), freplay.readlines()) state = self._make_state(self._binary) self._executor = Executor(state, workspace=self.workspace, policy=self._policy, dumpafter=self.dumpafter, maxstates=self.maxstates, maxstorage=self.maxstorage, replay=replay, dumpstats=self.should_profile) if self._hooks: self._executor.will_execute_pc += self._hook_callback if self._model_hooks: self._executor.will_execute_pc += self._model_hook_callback if self._assertions: self._executor.will_execute_pc += self._assertions_callback if self.should_profile: self._executor.profiling = True self._time_started = time.time() self._running = True try: self._start_workers(self._num_processes) self._join_workers() finally: self._running = False def terminate(self): 'Gracefully terminate the currently-executing Manticore run.' self._executor.shutdown() def _assertions_callback(self, state, pc): if pc not in self._assertions: return from core.parser import parse program = self._assertions[pc] #This will interpret the buffer specification written in INTEL ASM. # (It may dereference pointers) assertion = parse(program, state.cpu.read, state.cpu.read_register) if not solver.can_be_true(state.constraints, assertion): logger.info(str(state.cpu)) logger.info("Assertion %x -> {%s} does not hold. Aborting state.", state.cpu.pc, program) raise AbandonState() #Everything is good add it. state.constraints.add(assertion) def _hook_callback(self, state, pc): 'Invoke all registered generic hooks' # Ignore symbolic pc. # TODO(yan): Should we ask the solver if any of the hooks are possible, # and execute those that are? if not isinstance(pc, (int, long)): return # Invoke all pc-specific hooks for cb in self._hooks.get(pc, []): cb(state) # Invoke all pc-agnostic hooks for cb in self._hooks.get(None, []): cb(state) def dump_stats(self): if self.coverage_file is not None: with open(self.coverage_file, "w") as f: fmt = "0x{:016x}\n" for m in self._executor.visited: f.write(fmt.format(m[1])) if self.memory_errors_file is not None: with open(args.errorfile, "w") as f: fmt = "0x{:016x}\n" for m in self._executor.errors: f.write(fmt.format(m)) self._executor.dumpStats() logger.info('Results dumped in %s', self.workspace) logger.info('Instructions executed: %d', self._executor.count) logger.info('Coverage: %d different instructions executed', len(self._executor.visited)) logger.info('Number of paths covered %r', State.state_count()) logger.info('Total time: %s', time.time()-self._time_started) logger.info('IPS: %d', self._executor.count/(time.time()-self._time_started)) visited = ['%d:%08x'%site for site in self._executor.visited] with file(os.path.join(self.workspace,'visited.txt'),'w') as f: for entry in sorted(visited): f.write(entry + '\n') with file(os.path.join(self.workspace,'command.sh'),'w') as f: f.write(' '.join(sys.argv))