manticore/manticore.py
Yan dde79a0bab Remove ManticoreControl object [#180] (#4)
* Remove ManticoreControl object
* Some changes were brought in from dev-symbolicate-api
* Add Manticore.terminate()
* Add State.abandon()
* Update sample scripts
* Remove ctl from README
* Fix tests
* Bring in changes from dev-symbolicate-api
* Lower-case wildcard
* string -> cstring
* abandon() docstring
* Rename "name" to "label"
* Remove obsolete comment
* Make NUL a possible value for the last byte of a cstring
* Fix AbandonState and add example binary&script
* name -> label in tests, manticore.py
* Ignore .DS_Store
* Update symbolicate_buffer docstring
2017-02-14 14:54:52 -05:00

639 lines
21 KiB
Python

import os
import sys
import time
import types
import logging
import tempfile
import functools
from multiprocessing import Manager, Pool
from multiprocessing import Process
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection
from core.executor import Executor, State, AbandonState
from core.parser import parse
from core.smtlib import solver, Expression, Operators, SolverException, Array
from core.smtlib import BitVec, Bool
from models import linux, decree, windows
from utils import gdb, qemu
from core.smtlib import ConstraintSet
logger = logging.getLogger('MANTICORE')
def makeDecree(args):
constraints = ConstraintSet()
model = decree.SDecree(constraints, ','.join(args.programs))
initial_state = State(constraints, model)
logger.info('Loading program %s', args.programs)
#if args.data != '':
# logger.info('Starting with concrete input: {}'.format(args.data))
model.input.transmit(args.data)
model.input.transmit(initial_state.symbolicate_buffer('+'*14, label='RECEIVE'))
return initial_state
def makeLinux(program, arguments, environment, concrete_start = ''):
logger.info('Loading program %s', program)
constraints = ConstraintSet()
model = linux.SLinux(constraints, program, argv=arguments, envp=environment, symbolic_files=('symbolic.txt'))
initial_state = State(constraints, model)
if concrete_start != '':
logger.info('Starting with concrete input: {}'.format(concrete_start))
for i in xrange(len(arguments)):
arguments[i] = initial_state.symbolicate_buffer(arguments[i], label='ARGV%d' % (i+1), string=True)
for i in xrange(len(environment)):
environment[i] = initial_state.symbolicate_buffer(environment[i], label='ENV%d' % (i+1), string=True)
model.input.transmit(concrete_start)
#set stdin input...
model.input.transmit(initial_state.symbolicate_buffer('+'*256, label='STDIN'))
return initial_state
def makeWindows(args):
assert args.size is not None, "Need to specify buffer size"
assert args.buffer is not None, "Need to specify buffer base address"
logger.debug('Loading program %s', args.programs)
additional_context = None
if args.context:
with open(args.context, "r") as addl_context_file:
additional_context = cPickle.loads(addl_context_file.read())
logger.debug('Additional context loaded with contents {}'.format(additional_context)) #DEBUG
constraints = ConstraintSet()
model = windows.SWindows(constraints, args.programs[0], additional_context, snapshot_folder=args.workspace)
#This will interpret the buffer specification written in INTEL ASM. (It may dereference pointers)
data_size = parse(args.size, model.current.read_bytes, model.current.read_register)
data_ptr = parse(args.buffer, model.current.read_bytes, model.current.read_register)
logger.debug('Buffer at %x size %d bytes)', data_ptr, data_size)
buf_str = "".join(model.current.read_bytes(data_ptr, data_size))
logger.debug('Original buffer: %s', buf_str.encode('hex'))
offset = args.offset
concrete_data = args.data.decode('hex')
assert data_size >= offset + len(concrete_data)
size = min(args.maxsymb, data_size - offset - len(concrete_data))
symb = constraints.new_array(name='RAWMSG', index_max=size)
model.current.write_bytes(data_ptr + offset, concrete_data)
model.current.write_bytes(data_ptr + offset + len(concrete_data), [symb[i] for i in xrange(size)] )
logger.debug('First %d bytes are left concrete', offset)
logger.debug('followed by %d bytes of concrete start', len(concrete_data))
hex_head = "".join(model.current.read_bytes(data_ptr, offset+len(concrete_data)))
logger.debug('Hexdump head: %s', hex_head.encode('hex'))
logger.debug('Total symbolic characters inserted: %d', size)
logger.debug('followed by %d bytes of unmodified concrete bytes at end.', (data_size-offset-len(concrete_data))-size )
hex_tail = "".join(map(chr, model.current.read_bytes(data_ptr+offset+len(concrete_data)+size, data_size-(offset+len(concrete_data)+size))))
logger.debug('Hexdump tail: %s', hex_tail.encode('hex'))
logger.info("Starting PC is: {:08x}".format(model.current.PC))
return State(constraints, model)
def binary_type(path):
'''
Given a path to a binary, return a string representation of its type.
i.e. ELF, PE, DECREE, QNX
'''
magic = None
with open(path) as f:
magic = f.read(4)
if magic == '\x7fELF':
return 'ELF'
elif magic == 'MDMP':
return 'PE'
elif magic == '\x7fCGC':
return 'DECREE'
else:
raise NotImplementedError("Binary {} not supported.".format(path))
def issymbolic(value):
'''
Helper to determine whether a value read from memory is symbolic.
'''
return isinstance(value, Expression)
class Manticore(object):
def __init__(self, binary_path, args = [], verbose = False):
assert os.path.isfile(binary_path)
self._binary = binary_path
self._binary_type = binary_type(binary_path)
self._argv = args # args.programs[1:]
self._env = {}
# Will be set to a temporary directory if not set before running start()
self._workspace_path = None
self._policy = 'random'
self._coverage_file = None
self._memory_errors = None
self._should_profile = False
self._workers = []
# XXX(yan) '_args' will be removed soon; exists currently to ease porting
self._args = args
self._time_started = 0
self._num_processes = 1
self._begun_trace = False
self._assertions = {}
self._model_hooks = {}
self._hooks = {}
self._running = False
self._arch = None
self._log_debug = False
self._log_file = '/dev/stdout'
self._concrete_data = ''
self._dumpafter = 0
self._maxstates = 0
self._maxstorage = 0
self._verbosity = 0
manager = Manager()
self._context = manager.dict()
# XXX(yan) This is a bit obtuse; once PE support is updated this should
# be refactored out
if self._binary_type == 'ELF':
self._binary_obj = ELFFile(file(self._binary))
self._init_logging()
def _init_logging(self):
fmt_str = '%(asctime)s: [%(process)d]%(stateid)s %(name)s:%(levelname)s: %(message)s'
if self._log_debug:
log_level = logging.DEBUG
else:
log_level = logging.WARNING
logging.basicConfig(filename=self._log_file, format=fmt_str, level=log_level)
def loggerSetState(logger, stateid):
logger.filters[0].stateid = stateid
class ContextFilter(logging.Filter):
'''
This is a filter which injects contextual information into the log.
'''
def filter(self, record):
if hasattr(self, 'stateid') and isinstance(self.stateid, int):
record.stateid = '[%d]' % self.stateid
else:
record.stateid = ''
return True
ctxfilter = ContextFilter()
for name, logger in logging.Logger.manager.loggerDict.items():
logger.addFilter(ctxfilter)
logger.setLevel(log_level)
logger.setState = types.MethodType(loggerSetState, logger)
@property
def log_file(self):
return self._log_file
@log_file.setter
def log_file(self, path):
if self._log_file == path:
return
if path == '-':
path = '/dev/stdout'
self._log_file = path
self._init_logging()
# XXX(yan): args is a temporary hack to include while we continue moving
# non-Linux platforms to new-style arg handling.
@property
def args(self):
return self._args
@args.setter
def args(self, args):
self._args = args
@property
def should_profile(self):
return self._should_profile
@should_profile.setter
def should_profile(self, enable_profiling):
self._should_profile = enable_profiling
@property
def concrete_data(self):
return self._concrete_data
@concrete_data.setter
def concrete_data(self, data):
self._concrete_data = data
@property
def maxstates(self):
return self._maxstates
@maxstates.setter
def maxstates(self, max_states):
self._maxstates = max_states
@property
def dumpafter(self):
return self._dumpafter
@dumpafter.setter
def dumpafter(self, dump_after):
self._dumpafter = dump_after
@property
def maxstorage(self):
return self._maxstorage
@maxstorage.setter
def maxstorage(self, max_storage):
self._maxstorage = max_storage
@property
def log_debug(self):
return self._log_debug
@log_debug.setter
def log_debug(self, debug):
if self._log_debug == debug:
return
self._log_debug = debug
self._init_logging()
@property
def verbosity(self):
return self._verbosity
@verbosity.setter
def verbosity(self, setting):
levels = [[],
[('EXECUTOR', logging.INFO)],
[('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG)],
[('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG)],
[('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG)],
[('EXECUTOR', logging.DEBUG), ('MODEL', logging.DEBUG), ('CPU', logging.DEBUG), ('SMTLIB', logging.DEBUG)]]
# Takes a value and ensures it's in a certain range
def clamp(val, minimum, maximum):
return sorted((minimum, val, maximum))[1]
clamped = clamp(setting, 0, len(levels) - 1)
if clamped != setting:
logger.debug("%s not between 0 and %d, forcing to %d", setting, len(levels) - 1, clamped)
for log_type, level in levels[clamped]:
logging.getLogger(log_type).setLevel(level)
self._verbosity = setting
def add_hook(self, pc, callback):
'''
Add a callback to be invoked on executing a program counter. Pass 'None'
for pc to invoke callback on every instruction.
'''
def _inner(state):
callback(self._context, state)
self._hooks.setdefault(pc, set()).add(_inner)
def _get_symbol_address(self, symbol):
'''
Return the address of |symbol| within the binary
'''
if self._binary_obj is None:
return NotImplementedError("Symbols aren't supported")
for section in self._binary_obj.iter_sections():
if not isinstance(section, SymbolTableSection):
continue
symbols = section.get_symbol_by_name(symbol)
if len(symbols) == 0:
continue
return symbols[0].entry['st_value']
def _make_state(self, path):
if self._binary_type == 'ELF':
# Linux
env = ['%s=%s'%(k,v) for k,v in self._env.items()]
state = makeLinux(self._binary, self._argv, env, self._concrete_data)
elif self._binary_type == 'PE':
# Windows
state = makeWindows(self._args)
elif self._binary_type == 'DECREE':
# Decree
state = makeDecree(self._args)
else:
raise NotImplementedError("Binary {} not supported.".format(path))
return state
@property
def workspace(self):
if self._workspace_path is None:
self._workspace_path = self._make_workspace()
return self._workspace_path
@workspace.setter
def workspace(self, path):
assert not self._running, "Can't set workspace if Manticore is running."
if os.path.exists(path):
assert os.path.isdir(path)
else:
os.mkdir(path)
self._workspace_path = path
def _make_workspace(self):
''' Make working directory '''
return tempfile.mkdtemp(prefix="pse_", dir='./')
@property
def workers(self):
return self._num_processes
@workers.setter
def workers(self, n):
assert not self._running, "Can't set workers if Manticore is running."
self._num_processes = n
@property
def policy(self):
return self._policy
@policy.setter
def policy(self, policy):
assert not self._running, "Can't set policy if Manticore is running."
self._policy = policy
@property
def coverage_file(self):
return self._coverage_file
@coverage_file.setter
def coverage_file(self, path):
assert not self._running, "Can't set coverage file if Manticore is running."
self._coverage_file = path
@property
def memory_errors_file(self):
return self._memory_errors
@memory_errors_file.setter
def memory_errors_file(self, path):
assert not self._running, "Can't set memory errors if Manticore is running."
self._memory_errors = path
@property
def env(self):
return self._env
@env.setter
def env(self, env):
'''
Update environment variables from |env|. Use repeated '+' chars for
symbolic values.
'''
assert isinstance(env, dict)
assert not self._running, "Can't set process env if Manticore is running."
self._env.update(env)
return self._env
def env_add(self, key, value, overwrite=True):
if key in self._env:
if overwrite:
self._env[key] = value
else:
self._env[key] = value
@property
def arch(self):
assert self._binary is not None
if self._arch is not None:
return self._arch
arch = self._binary_obj.get_machine_arch()
if arch == 'x86': self._arch = 'i386'
elif arch == 'x64': self._arch = 'x86_64'
elif arch == 'ARM': self._arch = 'arm'
else: raise "Unsupported architecture: %s"%(arch, )
return self._arch
def _start_workers(self, num_processes):
assert num_processes > 0, "Must have more than 0 worker processes"
logger.info("Starting %d processes.", num_processes)
for _ in range(num_processes):
p = Process(target=self._executor.run, args=())
self._workers.append(p)
p.start()
def _join_workers(self):
while len(self._workers) > 0:
w = self._workers.pop()
try:
w.join()
except KeyboardInterrupt, e:
self._executor.shutdown()
# multiprocessing.dummy.Process does not support terminate
if hasattr(w, 'terminate'):
w.terminate()
self._workers.append(w)
############################################################################
# Model hooks + callback
############################################################################
def apply_model_hooks(self, path):
#TODO(yan): Simplify the partial function application
# Imported straight from main.py; this will be re-written once the new
# event code is in place.
import core.cpu
import importlib
import models
with open(path, 'r') as fnames:
for line in fnames.readlines():
address, cc_name, name = line.strip().split(' ')
cc = getattr(core.cpu.x86.ABI, cc_name)
fmodel = models
name_parts = name.split('.')
importlib.import_module("models.{}".format(name_parts[0]))
for n in name_parts:
fmodel = getattr(fmodel,n)
assert fmodel != models
logger.debug("[+] Hooking 0x%x %s %s", int(address,0), cc_name, name )
def cb_function(cc, fmodel, state):
cc(fmodel)(state.model)
cb = functools.partial(cb_function, cc, fmodel)
# TODO(yan) this should be a dict
self._model_hooks.setdefault(int(address,0), set()).add(cb)
def _model_hook_callback(self, state, pc):
if pc not in self._model_hooks:
return
for cb in self._model_hooks[pc]:
cb(state)
############################################################################
# Assertion hooks + callback
############################################################################
def load_assertions(self, path):
with open(path, 'r') as f:
for line in f.readlines():
pc = int(line.split(' ')[0], 16)
if pc in self._assertions:
logger.debug("Repeated PC in assertions file %s", path)
self._assertions[pc] = ' '.join(line.split(' ')[1:])
def start(self):
'''
Start Manticore, creating all necessary support classes.
'''
assert not self._running, "Manticore is already running."
args = self._args
replay=None
if hasattr(args, 'replay') and args.replay is not None:
with open(args.replay, 'r') as freplay:
replay = map(lambda x: int(x, 16), freplay.readlines())
state = self._make_state(self._binary)
self._executor = Executor(state,
workspace=self.workspace,
policy=self._policy,
dumpafter=self.dumpafter,
maxstates=self.maxstates,
maxstorage=self.maxstorage,
replay=replay,
dumpstats=self.should_profile)
if self._hooks:
self._executor.will_execute_pc += self._hook_callback
if self._model_hooks:
self._executor.will_execute_pc += self._model_hook_callback
if self._assertions:
self._executor.will_execute_pc += self._assertions_callback
if self.should_profile:
self._executor.profiling = True
self._time_started = time.time()
self._running = True
try:
self._start_workers(self._num_processes)
self._join_workers()
finally:
self._running = False
def terminate(self):
'Gracefully terminate the currently-executing Manticore run.'
self._executor.shutdown()
def _assertions_callback(self, state, pc):
if pc not in self._assertions:
return
from core.parser import parse
program = self._assertions[pc]
#This will interpret the buffer specification written in INTEL ASM.
# (It may dereference pointers)
assertion = parse(program, state.cpu.read, state.cpu.read_register)
if not solver.can_be_true(state.constraints, assertion):
logger.info(str(state.cpu))
logger.info("Assertion %x -> {%s} does not hold. Aborting state.",
state.cpu.pc, program)
raise AbandonState()
#Everything is good add it.
state.constraints.add(assertion)
def _hook_callback(self, state, pc):
'Invoke all registered generic hooks'
# Ignore symbolic pc.
# TODO(yan): Should we ask the solver if any of the hooks are possible,
# and execute those that are?
if not isinstance(pc, (int, long)):
return
# Invoke all pc-specific hooks
for cb in self._hooks.get(pc, []):
cb(state)
# Invoke all pc-agnostic hooks
for cb in self._hooks.get(None, []):
cb(state)
def dump_stats(self):
if self.coverage_file is not None:
with open(self.coverage_file, "w") as f:
fmt = "0x{:016x}\n"
for m in self._executor.visited:
f.write(fmt.format(m[1]))
if self.memory_errors_file is not None:
with open(args.errorfile, "w") as f:
fmt = "0x{:016x}\n"
for m in self._executor.errors:
f.write(fmt.format(m))
self._executor.dumpStats()
logger.info('Results dumped in %s', self.workspace)
logger.info('Instructions executed: %d', self._executor.count)
logger.info('Coverage: %d different instructions executed', len(self._executor.visited))
logger.info('Number of paths covered %r', State.state_count())
logger.info('Total time: %s', time.time()-self._time_started)
logger.info('IPS: %d', self._executor.count/(time.time()-self._time_started))
visited = ['%d:%08x'%site for site in self._executor.visited]
with file(os.path.join(self.workspace,'visited.txt'),'w') as f:
for entry in sorted(visited):
f.write(entry + '\n')
with file(os.path.join(self.workspace,'command.sh'),'w') as f:
f.write(' '.join(sys.argv))