From 2916d7e3aebd662ecbf6883eb280f6f31a9b1fb9 Mon Sep 17 00:00:00 2001 From: Yan Date: Mon, 5 Jun 2017 16:16:54 -0400 Subject: [PATCH] Support tracing (#247) * Script for generating syscall tables * Add generated syscall table * Reintroduce tracing script * Add configuration options needed by verify.py * Clean up verify; remove dependency on experimental after_hook * trace experiments * reorg verify.py * Update after merge * Remove Manticore param * Remove unused vars * Use regfile api; redo last_instr check * Fix gdb->mcore name descrepancy * Move kwargs to explicit args for Linux/SLinux * Maintain options in makeLinux to not overcomplicate the Manticore class * Address merge issues * remove debug stmt * Reintroduce options * Revert linux.py/manticore.py from master * Use the qemu -s and -R flags * Import syscalls table from master * And import extract_syscalls.py script * Fix verify reference * Move syscall to arg * Update register references * Simplify last instruction check * Add logging filter to TRACE logger as well * Be consistent with state synchronization * Be explicit about gdb types * Improve mmap debug output * Return error if ioctl is not implemented * Fix syscall sync * Make logging more self-contained * Use errno const in ioctl impl --- manticore/platforms/linux.py | 10 +- scripts/gdb.py | 128 +++++++++++++++++++ scripts/qemu.py | 92 ++++++++++++++ scripts/verify.py | 234 +++++++++++++++++++++++++++++++++++ 4 files changed, 462 insertions(+), 2 deletions(-) create mode 100644 scripts/gdb.py create mode 100644 scripts/qemu.py create mode 100644 scripts/verify.py diff --git a/manticore/platforms/linux.py b/manticore/platforms/linux.py index 7320f41..dd3aaaa 100644 --- a/manticore/platforms/linux.py +++ b/manticore/platforms/linux.py @@ -1167,7 +1167,7 @@ class Linux(Platform): if fd > 2: return self.files[fd].ioctl(request, argp) else: - return 0 + return -errno.EINVAL def sys_open(self, buf, flags, mode): @@ -1952,10 +1952,16 @@ class SLinux(Linux): #FIXME Check if file should be symbolic input and do as with fd0 result = cpu.memory.mmapFile(address, size, perms, self.files[fd].name, offset) + actually_mapped = '0x{:016x}'.format(result) + if address is None or result != address: + address = address or 0 + actually_mapped += ' [requested: 0x{:016x}]'.format(address) + if (flags & 0x10 !=0) and result != address: cpu.memory.munmap(result, size) result = -1 - logger.debug("sys_mmap(0x%016x, 0x%x, %s, %x, %d) - (%r)", result, size, perms, flags, fd, prot) + + logger.debug("sys_mmap(%s, 0x%x, %s, %x, %d) - (0x%x)", actually_mapped, size, perms, flags, fd, result) return result diff --git a/scripts/gdb.py b/scripts/gdb.py new file mode 100644 index 0000000..e4eafae --- /dev/null +++ b/scripts/gdb.py @@ -0,0 +1,128 @@ +import copy +import traceback +import os +import sys +import time +import subprocess + +count = 0 + +prompt = '' +subproc = None +_arch = None + +def drain(): + str_buffer = '' + while not str_buffer.endswith(prompt): + c = subproc.stdout.read(1) + str_buffer += c + return str_buffer[:-len(prompt)] + +def start(arch, argv, port=1234, _prompt='(gdb) '): + global prompt, subproc + prompt = _prompt + gdb = 'gdb-multiarch' + try: + subproc = subprocess.Popen([gdb, argv[0]], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except OSError: + msg = "'{}' binary not found in PATH (needed for tracing)".format(gdb) + raise RuntimeError(msg) + + drain() + #correspond('set architecture {}\n'.format(arch)) + correspond('file {}\n'.format(argv[0])) + correspond('target remote :{}\n'.format(port)) + correspond('set pagination off\n') + +def correspond(text): + """Communicate with the child process without closing stdin.""" + subproc.stdin.write(text) + subproc.stdin.flush() + return drain() + +def getInstruction(): + return correspond('x/i $pc\n').split('\n')[0] + +def getR(reg): + reg = "$"+reg + if "XMM" in reg: + reg = reg+".uint128" + val = correspond('p %s\n'%reg.lower()).split("=")[-1].split("\n")[0] + if "0x" in val: + return long(val.split("0x")[-1],16) + else: + return long(val) + if "FLAG" in reg: + reg = "(unsigned) "+reg + if reg in ['$R%dB'%i for i in range(16)] : + reg = reg[:-1] + "&0xff" + if reg in ['$R%dW'%i for i in range(16)] : + reg = reg[:-1] + "&0xffff" + val = correspond('p /x %s\n'%reg.lower()) + val = val.split("0x")[-1] + return long(val.split("\n")[0],16) + +def getCanonicalRegisters(): + reg_output = correspond('info reg\n') + registers = {} + for line in reg_output.split("\n"): + line = line.strip() + if not line: + continue + name, hex_val = line.split()[:2] + if name != 'cpsr': + registers[name] = long(hex_val, 0) + else: + # We just want the NZCV flags + registers[name] = int(hex_val, 0) & 0xF0000000 + return registers + +def setR(reg, value): + correspond('set $%s = %s\n'%(reg.lower(), long(value))) + +def stepi(): + #print subproc.correspond("x/i $pc\n") + correspond("stepi\n") +def getM(m): + try: + return long(correspond('x/xg %s\n'%m).strip().split('\t')[-1], 0) + except Exception,e: + raise e + return 0 +def getPid(): + return int(correspond('info proc\n').split("\n")[0].split(" ")[-1]) +def getStack(): + maps = file("/proc/%s/maps"%correspond('info proc\n').split("\n")[0].split(" ")[-1]).read().split("\n") + i,o = [ int(x,16) for x in maps[-3].split(" ")[0].split('-')] + +def setByte(addr, val): + cmdstr = 'set {{char}}{} = {}'.format(addr, ord(val)) + correspond(cmdstr + '\n') +def getByte(m): + arch = get_arch() + mask = {'i386': 0xffffffff, + 'armv7': 0xffffffff, + 'amd64': 0xffffffffffffffff}[arch] + return int(correspond("x/1bx %d\n"%(m&mask)).split("\t")[-1].split("\n")[0][2:],16) +def get_entry(): + a=correspond('info target\n') + return long(a[a.find("Entry point:"):].split('\n')[0].split(' ')[-1][2:],16) + +def get_arch(): + global _arch + if _arch is not None: + return _arch + infotarget = correspond('info target\n') + if 'elf32-i386' in infotarget: + _arch = 'i386' + elif 'elf64-x86-64' in infotarget: + _arch = 'amd64' + elif 'elf32-littlearm' in infotarget: + _arch = 'armv7' + else: + print infotarget + raise NotImplemented + return _arch diff --git a/scripts/qemu.py b/scripts/qemu.py new file mode 100644 index 0000000..fd60908 --- /dev/null +++ b/scripts/qemu.py @@ -0,0 +1,92 @@ +import copy +import traceback +import os +import sys +import time +import subprocess +import logging + + +logger = logging.getLogger("QEMU") + +count = 0 + +subproc = None +stats = None +_arch = None + +def get_lines(n=1): + lines = [] + str_buffer = '' + received_lines = 0 + while received_lines < n: + c = subproc.stdout.read(1) + str_buffer += c + if c == '\n': + lines.append(str_buffer) + str_buffer = '' + received_lines += 1 + + return lines + +def parse_mmu_debug_output(s): + d = {} + + # Get guest address space + d['reserved'] = int(s.pop(0).split()[1], 0) + d['host_mmap_min_addr'] = int(s.pop(0).split('=')[1], 0) + d['guest_base'] = int(s.pop(0).split()[1], 0) + + # get rid of mapping heading + s.pop(0) + d['maps'] = [] + + while '-' in s[0]: + line = s.pop(0) + range, size, protections = line.split() + start, end = range.split('-') + d['maps'].append((int(start, 16), + int(end, 16), + int(size, 16), + protections)) + + while s: + line = s.pop(0) + if not line: + continue + var, addr = line.split() + d[var] = int(addr, 0) + + return d + + +def start(arch, argv, port=1234, va_size=0xc0000000, stack_size=0x20000): + global subproc, stats + aslr_file = '/proc/sys/kernel/randomize_va_space' + try: + with open(aslr_file, 'r') as f: + if f.read().strip() != '0': + logger.warning("Disable ASLR before running qemu-user") + logger.warning(" sudo sh -c 'echo 0 > %s'", aslr_file) + finally: + pass + + args = ['qemu-%s'%(arch,), '-g', port, '-d', 'mmu', '-R', va_size, '-s', stack_size] + argv + args = map(str, args) + print("Running: %s"%(' '.join(args),)) + subproc = subprocess.Popen(args, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + mmu_debug_output = get_lines(16) + + stats = parse_mmu_debug_output(mmu_debug_output) + for m in stats['maps']: + start, end, size, perms = m + print '{:x}-{:x}, {}, {}'.format(*m) + +def correspond(text): + """Communicate with the child process without closing stdin.""" + if text: + subproc.stdin.write(text) + subproc.stdin.flush() + return get_lines() diff --git a/scripts/verify.py b/scripts/verify.py new file mode 100644 index 0000000..25b6159 --- /dev/null +++ b/scripts/verify.py @@ -0,0 +1,234 @@ +from manticore import Manticore +from manticore.platforms import linux_syscalls + +import logging + +from sys import argv, exit +import struct +import qemu +import gdb + +logger = logging.getLogger('TRACE') + +## We need to keep some complex objects in between hook invocations so we keep them +## as globals. Tracing is inherently a single-threaded process, so using a +## manticore context would be heavier than needed. +stack_top = 0xc0000000 +stack_size = 0x20000 +initialized = False +last_instruction = None +in_helper = False + +def init_logging(): + class ContextFilter(logging.Filter): + def filter(self, record): + record.stateid = '' + return True + logger.addFilter(ContextFilter()) + +def dump_gdb(cpu, addr, count): + for offset in range(addr, addr+count, 4): + val = int(gdb.getM(offset) & 0xffffffff) + val2 = int(cpu.read_int(offset)) + print '{:x}: g{:08x} m{:08x}'.format(offset, val, val2) + +def cmp_regs(cpu, should_print=False): + ''' + Compare registers from a remote gdb session to current mcore. + + :param manticore.core.cpu Cpu: Current cpu + :param bool should_print: Whether to print values to stdout + :return: Whether or not any differences were detected + :rtype: bool + ''' + differing = False + gdb_regs = gdb.getCanonicalRegisters() + for name in sorted(gdb_regs): + vg = gdb_regs[name] + if name.endswith('psr'): + name = 'apsr' + v = cpu.read_register(name.upper()) + if should_print: + logger.debug('{} gdb:{:x} mcore:{:x}'.format(name, vg, v)) + if vg != v: + if should_print: + logger.warning('^^ unequal') + differing = True + if differing: + logger.debug(qemu.correspond(None)) + return differing + +def pre_mcore(state): + # Start recording memory writes + if state.cpu.instruction.mnemonic.lower() == 'svc': + state.cpu.memory.push_record_writes() + +def post_mcore(state, last_instruction): + ''' + Handle syscalls (import memory) and bail if we diverge + ''' + global in_helper + + # Synchronize qemu state to manticore's after a system call + if last_instruction.mnemonic.lower() == 'svc': + # Syncronize all writes that have happened + writes = state.cpu.memory.pop_record_writes() + if writes: + logger.debug("Got %d writes", len(writes)) + for addr, val in writes: + gdb.setByte(addr, val[0]) + + # Write return val to gdb + gdb_r0 = gdb.getR('R0') + if gdb_r0 != state.cpu.R0: + logger.debug("Writing 0x{:x} to R0 (overwriting 0x{:x})".format( + state.cpu.R0, gdb.getR('R0'))) + for reg in state.cpu.canonical_registers: + if reg.endswith('PSR') or reg in ('R15', 'PC'): + continue + val = state.cpu.read_register(reg) + gdb.setR(reg, val) + + + # Ignore Linux kernel helpers + if (state.cpu.PC >> 16) == 0xffff: + in_helper = True + return + + # If we executed a few instructions of a helper, we need to sync Manticore's + # state to GDB as soon as we stop executing a helper. + if in_helper: + for reg in state.cpu.canonical_registers: + if reg.endswith('PSR'): + continue + # Don't sync pc + if reg == 'R15': + continue + gdb.setR(reg, state.cpu.read_register(reg)) + in_helper = False + + if cmp_regs(state.cpu): + cmp_regs(state.cpu, should_print=True) + state.abandon() + +def pre_qemu(state): + # Nop for now, might need to do future sync state + pass + +def post_qemu(state, last_mnemonic): + if last_mnemonic.lower() == 'svc': + sync_svc(state) + +def sync_svc(state): + ''' + Mirror some service calls in manticore. Happens after qemu executed a SVC + instruction, but before manticore did. + ''' + syscall = state.cpu.R7 # Grab idx from manticore since qemu could have exited + name = linux_syscalls.armv7[syscall] + + logger.debug("Syncing syscall: {}".format(name)) + + try: + # Make sure mmap returns the same address + if 'mmap' in name: + returned = gdb.getR('R0') + logger.debug("Syncing mmap ({:x})".format(returned)) + state.cpu.write_register('R0', returned) + if 'exit' in name: + return + except ValueError: + for reg in state.cpu.canonical_registers: + print '{}: {:x}'.format(reg, state.cpu.read_register(reg)) + raise + +def initialize(state): + ''' + Synchronize the stack and register state (manticore->qemu) + ''' + logger.debug("Copying {} bytes in the stack..".format(stack_top - state.cpu.SP)) + stack_bottom = min(state.cpu.SP, gdb.getR('SP')) + for address in range(stack_bottom, stack_top): + b = state.cpu.read_int(address, 8) + gdb.setByte(address, chr(b)) + + logger.debug("Done") + + # Qemu fd's start at 5, ours at 3. Add two filler fds + mcore_stdout = state.platform.files[1] + state.platform.files.append(mcore_stdout) + state.platform.files.append(mcore_stdout) + + # Sync gdb's regs + for gdb_reg in gdb.getCanonicalRegisters(): + if gdb_reg.endswith('psr'): + mcore_reg = 'APSR' + else: + mcore_reg = gdb_reg.upper() + value = state.cpu.read_register(mcore_reg) + gdb.setR(gdb_reg, value) + +def verify(argv): + logger.debug("Verifying program \"{}\"".format(argv)) + + # Address and stack_size are from linux.py + # TODO(yan): Refactor these constants into a reachable value in platform + qemu.start('arm', argv, va_size=stack_top, stack_size=stack_size) + gdb.start('arm', argv) + + m = Manticore(argv[0], argv[1:]) + m.verbosity = 2 + + init_logging() + logger.setLevel(logging.DEBUG) + + @m.hook(None) + def on_instruction(state): + ''' + Handle all the hooks for each instruction executed. Ordered as: + + pre_qemu + * qemu exec * + post_qemu + + // svc synchronization happens here (mmap specifically) + + pre_mcore + * mcore exec * + post_mcore + + // all memory written in a mcore syscall gets moved to qemu here + ''' + global initialized, last_instruction + + # Initialize our state to QEMU's + if not initialized: + initialize(state) + initialized = True + + if last_instruction: + post_mcore(state, last_instruction) + + # Kernel helpers are inline in QEMU; do nothing + if (state.cpu.PC >> 16) == 0xffff: + return + + pre_qemu(state) + last_mnemonic = [x.strip() for x in gdb.getInstruction().split(':')][1].split('\t')[0] + gdb.stepi() + post_qemu(state, last_mnemonic) + + last_instruction = state.cpu.instruction + pre_mcore(state) + + m.run() + +if __name__ == "__main__": + args = argv[1:] + + if len(args) == 0: + print "usage: python {} PROGRAM1 ...".format(argv[0]) + exit() + + verify(args) +