Support tracing (#247)

* Script for generating syscall tables

* Add generated syscall table

* Reintroduce tracing script

* Add configuration options needed by verify.py

* Clean up verify; remove dependency on experimental after_hook

* trace experiments

* reorg verify.py

* Update after merge

* Remove Manticore param

* Remove unused vars

* Use regfile api; redo last_instr check

* Fix gdb->mcore name descrepancy

* Move kwargs to explicit args for Linux/SLinux

 * Maintain options in makeLinux to not overcomplicate the Manticore
   class

* Address merge issues

* remove debug stmt

* Reintroduce options

* Revert linux.py/manticore.py from master

* Use the qemu -s and -R flags

* Import syscalls table from master

* And import extract_syscalls.py script

* Fix verify reference

* Move syscall to arg

* Update register references

* Simplify last instruction check

* Add logging filter to TRACE logger as well

* Be consistent with state synchronization

* Be explicit about gdb types

* Improve mmap debug output

* Return error if ioctl is not implemented

* Fix syscall sync

* Make logging more self-contained

* Use errno const in ioctl impl
This commit is contained in:
Yan 2017-06-05 16:16:54 -04:00 committed by GitHub
parent 14499f7ba8
commit 2916d7e3ae
4 changed files with 462 additions and 2 deletions

View File

@ -1167,7 +1167,7 @@ class Linux(Platform):
if fd > 2:
return self.files[fd].ioctl(request, argp)
else:
return 0
return -errno.EINVAL
def sys_open(self, buf, flags, mode):
@ -1952,10 +1952,16 @@ class SLinux(Linux):
#FIXME Check if file should be symbolic input and do as with fd0
result = cpu.memory.mmapFile(address, size, perms, self.files[fd].name, offset)
actually_mapped = '0x{:016x}'.format(result)
if address is None or result != address:
address = address or 0
actually_mapped += ' [requested: 0x{:016x}]'.format(address)
if (flags & 0x10 !=0) and result != address:
cpu.memory.munmap(result, size)
result = -1
logger.debug("sys_mmap(0x%016x, 0x%x, %s, %x, %d) - (%r)", result, size, perms, flags, fd, prot)
logger.debug("sys_mmap(%s, 0x%x, %s, %x, %d) - (0x%x)", actually_mapped, size, perms, flags, fd, result)
return result

128
scripts/gdb.py Normal file
View File

@ -0,0 +1,128 @@
import copy
import traceback
import os
import sys
import time
import subprocess
count = 0
prompt = ''
subproc = None
_arch = None
def drain():
str_buffer = ''
while not str_buffer.endswith(prompt):
c = subproc.stdout.read(1)
str_buffer += c
return str_buffer[:-len(prompt)]
def start(arch, argv, port=1234, _prompt='(gdb) '):
global prompt, subproc
prompt = _prompt
gdb = 'gdb-multiarch'
try:
subproc = subprocess.Popen([gdb, argv[0]],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
except OSError:
msg = "'{}' binary not found in PATH (needed for tracing)".format(gdb)
raise RuntimeError(msg)
drain()
#correspond('set architecture {}\n'.format(arch))
correspond('file {}\n'.format(argv[0]))
correspond('target remote :{}\n'.format(port))
correspond('set pagination off\n')
def correspond(text):
"""Communicate with the child process without closing stdin."""
subproc.stdin.write(text)
subproc.stdin.flush()
return drain()
def getInstruction():
return correspond('x/i $pc\n').split('\n')[0]
def getR(reg):
reg = "$"+reg
if "XMM" in reg:
reg = reg+".uint128"
val = correspond('p %s\n'%reg.lower()).split("=")[-1].split("\n")[0]
if "0x" in val:
return long(val.split("0x")[-1],16)
else:
return long(val)
if "FLAG" in reg:
reg = "(unsigned) "+reg
if reg in ['$R%dB'%i for i in range(16)] :
reg = reg[:-1] + "&0xff"
if reg in ['$R%dW'%i for i in range(16)] :
reg = reg[:-1] + "&0xffff"
val = correspond('p /x %s\n'%reg.lower())
val = val.split("0x")[-1]
return long(val.split("\n")[0],16)
def getCanonicalRegisters():
reg_output = correspond('info reg\n')
registers = {}
for line in reg_output.split("\n"):
line = line.strip()
if not line:
continue
name, hex_val = line.split()[:2]
if name != 'cpsr':
registers[name] = long(hex_val, 0)
else:
# We just want the NZCV flags
registers[name] = int(hex_val, 0) & 0xF0000000
return registers
def setR(reg, value):
correspond('set $%s = %s\n'%(reg.lower(), long(value)))
def stepi():
#print subproc.correspond("x/i $pc\n")
correspond("stepi\n")
def getM(m):
try:
return long(correspond('x/xg %s\n'%m).strip().split('\t')[-1], 0)
except Exception,e:
raise e
return 0
def getPid():
return int(correspond('info proc\n').split("\n")[0].split(" ")[-1])
def getStack():
maps = file("/proc/%s/maps"%correspond('info proc\n').split("\n")[0].split(" ")[-1]).read().split("\n")
i,o = [ int(x,16) for x in maps[-3].split(" ")[0].split('-')]
def setByte(addr, val):
cmdstr = 'set {{char}}{} = {}'.format(addr, ord(val))
correspond(cmdstr + '\n')
def getByte(m):
arch = get_arch()
mask = {'i386': 0xffffffff,
'armv7': 0xffffffff,
'amd64': 0xffffffffffffffff}[arch]
return int(correspond("x/1bx %d\n"%(m&mask)).split("\t")[-1].split("\n")[0][2:],16)
def get_entry():
a=correspond('info target\n')
return long(a[a.find("Entry point:"):].split('\n')[0].split(' ')[-1][2:],16)
def get_arch():
global _arch
if _arch is not None:
return _arch
infotarget = correspond('info target\n')
if 'elf32-i386' in infotarget:
_arch = 'i386'
elif 'elf64-x86-64' in infotarget:
_arch = 'amd64'
elif 'elf32-littlearm' in infotarget:
_arch = 'armv7'
else:
print infotarget
raise NotImplemented
return _arch

92
scripts/qemu.py Normal file
View File

@ -0,0 +1,92 @@
import copy
import traceback
import os
import sys
import time
import subprocess
import logging
logger = logging.getLogger("QEMU")
count = 0
subproc = None
stats = None
_arch = None
def get_lines(n=1):
lines = []
str_buffer = ''
received_lines = 0
while received_lines < n:
c = subproc.stdout.read(1)
str_buffer += c
if c == '\n':
lines.append(str_buffer)
str_buffer = ''
received_lines += 1
return lines
def parse_mmu_debug_output(s):
d = {}
# Get guest address space
d['reserved'] = int(s.pop(0).split()[1], 0)
d['host_mmap_min_addr'] = int(s.pop(0).split('=')[1], 0)
d['guest_base'] = int(s.pop(0).split()[1], 0)
# get rid of mapping heading
s.pop(0)
d['maps'] = []
while '-' in s[0]:
line = s.pop(0)
range, size, protections = line.split()
start, end = range.split('-')
d['maps'].append((int(start, 16),
int(end, 16),
int(size, 16),
protections))
while s:
line = s.pop(0)
if not line:
continue
var, addr = line.split()
d[var] = int(addr, 0)
return d
def start(arch, argv, port=1234, va_size=0xc0000000, stack_size=0x20000):
global subproc, stats
aslr_file = '/proc/sys/kernel/randomize_va_space'
try:
with open(aslr_file, 'r') as f:
if f.read().strip() != '0':
logger.warning("Disable ASLR before running qemu-user")
logger.warning(" sudo sh -c 'echo 0 > %s'", aslr_file)
finally:
pass
args = ['qemu-%s'%(arch,), '-g', port, '-d', 'mmu', '-R', va_size, '-s', stack_size] + argv
args = map(str, args)
print("Running: %s"%(' '.join(args),))
subproc = subprocess.Popen(args, stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
mmu_debug_output = get_lines(16)
stats = parse_mmu_debug_output(mmu_debug_output)
for m in stats['maps']:
start, end, size, perms = m
print '{:x}-{:x}, {}, {}'.format(*m)
def correspond(text):
"""Communicate with the child process without closing stdin."""
if text:
subproc.stdin.write(text)
subproc.stdin.flush()
return get_lines()

234
scripts/verify.py Normal file
View File

@ -0,0 +1,234 @@
from manticore import Manticore
from manticore.platforms import linux_syscalls
import logging
from sys import argv, exit
import struct
import qemu
import gdb
logger = logging.getLogger('TRACE')
## We need to keep some complex objects in between hook invocations so we keep them
## as globals. Tracing is inherently a single-threaded process, so using a
## manticore context would be heavier than needed.
stack_top = 0xc0000000
stack_size = 0x20000
initialized = False
last_instruction = None
in_helper = False
def init_logging():
class ContextFilter(logging.Filter):
def filter(self, record):
record.stateid = ''
return True
logger.addFilter(ContextFilter())
def dump_gdb(cpu, addr, count):
for offset in range(addr, addr+count, 4):
val = int(gdb.getM(offset) & 0xffffffff)
val2 = int(cpu.read_int(offset))
print '{:x}: g{:08x} m{:08x}'.format(offset, val, val2)
def cmp_regs(cpu, should_print=False):
'''
Compare registers from a remote gdb session to current mcore.
:param manticore.core.cpu Cpu: Current cpu
:param bool should_print: Whether to print values to stdout
:return: Whether or not any differences were detected
:rtype: bool
'''
differing = False
gdb_regs = gdb.getCanonicalRegisters()
for name in sorted(gdb_regs):
vg = gdb_regs[name]
if name.endswith('psr'):
name = 'apsr'
v = cpu.read_register(name.upper())
if should_print:
logger.debug('{} gdb:{:x} mcore:{:x}'.format(name, vg, v))
if vg != v:
if should_print:
logger.warning('^^ unequal')
differing = True
if differing:
logger.debug(qemu.correspond(None))
return differing
def pre_mcore(state):
# Start recording memory writes
if state.cpu.instruction.mnemonic.lower() == 'svc':
state.cpu.memory.push_record_writes()
def post_mcore(state, last_instruction):
'''
Handle syscalls (import memory) and bail if we diverge
'''
global in_helper
# Synchronize qemu state to manticore's after a system call
if last_instruction.mnemonic.lower() == 'svc':
# Syncronize all writes that have happened
writes = state.cpu.memory.pop_record_writes()
if writes:
logger.debug("Got %d writes", len(writes))
for addr, val in writes:
gdb.setByte(addr, val[0])
# Write return val to gdb
gdb_r0 = gdb.getR('R0')
if gdb_r0 != state.cpu.R0:
logger.debug("Writing 0x{:x} to R0 (overwriting 0x{:x})".format(
state.cpu.R0, gdb.getR('R0')))
for reg in state.cpu.canonical_registers:
if reg.endswith('PSR') or reg in ('R15', 'PC'):
continue
val = state.cpu.read_register(reg)
gdb.setR(reg, val)
# Ignore Linux kernel helpers
if (state.cpu.PC >> 16) == 0xffff:
in_helper = True
return
# If we executed a few instructions of a helper, we need to sync Manticore's
# state to GDB as soon as we stop executing a helper.
if in_helper:
for reg in state.cpu.canonical_registers:
if reg.endswith('PSR'):
continue
# Don't sync pc
if reg == 'R15':
continue
gdb.setR(reg, state.cpu.read_register(reg))
in_helper = False
if cmp_regs(state.cpu):
cmp_regs(state.cpu, should_print=True)
state.abandon()
def pre_qemu(state):
# Nop for now, might need to do future sync state
pass
def post_qemu(state, last_mnemonic):
if last_mnemonic.lower() == 'svc':
sync_svc(state)
def sync_svc(state):
'''
Mirror some service calls in manticore. Happens after qemu executed a SVC
instruction, but before manticore did.
'''
syscall = state.cpu.R7 # Grab idx from manticore since qemu could have exited
name = linux_syscalls.armv7[syscall]
logger.debug("Syncing syscall: {}".format(name))
try:
# Make sure mmap returns the same address
if 'mmap' in name:
returned = gdb.getR('R0')
logger.debug("Syncing mmap ({:x})".format(returned))
state.cpu.write_register('R0', returned)
if 'exit' in name:
return
except ValueError:
for reg in state.cpu.canonical_registers:
print '{}: {:x}'.format(reg, state.cpu.read_register(reg))
raise
def initialize(state):
'''
Synchronize the stack and register state (manticore->qemu)
'''
logger.debug("Copying {} bytes in the stack..".format(stack_top - state.cpu.SP))
stack_bottom = min(state.cpu.SP, gdb.getR('SP'))
for address in range(stack_bottom, stack_top):
b = state.cpu.read_int(address, 8)
gdb.setByte(address, chr(b))
logger.debug("Done")
# Qemu fd's start at 5, ours at 3. Add two filler fds
mcore_stdout = state.platform.files[1]
state.platform.files.append(mcore_stdout)
state.platform.files.append(mcore_stdout)
# Sync gdb's regs
for gdb_reg in gdb.getCanonicalRegisters():
if gdb_reg.endswith('psr'):
mcore_reg = 'APSR'
else:
mcore_reg = gdb_reg.upper()
value = state.cpu.read_register(mcore_reg)
gdb.setR(gdb_reg, value)
def verify(argv):
logger.debug("Verifying program \"{}\"".format(argv))
# Address and stack_size are from linux.py
# TODO(yan): Refactor these constants into a reachable value in platform
qemu.start('arm', argv, va_size=stack_top, stack_size=stack_size)
gdb.start('arm', argv)
m = Manticore(argv[0], argv[1:])
m.verbosity = 2
init_logging()
logger.setLevel(logging.DEBUG)
@m.hook(None)
def on_instruction(state):
'''
Handle all the hooks for each instruction executed. Ordered as:
pre_qemu
* qemu exec *
post_qemu
// svc synchronization happens here (mmap specifically)
pre_mcore
* mcore exec *
post_mcore
// all memory written in a mcore syscall gets moved to qemu here
'''
global initialized, last_instruction
# Initialize our state to QEMU's
if not initialized:
initialize(state)
initialized = True
if last_instruction:
post_mcore(state, last_instruction)
# Kernel helpers are inline in QEMU; do nothing
if (state.cpu.PC >> 16) == 0xffff:
return
pre_qemu(state)
last_mnemonic = [x.strip() for x in gdb.getInstruction().split(':')][1].split('\t')[0]
gdb.stepi()
post_qemu(state, last_mnemonic)
last_instruction = state.cpu.instruction
pre_mcore(state)
m.run()
if __name__ == "__main__":
args = argv[1:]
if len(args) == 0:
print "usage: python {} PROGRAM1 ...".format(argv[0])
exit()
verify(args)