Linux platform refactoring (#264)

* Clean up model syscall invocation
* Move read_string to Cpu
* move push/pop helpers to Cpu
* Reorg Linux initialization
* Update linux test
* fstat64 test harness
* assert read_string only considers concrete bytes
* Ensure that correct aliases exist during ctor
* Improve alias check
This commit is contained in:
Yan 2017-05-23 15:30:58 -04:00 committed by GitHub
parent 475f786f21
commit 3c977719f0
3 changed files with 199 additions and 130 deletions

View File

@ -12,6 +12,7 @@ import inspect
import sys
import types
import logging
import StringIO
logger = logging.getLogger("CPU")
register_logger = logging.getLogger("REGISTERS")
@ -324,6 +325,10 @@ class Cpu(object):
self._md.syntax = 0
self.instruction = None
# Ensure that regfile created STACK/PC aliases
assert 'STACK' in self._regfile
assert 'PC' in self._regfile
def __getstate__(self):
state = {}
state['regfile'] = self._regfile
@ -477,6 +482,78 @@ class Cpu(object):
result.append(Operators.CHR(self.read_int( where+i, 8)))
return result
def read_string(self, where, max_length=None):
'''
Read a NUL-terminated concrete buffer from memory.
:param int where: Address to read string from
:param int max_length:
The size in bytes to cap the string at, or None [default] for no
limit.
:return: string read
:rtype: str
'''
s = StringIO.StringIO()
while True:
c = self.read_int(where, 8)
assert not issymbolic(c)
if c == 0:
break
if max_length is not None:
if max_length == 0:
break
max_length = max_length - 1
s.write(Operators.CHR(c))
where += 1
return s.getvalue()
def push_bytes(self, data):
'''
Write `data` to the stack and decrement the stack pointer accordingly.
:param str data: Data to write
'''
self.STACK -= len(data)
self.write_bytes(self.STACK, data)
return self.STACK
def pop_bytes(self, nbytes):
'''
Read `nbytes` from the stack, increment the stack pointer, and return
data.
:param int nbytes: How many bytes to read
:return: Data read from the stack
'''
data = self.read_bytes(self.STACK, nbytes)
self.STACK += nbytes
return data
def push_int(self, value):
'''
Decrement the stack pointer and write `value` to the stack.
:param int value: The value to write
:return: New stack pointer
'''
self.STACK -= self.address_bit_size / 8
self.write_int(self.STACK, value)
return self.STACK
def pop_int(self):
'''
Read a value from the stack and increment the stack pointer.
:return: Value read
'''
value = self.read_int(self.STACK)
self.STACK += self.address_bit_size / 8
return value
#######################################
# Decoder
@abstractmethod

View File

@ -5,7 +5,7 @@ import weakref
import errno
import os, struct
from ..utils.helpers import issymbolic
from ..core.cpu.abstractcpu import Interruption, Syscall, ConcretizeRegister
from ..core.cpu.abstractcpu import Interruption, Syscall, ConcretizeArgument
from ..core.cpu.cpufactory import CpuFactory
from ..core.memory import SMemory32, SMemory64, Memory32, Memory64
from ..core.smtlib import Operators, ConstraintSet
@ -31,14 +31,6 @@ def perms_from_elf(elf_flags):
def perms_from_protflags(prot_flags):
return [' ', 'r ', ' w ', 'rw ', ' x', 'r x', ' wx', 'rwx'][prot_flags&7]
class SymbolicSyscallArgument(Exception):
def __init__(self, reg_num, message='Concretizing syscall argument', policy='SAMPLED'):
self.reg_num = reg_num
self.message = message
self.policy = policy
super(SymbolicSyscallArgument, self).__init__(message)
class File(object):
def __init__(self, *args, **kwargs):
#Todo: assert file is seekable otherwise we should save what was
@ -272,16 +264,35 @@ class Linux(Platform):
:param list envp: The ENV variables.
'''
super(Linux, self).__init__(program)
argv = [] if argv is None else argv
envp = [] if envp is None else envp
self.program = program
self.clocks = 0
self.files = []
self.syscall_trace = []
self.syscall_arg_regs = []
self.files = []
if program != None:
self.elf = ELFFile(file(program))
self.arch = {'x86': 'i386', 'x64': 'amd64', 'ARM': 'armv7'}[self.elf.get_machine_arch()]
self._init_cpu(self.arch)
self._init_fds()
self._execve(program, argv, envp)
@classmethod
def empty_platform(cls, arch):
'''
Create a platform without an ELF loaded.
:param str arch: The architecture of the new platform
:rtype: Linux
'''
platform = cls(None)
platform._init_cpu(arch)
platform._init_fds()
return platform
def _init_fds(self):
# open standard files stdin, stdout, stderr
logger.debug("Opening file descriptors (0,1,2)")
self.input = Socket()
@ -303,16 +314,29 @@ class Linux(Platform):
assert self._open(stdout) == 1
assert self._open(stderr) == 2
#Load process and setup socketpairs
arch = {'x86': 'i386', 'x64': 'amd64', 'ARM': 'armv7'}[ELFFile(file(program)).get_machine_arch()]
def _init_cpu(self, arch):
cpu = self._mk_proc(arch)
self.procs = [cpu]
self._current = 0
self._function_abi = CpuFactory.get_function_abi(cpu, 'linux', arch)
self._syscall_abi = CpuFactory.get_syscall_abi(cpu, 'linux', arch)
self._current = 0
def _execve(self, program, argv, envp):
'''
Load `program` and establish program state, such as stack and arguments.
:param program str: The ELF binary to load
:param argv list: argv array
:param envp list: envp array
'''
argv = [] if argv is None else argv
envp = [] if envp is None else envp
logger.debug("Loading {} as a {} elf".format(program,self.arch))
self.load(program)
self._arch_specific_init(arch)
self._arch_specific_init()
self._stack_top = self.current.STACK
self.setup_stack([program]+argv, envp)
@ -366,7 +390,6 @@ class Linux(Platform):
state['elf_brk'] = self.elf_brk
state['auxv'] = self.auxv
state['program'] = self.program
state['syscall_arg_regs'] = self.syscall_arg_regs
state['functionabi'] = self._function_abi
state['syscallabi'] = self._syscall_abi
if hasattr(self, '_arm_tls_memory'):
@ -416,25 +439,11 @@ class Linux(Platform):
self.elf_brk = state['elf_brk']
self.auxv = state['auxv']
self.program = state['program']
self.syscall_arg_regs = state['syscall_arg_regs']
self._function_abi = state['functionabi']
self._syscall_abi = state['syscallabi']
if '_arm_tls_memory' in state:
self._arm_tls_memory = state['_arm_tls_memory']
def _read_string(self, buf):
"""
Reads a null terminated concrete buffer form memory
:todo: FIX. move to cpu or memory
"""
filename = ""
for i in xrange(0,1024):
c = Operators.CHR(self.current.read_int(buf + i, 8))
if c == '\x00':
break
filename += c
return filename
def _init_arm_kernel_helpers(self):
'''
ARM kernel helpers
@ -567,18 +576,6 @@ class Linux(Platform):
# stack from the original top
cpu.STACK = self._stack_top
# TODO cpu.STACK_push_bytes() pls
def push_bytes(data):
cpu.STACK -= len(data)
cpu.write_bytes(cpu.STACK, data)
return cpu.STACK
def push_int(value):
cpu.STACK -= cpu.address_bit_size/8
cpu.write_int(cpu.STACK, value, cpu.address_bit_size)
return cpu.STACK
auxv = self.auxv
logger.debug("Setting argv, envp and auxv.")
logger.debug("\tArguments: %s"%repr(argv))
@ -597,12 +594,12 @@ class Linux(Platform):
#end envp marker empty string
for evar in envp:
push_bytes('\x00')
envplst.append(push_bytes(evar))
cpu.push_bytes('\x00')
envplst.append(cpu.push_bytes(evar))
for arg in argv:
push_bytes('\x00')
argvlst.append(push_bytes(arg))
for arg in argv:
cpu.push_bytes('\x00')
argvlst.append(cpu.push_bytes(arg))
#Put all auxv strings into the string stack area.
@ -610,7 +607,7 @@ class Linux(Platform):
for name, value in auxv.items():
if hasattr(value, '__len__'):
push_bytes(value)
cpu.push_bytes(value)
auxv[name]=cpu.STACK
#The "secure execution" mode of secure_getenv() is controlled by the
@ -642,34 +639,35 @@ class Linux(Platform):
'AT_SYSINFO_EHDR': 33, #Pointer to the global system page used for system calls and other nice things.
}
#AT_NULL
push_int(0)
push_int(0)
cpu.push_int(0)
cpu.push_int(0)
for name, val in auxv.items():
push_int(val)
push_int(auxvnames[name])
cpu.push_int(val)
cpu.push_int(auxvnames[name])
# NULL ENVP
push_int(0)
cpu.push_int(0)
for var in reversed(envplst): # ENVP n
push_int(var)
cpu.push_int(var)
envp = cpu.STACK
# NULL ARGV
push_int(0)
cpu.push_int(0)
for arg in reversed(argvlst): # Argv n
push_int(arg)
cpu.push_int(arg)
argv = cpu.STACK
#ARGC
push_int(len(argvlst))
cpu.push_int(len(argvlst))
def load(self, filename):
'''
Loads and an ELF program in memory and prepares the initial CPU state.
Creates the stack and loads the environment variables and the arguments in it.
:param filename: pathname of the file to be executed.
:param filename: pathname of the file to be executed. (used for auxv)
:raises error:
- 'Not matching cpu': if the program is compiled for a different architecture
- 'Not matching memory': if the program is compiled for a different address size
@ -678,8 +676,8 @@ class Linux(Platform):
#load elf See binfmt_elf.c
#read the ELF object file
cpu = self.current
elf = ELFFile(file(filename))
arch = {'x86':'i386','x64':'amd64', 'ARM': 'armv7'}[elf.get_machine_arch()]
elf = self.elf
arch = self.arch
addressbitsize = {'x86':32, 'x64':64, 'ARM': 32}[elf.get_machine_arch()]
logger.debug("Loading %s as a %s elf"%(filename, arch))
@ -891,35 +889,26 @@ class Linux(Platform):
self.end_data = end_data
self.elf_brk = real_elf_brk
at_random = cpu.push_bytes('A'*16)
at_execfn = cpu.push_bytes(filename+'\x00')
#put auxv strings in stack
# TODO move into cpu as cpu.stack_push(), possibly removing the need for stack_sub, stack_add?
def push_bytes( value ):
cpu.STACK -= len(value)
cpu.write_bytes(cpu.STACK, value)
return cpu.STACK
at_random = push_bytes('A'*16)
at_execfn = push_bytes(filename+'\x00')
auxv = {}
auxv['AT_PHDR'] = load_addr+elf.header.e_phoff # Program headers for program
auxv['AT_PHENT'] = elf.header.e_phentsize # Size of program header entry
auxv['AT_PHNUM'] = elf.header.e_phnum # Number of program headers
auxv['AT_PAGESZ'] = cpu.memory.page_size # System page size
auxv['AT_BASE'] = interpreter_base # Base address of interpreter
auxv['AT_FLAGS'] = elf.header.e_flags # Flags
auxv['AT_ENTRY'] = elf_entry # Entry point of program
auxv['AT_UID'] = 1000 # Real uid
auxv['AT_EUID'] = 1000 # Effective uid
auxv['AT_GID'] = 1000 # Real gid
auxv['AT_EGID'] = 1000 # Effective gid
auxv['AT_CLKTCK'] = 100 # Frequency of times()
auxv['AT_HWCAP'] = 0 # Machine-dependent hints about processor capabilities.
auxv['AT_RANDOM'] = at_random # Address of 16 random bytes.
auxv['AT_EXECFN'] = at_execfn # Filename of executable.
self.auxv = auxv
self.auxv = {
'AT_PHDR' : load_addr+elf.header.e_phoff, # Program headers for program
'AT_PHENT' : elf.header.e_phentsize, # Size of program header entry
'AT_PHNUM' : elf.header.e_phnum, # Number of program headers
'AT_PAGESZ' : cpu.memory.page_size, # System page size
'AT_BASE' : interpreter_base, # Base address of interpreter
'AT_FLAGS' : elf.header.e_flags, # Flags
'AT_ENTRY' : elf_entry, # Entry point of program
'AT_UID' : 1000, # Real uid
'AT_EUID' : 1000, # Effective uid
'AT_GID' : 1000, # Real gid
'AT_EGID' : 1000, # Effective gid
'AT_CLKTCK' : 100, # Frequency of times()
'AT_HWCAP' : 0, # Machine-dependent hints about processor capabilities.
'AT_RANDOM' : at_random, # Address of 16 random bytes.
'AT_EXECFN' : at_execfn, # Filename of executable.
}
def _open(self, f):
'''
@ -1156,7 +1145,7 @@ class Linux(Platform):
# buf: address of zero-terminated pathname
# flags/access: file access bits
# perms: file permission mode
filename = self._read_string(buf)
filename = self.current.read_string(buf)
try :
if os.path.abspath(filename).startswith('/proc/self'):
if filename == '/proc/self/exe':
@ -1223,7 +1212,7 @@ class Linux(Platform):
'''
if bufsize <= 0:
return -errno.EINVAL
filename = self._read_string(path)
filename = self.current.read_string(path)
if filename == '/proc/self/exe':
data = os.path.abspath(self.program)
else:
@ -1419,7 +1408,7 @@ class Linux(Platform):
logger.debug("sys_set_tid_address(%016x) -> 0", tidptr)
return 1000 #tha pid
def sys_faccessat(self, dirfd, pathname, mode, flags):
filename = self._read_string(pathname)
filename = self.current.read_string(pathname)
logger.debug("sys_faccessat(%016x, %s, %x, %x) -> 0", dirfd, filename, mode, flags)
return -1
@ -1693,31 +1682,18 @@ class Linux(Platform):
self.sys_close(fd)
return ret
def _arch_specific_init(self, arch):
assert arch in {'i386', 'amd64', 'armv7'}
def _arch_specific_init(self):
assert self.arch in {'i386', 'amd64', 'armv7'}
self._arch_reg_init(arch)
if arch == 'i386':
self.syscall_arg_regs = ['EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP']
elif arch == 'amd64':
self.syscall_arg_regs = ['RDI', 'RSI', 'RDX', 'R10', 'R8', 'R9']
elif arch == 'armv7':
self.syscall_arg_regs = ['R0', 'R1', 'R2', 'R3', 'R4', 'R5', 'R6']
self._init_arm_kernel_helpers()
def _arch_reg_init(self, arch):
if arch in {'i386', 'amd64'}:
x86_defaults = {
'CS': 0x23,
'SS': 0x2b,
'DS': 0x2b,
'ES': 0x2b,
}
# Establish segment registers for x86 arches
if self.arch in {'i386', 'amd64'}:
x86_defaults = { 'CS': 0x23, 'SS': 0x2b, 'DS': 0x2b, 'ES': 0x2b, }
for reg, val in x86_defaults.iteritems():
self.current.regfile.write(reg, val)
if self.arch == 'armv7':
self._init_arm_kernel_helpers()
@staticmethod
def _interp_total_size(interp):
'''
@ -1781,26 +1757,19 @@ class SLinux(Linux):
#Dispatchers...
def syscall(self):
try:
return super(SLinux, self).syscall()
except SymbolicSyscallArgument, e:
self.current.PC = self.current.PC - self.current.instruction.size
reg_name = self.syscall_arg_regs[e.reg_num]
raise ConcretizeRegister(reg_name,e.message,e.policy)
def sys_read(self, fd, buf, count):
if issymbolic(fd):
logger.debug("Ask to read from a symbolic file descriptor!!")
raise SymbolicSyscallArgument(0)
raise ConcretizeArgument(0)
if issymbolic(buf):
logger.debug("Ask to read to a symbolic buffer")
raise SymbolicSyscallArgument(1)
raise ConcretizeArgument(1)
if issymbolic(count):
logger.debug("Ask to read a symbolic number of bytes ")
raise SymbolicSyscallArgument(2)
raise ConcretizeArgument(2)
return super(SLinux, self).sys_read(fd, buf, count)
@ -1922,15 +1891,15 @@ class SLinux(Linux):
def sys_write(self, fd, buf, count):
if issymbolic(fd):
logger.debug("Ask to write to a symbolic file descriptor!!")
raise SymbolicSyscallArgument(0)
raise ConcretizeArgument(0)
if issymbolic(buf):
logger.debug("Ask to write to a symbolic buffer")
raise SymbolicSyscallArgument(1)
raise ConcretizeArgument(1)
if issymbolic(count):
logger.debug("Ask to write a symbolic number of bytes ")
raise SymbolicSyscallArgument(2)
raise ConcretizeArgument(2)
return super(SLinux, self).sys_write(fd, buf, count)

View File

@ -1,6 +1,7 @@
import os
import unittest
from manticore.platforms import linux
from manticore.platforms import linux, linux_syscalls
class LinuxTest(unittest.TestCase):
@ -37,10 +38,10 @@ class LinuxTest(unittest.TestCase):
envp_ptr = argv_ptr + len(real_argv)*8 + 8
for i, arg in enumerate(real_argv):
self.assertEqual(self.linux._read_string(cpu.read_int(argv_ptr + i*8)), arg)
self.assertEqual(cpu.read_string(cpu.read_int(argv_ptr + i*8)), arg)
for i, env in enumerate(envp):
self.assertEqual(self.linux._read_string(cpu.read_int(envp_ptr + i*8)), env)
self.assertEqual(cpu.read_string(cpu.read_int(envp_ptr + i*8)), env)
def test_load_maps(self):
mappings = self.linux.current.memory.mappings()
@ -57,3 +58,25 @@ class LinuxTest(unittest.TestCase):
self.assertEqual(first_map_name, '/bin/ls')
self.assertEqual(second_map_name, '/bin/ls')
def test_syscall_fstat(self):
nr_fstat64 = 197
# Create a minimal state
model = linux.SLinux.empty_platform('armv7')
model.current.memory.mmap(0x1000, 0x1000, 'rw ')
model.current.SP = 0x2000-4
# open a file
filename = model.current.push_bytes('/bin/true\x00')
fd = model.sys_open(filename, os.O_RDONLY, 0600)
stat = model.current.SP - 0x100
model.current.R0 = fd
model.current.R1 = stat
model.current.R7 = nr_fstat64
self.assertEquals(linux_syscalls.armv7[nr_fstat64], 'sys_fstat64')
model.syscall()
print ''.join(model.current.read_bytes(stat, 100)).encode('hex')