Basic Caching for Binja IL (#467)

* basic caching (wip)

* moved execute in binja and cleanup of self.instruction refs

* did_emulate_insn and get_current_llil_func renaming

* refactor for hasattr
This commit is contained in:
Theofilos Petsios 2017-08-23 14:46:17 -04:00 committed by GitHub
parent b32379d3d4
commit 1b653be9b3
3 changed files with 108 additions and 61 deletions

View File

@ -723,36 +723,28 @@ class Cpu(Eventful):
if insn.address != self.PC:
return
name = self.canonicalize_instruction_name(insn)
def fallback_to_emulate(*operands):
text_bytes = ' '.join('%02x'%x for x in insn.bytes)
logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
insn.address, text_bytes, insn.mnemonic, insn.op_str)
self.publish('will_emulate_instruction', insn)
self.emulate(insn)
self.publish('did_emulate_instruction', insn)
implementation = getattr(self, name, fallback_to_emulate)
if logger.level == logging.DEBUG :
logger.debug(self.render_instruction(insn))
for l in self.render_registers():
register_logger.debug(l)
self._insn_implementation(insn)
self._icount += 1
self.publish('did_execute_instruction', insn)
def fallback_to_emulate(self, *operands):
insn = self.instruction
text_bytes = ' '.join('%02x'%x for x in insn.bytes)
logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
insn.address, text_bytes, insn.mnemonic, insn.op_str)
self.publish('will_emulate_instruction', insn)
self.emulate(insn)
self.publish('did_emulate_instruction', insn)
def _insn_implementation(self, insn):
name = self.canonicalize_instruction_name(insn)
implementation = getattr(self, name, self.fallback_to_emulate)
implementation(*insn.operands)
self.update_pc()
self._icount += 1
# to be overriden if needed
def update_pc(self):
pass
self.publish('did_execute_instruction', insn)
def emulate(self, insn):
'''

View File

@ -17,6 +17,7 @@ from ..smtlib import Operators, BitVecConstant, operator
from ...utils.helpers import issymbolic
logger = logging.getLogger("CPU")
register_logger = logging.getLogger("REGISTERS")
class BinjaRegisterFile(RegisterFile):
@ -343,7 +344,7 @@ class BinjaCpu(Cpu):
c = self.memory[address]
if issymbolic(c):
assert isinstance(c, BitVec) and c.size == 8
assert isinstance(c, BitVec) and c.size == 8
if isinstance(c, Constant):
c = chr(c.value)
else:
@ -380,6 +381,81 @@ class BinjaCpu(Cpu):
insn.operands = self._wrap_operands(insn.operands)
return insn
def execute(self):
'''
Decode, and execute one instruction pointed by register PC
'''
if issymbolic(self.PC):
raise ConcretizeRegister(self, 'PC', policy='ALL')
if not self.memory.access_ok(self.PC, 'x'):
raise InvalidMemoryAccess(self.PC, 'x')
self.publish('will_decode_instruction', self.PC)
insn = self.decode_instruction(self.PC)
self._last_pc = self.PC
self.publish('will_execute_instruction', insn)
# FIXME (theo) why just return here?
if insn.address != self.PC:
return
name = self.canonicalize_instruction_name(insn)
def fallback_to_emulate(*operands):
if (isinstance(self.disasm, BinjaILDisasm) and
isinstance(insn, cs.CsInsn)):
# if we got a capstone instruction using BinjaILDisasm, it means
# this instruction is not implemented. Fallback to Capstone
self.FALLBACK(name, *operands)
# XXX after this point self.PC != self._last_pc but that is
# OK because we will update the PC properly
else:
text_bytes = ' '.join('%02x'%x for x in insn.bytes)
logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
insn.address, text_bytes, insn.mnemonic, insn.op_str)
self.publish('will_emulate_instruction', insn)
self.emulate(insn)
self.publish('did_emulate_instruction', insn)
implementation = getattr(self, name, fallback_to_emulate)
if logger.level == logging.DEBUG :
logger.debug(self.render_instruction(insn))
for l in self.render_registers():
register_logger.debug(l)
assert (self.PC == self._last_pc or
(isinstance(insn, BinjaILDisasm.BinjaILInstruction) and
insn.sets_pc))
implementation(*insn.operands)
# In case we are executing IL instructions, we could iteratively
# invoke multiple instructions due to the tree form, thus we only
# want to increment the PC once, based on its previous position
# for CALLS and JUMPS the PC should have been set automatically
# so no need to do anything. Also, if there are pending instruction
if not isinstance(self.disasm, BinjaILDisasm):
return
# don't bump the PC if we are in an LLIL that has set it,
# or if there are pending IL insn in the queue. This is because
# for cases where we have other il instructions in the queue,
# such as when we get a divu insn, the PC + size will point
# to the next assembly instruction and not the next LLIL
#
# we might be executing a Capstone instruction at this point
# if we context-switched, so check the sets_pc attr
if not (isinstance(insn, BinjaILDisasm.BinjaILInstruction) and
(insn.sets_pc or self.disasm.il_queue)):
self.PC = self._last_pc + insn.size
self._icount += 1
self.publish('did_execute_instruction', insn)
def update_platform_cpu_regs(self):
for pl_reg, binja_reg in self.regfile.pl2b_map.items():
if isinstance(binja_reg, tuple) or binja_reg is None: continue
@ -463,37 +539,6 @@ class BinjaCpu(Cpu):
return [BinjaOperand(self, self.disasm.disasm_il, op)
for op in operands]
def fallback_to_emulate(self, *operands):
if (isinstance(self.disasm, BinjaILDisasm) and
isinstance(self.instruction, cs.CsInsn)):
# if we got a capstone instruction using BinjaILDisasm, it means
# this instruction is not implemented. Fallback to Capstone
name = self.canonicalize_instruction_name(self.instruction)
self.FALLBACK(name, *operands)
else:
super(BinjaCpu, self).fallback_to_emulate(*operands)
def update_pc(self):
# In case we are executing IL instructions, we could iteratively
# invoke multiple instructions due to the tree form, thus we only
# want to increment the PC once, based on its previous position
# for CALLS and JUMPS the PC should have been set automatically
# so no need to do anything. Also, if there are pending instruction
if not isinstance(self.disasm, BinjaILDisasm):
return
# don't bump the PC if we are in an LLIL that has set it,
# or if there are pending IL insn in the queue. This is because
# for cases where we have other il instructions in the queue,
# such as when we get a divu insn, the PC + size will point
# to the next assembly instruction and not the next LLIL
#
# we might be executing a Capstone instruction at this point
# if we context-switched, so check the sets_pc attr
if not (hasattr(self.instruction, "sets_pc") and
(self.instruction.sets_pc or self.disasm.il_queue)):
self.PC = self._last_pc + self.instruction.size
# XXX this is currently not active because a bunch of flag-setting
# LLIL are not implemented by Binja :(
def update_flags_from_il(cpu, il):
@ -1245,6 +1290,7 @@ def x86_calculate_cmp_flags(cpu, size, res, left_v, right_v):
's': _sign_flag(res, size),
'o': _overflow_flag(res, right_v, left_v, size)
}
cpu.update_flags(flags)
def x86_update_logic_flags(cpu, result, size):

View File

@ -95,6 +95,7 @@ class BinjaILDisasm(Disasm):
self.unimpl_cache = set()
self.func_cache = dict()
self.llil_func_cache = dict()
# for all UNIMPL insn and other hard times
# FIXME generalize for other archs
@ -124,13 +125,9 @@ class BinjaILDisasm(Disasm):
# clear the queue (e.g., we might be here because of a CALL)
del self.il_queue[:]
from binaryninja import Architecture, LowLevelILFunction
func = LowLevelILFunction(self.view.arch)
func.current_address = pc
self.disasm_insn_size = (self.view.arch.
get_instruction_low_level_il(code, pc, func))
func, size = self._llil_func_info(code, pc)
self.current_llil_func = func
self.disasm_insn_size = size
self.il_queue = [(i, func[i]) for i in xrange(len(func))]
return self.il_queue.pop(0)[1]
@ -141,6 +138,18 @@ class BinjaILDisasm(Disasm):
return (il.operation == enums.LowLevelILOperation.LLIL_UNIMPL or
il.operation == enums.LowLevelILOperation.LLIL_UNIMPL_MEM)
def _llil_func_info(self, code, pc):
if pc in self.llil_func_cache:
return self.llil_func_cache[pc]
from binaryninja import Architecture, LowLevelILFunction
# FIXME
func = LowLevelILFunction(Architecture['x86_64'])
func.current_address = pc
size = self.view.arch.get_instruction_low_level_il(code, pc, func)
self.llil_func_cache[pc] = (func, size)
return func, size
# XXX will be removed once we no longer rely on view
def _get_current_func(self, pc):
if pc in self.func_cache: