Basic Caching for Binja IL (#467)

* basic caching (wip) * moved execute in binja and cleanup of self.instruction refs * did_emulate_insn and get_current_llil_func renaming * refactor for hasattr
2017-08-23 14:46:17 -04:00 · 2017-08-23 14:46:17 -04:00 · 1b653be9b3
commit 1b653be9b3
parent b32379d3d4
3 changed files with 108 additions and 61 deletions
--- a/manticore/core/cpu/abstractcpu.py
+++ b/manticore/core/cpu/abstractcpu.py
@ -723,36 +723,28 @@ class Cpu(Eventful):
        if insn.address != self.PC:
            return

+        name = self.canonicalize_instruction_name(insn)
+
+        def fallback_to_emulate(*operands):
+            text_bytes = ' '.join('%02x'%x for x in insn.bytes)
+            logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
+                        insn.address, text_bytes, insn.mnemonic, insn.op_str)
+
+            self.publish('will_emulate_instruction', insn)
+            self.emulate(insn)
+            self.publish('did_emulate_instruction', insn)
+
+        implementation = getattr(self, name, fallback_to_emulate)
+
        if logger.level == logging.DEBUG :
            logger.debug(self.render_instruction(insn))
            for l in self.render_registers():
                register_logger.debug(l)

-        self._insn_implementation(insn)
-        self._icount += 1
-        self.publish('did_execute_instruction', insn)
-
-    def fallback_to_emulate(self, *operands):
-        insn = self.instruction
-        text_bytes = ' '.join('%02x'%x for x in insn.bytes)
-        logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
-                insn.address, text_bytes, insn.mnemonic, insn.op_str)
-
-        self.publish('will_emulate_instruction', insn)
-        self.emulate(insn)
-
-        self.publish('did_emulate_instruction', insn)
-
-    def _insn_implementation(self, insn):
-        name = self.canonicalize_instruction_name(insn)
-
-        implementation = getattr(self, name, self.fallback_to_emulate)
        implementation(*insn.operands)
-        self.update_pc()
+        self._icount += 1

-    # to be overriden if needed
-    def update_pc(self):
-        pass
+        self.publish('did_execute_instruction', insn)

    def emulate(self, insn):
        '''
--- a/manticore/core/cpu/binja.py
+++ b/manticore/core/cpu/binja.py
@ -17,6 +17,7 @@ from ..smtlib import Operators, BitVecConstant, operator
 from ...utils.helpers import issymbolic

 logger = logging.getLogger("CPU")
+register_logger = logging.getLogger("REGISTERS")


 class BinjaRegisterFile(RegisterFile):
@ -343,7 +344,7 @@ class BinjaCpu(Cpu):
            c = self.memory[address]

            if issymbolic(c):
-                assert isinstance(c, BitVec) and  c.size == 8
+                assert isinstance(c, BitVec) and c.size == 8
                if isinstance(c, Constant):
                    c = chr(c.value)
                else:
@ -380,6 +381,81 @@ class BinjaCpu(Cpu):
        insn.operands = self._wrap_operands(insn.operands)
        return insn

+    def execute(self):
+        '''
+        Decode, and execute one instruction pointed by register PC
+        '''
+        if issymbolic(self.PC):
+            raise ConcretizeRegister(self, 'PC', policy='ALL')
+
+        if not self.memory.access_ok(self.PC, 'x'):
+            raise InvalidMemoryAccess(self.PC, 'x')
+
+        self.publish('will_decode_instruction', self.PC)
+
+        insn = self.decode_instruction(self.PC)
+        self._last_pc = self.PC
+
+        self.publish('will_execute_instruction', insn)
+
+        # FIXME (theo) why just return here?
+        if insn.address != self.PC:
+            return
+
+        name = self.canonicalize_instruction_name(insn)
+        def fallback_to_emulate(*operands):
+            if (isinstance(self.disasm, BinjaILDisasm) and
+                    isinstance(insn, cs.CsInsn)):
+                # if we got a capstone instruction using BinjaILDisasm, it means
+                # this instruction is not implemented. Fallback to Capstone
+                self.FALLBACK(name, *operands)
+                # XXX after this point self.PC != self._last_pc but that is
+                # OK because we will update the PC  properly
+            else:
+                text_bytes = ' '.join('%02x'%x for x in insn.bytes)
+                logger.info("Unimplemented instruction: 0x%016x:\t%s\t%s\t%s",
+                            insn.address, text_bytes, insn.mnemonic, insn.op_str)
+
+                self.publish('will_emulate_instruction', insn)
+                self.emulate(insn)
+                self.publish('did_emulate_instruction', insn)
+
+        implementation = getattr(self, name, fallback_to_emulate)
+
+        if logger.level == logging.DEBUG :
+            logger.debug(self.render_instruction(insn))
+            for l in self.render_registers():
+                register_logger.debug(l)
+
+        assert (self.PC == self._last_pc or
+                (isinstance(insn, BinjaILDisasm.BinjaILInstruction) and
+                 insn.sets_pc))
+
+        implementation(*insn.operands)
+
+        # In case we are executing IL instructions, we could iteratively
+        # invoke multiple instructions due to the tree form, thus we only
+        # want to increment the PC once, based on its previous position
+        # for CALLS and JUMPS the PC should have been set automatically
+        # so no need to do anything. Also, if there are pending instruction
+        if not isinstance(self.disasm, BinjaILDisasm):
+            return
+
+        # don't bump the PC if we are in an LLIL that has set it,
+        # or if there are pending IL insn in the queue. This is because
+        # for cases where we have other il instructions in the queue,
+        # such as when we get a divu insn, the PC + size will point
+        # to the next assembly instruction and not the next LLIL
+        #
+        # we might be executing a Capstone instruction at this point
+        # if we context-switched, so check the sets_pc attr
+        if not (isinstance(insn, BinjaILDisasm.BinjaILInstruction) and
+                (insn.sets_pc or self.disasm.il_queue)):
+            self.PC = self._last_pc + insn.size
+
+        self._icount += 1
+        self.publish('did_execute_instruction', insn)
+
    def update_platform_cpu_regs(self):
        for pl_reg, binja_reg in self.regfile.pl2b_map.items():
            if isinstance(binja_reg, tuple) or binja_reg is None: continue
@ -463,37 +539,6 @@ class BinjaCpu(Cpu):
        return [BinjaOperand(self, self.disasm.disasm_il, op)
                for op in operands]

-    def fallback_to_emulate(self, *operands):
-        if (isinstance(self.disasm, BinjaILDisasm) and
-                isinstance(self.instruction, cs.CsInsn)):
-            # if we got a capstone instruction using BinjaILDisasm, it means
-            # this instruction is not implemented. Fallback to Capstone
-            name = self.canonicalize_instruction_name(self.instruction)
-            self.FALLBACK(name, *operands)
-        else:
-            super(BinjaCpu, self).fallback_to_emulate(*operands)
-
-    def update_pc(self):
-        # In case we are executing IL instructions, we could iteratively
-        # invoke multiple instructions due to the tree form, thus we only
-        # want to increment the PC once, based on its previous position
-        # for CALLS and JUMPS the PC should have been set automatically
-        # so no need to do anything. Also, if there are pending instruction
-        if not isinstance(self.disasm, BinjaILDisasm):
-            return
-
-        # don't bump the PC if we are in an LLIL that has set it,
-        # or if there are pending IL insn in the queue. This is because
-        # for cases where we have other il instructions in the queue,
-        # such as when we get a divu insn, the PC + size will point
-        # to the next assembly instruction and not the next LLIL
-        #
-        # we might be executing a Capstone instruction at this point
-        # if we context-switched, so check the sets_pc attr
-        if not (hasattr(self.instruction, "sets_pc") and
-                (self.instruction.sets_pc or self.disasm.il_queue)):
-            self.PC = self._last_pc + self.instruction.size
-
    # XXX this is currently not active because a bunch of flag-setting
    # LLIL are not implemented by Binja :(
    def update_flags_from_il(cpu, il):
@ -1245,6 +1290,7 @@ def x86_calculate_cmp_flags(cpu, size, res, left_v, right_v):
        's': _sign_flag(res, size),
        'o': _overflow_flag(res, right_v, left_v, size)
    }
+
    cpu.update_flags(flags)

 def x86_update_logic_flags(cpu, result, size):
--- a/manticore/core/cpu/disasm.py
+++ b/manticore/core/cpu/disasm.py
@ -95,6 +95,7 @@ class BinjaILDisasm(Disasm):

        self.unimpl_cache = set()
        self.func_cache = dict()
+        self.llil_func_cache = dict()

        # for all UNIMPL insn and other hard times
        # FIXME generalize for other archs
@ -124,13 +125,9 @@ class BinjaILDisasm(Disasm):
                # clear the queue (e.g., we might be here because of a CALL)
                del self.il_queue[:]

-        from binaryninja import Architecture, LowLevelILFunction
-
-        func = LowLevelILFunction(self.view.arch)
-        func.current_address = pc
-        self.disasm_insn_size = (self.view.arch.
-                                 get_instruction_low_level_il(code, pc, func))
+        func, size = self._llil_func_info(code, pc)
        self.current_llil_func = func
+        self.disasm_insn_size = size
        self.il_queue = [(i, func[i]) for i in xrange(len(func))]
        return self.il_queue.pop(0)[1]

@ -141,6 +138,18 @@ class BinjaILDisasm(Disasm):
        return (il.operation == enums.LowLevelILOperation.LLIL_UNIMPL or
                il.operation == enums.LowLevelILOperation.LLIL_UNIMPL_MEM)

+    def _llil_func_info(self, code, pc):
+        if pc in self.llil_func_cache:
+            return self.llil_func_cache[pc]
+
+        from binaryninja import Architecture, LowLevelILFunction
+        # FIXME
+        func = LowLevelILFunction(Architecture['x86_64'])
+        func.current_address = pc
+        size = self.view.arch.get_instruction_low_level_il(code, pc, func)
+        self.llil_func_cache[pc] = (func, size)
+        return func, size
+
    # XXX will be removed once we no longer rely on view
    def _get_current_func(self, pc):
        if pc in self.func_cache: