Symbolic Execution For Deobfuscation The Basics
The Theory Behind Angr and Symbolic Execution and Control Flow Flattening
Overview
- DFS and BFS graph traversal tutorial
- OST2 - Reverse Engineering 3201: Symbolic Analysis
- Deobfuscation: recovering an OLLVM-protected program (QuarksLab)
- Automated Detection of Control-flow Flattening (Tim Blazytko)
- Hex-Rays Microcode API vs. Obfuscating Compiler
- What is SSA Static Single Assigment
- Assembly Instructions Data Sheet
- IDA IR Explorer (Lucid)
- Dissecting LLVM Obfuscator Part 1
- Stack-based graph traversal ≠ depth first search
Labelling The Dispatcher
- start at the entry of the fn and assume this is an obb and has a STATE (in some cases the STATE is passed as an argument so this is a bad assumption)
- use symbolic execution to get the next bb
- if the next bb constraints depend on our STATE register this is a dispatcher (cf) bb, label this bb (entry to dispatcher)
- if the next bb constaints are equal to a STATE register this next bb is an obb, label the next bb (exit from dispatcher)
- if there is no branch (no constraints) keep the previous label, label this bb
- if there are no next bb this is an END bb
The example we will be using is an obfuscated Emotet binary eeb13cd51faa7c23d9a40241d03beb239626fbf3efe1dbbfa3994fc10dea0827
import angr, claripy
from queue import Queue
import struct
import logging
logging.getLogger('angr').setLevel('ERROR')
BINARY_PATH = '/tmp/emotet.bin'
# Save some info about each bb
# bb_states[bb_address] = {"is_obb":true/false, "end":<end of bb>}
bb_states = {}
bb_visited = set()
fn_start = 0x10008784
fn_end = 0x100099D2
state_register = 'ebx'
project = angr.Project(BINARY_PATH, load_options={'auto_load_libs': False})
# TODO: We should explicately add the state since we know it (main)
initial_state = project.factory.call_state(addr=fn_start)
# Use this setting to skip calls instead of a hook
initial_state.options.add(angr.options.CALLLESS)
# Setup simulation manager
simgr = project.factory.simgr(initial_state)
# We are going to start with an assumption that the first BB is in an obb
# Since we are using a queue we need to track this rolling state along with the address of the bb
# bb_info = {address:<>, prev_is_obb:true/false}
# Use a queue for BFS
q = Queue()
# Push function start onto queue
# Set the sim manager for the next state to remove any concrete value
first_state = project.factory.blank_state(addr=fn_start)
first_state.options.add(angr.options.CALLLESS)
bb_info = {'address':fn_start, 'prev_is_obb':True, 'state':first_state}
q.put(bb_info)
## TODO: currently we don't handle when the first bb in a transition from a cf to obb doesn't set the STATE,
# our algorithm will think this is just another cf bb
# Walk the queue
while not q.empty():
bb_info = q.get()
bb_address = bb_info.get('address')
prev_bb_is_obb = bb_info.get('prev_is_obb')
new_state = bb_info.get('state')
bb_end_address = new_state.block().size + new_state.block().addr
print(f"BB: {hex(bb_address)}")
#print(project.factory.block(simgr.active[0].addr).pp())
print(f"BB end:{hex(bb_end_address)}")
## Check if bb is already labeled in bb_states if it is just generate and push
# the next states don't relabel it
if bb_address not in bb_states:
if prev_bb_is_obb:
# Reset the state for any transition from an obb
new_state = project.factory.blank_state(addr=bb_address)
new_state.options.add(angr.options.CALLLESS)
# Use successors to determine info about this block
if len(list(project.factory.successors(new_state))) == 0:
# If there are no successors this is an end bb so it must be an obb
bb_states[bb_address] = {'is_obb':True, 'end':bb_end_address}
print(f"This is an end block")
elif len(list(project.factory.successors(new_state))) == 1:
# If there is only next bb then there is no condition
print(f"Only one successor: {hex(project.factory.successors(new_state)[0].addr)}")
if prev_bb_is_obb:
# If we are in an obb keep the obb label
bb_states[bb_address] = {'is_obb':True, 'end':bb_end_address}
print(f"Previous bb was obb this one is too")
else:
bb_states[bb_address] = {'is_obb':False, 'end':bb_end_address}
print(f"Previous bb was cf this one is too")
else:
# If there are multiple next bb then there is a condition
# Determin if the branch depends on state
print(f"Multiple successors")
print(f"\t{hex(project.factory.successors(new_state)[0].addr)}")
print(f"\t{hex(project.factory.successors(new_state)[1].addr)}")
flag_depends_on_state = False
for next_bb in project.factory.successors(new_state):
print(f"Constraints {hex(next_bb.addr)}:")
for constraint in next_bb.solver.constraints:
print(f"\t{constraint}")
for v in constraint.variables:
if 'reg_'+ state_register in v:
flag_depends_on_state = True
print(f"\t\t Depends on STATE")
# If the constraint is an == this means next_bb must be an obb
# Preemptively add it to the bb_states
if constraint.op == '__eq__':
print(f"\t\t **Equals STATE next bb must be obb")
next_bb_addr = next_bb.addr
next_bb_end = next_bb_addr + next_bb.block().size
# make sure not to re-label stuff this is sort of like an alread seen list too
if next_bb_addr not in bb_states:
print(f"\t\t ++ adding obb label")
bb_states[next_bb_addr] = {'is_obb':True, 'end':next_bb_end}
else:
print(f"\t\t !! bb already labeled !!")
# If the branch depends on state then this is a cf block
if flag_depends_on_state:
bb_states[bb_address] = {'is_obb':False, 'end':bb_end_address}
prev_bb_is_obb = False
print(f"Control flow depends on STATE - must be cf")
else:
# use previous state
bb_states[bb_address] = {'is_obb':prev_bb_is_obb, 'end':bb_end_address}
print(f"Control does not depend on STATE - keep previouse state {prev_bb_is_obb}")
else:
print(f"Already labeled bb - skipping!")
# Set the next prev state based on the saved one
if bb_states[bb_address].get('is_obb'):
prev_bb_is_obb = True
else:
prev_bb_is_obb = False
# new_state = project.factory.blank_state(addr=bb_address)
# new_state.options.add(angr.options.CALLLESS)
# Mark this bb as visited
bb_visited.add(bb_address)
# Use successors to push next bb on queue
for next_bb in project.factory.successors(new_state):
next_address = next_bb.addr
if next_address not in bb_visited:
if next_bb.regs.get(state_register).uninitialized:
bb_info = {'address':next_address, 'prev_is_obb':prev_bb_is_obb, 'state':next_bb}
else:
# Reset it all
new_next_bb = project.factory.blank_state(addr=next_address)
new_next_bb.options.add(angr.options.CALLLESS)
bb_info = {'address':next_address, 'prev_is_obb':prev_bb_is_obb, 'state':new_next_bb}
q.put(bb_info)
print("========================")
print("DONE!")
# If there is a jmp to the middle of a bb angr doesn't split it into two bb, this causes issues where a "single" bb
# in the view of anger is actually two different types of bb
# To normalize these what we need to do is split the bottom parts off the any non-normalized bb and set the type of
# the top part of the block to be the same as the previous block
# Sort the bb by address
bb_states_sorted = {key:bb_states[key] for key in sorted(bb_states.keys())}
# For each bb search for bb that end after it and truncate them
# Also update their type to match the previous type
for bb_address in bb_states_sorted:
for ptr in bb_states_sorted:
if ptr >= bb_address:
# We have passed our bb, not more potential unnormalized bb for this address
break
if bb_address < bb_states_sorted[ptr].get('end'):
# Truncate the block
bb_states_sorted[ptr]['end'] = bb_address
# Update block type using previous block
for prev_addr in bb_states_sorted:
if ptr == bb_states_sorted[prev_addr].get('end'):
bb_states_sorted[ptr]['is_obb'] = bb_states_sorted[prev_addr].get('is_obb')
break
bb_states_sorted
Label The IDA Basic Blocks
import idaapi
import idautils
import idc
from queue import Queue
import struct
bb_states = {268470148: {'is_obb': True, 'end': 268470548},
268470548: {'is_obb': True, 'end': 268470945},
268470945: {'is_obb': True, 'end': 268471345},
268471345: {'is_obb': True, 'end': 268471739},
268471739: {'is_obb': True, 'end': 268472135},
268472135: {'is_obb': True, 'end': 268472528},
268472528: {'is_obb': True, 'end': 268472925},
268472925: {'is_obb': True, 'end': 268473318},
268473318: {'is_obb': True, 'end': 268473658},
268473658: {'is_obb': True, 'end': 268473673},
268473673: {'is_obb': False, 'end': 268473685},
268473685: {'is_obb': False, 'end': 268473691},
268473691: {'is_obb': False, 'end': 268473699},
268473699: {'is_obb': False, 'end': 268473711},
268473711: {'is_obb': False, 'end': 268473715},
268473715: {'is_obb': False, 'end': 268473723},
268473723: {'is_obb': True, 'end': 268473749},
268473749: {'is_obb': True, 'end': 268473769},
268473769: {'is_obb': True, 'end': 268473794},
268473794: {'is_obb': True, 'end': 268473827},
268473827: {'is_obb': True, 'end': 268473877},
268473877: {'is_obb': True, 'end': 268474018},
268474018: {'is_obb': True, 'end': 268474070},
268474070: {'is_obb': True, 'end': 268474098},
268474098: {'is_obb': True, 'end': 268474101},
268474101: {'is_obb': True, 'end': 268474106},
268474106: {'is_obb': True, 'end': 268474131},
268474131: {'is_obb': True, 'end': 268474162},
268474162: {'is_obb': True, 'end': 268474192},
268474192: {'is_obb': True, 'end': 268474238},
268474238: {'is_obb': True, 'end': 268474287},
268474287: {'is_obb': True, 'end': 268474309},
268474309: {'is_obb': True, 'end': 268474317},
268474317: {'is_obb': True, 'end': 268474342},
268474342: {'is_obb': True, 'end': 268474416},
268474416: {'is_obb': True, 'end': 268474459},
268474459: {'is_obb': True, 'end': 268474467},
268474467: {'is_obb': True, 'end': 268474506},
268474506: {'is_obb': True, 'end': 268474516},
268474516: {'is_obb': False, 'end': 268474528},
268474528: {'is_obb': False, 'end': 268474532},
268474532: {'is_obb': False, 'end': 268474540},
268474540: {'is_obb': True, 'end': 268474584},
268474584: {'is_obb': True, 'end': 268474587},
268474587: {'is_obb': True, 'end': 268474600},
268474600: {'is_obb': True, 'end': 268474628},
268474628: {'is_obb': True, 'end': 268474764},
268474764: {'is_obb': True, 'end': 268474776},
268474776: {'is_obb': True, 'end': 268474783},
268474783: {'is_obb': True, 'end': 268474788},
268474788: {'is_obb': True, 'end': 268474810},
268474810: {'is_obb': True, 'end': 268474817},
268474817: {'is_obb': True, 'end': 268474822},
268474822: {'is_obb': False, 'end': 268474834},
268474834: {'is_obb': True, 'end': 268474839}}
def set_bb_color(ea_start, ea_end, color_value):
ea_end = prev_head(ea_end)
ptr = ea_start
while ptr <= ea_end :
set_color(ptr, CIC_ITEM, color_value)
ptr = next_head(ptr)
for b in bb_states:
print(f"{hex(b)} - {hex(bb_states[b].get('end'))}: {bb_states[b].get('is_obb')}")
if bb_states[b].get('is_obb'):
set_bb_color(b, bb_states[b].get('end'), 0x00ff00)
else:
set_bb_color(b, bb_states[b].get('end'), 0x00A5ff)