Emotet Deobfuscation
Removing CFF Obfuscation From Emotet Using Angr and Symbolic Execution
- Overview
- Approach For Identifying Original Basic Blocks (OBB) - Assembly/IDA Only
- Deobfuscation With Symboic Execution
- Approach For Identifying Original Basic Blocks (OBB) Using Symbolic Execution
Approach For Identifying Original Basic Blocks (OBB) - Assembly/IDA Only
Shout out to @mrexodia full credit goes to him for this approach!
We are doing a breadth-first search through bb. This is specific to the binary we are analyzing, we just noticed that the jz
/jnz
is used for dispatcher control flow (it will differ for other binaries). For our search algorithm we will relie on this to mark a transition from the cf to an obb.
The generic algorithm.
- Assume two states (not the same as the cff states) in obb and in cf.
- Walk the graph in a bredth first search (BFS) and track your current state.
- When you are in cf you can transition to in obb on the positive branch of a
jz
or the negative branch of ajnz
. - If you are in obb you don't exit until you hit an already identified in obb bb.
- When you are in cf you can transition to in obb on the positive branch of a
- Mark each bb as you go
This works because we are doing a BFS and the CFF forces a loop back to the dispatcher so we are garunteed to have already seen the dispatcher in cf bb before we reach the end of the first in obb.
The specific algorithm.
- start at the disptacher entry this is the first dispatcher block
- for each next bb if it doesn't end in a jz/jnz then we mark as dispatcher and search forward
- if we are in a dispatcher and end with a jz the TRUE points to a obb and the FALSE points to another dispatcher
- if we are in a dispatcher and end with a jnz the FALSE points to a obb and the TRUE points to another dispatcher
- mark the blocks and continue our search BFS
- if we are in an obb mark every next bb as an obb until we see a dispatcher block (then end that trace)
import idaapi
import idautils
import idc
from queue import Queue
import struct
# Basic blocks for dispatcher and obb
# bb_states[start_address] = obb = True/False (obb or dispatcher)
bb_states = {}
bb_visited = set()
fn_start = 0x10008784
fn_end = 0x100099D2
dispatcher_start = 0x1000953A
function = idaapi.get_func(fn_start)
flowchart = idaapi.FlowChart(function)
# Get bb flowchart starting with dispatcher
dispatcher_flowchart = list(flowchart[0].succs())[0]
# Use a queue for BFS
q = Queue()
# Push dispatcher start onto queue and add info
q.put(dispatcher_flowchart)
bb_states[dispatcher_flowchart.start_ea] = {'obb':False }
# Walk through bb
while not q.empty():
bb_flowchart = q.get()
bb_start = bb_flowchart.start_ea
# Get bb_info
bb_info = bb_states[bb_start]
#print(f"-> {hex(bb_start)} {bb_info}")
if bb_start in bb_visited:
# We don't need to re-process this just continue
continue
else:
bb_visited.add(bb_start)
# Check if there are successors
if len(list(bb_flowchart.succs())) == 0:
continue
# Check if the bb is conditional
if len(list(bb_flowchart.succs())) > 1:
# Parse the condition
bb_end = prev_head(bb_flowchart.end_ea)
if not bb_info.get('obb') and print_insn_mnem(bb_end) == 'jz':
# The true jmp indicated an obb
# The false indicates more dispatcher
# We are going to check which next bb matches the
# true contidion for the jz
true_bb_address = get_operand_value(bb_end,0)
for next_bb_flowchart in bb_flowchart.succs():
# Get the next bb address
next_bb_start = next_bb_flowchart.start_ea
# If we have already visited it ignore
if next_bb_start in bb_visited:
continue
if next_bb_start == true_bb_address:
# Put next bb onto the queue
q.put(next_bb_flowchart)
# Mark the bb as an obb
bb_states[next_bb_start] = {'obb':True}
else:
# This is another dispatcher bb
# Put next bb onto the queue
q.put(next_bb_flowchart)
# Mark the bb as an obb
bb_states[next_bb_start] = {'obb':False}
elif not bb_info.get('obb') and print_insn_mnem(bb_end) == 'jnz':
# The true jmp indicated an obb
# The false indicates more dispatcher
# We are going to check which next bb matches the
# true contidion for the jz
true_bb_address = get_operand_value(bb_end,0)
for next_bb_flowchart in bb_flowchart.succs():
# Get the next bb address
next_bb_start = next_bb_flowchart.start_ea
# If we have already visited it ignore
if next_bb_start in bb_visited:
continue
if next_bb_start == true_bb_address:
# Put next bb onto the queue
q.put(next_bb_flowchart)
# Mark the bb as an obb
bb_states[next_bb_start] = {'obb':False}
else:
# This is another dispatcher bb
# Put next bb onto the queue
q.put(next_bb_flowchart)
# Mark the bb as an obb
bb_states[next_bb_start] = {'obb':True}
else:
# We can treat all next bb as if there is no condition
# and propogate the bb type
for next_bb_flowchart in bb_flowchart.succs():
# Get the next bb address
next_bb_start = next_bb_flowchart.start_ea
# If we have already visited it ignore
if next_bb_start in bb_visited:
continue
# Add it to the queue and add info same as current block
q.put(next_bb_flowchart)
# Set bb type based on this bb
bb_states[next_bb_flowchart.start_ea] = {'obb':bb_info.get('obb')}
else:
# No condition
next_bb_flowchart = list(bb_flowchart.succs())[0]
# If not visited
if next_bb_flowchart.start_ea not in bb_visited:
# Push next block on queue and add info
q.put(next_bb_flowchart)
# Set bb type based on this bb
bb_states[next_bb_flowchart.start_ea] = {'obb':bb_info.get('obb')}
#### ALL this for debugging
# Add color to bb just for debugging
def set_bb_color(ea, flowchart, color_value):
for block in flowchart:
if block.start_ea <= ea and block.end_ea > ea:
# Loop and add color
ptr = block.start_ea
while ptr <= prev_head(block.end_ea):
set_color(ptr, CIC_ITEM, color_value)
ptr = next_head(ptr)
break
# Verification conditions:
# - all bb should be in the visited set
# - each bb should have a type associated with it in the bb_states
for bb_addr in bb_states:
print(f"{hex(bb_addr)}: {bb_states[bb_addr]}")
if bb_states[bb_addr].get('obb'):
# Make green for obb
set_bb_color(bb_addr, flowchart, 0x00ff00)
else:
# Make orange for dispatcher
set_bb_color(bb_addr, flowchart, 0x00A5ff)
Approach For Identifying Original Basic Blocks (OBB) Using Symbolic Execution
The drawback of the assembly approach is that the analyst must first identify what condition causes a transition between the obb and cf blocks. This is a manual process. If we want to fully automate this in a generic way we need to use symbolic execution to identify which bb are cf and which are obb.
For this approach the analyst must still identify how the state is tracked (register) but heuristics can also be used to do this automatically. Once we identify the state we can use the same algorithm above but instead of using a jz
/jnz
assembly compare to test for a transition between a cf and obb block we can check our symbolic exeuction predicate to see if the state is an equation or a constant (ie. does the state change in the bb or is it constant) if it is constant than we know the state was not modified so this is a cf block if the state can change then this is obb. We must also track if the state has changed and how it was changed to determine if the bb has transitioned from a cf to an obb.
Glossary
- cf - control flow
- bb - basic block
- obb - original basic block (these are from before the obfuscation)
- cf bb - control flow basic block also known as part of the dispatcher or "obfuscation code"
- symbolic execution - a process of transforming bb assembly code into mathematical/logical equations that can be solved
- concolic execution - a mix of symbolic and real (emulation) excution ie. we sub in some concrete values to constrain the exectuion path where they exist in the code
- BFS - breadth first search
The Algorithm
assumption we don't want to use concolic execution, we just want symbolic execution because we want to trace every branch and we don't want a concrete STATE value we want a symbol (I think???)
- on the entry to each bb reset to the STATE to be a symbol (I think this will force symboic execution)
- for each bb step we check the following
- is the predicate an equation or a constant (ie. does it depend on the STATE). If it depends on the STATE then it must be a CF bb.
- if it's a constant or the predicate an equation does NOT depend on the STATE then it must be the same type of bb as the previous one
- check the STATE symbol, if it has been assigned a constant or an equation then this is a obb (if it's not undefined)
- maintain a rolling label (cf/obb) for the bb as you step, propogate the label to each sequential bb until one of the above conditions is met
- for each branch (new angr state) push this onto a queue and use a bfs approach
- stop execution when we reach a labeled bb
#
#
# BUGS!!
#
# This is a work in progress, we have atleast two bugs that cascate to create a bad graph
# 1) we need to fix the logic so we correctly identify the first cf bb
# 2) we need to figure out why angr chooses such weird bb breaks, like why break at random spots???
#
##############################################################################################
import angr, claripy
from queue import Queue
import struct
import logging
logging.getLogger('angr').setLevel('ERROR')
BINARY_PATH = '/tmp/emotet.bin'
# Save some info about each bb
# bb_states[bb_address] = {"is_obb":true/false}
bb_states = {}
bb_visited = set()
fn_start = 0x10008784
fn_end = 0x100099D2
dispatcher_start = 0x1000953A
project = angr.Project(BINARY_PATH, load_options={'auto_load_libs': False})
# TODO: We should explicately add the state since we know it (main)
initial_state = project.factory.call_state(addr=fn_start)
# Use this setting to skip calls instead of a hook
initial_state.options.add(angr.options.CALLLESS)
# Setup simulation manager
simgr = project.factory.simgr(initial_state)
# We are going to start with an assumption that the first BB is in an obb
# Since we are using a queue we need to track this rolling state along with the address of the bb
# bb_info = {address:<>, prev_is_obb:true/false}
# Use a queue for BFS
q = Queue()
# Push function start onto queue
bb_info = {'address':fn_start, 'prev_is_obb':True}
q.put(bb_info)
## TODO: currently we don't handle when the first bb in a transition from a cf to obb doesn't set the STATE,
# our algorithm will think this is just another cf bb
# Walk the queue
while not q.empty():
bb_info = q.get()
bb_address = bb_info.get('address')
prev_bb_is_obb = bb_info.get('prev_is_obb')
print(f"BB: {hex(bb_address)}")
#print(project.factory.block(simgr.active[0].addr).pp())
# Set the sim manager for the next state to remove any concrete value
new_state = project.factory.blank_state(addr=bb_address)
new_state.options.add(angr.options.CALLLESS)
# Use successors to determine info about this block
if len(list(project.factory.successors(new_state))) == 0:
# If there are no successors this is an end bb so it must be an obb
bb_states[bb_address] = {'is_obb':True}
elif len(list(project.factory.successors(new_state))) == 1:
# If there is only next bb then there is no condition
if prev_bb_is_obb:
# If we are in an obb keep the obb label
bb_states[bb_address] = {'is_obb':True}
else:
# If we are in an cf we need to test to see if we changed the STATE
# this will move us into an obb
next_bb = project.factory.successors(new_state)[0]
if next_bb.regs.ebx.uninitialized:
bb_states[bb_address] = {'is_obb':False}
prev_bb_is_obb = False
else:
bb_states[bb_address] = {'is_obb':True}
else:
# If there are multiple next bb then there is a condition
# Determin if the branch depends on state
flag_depends_on_state = False
for next_bb in project.factory.successors(new_state):
for constraint in next_bb.solver.constraints:
for v in constraint.variables:
if 'reg_ebx' in v:
flag_depends_on_state = True
# If the branch depends on state then this is a cf block
if flag_depends_on_state:
bb_states[bb_address] = {'is_obb':False}
prev_bb_is_obb = False
else:
bb_states[bb_address] = {'is_obb':True}
# Mark this bb as visited
bb_visited.add(bb_address)
# Use successors to push next bb on queue
for next_bb in project.factory.successors(new_state):
next_address = next_bb.addr
if next_address not in bb_visited:
bb_info = {'address':next_address, 'prev_is_obb':prev_bb_is_obb}
q.put(bb_info)
print("========================")
print("DONE!")
bb_states