Emotet Deobfuscation Generic Solution
Removing obfsucation from Emotet with a generic solution
- Overview
- References
- IDA Script - Label The Basic Blocks
- Obtain STATE and NEXT-STATE Information For Each Basic Block
- Patching Ideas
- Failures and Next Steps
import angr, claripy
from queue import Queue
import struct
import logging
logging.getLogger('angr').setLevel('ERROR')
# maintain a state counter
state_count = 0
# maintain a list of entry bb for states
entry_bb_list = []
# bb_tree[bb_address] = {states[], end, children=[] }
bb_tree = {}
# Use a queue for BFS
# The queue contains state_info = {'state_value':0, 'sim_state':initial_state}
q = Queue()
# Function information
BINARY_PATH = '/tmp/emotet.bin'
fn_start = 0x10008784
fn_end = 0x100099D2
state_register = 'ebx'
# Start angr project
project = angr.Project(BINARY_PATH, load_options={'auto_load_libs': False})
# Setup function initial state on queue
initial_state = project.factory.call_state(addr=fn_start)
# Use this setting to skip calls instead of a hook
initial_state.options.add(angr.options.CALLLESS)
state_count += 1
state_info = {'state_value':0, 'sim_state':initial_state}
entry_bb_list.append(fn_start)
q.put(state_info)
# Walk the queue
while not q.empty():
state_info = q.get()
state_value = state_info.get('state_value')
sim_state = state_info.get('sim_state')
bb_address = sim_state.block().addr
bb_end_address = bb_address + sim_state.block().size
print(f"\n==============================\n")
print(f"BB: {hex(bb_address)} - {hex(bb_end_address)}")
print(f"\t State Value: {state_value}")
# If we have already processed this bb for the same state disregard it
if bb_address in bb_tree:
if state_value in bb_tree[bb_address].get('states'):
print(f"\t !! Already processed for this state - END !!")
continue
# Get successors
successors = project.factory.successors(sim_state)
# Get children
children = [s.addr for s in successors]
print(f"\t Children:")
for c in children:
print(f"\t\t {hex(c)}")
# Add info to bb tree
if bb_address in bb_tree:
bb_tree[bb_address]['states'].append(state_value)
bb_tree[bb_address]['children'] += children
else:
bb_tree[bb_address] = {'states':[state_value], 'end':bb_end_address, 'children':children }
# Generate the successors and push them onto the queue
for successor in successors:
print(f"\n\t Successor: {hex(successor.addr)}")
if not successor.regs.get(state_register).uninitialized:
# If STATE is set reset the sim_state
# Set clean sim_state
new_sim_state = project.factory.blank_state(addr=successor.addr)
new_sim_state.options.add(angr.options.CALLLESS)
for reg_name in ['eax','ecx','edx','ebx','esp','ebp','esi','edi']:
if reg_name != state_register:
new_sim_state.memory.store(new_sim_state.regs.get(reg_name), successor.regs.get(reg_name))
# Add to queue
state_info = {'state_value':state_value, 'sim_state':new_sim_state}
q.put(state_info)
print(f"\t\t STATE registers set - reset sim state")
print(f"\t\t Added to queue ->")
else:
# If constraints for successor include == STATE
# check if we have already seen the successor in entry_bb_list if we have don't push it else add it
flag_queue_successor = True
state_info = {'state_value':state_value, 'sim_state':successor}
for constraint in successor.solver.constraints:
for constraint_variable in constraint.variables:
if 'reg_'+ state_register in constraint_variable:
if constraint.op == '__eq__':
print(f"\t\t State entry bb found!")
# This is a state entry bb
if successor.addr in entry_bb_list:
# if it is already in entry_bb_list then don't add it to the queue
flag_queue_successor = False
print(f"\t\t\t Already processed this state - END!")
else:
# Setup new entry state for queue
new_state_value = state_count
state_count += 1
# Set clean sim_state
new_sim_state = project.factory.blank_state(addr=successor.addr)
new_sim_state.options.add(angr.options.CALLLESS)
for reg_name in ['eax','ecx','edx','ebx','esp','ebp','esi','edi']:
if reg_name != state_register:
new_sim_state.memory.store(new_sim_state.regs.get(reg_name), successor.regs.get(reg_name))
state_info = {'state_value':new_state_value, 'sim_state':new_sim_state}
entry_bb_list.append(successor.addr)
print(f"\t\t\t New state: {new_state_value}")
if flag_queue_successor:
print(f"\t\t Added to queue ->")
# If we are ok to add the successor to the queue add it now
q.put(state_info)
print("** Completed initial analysis **")
# Normalize bb_tree and combine states for overlapping blocks
# Basic block normalization
# If there is a jmp to the middle of a bb angr doesn't split it into two bb, this causes issues where a "single" bb
# in the view of anger is actually two different types of bb
# To normalize these what we need to do is split the bottom parts off the any non-normalized bb and set the type of
# the top part of the block to be the same as the previous block
# bb_tree[bb_address] = {states[], end, children=[] }
# Sort the bb by address
bb_tree_sorted = {key:bb_tree[key] for key in sorted(bb_tree.keys())}
# For each bb search for bb that end after it and truncate them
# Also update their type to match the previous type
for bb_address in bb_tree_sorted:
bb_tree_sorted[bb_address]['children'] = list(set(bb_tree_sorted[bb_address]['children']))
for ptr in bb_tree_sorted:
if ptr >= bb_address:
# We have passed our bb, not more potential unnormalized bb for this address
break
if bb_address < bb_tree_sorted[ptr].get('end'):
# Truncate the block
bb_tree_sorted[ptr]['end'] = bb_address
# Update the truncated block children with only next block
bb_tree_sorted[ptr]['children'] = [bb_address]
# Update the overlapped block states by combining both state lists
bb_tree_sorted[bb_address]['states'] = list(set(bb_tree_sorted[bb_address]['states'] + bb_tree_sorted[ptr]['states']))
# Print sorted bb_tree
for bb_address in bb_tree_sorted:
print(f"{hex(bb_address)} - States: {bb_tree_sorted[bb_address].get('states')} Children: {[hex(c) for c in set(bb_tree_sorted[bb_address].get('children'))]} ")
# Walk bb_tree and group bb by state, identify state end blocks
# bb_state_map[addr] = {'is_obb': True, 'end': 268470548}
bb_state_map = {}
for bb_address in bb_tree_sorted:
if len(bb_tree_sorted[bb_address]['children']) == 0:
# This is an end block mark it as obb
bb_state_map[bb_address] = {'is_obb': True, 'end': bb_tree_sorted[bb_address]['end']}
elif len(bb_tree_sorted[bb_address]['states']) == 1:
# This is an obb
bb_state_map[bb_address] = {'is_obb': True, 'end': bb_tree_sorted[bb_address]['end']}
else:
bb_state_map[bb_address] = {'is_obb': False, 'end': bb_tree_sorted[bb_address]['end']}
IDA Script - Label The Basic Blocks
import idaapi
import idautils
import idc
from queue import Queue
import struct
bb_states = {268470148: {'is_obb': True, 'end': 268470548},
268470548: {'is_obb': True, 'end': 268470945},
268470945: {'is_obb': True, 'end': 268471345},
268471345: {'is_obb': True, 'end': 268471739},
268471739: {'is_obb': True, 'end': 268472135},
268472135: {'is_obb': True, 'end': 268472528},
268472528: {'is_obb': True, 'end': 268472925},
268472925: {'is_obb': True, 'end': 268473318},
268473318: {'is_obb': True, 'end': 268473658},
268473658: {'is_obb': False, 'end': 268473673},
268473673: {'is_obb': False, 'end': 268473685},
268473685: {'is_obb': False, 'end': 268473691},
268473691: {'is_obb': False, 'end': 268473699},
268473699: {'is_obb': False, 'end': 268473711},
268473711: {'is_obb': False, 'end': 268473715},
268473715: {'is_obb': False, 'end': 268473723},
268473723: {'is_obb': True, 'end': 268473749},
268473749: {'is_obb': True, 'end': 268473769},
268473769: {'is_obb': True, 'end': 268473794},
268473794: {'is_obb': True, 'end': 268473827},
268473827: {'is_obb': True, 'end': 268473877},
268473877: {'is_obb': True, 'end': 268474018},
268474018: {'is_obb': True, 'end': 268474070},
268474070: {'is_obb': True, 'end': 268474098},
268474098: {'is_obb': True, 'end': 268474101},
268474101: {'is_obb': False, 'end': 268474106},
268474106: {'is_obb': False, 'end': 268474131},
268474131: {'is_obb': True, 'end': 268474162},
268474162: {'is_obb': True, 'end': 268474192},
268474192: {'is_obb': True, 'end': 268474238},
268474238: {'is_obb': True, 'end': 268474287},
268474287: {'is_obb': True, 'end': 268474309},
268474309: {'is_obb': True, 'end': 268474317},
268474317: {'is_obb': True, 'end': 268474342},
268474342: {'is_obb': True, 'end': 268474416},
268474416: {'is_obb': True, 'end': 268474459},
268474459: {'is_obb': True, 'end': 268474467},
268474467: {'is_obb': True, 'end': 268474506},
268474506: {'is_obb': True, 'end': 268474516},
268474516: {'is_obb': False, 'end': 268474528},
268474528: {'is_obb': False, 'end': 268474532},
268474532: {'is_obb': False, 'end': 268474540},
268474540: {'is_obb': True, 'end': 268474584},
268474584: {'is_obb': True, 'end': 268474587},
268474587: {'is_obb': True, 'end': 268474600},
268474600: {'is_obb': True, 'end': 268474628},
268474628: {'is_obb': True, 'end': 268474764},
268474764: {'is_obb': True, 'end': 268474776},
268474776: {'is_obb': True, 'end': 268474783},
268474783: {'is_obb': True, 'end': 268474788},
268474788: {'is_obb': True, 'end': 268474810},
268474810: {'is_obb': True, 'end': 268474817},
268474817: {'is_obb': True, 'end': 268474822},
268474822: {'is_obb': False, 'end': 268474834},
268474834: {'is_obb': True, 'end': 268474839}}
def set_bb_color(ea_start, ea_end, color_value):
ea_end = prev_head(ea_end)
ptr = ea_start
while ptr <= ea_end :
set_color(ptr, CIC_ITEM, color_value)
ptr = next_head(ptr)
for b in bb_states:
print(f"{hex(b)} - {hex(bb_states[b].get('end'))}: {bb_states[b].get('is_obb')}")
if bb_states[b].get('is_obb'):
set_bb_color(b, bb_states[b].get('end'), 0x00ff00)
else:
set_bb_color(b, bb_states[b].get('end'), 0x00A5ff)
# Color the bb entry
set_color(b, CIC_ITEM, 0xc0c0c0)
Obtain STATE and NEXT-STATE Information For Each Basic Block
- loop through all bb
- start with STATE = 0 in obb
- from obb run until you hit a cf bb - push branches onto queue
- note the STATE register VALUE - this is the NEXT STATE VALUE
- use the STATE register value to derive the FLAGS constraints (this will be used for patching)
- note the STATE end bb
- from cf run until you hit an obb
- assign the NEXT STATE VALUE to this obb and note the STATE entry bb
- end at obb you have alreay seen
- end at end block
state_map['state_value'] = {'state_value': None
'entry_bb': None
'exit_bb': None
'next_states': [ {'state_value':None, 'flags': None }, ]
}
# state_table[state_value] = {
# 'entry':<entry_bb_address>,
# 'exit':<exit_bb_address>,
# 'next_states':[{'state_value':<>, 'flags':<constrait_flags>},]
# }
state_table = {}
## Build list of cf and obb blocks
cf_bb_list = []
obb_list = []
for bb_address in bb_state_map:
if bb_state_map[bb_address].get('is_obb'):
obb_list.append(bb_address)
else:
cf_bb_list.append(bb_address)
# Use a queue for BFS
# The queue contains state_info = {
# 'state_value':0,
# 'sim_state':initial_state,
# 'entry':true/false,
# 'condition':<none / jz / etc.>
# }
q = Queue()
# Function information
BINARY_PATH = '/tmp/emotet.bin'
fn_start = 0x10008784
fn_end = 0x100099D2
state_register = 'ebx'
# Start angr project
project = angr.Project(BINARY_PATH, load_options={'auto_load_libs': False})
# Setup function initial state on queue
initial_state = project.factory.call_state(addr=fn_start)
# Use this setting to skip calls instead of a hook
initial_state.options.add(angr.options.CALLLESS)
# Add first state to state table (call it state 0)
state_table[0] = {'entry':fn_start, 'exit':None,'next_states':[]}
# Push state info for first state onto queue
state_info = {'state_value':0, 'sim_state':initial_state, 'entry':True, 'condition':None }
q.put(state_info)
# Walk the queue
while not q.empty():
state_info = q.get()
state_value = state_info.get('state_value')
state_entry = state_info.get('entry')
old_sim_state = state_info.get('sim_state')
# Flag to control end of state and error end
state_end = False
error_end = False
sim_state = old_sim_state
# # Setup blank state but carry regsiters
# # Set clean sim_state
# if not state_entry:
# sim_state = old_sim_state
# else:
# sim_state = project.factory.blank_state(addr=old_sim_state.addr)
# sim_state.options.add(angr.options.CALLLESS)
# for reg_name in ['eax','ecx','edx','ebx','esp','ebp','esi','edi']:
# if reg_name != state_register:
# sim_state.memory.store(sim_state.regs.get(reg_name), old_sim_state.regs.get(reg_name))
print(f"\n\n\n========================\n")
print(f"Walking obbs for state {hex(state_value)} - Entry: {state_entry}")
# Walk until cf or branch or dead
loop_limit_count = 0
while loop_limit_count <= 1000:
loop_limit_count += 1
print(f"obb walk: {hex(sim_state.addr)}")
#print(f"STATE:{sim_state.regs.ebx} - FLAGS: {sim_state.regs.flags.variables}")
successors = list(project.factory.successors(sim_state))
# print(f"Successors:")
# for s in successors:
# print(f"\t{hex(s.addr)} - STATE:{s.regs.get(state_register)}")
if len(successors) == 0:
# dead - mark this bb as exit - continue next in queue
state_table[state_value]['exit'] = sim_state.addr
state_end = True
print(f"State {hex(state_value)} exit at bb: {hex(sim_state.addr)}")
break
elif len(successors) > 1:
# Check each one to see if it is a cf
# Assumption: there should never be a branch from an obb into the dispatcher
if successors[0].addr in cf_bb_list and successors[1].addr in cf_bb_list:
print(f"ERROR bb: {hex(sim_state.addr)} - obb branch both successors are cf")
error_end = True
break
elif successors[0].addr in cf_bb_list and successors[1].addr not in cf_bb_list:
print(f"ERROR bb: {hex(sim_state.addr)} - obb branch successor[0] are cf")
error_end = True
break
elif successors[0].addr not in cf_bb_list and successors[1].addr in cf_bb_list:
print(f"ERROR bb: {hex(sim_state.addr)} - obb branch successor[1] are cf")
error_end = True
break
else:
# branch - push second on queue and continue walking
print("Branch found!")
# if the condition is an == we can save it in the hope that it might be useful
# when determining a state later -- this is a hack and we don't propogate conditions for
# multiple obb branches -- there should be a better way to do this???
successors[0].solver.simplify()
successors[1].solver.simplify()
constraints_0 = []
constraints_1 = []
for cc in successors[0].solver.constraints:
if not cc.concrete:
constraints_0.append(cc)
for cc in successors[1].solver.constraints:
if not cc.concrete:
constraints_1.append(cc)
if len(constraints_0) == 1 and len(constraints_1):
if constraints_0[0].op == '__eq__':
print(f"{hex(successors[0].addr)} is __eq__ pushing to queue")
sim_state = successors[1]
queue_state = successors[0].copy()
branch_condition = 'jz'
elif constraints_1[0].op == '__eq__':
print(f"{hex(successors[1].addr)} is __eq__ pushing to queue")
sim_state = successors[0]
queue_state = successors[1].copy()
branch_condition = 'jz'
else:
print(f"Constraint is not __eq__ not propogating condition")
sim_state = successors[0]
queue_state = successors[1].copy()
branch_condition = None
else:
print(f"More than one constraint not propogating condition")
print(constraints_0)
print(constraints_1)
sim_state = successors[0]
queue_state = successors[1].copy()
branch_condition = None
print(f"Add bb: {hex(queue_state.addr)} to queue - continue with bb: {hex(sim_state.addr)}")
branch_state_info = {'state_value':state_value, 'sim_state':queue_state, 'entry':False, 'condition':branch_condition}
q.put(branch_state_info)
else:
# Check if the sccessor is outside of the function, this is some crazy angr thing with
# following ret instructions instead of dying
if successors[0].addr < fn_start or successors[0].addr > fn_end:
# This is an end state - mark this bb as exit - continue next in queue
state_table[state_value]['exit'] = sim_state.addr
state_end = True
print(f"State {hex(state_value)} return at bb: {hex(sim_state.addr)}")
break
if successors[0].addr in cf_bb_list:
successor = successors[0]
# cf - get state value (next state)
# - get flags and add to state table next states
next_state_values = successor.solver.eval_upto(successor.regs.get(state_register), 4)
print(f"{successor.regs.get(state_register)}")
######### DEBUG Constrain Next State #########
conditions_map = {}
# Looking for ast in the form IF (foo __eq__ bar) then STATE1 else STATE2
state_reg_ast = successor.regs.get(state_register)
if not state_reg_ast.concrete:
if state_reg_ast.op == 'If' and not state_reg_ast.args[1].symbolic and not state_reg_ast.args[1].symbolic:
if state_reg_ast.args[0].op == '__eq__':
print(f"{hex(successor.solver.eval(state_reg_ast.args[1]))} constrained by jz")
conditions_map[successor.solver.eval(state_reg_ast.args[1])] = 'jz'
print(f"{hex(successor.solver.eval(state_reg_ast.args[2]))} unconstrained")
else:
print(f"!!!! If condition {state_reg_ast.args[0].op} not handled")
else:
print("!!!! Top level condition to complex to handle")
else:
# If this is concrete test if it has a prior condition
if state_info.get('condition', None) is None:
print(" * Concrete * ")
else:
print(f" * Queue condtion {state_info.get('condition')} * ")
conditions_map[next_state_values[0]] = 'jz'
##########################################
# Save info to state table
state_table[state_value]['exit'] = sim_state.addr
for next_state_value in next_state_values:
state_table[state_value]['next_states'].append({'state_value':next_state_value, 'condition':conditions_map.get(next_state_value,None)})
print(f"State: {hex(state_value)} -> Next States {[hex(s) for s in next_state_values]}")
sim_state = successors[0]
break
else:
sim_state = successors[0]
# Check state end flag
if state_end:
continue
if error_end:
break
print(f"\n------------------\n")
frozen_sim_state = sim_state
# For each state match with obb and push onto queue
# - if seen - continue next in queue
# - if not seen walk to obb
for next_state_value in next_state_values:
print(f"Walking dispatcher for state {hex(next_state_value)}")
if state_entry and next_state_value in state_table:
# We have already processed this skip
print(f"Already processed {hex(next_state_value)} - skip!")
continue
# Setup sim_state with concrete state_value
sim_state = frozen_sim_state.copy()
sim_state.regs.__setattr__(state_register, next_state_value)
# Walk until obb
loop_limit_count = 0
while loop_limit_count <= 1000:
loop_limit_count += 1
print(f"cf walk: {hex(sim_state.addr)}")
successors = list(project.factory.successors(sim_state))
# print(f"\t Successors: {[hex(s.addr) for s in successors]}")
if len(successors) == 0:
# dead - error! no path for state through dispatcher
print(f"ERROR no path through dispatcher for state {hex(next_state_value)} - end bb: {hex(sim_state.addr)}")
error_end = True
break
elif len(successors) > 1:
# branch - error! duplicate dispatcher paths
print(f"ERROR duplicate paths through dispatcher for state {hex(next_state_value)} - branch bb: {hex(sim_state.addr)}")
for s in successors:
for c in s.solver.constraints:
print(f"\t{c}")
error_end = True
break
else:
if successors[0].addr in obb_list:
print(f"Found matching obb: {hex(successors[0].addr)}")
# obb - add state and entry to state_table
state_table[next_state_value] = {'entry':successors[0].addr, 'exit':None, 'next_states':[]}
# - push to queue - continue next in queue
new_state_info = {'state_value':next_state_value, 'sim_state':successors[0].copy(), 'entry':True, 'condition':None}
q.put(new_state_info)
break
else:
sim_state = successors[0]
if error_end:
break
if error_end:
break
for state in state_table:
print(f"{hex(state)} \tEntry:{hex(state_table[state]['entry'])} Exit:{hex(state_table[state]['exit'])} -> ")
for next_state in state_table[state]['next_states']:
print(f"\t\t\t\t\t\t\t{hex(next_state.get('state_value'))}: {next_state.get('condition')} ")
print(bb_state_map)
print('==============')
print(state_table)
Patching Ideas
Assumptions:
- There is enough code cave space (dispatcher code) that we can implement our jump table
- Any short jumps from an obb remain in the state (no patch needed) or jump to a code cave
** Note we may need to first process states that have an exit_bb that doesn't have a jmp as the final instruction so we can check for code caves at the next address
** We might want to proactivly patch out all cf blocks with nops to make the process easier to identify unused caves
For each state in the state map.
- if there is only one next-state
- lookup next-state entry_bb if it immediate follows state exit_bb do nothing
- if it does not immediatly follow check if state exit_bb has a jmp as the final instruction
- if it does then simply alter the address to point to the next-state entry_bb
- if it doesn't then find the next available code cave - if it doesn't immediatly follow the bb we are f-ed!!
- if there is more than one next-state again check for a jmp as the final state instruction or a code cave following the bb
- if there is enough space to add a conditional jmp and an unconditiona jmp add them in directly
- if not then add ajmp to a larger code cave
import idaapi
import idautils
import idc
import struct
# Function information
BINARY_PATH = '/tmp/emotet.bin'
fn_start = 0x10008784
fn_end = 0x100099D2
# Map of all the bb and their type (obb / cf)
# bb_state_map[addr] = {'is_obb': True, 'end': 268470548}
bb_state_map = {268470148: {'is_obb': True, 'end': 268470548}, 268470548: {'is_obb': True, 'end': 268470945}, 268470945: {'is_obb': True, 'end': 268471345}, 268471345: {'is_obb': True, 'end': 268471739}, 268471739: {'is_obb': True, 'end': 268472135}, 268472135: {'is_obb': True, 'end': 268472528}, 268472528: {'is_obb': True, 'end': 268472925}, 268472925: {'is_obb': True, 'end': 268473318}, 268473318: {'is_obb': True, 'end': 268473658}, 268473658: {'is_obb': False, 'end': 268473673}, 268473673: {'is_obb': False, 'end': 268473685}, 268473685: {'is_obb': False, 'end': 268473691}, 268473691: {'is_obb': False, 'end': 268473699}, 268473699: {'is_obb': False, 'end': 268473711}, 268473711: {'is_obb': False, 'end': 268473715}, 268473715: {'is_obb': False, 'end': 268473723}, 268473723: {'is_obb': True, 'end': 268473749}, 268473749: {'is_obb': True, 'end': 268473769}, 268473769: {'is_obb': True, 'end': 268473794}, 268473794: {'is_obb': True, 'end': 268473827}, 268473827: {'is_obb': True, 'end': 268473877}, 268473877: {'is_obb': True, 'end': 268474018}, 268474018: {'is_obb': True, 'end': 268474070}, 268474070: {'is_obb': True, 'end': 268474098}, 268474098: {'is_obb': True, 'end': 268474101}, 268474101: {'is_obb': False, 'end': 268474106}, 268474106: {'is_obb': False, 'end': 268474131}, 268474131: {'is_obb': True, 'end': 268474162}, 268474162: {'is_obb': True, 'end': 268474192}, 268474192: {'is_obb': True, 'end': 268474238}, 268474238: {'is_obb': True, 'end': 268474287}, 268474287: {'is_obb': True, 'end': 268474309}, 268474309: {'is_obb': True, 'end': 268474317}, 268474317: {'is_obb': True, 'end': 268474342}, 268474342: {'is_obb': True, 'end': 268474416}, 268474416: {'is_obb': True, 'end': 268474459}, 268474459: {'is_obb': True, 'end': 268474467}, 268474467: {'is_obb': True, 'end': 268474506}, 268474506: {'is_obb': True, 'end': 268474516}, 268474516: {'is_obb': False, 'end': 268474528}, 268474528: {'is_obb': False, 'end': 268474532}, 268474532: {'is_obb': False, 'end': 268474540}, 268474540: {'is_obb': True, 'end': 268474584}, 268474584: {'is_obb': True, 'end': 268474587}, 268474587: {'is_obb': True, 'end': 268474600}, 268474600: {'is_obb': True, 'end': 268474628}, 268474628: {'is_obb': True, 'end': 268474764}, 268474764: {'is_obb': True, 'end': 268474776}, 268474776: {'is_obb': True, 'end': 268474783}, 268474783: {'is_obb': True, 'end': 268474788}, 268474788: {'is_obb': True, 'end': 268474810}, 268474810: {'is_obb': True, 'end': 268474817}, 268474817: {'is_obb': True, 'end': 268474822}, 268474822: {'is_obb': False, 'end': 268474834}, 268474834: {'is_obb': True, 'end': 268474839}}
# Map of the states their entry and exit and the next states
# state_table['state_value'] = {
# 'entry': None
# 'exit': None
# 'next_states': [ {'state_value':None, 'condition': None }, ]
state_table = {0: {'entry': 268470148, 'exit': 268473318, 'next_states': [{'state_value': 161707569, 'condition': None}]}, 161707569: {'entry': 268474817, 'exit': 268474817, 'next_states': [{'state_value': 66940794, 'condition': None}]}, 66940794: {'entry': 268474131, 'exit': 268474309, 'next_states': [{'state_value': 227043414, 'condition': None}, {'state_value': 148128813, 'condition': 'jz'}]}, 227043414: {'entry': 268474834, 'exit': 268474587, 'next_states': []}, 148128813: {'entry': 268473769, 'exit': 268474098, 'next_states': [{'state_value': 64685583, 'condition': 'jz'}, {'state_value': 240590470, 'condition': None}]}, 64685583: {'entry': 268474317, 'exit': 268474459, 'next_states': [{'state_value': 151019129, 'condition': 'jz'}, {'state_value': 240590470, 'condition': None}]}, 240590470: {'entry': 268474540, 'exit': 268474584, 'next_states': []}, 151019129: {'entry': 268473723, 'exit': 268473749, 'next_states': [{'state_value': 240590470, 'condition': 'jz'}, {'state_value': 180166165, 'condition': None}]}, 180166165: {'entry': 268474600, 'exit': 268474810, 'next_states': [{'state_value': 156164867, 'condition': None}, {'state_value': 240590470, 'condition': 'jz'}]}, 156164867: {'entry': 268474467, 'exit': 268474506, 'next_states': [{'state_value': 240590470, 'condition': None}]}}
# Track states that have been patched
patched_states = []
# Nop all of the cf blocks - create our code caves
for bb_addr in bb_state_map:
if not bb_state_map[bb_addr]['is_obb']:
# Patch it
idaapi.patch_bytes(bb_addr, b'\x90'*(bb_state_map[bb_addr]['end'] - bb_addr))
# TODO: when we patch we should check to make sure there are enough nops to actually patch
# Find all state exit bb that don't end with a patchable (jmp) instruction and add control flow in the following code cave
# - if there is no following code cave !!! ERROR we can't proceed this is not patchable in-place
for state_value in state_table:
if len(state_table[state_value]['next_states']) == 0:
# This is an end state skip it
continue
#print(f"STATE: {hex(state_value)}")
exit_bb_start = state_table[state_value]['exit']
entry_bb_start = state_table[state_value]['entry']
#print(f"entry_bb_start: {hex(entry_bb_start)}")
#print(f"exit_bb_start: {hex(exit_bb_start)}")
# Check the final instruction of the exit bb
exit_bb_end = bb_state_map[exit_bb_start]['end']
exit_bb_end_head = prev_head(exit_bb_end)
exit_bb_final_mnemonic = print_insn_mnem(exit_bb_end_head)
## Assume you can't have conditional jumps from the state into the dispatcher
if exit_bb_final_mnemonic != 'jmp' and 'ret' not in exit_bb_final_mnemonic :
# Check if there is only one next state and it is contiguous
next_states = state_table[state_value]['next_states']
if len(next_states) == 0:
# This is an end block who cares
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - No need to patch this it's an end block")
patched_states.append(state_value)
continue
elif len(next_states) == 1:
# Check if the state is contiguous
next_state_value = next_states[0]['state_value']
next_state_entry = state_table[next_state_value]['entry']
if next_state_entry == exit_bb_end:
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - No need to patch this it's contiguous")
patched_states.append(state_value)
continue
# If we are here we need a code cave following our bb
# Check is there space for a code cave
if print_insn_mnem(exit_bb_end) == 'nop':
print(f"This bb needs to eat the code cave!!! - {hex(exit_bb_end_head)}")
patched_states.append(state_value)
# Directly add the jmps
if len(next_states) == 1:
# Directly patch one jmp
patch_address = exit_bb_end
next_state_value = next_states[0]['state_value']
next_state_entry = state_table[next_state_value]['entry']
jmp_rel = next_state_entry - (patch_address + 5)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
idaapi.patch_bytes(patch_address, patch_jmp)
else:
# Find the condition and patch first then patch other
jmp_condition = None
conditional_address = None
unconditional_address = None
for next_state in next_states:
if next_state['condition'] is None:
unconditional_address = state_table[next_state['state_value']]['entry']
else:
conditional_address = state_table[next_state['state_value']]['entry']
jmp_condition = next_state['condition']
# Set up the patch jumps
# TODO: For now we will hard code the codition as a jz
patch_jmp_cond_start = exit_bb_end
jmp_rel_statisfied = conditional_address - (patch_jmp_cond_start + 6)
patch_jmp_start = patch_jmp_cond_start + 6
jmp_rel = unconditional_address - (patch_jmp_start + 5)
patch_jmp_condition = b'\x0f\x84' + struct.pack('<i',jmp_rel_statisfied)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
patch_bytes = patch_jmp_condition + patch_jmp
idaapi.patch_bytes(exit_bb_end, patch_bytes)
else:
# NO? Abort! This won't work
# If this is the case then it doesn't need to be patched
# Else check for code cave
print(f"ABORT!! STATE: {hex(state_value)} Entry: {hex(entry_bb_start)}")
abort_flag = False
# Find all state exit bb that end with a short jmp - these need to be patched first they depend on the code cave proximity
for state_value in state_table:
if len(state_table[state_value]['next_states']) == 0:
# This is an end state skip it
continue
if abort_flag:
break
if state_value in patched_states:
continue
exit_bb_start = state_table[state_value]['exit']
entry_bb_start = state_table[state_value]['entry']
# Check the final instruction of the exit bb
exit_bb_end = bb_state_map[exit_bb_start]['end']
exit_bb_end_head = prev_head(exit_bb_end)
exit_bb_final_mnemonic = print_insn_mnem(exit_bb_end_head)
## Assume you can't have conditional jumps from the state into the dispatcher
if exit_bb_final_mnemonic == 'jmp':
# Check the number of bytes
next_instruction_ea = next_head(exit_bb_end_head)
instruction_count = next_instruction_ea - exit_bb_end_head
if instruction_count < 5:
# This is a short jump
# Follow the jump this will be our patch area
short_jmp_address = get_operand_value(exit_bb_end_head,0)
# Check if this a single jump to another state so we don't need to patch
next_states = state_table[state_value]['next_states']
if len(next_states) == 1:
# Check if the state is at the jump address
next_state_address = state_table[next_states[0]['state_value']]['entry']
if next_state_address == short_jmp_address:
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - Short jump to next state no patch")
patched_states.append(state_value)
continue
# If we got here we need to add a patch
# Check if this address is available as a code cave if not adjust it
patched_states.append(state_value)
ptr_short_jmp_address = short_jmp_address
while print_insn_mnem(ptr_short_jmp_address) != 'nop':
# We need to adjust the jmp to the nearest code cave
# Loop until we hit the next section of nops
ptr_short_jmp_address = next_head(ptr_short_jmp_address)
# Error if we run out of space in the function
if ptr_short_jmp_address >= fn_end:
print(f"ABORT - STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - short jmp nop finder hit end of function with no suitable code cave found!")
abort_flag = True
break
# Check to see if we need to update the jmp diplacement
if ptr_short_jmp_address != short_jmp_address:
original_displacement = struct.unpack('b',get_bytes(exit_bb_end_head + 1, 1))[0]
original_displacement += ptr_short_jmp_address - short_jmp_address
try:
idaapi.patch_bytes(exit_bb_end_head + 1, struct.pack('b',original_displacement))
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - Short jump updated jump to code cave: {hex(ptr_short_jmp_address)}")
except:
print(f"ABORT - STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - short jmp code cave adjustment is too far!")
abort_flag = True
break
# If we get here we can assume we are ready to patch at the ptr_short_jmp_address
if len(next_states) == 1:
# Directly patch one jmp
patch_address = ptr_short_jmp_address
next_state_value = next_states[0]['state_value']
next_state_entry = state_table[next_state_value]['entry']
jmp_rel = next_state_entry - (patch_address + 5)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
idaapi.patch_bytes(patch_address, patch_jmp)
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - Short jump patched with one state at {hex(patch_address)}")
else:
# Find the condition and patch first then patch other
jmp_condition = None
conditional_address = None
unconditional_address = None
for next_state in next_states:
if next_state['condition'] is None:
unconditional_address = state_table[next_state['state_value']]['entry']
else:
conditional_address = state_table[next_state['state_value']]['entry']
jmp_condition = next_state['condition']
# Set up the patch jumps
# TODO: For now we will hard code the codition as a jz
patch_jmp_cond_start = ptr_short_jmp_address
jmp_rel_statisfied = conditional_address - (patch_jmp_cond_start + 6)
patch_jmp_start = patch_jmp_cond_start + 6
jmp_rel = unconditional_address - (patch_jmp_start + 5)
patch_jmp_condition = b'\x0f\x84' + struct.pack('<i',jmp_rel_statisfied)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
patch_bytes = patch_jmp_condition + patch_jmp
idaapi.patch_bytes(ptr_short_jmp_address, patch_bytes)
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - Short jump patched with one two states at {hex(ptr_short_jmp_address)}")
# Patch all remaining state exits
for state_value in state_table:
if len(state_table[state_value]['next_states']) == 0:
# This is an end state skip it
continue
if abort_flag:
break
if state_value in patched_states:
continue
exit_bb_start = state_table[state_value]['exit']
entry_bb_start = state_table[state_value]['entry']
# Get address of final instruction
exit_bb_end = bb_state_map[exit_bb_start]['end']
exit_bb_end_head = prev_head(exit_bb_end)
next_states = state_table[state_value]['next_states']
if len(next_states) == 1:
# Directly patch one jmp
patch_address = exit_bb_end_head
next_state_value = next_states[0]['state_value']
next_state_entry = state_table[next_state_value]['entry']
jmp_rel = next_state_entry - (patch_address + 5)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
idaapi.patch_bytes(patch_address, patch_jmp)
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - patched with one state at {hex(patch_address)}")
else:
# Find the condition and patch first then patch other
jmp_condition = None
conditional_address = None
unconditional_address = None
for next_state in next_states:
if next_state['condition'] is None:
unconditional_address = state_table[next_state['state_value']]['entry']
else:
conditional_address = state_table[next_state['state_value']]['entry']
jmp_condition = next_state['condition']
if unconditional_address is None or conditional_address is None:
print(f"conditions error!! {next_states}")
abort_flag = True
# Set up the patch jumps
# TODO: For now we will hard code the codition as a jz
# Find a code cave to insert our jumps - search from the fn start to end
code_cave_ptr = fn_start
while code_cave_ptr <= fn_end:
# Find a code cave with enough nops
if print_insn_mnem(code_cave_ptr) == 'nop':
if get_bytes(code_cave_ptr, 11) == b'\x90'*11:
break
# Find next head
code_cave_ptr = next_head(code_cave_ptr)
# Check to make sure we didn't run out of room in the function
if code_cave_ptr >= fn_end:
print(f"ABORT - STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - no code cave found!")
abort_flag = True
break
# Patch the jump to point to our new code cave
patch_address = exit_bb_end_head
new_jmp_address = code_cave_ptr
jmp_rel = new_jmp_address - (patch_address + 5)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
idaapi.patch_bytes(patch_address, patch_jmp)
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - moved our jump to point to a code cave: {hex(code_cave_ptr)}")
patch_jmp_cond_start = code_cave_ptr
jmp_rel_statisfied = conditional_address - (patch_jmp_cond_start + 6)
patch_jmp_start = patch_jmp_cond_start + 6
jmp_rel = unconditional_address - (patch_jmp_start + 5)
patch_jmp_condition = b'\x0f\x84' + struct.pack('<i',jmp_rel_statisfied)
patch_jmp = b'\xe9' + struct.pack('<i',jmp_rel)
patch_bytes = patch_jmp_condition + patch_jmp
idaapi.patch_bytes(code_cave_ptr, patch_bytes)
print(f"STATE: {hex(state_value)} Entry: {hex(entry_bb_start)} - patched multiple states at code cave: {hex(code_cave_ptr)}")
Failures and Next Steps
We made a bad assumption when we derived conditional next states. Our assumption was that we could rely on the FLAGS that were used to set the state to still be intact by the time the state exited so all we would need to do is add a conditional jmp based on the FLAGS. This turned out to not be the case for all states so our conditional jumps were incorrectly influenced by other code in the state.
Hack #1
One hack that we tried was to iterate through all insructions in the state and whenever the STATE register was invovled we replaced the instruction with an instruction to save the FLAGS in the STATE register. The assumption was that the last move into the STATE register would be the conditional one since it would overwrite the unconditional STATE.
9c -> push flags
5b -> pop ebx (store FLAGS in state register)
Then before we added our conditional jump we moved the flags back into the FLAGS register. The idea was that now our conditional jump would be based on the same FLAGS that were used to set the STATE earlier in the state basedic blocks.
53 -> push ebx (state register withe stored FLAGS)
9d -> pop flags
This does work except IDA is unable to simplify this logic so the end result is some random looking conditions that the analyst must investigate manually to recover the control flow. Not good!