Emulating Themida
Simple poc emulator for virtualized code
- Overview
- Sample
- PEmulator
- Emulate The Virtualized Function
- Where Did It Crash?
- Fake LocalAlloc
- Where Did It Crash?
- Fake wsprintfA
- How About A Clean Exit?
- Finally!
Overview
These notes are for a recent stream we did with the folks over at Off by One Security. You can find a recording on their channel here: Emulating Obfuscated Code.
Sample
The sample we are analyzing is an older version of Socks5Systems c9278f17730a4078d3b28e349d31dbdab961d8b61aab7b710f088d0f03a033c8
UnpacMe.
PEmulator
The PE loader script for Unicorn can be found here pemulator.py
import pemulator
import struct
from unicorn import *
from unicorn.x86_const import *
from pemulator import Emulator
# Read target and load into emulator
file_data = open('sock_virt.bin','rb').read()
emu = Emulator(file_data, trace=True)
# Workspace
workspace_start = emu.get_memory_upper_bound() + 0x1000
print(f"Workspace start: {hex(workspace_start)}")
# Setup Host and Return buffers
ptr_result = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
ptr_host = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Fill host buffer with any domain string (not really needed)
emu.emu.mem_write(ptr_host, b'test.com')
# Setup arguments ecx=host, edx=result
emu.emu.reg_write(UC_X86_REG_ECX, ptr_host)
emu.emu.reg_write(UC_X86_REG_EDX, ptr_result)
# Setup fake return
esp = emu.emu.reg_read(UC_X86_REG_ESP)
emu.emu.mem_write(esp, struct.pack('<I', 0xdeadbeef))
# Fake proc features no XMM
emu.emu.mem_write(0x100360b0, struct.pack('<I', 0x2))
# Load live globals - big hack lol
#data_buff = open("good_data.bin",'rb').read()
#g_buff_offset = 0x100373F0 - 0x10033000
#g_buff = data_buff[g_buff_offset:g_buff_offset+0x4*5]
# For the demo these are the IPs copied from the global data
g_buff = b'\x98Y\xc6\xd6\xc21^\xc2\x8db\xea\x1f-\x9b\xfaZQ\x1f\xc5&'
emu.emu.mem_write(0x100373F0, g_buff)
virt_fn = 0x10005727
emu.emu.emu_start(virt_fn, 0xdeadbeef)
localalloc_iat = emu.get_iat_address_from_import('kernel32.dll','localalloc')
print(f"LocalAlloc: {hex(localalloc_iat)}")
import pemulator
import struct
from unicorn import *
from unicorn.x86_const import *
from pemulator import Emulator
# Read target and load into emulator
file_data = open('sock_virt.bin','rb').read()
emu = Emulator(file_data, trace=True)
# Workspace
workspace_start = emu.get_memory_upper_bound() + 0x1000
print(f"Workspace start: {hex(workspace_start)}")
# Setup Host and Return buffers
ptr_result = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
ptr_host = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Fill host buffer with any domain string (not really needed)
emu.emu.mem_write(ptr_host, b'test.com')
# Setup arguments ecx=host, edx=result
emu.emu.reg_write(UC_X86_REG_ECX, ptr_host)
emu.emu.reg_write(UC_X86_REG_EDX, ptr_result)
# Setup fake return
esp = emu.emu.reg_read(UC_X86_REG_ESP)
emu.emu.mem_write(esp, struct.pack('<I', 0xdeadbeef))
# Fake proc features no XMM
emu.emu.mem_write(0x100360b0, struct.pack('<I', 0x2))
# Load live globals - big hack lol
#data_buff = open("good_data.bin",'rb').read()
#g_buff_offset = 0x100373F0 - 0x10033000
#g_buff = data_buff[g_buff_offset:g_buff_offset+0x4*5]
# For the demo these are the IPs copied from the global data
g_buff = b'\x98Y\xc6\xd6\xc21^\xc2\x8db\xea\x1f-\x9b\xfaZQ\x1f\xc5&'
emu.emu.mem_write(0x100373F0, g_buff)
# Setup API hooks
# Map fake API addresses
emu.emu.mem_map(0xfeed0000, 0x1000)
## Hook LocalAlloc
localalloc_iat = emu.get_iat_address_from_import('kernel32.dll','localalloc')
print(f"LocalAlloc IAT: {hex(localalloc_iat)}")
def localalloc_hook(uc: Uc, address: int, size: int, user_data: object) -> None:
global workspace_start
esp = uc.reg_read(UC_X86_REG_ESP)
ret_addr = struct.unpack('<I', uc.mem_read(esp, 4))[0]
print(f"Return address: {hex(ret_addr)}")
arg1 = struct.unpack('<I', uc.mem_read(esp + 4, 4))[0]
print(f"Arg1: {hex(arg1)}")
arg2 = struct.unpack('<I', uc.mem_read(esp + 8, 4))[0]
print(f"Arg2: {hex(arg2)}")
# Fake a local alloc
ptr_new_mem = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Return new mem ptr in eax
emu.emu.reg_write(UC_X86_REG_EAX, ptr_new_mem)
# Clean stack and ret
emu.emu.reg_write(UC_X86_REG_ESP, esp+12)
# Set EIP to return
emu.emu.reg_write(UC_X86_REG_EIP, ret_addr)
return
fake_localalloc = 0xfeed0001
emu.emu.mem_write(localalloc_iat, struct.pack('<I', fake_localalloc))
emu.emu.hook_add(UC_HOOK_CODE, localalloc_hook, begin=fake_localalloc, end=fake_localalloc)
virt_fn = 0x10005727
emu.emu.emu_start(virt_fn, 0xdeadbeef)
wsprintfa_iat = emu.get_iat_address_from_import('user32.dll','wsprintfa')
print(f"wsprintfA: {hex(wsprintfa_iat)}")
import pemulator
import struct
from unicorn import *
from unicorn.x86_const import *
from pemulator import Emulator
# Read target and load into emulator
file_data = open('sock_virt.bin','rb').read()
emu = Emulator(file_data, trace=True)
# Workspace
workspace_start = emu.get_memory_upper_bound() + 0x1000
print(f"Workspace start: {hex(workspace_start)}")
# Setup Host and Return buffers
ptr_result = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
ptr_host = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Fill host buffer with any domain string (not really needed)
emu.emu.mem_write(ptr_host, b'test.com')
# Setup arguments ecx=host, edx=result
emu.emu.reg_write(UC_X86_REG_ECX, ptr_host)
emu.emu.reg_write(UC_X86_REG_EDX, ptr_result)
# Setup fake return
esp = emu.emu.reg_read(UC_X86_REG_ESP)
emu.emu.mem_write(esp, struct.pack('<I', 0xdeadbeef))
# Fake proc features no XMM
emu.emu.mem_write(0x100360b0, struct.pack('<I', 0x2))
# Load live globals - big hack lol
#data_buff = open("good_data.bin",'rb').read()
#g_buff_offset = 0x100373F0 - 0x10033000
#g_buff = data_buff[g_buff_offset:g_buff_offset+0x4*5]
# For the demo these are the IPs copied from the global data
g_buff = b'\x98Y\xc6\xd6\xc21^\xc2\x8db\xea\x1f-\x9b\xfaZQ\x1f\xc5&'
emu.emu.mem_write(0x100373F0, g_buff)
# Setup API hooks
# Map fake API addresses
emu.emu.mem_map(0xfeed0000, 0x1000)
## Hook LocalAlloc
localalloc_iat = emu.get_iat_address_from_import('kernel32.dll','localalloc')
print(f"LocalAlloc IAT: {hex(localalloc_iat)}")
def localalloc_hook(uc: Uc, address: int, size: int, user_data: object) -> None:
global workspace_start
esp = uc.reg_read(UC_X86_REG_ESP)
ret_addr = struct.unpack('<I', uc.mem_read(esp, 4))[0]
print(f"Return address: {hex(ret_addr)}")
arg1 = struct.unpack('<I', uc.mem_read(esp + 4, 4))[0]
print(f"Arg1: {hex(arg1)}")
arg2 = struct.unpack('<I', uc.mem_read(esp + 8, 4))[0]
print(f"Arg2: {hex(arg2)}")
# Fake a local alloc
ptr_new_mem = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Return new mem ptr in eax
emu.emu.reg_write(UC_X86_REG_EAX, ptr_new_mem)
# Clean stack and ret
emu.emu.reg_write(UC_X86_REG_ESP, esp+12)
# Set EIP to return
emu.emu.reg_write(UC_X86_REG_EIP, ret_addr)
return
fake_localalloc = 0xfeed0001
emu.emu.mem_write(localalloc_iat, struct.pack('<I', fake_localalloc))
emu.emu.hook_add(UC_HOOK_CODE, localalloc_hook, begin=fake_localalloc, end=fake_localalloc)
def wsprintfa_hook(uc: Uc, address: int, size: int, user_data: object) -> None:
esp = uc.reg_read(UC_X86_REG_ESP)
ret_addr = struct.unpack('<I', uc.mem_read(esp, 4))[0]
print(f"Return address: {hex(ret_addr)}")
arg1 = struct.unpack('<I', uc.mem_read(esp + 4, 4))[0]
print(f"Arg1: {hex(arg1)}")
arg2 = struct.unpack('<I', uc.mem_read(esp + 8, 4))[0]
print(f"Arg2: {hex(arg2)}")
arg2_string = uc.mem_read(arg2, 11)
print(f"Arg2 String: {arg2_string}")
# Read ip octets
oct1 = struct.unpack('<I', uc.mem_read(esp + 12, 4))[0]
oct2 = struct.unpack('<I', uc.mem_read(esp + 16, 4))[0]
oct3 = struct.unpack('<I', uc.mem_read(esp + 20, 4))[0]
oct4 = struct.unpack('<I', uc.mem_read(esp + 24, 4))[0]
ip_address = f"{oct1}.{oct2}.{oct3}.{oct4}"
print(f"IP Address: {ip_address}")
# Clean stack and ret
emu.emu.reg_write(UC_X86_REG_ESP, esp+4)
# Set EIP to return
emu.emu.reg_write(UC_X86_REG_EIP, ret_addr)
return
fake_wsprintfa = 0xfeed0002
emu.emu.mem_write(wsprintfa_iat, struct.pack('<I', fake_wsprintfa))
emu.emu.hook_add(UC_HOOK_CODE, wsprintfa_hook, begin=fake_wsprintfa, end=fake_wsprintfa)
virt_fn = 0x10005727
emu.emu.emu_start(virt_fn, 0xdeadbeef)
queryperformancecounter_iat = emu.get_iat_address_from_import('kernel32.dll','QueryPerformanceCounter')
print(f"QueryPerformanceCounter: {hex(queryperformancecounter_iat)}")
import pemulator
import struct
from unicorn import *
from unicorn.x86_const import *
from pemulator import Emulator
# Read target and load into emulator
file_data = open('sock_virt.bin','rb').read()
emu = Emulator(file_data, trace=False)
# Workspace
workspace_start = emu.get_memory_upper_bound() + 0x1000
print(f"Workspace start: {hex(workspace_start)}")
# Setup Host and Return buffers
ptr_result = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
ptr_host = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Fill host buffer with any domain string (not really needed)
emu.emu.mem_write(ptr_host, b'test.com')
# Setup arguments ecx=host, edx=result
emu.emu.reg_write(UC_X86_REG_ECX, ptr_host)
emu.emu.reg_write(UC_X86_REG_EDX, ptr_result)
# Setup fake return
esp = emu.emu.reg_read(UC_X86_REG_ESP)
emu.emu.mem_write(esp, struct.pack('<I', 0xdeadbeef))
# Fake proc features no XMM
emu.emu.mem_write(0x100360b0, struct.pack('<I', 0x2))
# Load live globals - big hack lol
#data_buff = open("good_data.bin",'rb').read()
#g_buff_offset = 0x100373F0 - 0x10033000
#g_buff = data_buff[g_buff_offset:g_buff_offset+0x4*5]
# For the demo these are the IPs copied from the global data
g_buff = b'\x98Y\xc6\xd6\xc21^\xc2\x8db\xea\x1f-\x9b\xfaZQ\x1f\xc5&'
emu.emu.mem_write(0x100373F0, g_buff)
# Setup API hooks
# Map fake API addresses
emu.emu.mem_map(0xfeed0000, 0x1000)
## Hook LocalAlloc
localalloc_iat = emu.get_iat_address_from_import('kernel32.dll','localalloc')
print(f"LocalAlloc IAT: {hex(localalloc_iat)}")
def localalloc_hook(uc: Uc, address: int, size: int, user_data: object) -> None:
global workspace_start
esp = uc.reg_read(UC_X86_REG_ESP)
ret_addr = struct.unpack('<I', uc.mem_read(esp, 4))[0]
print(f"Return address: {hex(ret_addr)}")
arg1 = struct.unpack('<I', uc.mem_read(esp + 4, 4))[0]
print(f"Arg1: {hex(arg1)}")
arg2 = struct.unpack('<I', uc.mem_read(esp + 8, 4))[0]
print(f"Arg2: {hex(arg2)}")
# Fake a local alloc
ptr_new_mem = workspace_start
emu.emu.mem_map(workspace_start, 0x1000)
workspace_start += 0x1000
# Return new mem ptr in eax
emu.emu.reg_write(UC_X86_REG_EAX, ptr_new_mem)
# Clean stack and ret
emu.emu.reg_write(UC_X86_REG_ESP, esp+12)
# Set EIP to return
emu.emu.reg_write(UC_X86_REG_EIP, ret_addr)
return
fake_localalloc = 0xfeed0001
emu.emu.mem_write(localalloc_iat, struct.pack('<I', fake_localalloc))
emu.emu.hook_add(UC_HOOK_CODE, localalloc_hook, begin=fake_localalloc, end=fake_localalloc)
def wsprintfa_hook(uc: Uc, address: int, size: int, user_data: object) -> None:
esp = uc.reg_read(UC_X86_REG_ESP)
ret_addr = struct.unpack('<I', uc.mem_read(esp, 4))[0]
print(f"Return address: {hex(ret_addr)}")
arg1 = struct.unpack('<I', uc.mem_read(esp + 4, 4))[0]
print(f"Arg1: {hex(arg1)}")
arg2 = struct.unpack('<I', uc.mem_read(esp + 8, 4))[0]
print(f"Arg2: {hex(arg2)}")
arg2_string = uc.mem_read(arg2, 11)
print(f"Arg2 String: {arg2_string}")
# Read ip octets
oct1 = struct.unpack('<I', uc.mem_read(esp + 12, 4))[0]
oct2 = struct.unpack('<I', uc.mem_read(esp + 16, 4))[0]
oct3 = struct.unpack('<I', uc.mem_read(esp + 20, 4))[0]
oct4 = struct.unpack('<I', uc.mem_read(esp + 24, 4))[0]
ip_address = f"{oct1}.{oct2}.{oct3}.{oct4}"
print(f"\nIP Address: {ip_address}\n")
# Clean stack and ret
emu.emu.reg_write(UC_X86_REG_ESP, esp+4)
# Set EIP to return
emu.emu.reg_write(UC_X86_REG_EIP, ret_addr)
return
fake_wsprintfa = 0xfeed0002
emu.emu.mem_write(wsprintfa_iat, struct.pack('<I', fake_wsprintfa))
emu.emu.hook_add(UC_HOOK_CODE, wsprintfa_hook, begin=fake_wsprintfa, end=fake_wsprintfa)
## Set end state on QueryPerformanceCounter
queryperformancecounter_iat = emu.get_iat_address_from_import('kernel32.dll','QueryPerformanceCounter')
fake_queryperformancecounter = 0xfeed0003
emu.emu.mem_write(queryperformancecounter_iat, struct.pack('<I', fake_queryperformancecounter))
# Terminate on QueryPerformanceCounter
virt_fn = 0x10005727
emu.emu.emu_start(virt_fn, fake_queryperformancecounter)