ADVObfuscator
Automated string decryption
Overview
ADVobfuscator is a C++ string obfuscation library that is commonly used in malware (most famously by Conti ransomware). We are going to attempt to identify and decrypt strings protected with ADV using some simple python scripting and the unicorn emulator.
References
Decryption
Our approach is simple, we will first attempt to sig the ADV inline strings using a pattern left by the decryption loop, then we will use unicorn to emulate the string decryption (a similar approach to our Garble and XORSTR decryption tools).
ADV Loop Signature
The loop code appears to be very similar for each string.
8A 44 0C 08 mov al, [esp+ecx+68h+var_60]
2C 09 sub al, 9
88 44 0C 08 mov [esp+ecx+68h+var_60], al
41 inc ecx
83 F9 03 cmp ecx, 3
72 F0 jb short loc_4012F0
8D 44 24 08 lea eax, [esp+68h+var_60]
Only the registers change.
41 83 f9 ?? 72
42 83 fa ?? 72
4? 83 f? ?? 72
41 83 f9 1a 7c
from unicorn import *
from unicorn.x86_const import *
import struct
from capstone import *
from capstone.x86 import *
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
code = bytes.fromhex('c64424146583c408c644240d6633c9c644240e73c644240f79c644241078c6442411008a44240c8a440c0c2c0488440c0c4183f90572f0')
stack_snapshot = None
stack_string_offset = None
stack_base = 0x00100000
stack_size = 0x00100000
def trace(uc, address, size, user_data):
global stack_string_offset
global stack_snapshot
insn = next(cs.disasm(uc.mem_read(address, size), address))
#print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
if insn.mnemonic == 'cmp' and stack_snapshot is not None and stack_string_offset is None:
# Compare stacks and determine second letter
stack_now = uc.mem_read(stack_base,stack_size)
for i in range(len(stack_now)):
if stack_now[i] != stack_snapshot[i]:
stack_string_offset = i - 1
break
if insn.mnemonic == 'cmp' and stack_snapshot is None:
stack_snapshot = uc.mem_read(stack_base,stack_size)
def decrypt(code, string_size):
global stack_base
global stack_size
global stack_snapshot
global stack_string_offset
stack_snapshot = None
stack_string_offset = None
uc = Uc(UC_ARCH_X86, UC_MODE_32)
# Setup the stack
ESP = stack_base + (stack_size // 2)
uc.mem_map(stack_base, stack_size)
uc.mem_write(stack_base, b"\x00" * stack_size)
uc.reg_write(UC_X86_REG_ESP, ESP)
uc.reg_write(UC_X86_REG_EBP, ESP)
# Setup code
target_base = 0x00400000
target_size = 0x00100000
target_end = target_base + len(code)
uc.mem_map(target_base, target_size, UC_PROT_ALL)
uc.mem_write(target_base, b"\x00" * target_size)
uc.mem_write(target_base, code)
data_base = 0x00600000
data_size = 0x00100000
uc.mem_map(data_base, data_size, UC_PROT_ALL)
uc.mem_write(data_base, b"\x00" * data_size)
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
uc.hook_add(UC_HOOK_CODE, trace, None)
uc.emu_start(target_base, target_end, 0, 0)
stack_data = uc.mem_read(stack_base + stack_string_offset,string_size)
return(stack_data.replace(b'\x00',b''))
decrypt(code, 5)
import re
from capstone import *
from capstone.x86 import *
def filter_bytes(data):
#print("##############filter")
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
cs.skipdata = True
code_start = 0
for insn in cs.disasm(data, 0):
address = insn.address
#print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
if insn.mnemonic[0] == 'j' or insn.mnemonic.startswith('call') or insn.mnemonic.startswith('int') or insn.mnemonic.startswith('ret'):
code_start = address + insn.size
#print(f"############## new start {hex(code_start)}")
return code[code_start:]
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
file_data = open('/tmp/ObfuscatedStrings.exe','rb').read()
# Grab all register variants of the decryption loop
# inc ecx
# cmp ecx, 3
# jb short loc_4012F0
# 4? 83 f? ?? 72
egg = rb'[\x40-\x43\x46]\x83[\xf8-\xfb\xfe].[\x72\x7c].'
for m in re.finditer(egg, file_data):
hit_offset = m.start()
#print(f"Testing hit {hex(hit_offset)}")
data = m.group()
str_len = data[3]
#print(f"String length {str_len}")
code_start = hit_offset - (40 * str_len)
#print(f"Code start: {hex(code_start)}")
# print("## loop")
# for insn in cs.disasm(data, 0):
# print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
# print("## ##")
for i in range(16):
code = file_data[code_start+i:hit_offset]
code = filter_bytes(code) + data
try:
out = decrypt(code, str_len)
except:
pass
if out.isascii():
print(out)
break
TODO
Currently this works well for ADVobfuscator 32bit, but there are many ADV-like string encryption routines that this could be extended to handle if we implemented globals support.
Globals
In many algorithms the string is stored in a global and moved onto the stack using the xmm registers. To handle this we need to...
- Map the PE data/rdata section into memory
- Adjust the unicorn addresses to match the mapped PE file
- Run! (not sure about xmm/ymm unicorn support)
64-bit
- New egg to find the loop
- Update Unicorn to use 64bit