Extended ADVObfuscator
Extending our ADV automated string decryption tool to handle custom cases
Overview
ADVobfuscator is a C++ string obfuscation library that is commonly used in malware (most famously by Conti ransomware). We are going to attempt to identify and decrypt strings protected with ADV using some simple python scripting and the unicorn emulator.
We created an ADV decryption tool that can handle pure ADV but we want to extend this to handle custom variants.
References
Samples
The following are test samples that match our loop detection UnpacMe (must be signed in)
-
765d19b4728008c1589f222d1fa49f1cb7310204c7a4574eb9f930d0544bed7b
adv2.bin -
3a987fd51423f186242c3fbbdab59113c11d4ac67109e90ab948d5d0591699fb
adv3.bin -
a08c766724927d41cf29f736eca1ef557ba45debd3e29fa066180ec66426dc4f
adv6.bin -
4e0e4660d283270ae7abac2520b0bbd19324ff879c079ddb771c072bc7bbf60e
amawhat.bin
Decryption
Our approach is simple, we will first attempt to sig the ADV inline strings using a pattern left by the decryption loop, then we will use unicorn to emulate the string decryption (a similar approach to our Garble and XORSTR decryption tools).
ADV Loop Signature
The loop code appears to be very similar for each string.
8A 44 0C 08 mov al, [esp+ecx+68h+var_60]
2C 09 sub al, 9
88 44 0C 08 mov [esp+ecx+68h+var_60], al
41 inc ecx
83 F9 03 cmp ecx, 3
72 F0 jb short loc_4012F0
8D 44 24 08 lea eax, [esp+68h+var_60]
Only the registers change.
41 83 f9 ?? 72
42 83 fa ?? 72
4? 83 f? ?? 72
41 83 f9 ?? 7c
from unicorn import *
from unicorn.x86_const import *
import struct
from capstone import *
from capstone.x86 import *
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
class EmulatorData:
def __init__(self):
self.base = 0x00400000
self.text_section_rva = 0x1000
self.text_section_size = 0x100000
self.text_section_data = None
self.data_section_rva = None
self.data_section_size = None
self.data_section_data = None
self.rdata_section_rva = None
self.rdata_section_size = None
self.rdata_section_data = None
code = bytes.fromhex('c64424146583c408c644240d6633c9c644240e73c644240f79c644241078c6442411008a44240c8a440c0c2c0488440c0c4183f90572f0')
stack_snapshot = None
stack_string_offset = None
stack_base = 0x00100000
stack_size = 0x00100000
g_string_size = 0
def trace(uc, address, size, user_data):
global stack_string_offset
global stack_snapshot
global g_string_size
insn = next(cs.disasm(uc.mem_read(address, size), address))
#print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
# print(f"\tEAX {hex(uc.reg_read(UC_X86_REG_EAX))}")
# print(f"\tEBX {hex(uc.reg_read(UC_X86_REG_EBX))}")
# print(f"\tECX {hex(uc.reg_read(UC_X86_REG_ECX))}")
# print(f"\tEDX {hex(uc.reg_read(UC_X86_REG_EDX))}")
# print(f"\tESP {hex(uc.reg_read(UC_X86_REG_ESP))}")
# print(f"\tESI {hex(uc.reg_read(UC_X86_REG_ESI))}")
# if insn.mnemonic[0] == 'j' and stack_snapshot is not None and stack_string_offset is None:
# # Compare stacks and determine second letter
# print("Comparing stack snapshot on second loop")
# stack_now = uc.mem_read(stack_base,stack_size)
# for i in range(len(stack_now)):
# if stack_now[i] != stack_snapshot[i]:
# stack_string_offset = i - 1
# break
# if insn.mnemonic[0] == 'j' and stack_snapshot is None:
# print("Taking stack snapshot on first loop")
# stack_snapshot = uc.mem_read(stack_base,stack_size)
if insn.mnemonic == 'cmp' and insn.operands[1].type == X86_OP_IMM and insn.operands[1].value.imm == g_string_size and stack_snapshot is not None and stack_string_offset is None:
# Compare stacks and determine second letter
#print("Comparing stack snapshot on second loop")
stack_now = uc.mem_read(stack_base,stack_size)
for i in range(len(stack_now)):
if stack_now[i] != stack_snapshot[i]:
stack_string_offset = i - 1
break
if insn.mnemonic == 'cmp' and insn.operands[1].type == X86_OP_IMM and insn.operands[1].value.imm == g_string_size and stack_snapshot is None:
#print("Taking stack snapshot on first loop")
stack_snapshot = uc.mem_read(stack_base,stack_size)
def decrypt(code, string_size, emulator_data=EmulatorData()):
global stack_base
global stack_size
global stack_snapshot
global stack_string_offset
global g_string_size
g_string_size = string_size
stack_snapshot = None
stack_string_offset = None
base = emulator_data.base
uc = Uc(UC_ARCH_X86, UC_MODE_32)
# Setup the stack
ESP = stack_base + (stack_size // 2)
uc.mem_map(stack_base, stack_size)
uc.mem_write(stack_base, b"\x00" * stack_size)
uc.reg_write(UC_X86_REG_ESP, ESP)
uc.reg_write(UC_X86_REG_EBP, ESP)
# Setup code
target_base = base + emulator_data.text_section_rva
target_size = emulator_data.text_section_size
target_end = target_base + len(code)
uc.mem_map(target_base, target_size, UC_PROT_ALL)
uc.mem_write(target_base, b"\x00" * target_size)
uc.mem_write(target_base, code)
if emulator_data.data_section_rva is not None:
data_section_base = base + emulator_data.data_section_rva
data_section_size = emulator_data.data_section_size
uc.mem_map(data_section_base, data_section_size, UC_PROT_ALL)
uc.mem_write(data_section_base, emulator_data.data_section_data)
if emulator_data.rdata_section_rva is not None:
rdata_section_base = base + emulator_data.rdata_section_rva
rdata_section_size = emulator_data.rdata_section_size
uc.mem_map(rdata_section_base, rdata_section_size, UC_PROT_ALL)
uc.mem_write(rdata_section_base, emulator_data.rdata_section_data)
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
uc.hook_add(UC_HOOK_CODE, trace, None)
uc.emu_start(target_base, target_end, 0, 0)
if stack_string_offset is None:
#print("No stack string offset found!")
# print(uc.mem_read(stack_base,stack_size).replace(b'\x00',b''))
return None
stack_data = uc.mem_read(stack_base + stack_string_offset,string_size)
# If our string starts with a null byte assume this is a wide string
# Grab one byte back
if stack_data[0] == 0:
stack_data = uc.mem_read(stack_base + stack_string_offset - 1,string_size*2)
if stack_data[1] == 0:
stack_data = uc.mem_read(stack_base + stack_string_offset - 2,string_size*2)
return(stack_data.replace(b'\x00',b''))
decrypt(code, 5)
import re
from capstone import *
from capstone.x86 import *
import pefile
def filter_bytes(data):
#print("##############filter")
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
cs.skipdata = True
code_start = 0
last_jump = None
for insn in cs.disasm(data, 0):
address = insn.address
#print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
if insn.mnemonic.startswith('call') or insn.mnemonic.startswith('int') or insn.mnemonic.startswith('ret'):
code_start = address + insn.size
last_jump = None
if insn.mnemonic[0] == 'j':
if last_jump != None:
code_start = last_jump
last_jump = address+insn.size
jmp_addr = insn.operands[0].value.imm
if jmp_addr > len(data):
data = data[:address] + b'\x90' * insn.size + data[address+insn.size:]
#print(f"############## new start {hex(code_start)}")
return code_start,data[code_start:]
def section_align(size):
if size % 0x1000 != 0:
out = ((size // 0x1000) + 1 ) * 0x1000
else:
out = (size // 0x1000) * 0x1000
return out
cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True
file_data = open('/tmp/adv6.bin','rb').read()
pe = pefile.PE(data=file_data)
emulator_data = EmulatorData()
# Parse out PE sections
emulator_data.base = pe.OPTIONAL_HEADER.ImageBase
for s in pe.sections:
if s.Name[:5] == b'.text':
emulator_data.text_section_rva = s.VirtualAddress
emulator_data.text_section_size = section_align(s.Misc_VirtualSize)
emulator_data.text_section_data = s.get_data()
if s.Name[:5] == b'.data':
emulator_data.data_section_rva = s.VirtualAddress
emulator_data.data_section_size = section_align(s.Misc_VirtualSize)
emulator_data.data_section_data = s.get_data()
if s.Name[:6] == b'.rdata':
emulator_data.rdata_section_rva = s.VirtualAddress
emulator_data.rdata_section_size = section_align(s.Misc_VirtualSize)
emulator_data.rdata_section_data = s.get_data()
assert emulator_data.text_section_rva is not None
#print(f"text {hex(emulator_data.text_section_rva + emulator_data.base)}: {hex(emulator_data.text_section_rva + emulator_data.base + emulator_data.text_section_size)}")
# Grab all register variants of the decryption loop
# inc ecx
# cmp ecx, 3
# jb short loc_4012F0
# 4? 83 f? ?? 72
egg = rb'[\x40-\x43\x46]\x83[\xf8-\xfb\xfe].[\x72\x7c].'
for m in re.finditer(egg, emulator_data.text_section_data):
hit_offset = m.start()
#print(f"Testing hit {hex(hit_offset + emulator_data.text_section_rva + emulator_data.base)}")
data = m.group()
str_len = data[3]
#print(f"String length {str_len}")
code_start = hit_offset - (40 * str_len)
if code_start < 0:
code_start = 0
#print(f"Code start: {hex(code_start)}")
# print("## loop")
# for insn in cs.disasm(data, 0):
# print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
# print("## ##")
last_filter_offset = None
for i in range(16):
code = emulator_data.text_section_data[code_start+i:hit_offset]
filter_offset,tmp_code = filter_bytes(code)
if last_filter_offset == code_start + filter_offset + i:
continue
last_filter_offset = code_start + filter_offset + i
code = tmp_code + data
#print(f"\tCode start: {hex(code_start + filter_offset + i + emulator_data.text_section_rva + emulator_data.base)}")
try:
out = decrypt(code, str_len, emulator_data=emulator_data)
except:
continue
if out is not None and out.isascii():
print(f"{out}")
break
# test= 0x0041A8E1 - emulator_data.text_section_rva - emulator_data.base
# out = None
# hit_offset = test
# print(f"Testing hit {hex(hit_offset + emulator_data.text_section_rva + emulator_data.base)}")
# data = bytes.fromhex('4183f92c7ce7')
# str_len = data[3]
# #print(f"String length {str_len}")
# code_start = hit_offset - (40 * str_len)
# if code_start < 0:
# code_start = 0
# # print("## loop")
# # for insn in cs.disasm(data, 0):
# # print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
# # print("## ##")
# # for i in range(16):
# i = 5
# print(f"Code start: {hex(code_start + i + emulator_data.text_section_rva + emulator_data.base)}")
# code = emulator_data.text_section_data[code_start+i:hit_offset]
# code = filter_bytes(code) + data
# try:
# out = decrypt(code, str_len, emulator_data=emulator_data)
# except Exception as e:
# print(f"Failed {e}")
# if out.isascii():
# print(out)