Overview

ADVobfuscator is a C++ string obfuscation library that is commonly used in malware (most famously by Conti ransomware). We are going to attempt to identify and decrypt strings protected with ADV using some simple python scripting and the unicorn emulator.

References

Decryption

Our approach is simple, we will first attempt to sig the ADV inline strings using a pattern left by the decryption loop, then we will use unicorn to emulate the string decryption (a similar approach to our Garble and XORSTR decryption tools).

ADV Loop Signature

The loop code appears to be very similar for each string.

8A 44 0C 08                             mov     al, [esp+ecx+68h+var_60]
2C 09                                   sub     al, 9
88 44 0C 08                             mov     [esp+ecx+68h+var_60], al
41                                      inc     ecx
83 F9 03                                cmp     ecx, 3
72 F0                                   jb      short loc_4012F0
8D 44 24 08                             lea     eax, [esp+68h+var_60]

Only the registers change.

41 83 f9 ?? 72
42 83 fa ?? 72

4? 83 f? ?? 72

41 83 f9 1a 7c

Emulation Decryption

from unicorn import *
from unicorn.x86_const import *
import struct
from capstone import *
from capstone.x86 import *

cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True

code = bytes.fromhex('c64424146583c408c644240d6633c9c644240e73c644240f79c644241078c6442411008a44240c8a440c0c2c0488440c0c4183f90572f0')

stack_snapshot = None
stack_string_offset = None
stack_base = 0x00100000
stack_size = 0x00100000


def trace(uc, address, size, user_data):
    global stack_string_offset
    global stack_snapshot
    insn = next(cs.disasm(uc.mem_read(address, size), address))
    #print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
    if insn.mnemonic == 'cmp' and stack_snapshot is not None and stack_string_offset is None:
        # Compare stacks and determine second letter
        stack_now = uc.mem_read(stack_base,stack_size)
        for i in range(len(stack_now)):
            if stack_now[i] != stack_snapshot[i]:
                stack_string_offset = i - 1
                break
    
    if insn.mnemonic == 'cmp' and stack_snapshot is None:
        stack_snapshot = uc.mem_read(stack_base,stack_size)
        


def decrypt(code, string_size):
    global stack_base
    global stack_size
    global stack_snapshot
    global stack_string_offset
    stack_snapshot = None
    stack_string_offset = None
    uc = Uc(UC_ARCH_X86, UC_MODE_32)

    # Setup the stack
    ESP = stack_base + (stack_size // 2)
    uc.mem_map(stack_base, stack_size)
    uc.mem_write(stack_base, b"\x00" * stack_size)

    uc.reg_write(UC_X86_REG_ESP, ESP)
    uc.reg_write(UC_X86_REG_EBP, ESP)

    # Setup code 
    target_base = 0x00400000
    target_size = 0x00100000
    target_end = target_base + len(code)

    uc.mem_map(target_base, target_size, UC_PROT_ALL)
    uc.mem_write(target_base, b"\x00" * target_size)
    uc.mem_write(target_base, code)


    data_base = 0x00600000
    data_size = 0x00100000

    uc.mem_map(data_base, data_size, UC_PROT_ALL)
    uc.mem_write(data_base, b"\x00" * data_size)


    cs = Cs(CS_ARCH_X86, CS_MODE_32)
    cs.detail = True

    uc.hook_add(UC_HOOK_CODE, trace, None)
    uc.emu_start(target_base, target_end, 0, 0)

    stack_data = uc.mem_read(stack_base + stack_string_offset,string_size)
    return(stack_data.replace(b'\x00',b''))
    
decrypt(code, 5)

bytearray(b'about')

import re
from capstone import *
from capstone.x86 import *

def filter_bytes(data):
    #print("##############filter")
    cs = Cs(CS_ARCH_X86, CS_MODE_32)
    cs.detail = True
    cs.skipdata = True
    code_start = 0
    for insn in cs.disasm(data, 0):
        address = insn.address
        #print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
        if insn.mnemonic[0] == 'j' or insn.mnemonic.startswith('call') or insn.mnemonic.startswith('int') or insn.mnemonic.startswith('ret'):
            code_start = address + insn.size
    #print(f"############## new start {hex(code_start)}")
    return code[code_start:]
    


cs = Cs(CS_ARCH_X86, CS_MODE_32)
cs.detail = True

file_data = open('/tmp/ObfuscatedStrings.exe','rb').read()

# Grab all register variants of the decryption loop
# inc     ecx
# cmp     ecx, 3
# jb      short loc_4012F0
# 4? 83 f? ?? 72
egg = rb'[\x40-\x43\x46]\x83[\xf8-\xfb\xfe].[\x72\x7c].'

for m in re.finditer(egg, file_data):
    hit_offset = m.start()
    #print(f"Testing hit {hex(hit_offset)}")
    data = m.group()
    str_len = data[3]
    #print(f"String length {str_len}")
    code_start = hit_offset - (40 * str_len)
    #print(f"Code start: {hex(code_start)}")
    # print("## loop")
    # for insn in cs.disasm(data, 0):
    #     print(f"{address:#010x}:\t{insn.mnemonic}\t{insn.op_str}")
    # print("## ##")
    for i in range(16):
        code = file_data[code_start+i:hit_offset]
        code = filter_bytes(code) + data
        try:
            out = decrypt(code, str_len)
        except:
            pass
        if out.isascii():
            print(out)
            break

bytearray(b'ok')
bytearray(b'you')
bytearray(b'can')
bytearray(b'decrypt')
bytearray(b'one')
bytearray(b'string')
bytearray(b'but')
bytearray(b'how')
bytearray(b'about')
bytearray(b'10?')
bytearray(b'congratulations!!!!')

TODO

Currently this works well for ADVobfuscator 32bit, but there are many ADV-like string encryption routines that this could be extended to handle if we implemented globals support.

Globals

In many algorithms the string is stored in a global and moved onto the stack using the xmm registers. To handle this we need to...

Map the PE data/rdata section into memory
Adjust the unicorn addresses to match the mapped PE file
Run! (not sure about xmm/ymm unicorn support)

64-bit

New egg to find the loop
Update Unicorn to use 64bit