Reversing Tips With Python3
Tips for reverse engineering with Python3
- Overview
- Tip 1 - Use Juptyper Notebooks and A Git Repo
- Tip 2 - Remember Byte Strings Are Not Strings
- Example for Tips 3-5
- Tip 3 - Hex Encoding Binary Data
- Tip 4 - Use Struct To Convert Between Data and Types
- Tip 5 - Use Custom Struct Class To Parse Binary Data
- Tip 6 - Parse Stream With BytesIO
- Tip 7 - Using Fundamental Data Types With ctypes
- Tip 8 - Parse Binary Data Straight To Python Structure
- Tip 9 - Call C Function from DLL
import binascii
string_example = "test"
byte_array_example = b"test"
# Convert string into bytes
print(string_example.encode('utf-8'))
# Convert byte array into string
print(byte_array_example.decode('utf-8'))
example_data = b'\x02\x00\x00\x00\x00\x04\x00\x00\x00test\x01\x04\x00\x00\x00t\x00e\x00s\x00t\x00'
def unhex(hex_string):
import binascii
if type(hex_string) == str:
return binascii.unhexlify(hex_string.encode('utf-8'))
else:
return binascii.unhexlify(hex_string)
def tohex(data):
import binascii
if type(data) == str:
return binascii.hexlify(data.encode('utf-8'))
else:
return binascii.hexlify(data)
print("This is hex encoded data: %r" % tohex(example_data))
import struct
number_of_strings = struct.unpack('<I',example_data[:4])[0]
print("Number of strings: %d" % number_of_strings)
import struct
example_string = b'\x00\x04\x00\x00\x00test'
class EXAMPLE_STRING:
def __init__(self):
self.is_wide_string = False
self.string_length = 0
self.string = b''
def from_buffer_copy(self, data):
ptr = 0
self.is_wide_string = struct.unpack('?', data[ptr:ptr+1])[0]
ptr += 1
self.string_length = struct.unpack('<I', data[ptr:ptr+4])[0]
ptr += 4
if self.is_wide_string:
self.string = data[ptr:ptr+(self.string_length*2)].decode('utf-16le')
ptr += self.string_length*2
else:
self.string = data[ptr:ptr+self.string_length].decode('utf-8')
ptr += self.string_length
def pack(self):
data = b''
data += struct.pack('?', self.is_wide_string)
data += struct.pack('<I', self.string_length)
if self.is_wide_string:
data += self.string.encode('utf-16le')
else:
data += self.string.encode('utf-8')
return data
print("Example string data: %r" % example_string)
es = EXAMPLE_STRING()
es.from_buffer_copy(example_string)
print("Example is wide: %s" % es.is_wide_string)
print("Example string length: %d" % es.string_length)
print("Example string: %s" % es.string)
es.is_wide_string = True
print("Example string data converted to wide: %r" % es.pack())
class EXAMPLE_STRINGS:
def __init__(self):
self.length = 0
self.strings = []
def from_buffer_copy(self, data):
ptr = 0
self.length = struct.unpack('<I', data[ptr:ptr+4])[0]
ptr += 4
for i in range(self.length):
tmp_string = EXAMPLE_STRING()
tmp_string.from_buffer_copy(data[ptr:])
ptr += len(tmp_string.pack())
self.strings.append(tmp_string)
def pack(self):
data = b''
data += struct.pack('<I', self.length)
for s in self.strings:
data += s.pack()
return data
es = EXAMPLE_STRINGS()
es.from_buffer_copy(example_data)
print("Number of strings: %d" % es.length)
print("First string is wide: %s" % es.strings[0].is_wide_string)
print("First string: %s" % es.strings[0].string)
print("Second string is wide: %s" % es.strings[1].is_wide_string)
print("Second string: %s" % es.strings[1].string)
import io
import struct
example_string = b'\x00\x04\x00\x00\x00test'
bstream = io.BytesIO(example_string) # transform example_string in stream (like File)
is_wide_string, *_ = struct.unpack('?', bstream.read(1))
string_length, *_ = struct.unpack('<I', bstream.read(4))
string_length = string_length*2 if is_wide_string else string_length
strings = bstream.read(string_length).decode('utf-16le' if is_wide_string else 'utf-8')
print(f'is_wide_string: {is_wide_string}')
print(f'string_length: {string_length}')
print(f'string: {strings}', end="\n\n")
# Tip 6.1 - Using lambda
bstream.seek(0,0) # beginning of the stream
read_byte = lambda bs: bs.read(1)[0]
read_dword = lambda bs: struct.unpack("<I", bs.read(4))[0]
read_utf16le = lambda bs, s: bs.read(s).decode("utf-16le")
read_utf8 = lambda bs, s: bs.read(s).decode("utf-8")
print(f'is_wide_string: {read_byte(bstream)}')
print(f'string_length: {read_dword(bstream)}')
if is_wide_string:
print(f'string: {read_utf16le(bstream, string_length)}')
else :
print(f'string: {read_utf8(bstream, string_length)}')
Tip 7 - Using Fundamental Data Types With ctypes
e.g., useful to reimplement C pseudo code algorithm in Python
Highly recommended to read ctypes doc for reverser https://docs.python.org/3/library/ctypes.html
print("WRONG :")
DWORD = 0xFFFFFFFF
print(f"DWORD = {hex(DWORD)}")
DWORD += 1
print(f"DWORD+1 = {hex(DWORD)} ; WRONG! DWORD size is 4 bytes")
DWORD = 0x0
print(f"DWORD = {hex(DWORD)}")
DWORD -= 1
print(f"DWORD-1 = {hex(DWORD)} ; HUM ?!", end="\n\n")
import ctypes
print("GOOD :")
DWORD = ctypes.c_uint32(0xFFFFFFFF)
print(f"DWORD = {hex(DWORD.value)}")
DWORD.value += 1
print(f"DWORD+1 = {hex(DWORD.value)} ; GOOD!")
DWORD = ctypes.c_uint32(0x0)
print(f"DWORD = {hex(DWORD.value)}")
DWORD.value -= 1
print(f"DWORD-1 = {hex(DWORD.value)}", end="\n\n")
# CAST DATA
EAX = ctypes.c_uint32(0xAAAABBBB)
AX = ctypes.cast(ctypes.addressof(EAX), ctypes.POINTER(ctypes.c_uint16)) # we can cast
print("EAX =", hex(EAX.value))
print("AX =", hex(AX.contents.value))
from ctypes import Structure
from ctypes import wintypes
import io
class UNICODE_STRING(Structure):
_fields_ = [
('Length', wintypes.USHORT),
('MaximumLength', wintypes.USHORT),
('Buffer', wintypes.WCHAR*1024) # No more 1024 wchar! If more => CRASH
# To keep the example more concise we are not manage the Buffer length
]
# UNICODE_STRING data extracted from memory/raw file
data = b'\x0F\x00\x11\x00\x48\x00\x65\x00\x6c\x00\x6c\x00\x6f\x00\x20\x00\x74\x00\x68\x00\x65\x00\x20\x00\x77\x00\x6f\x00\x72\x00\x6c\x00\x64\x00\x00\x00\x00\x00'
stream = io.BytesIO(data)
unicode_stru = UNICODE_STRING()
stream.readinto(unicode_stru) # deserialization bin to struct
print(f"L:{unicode_stru.Length} Max:{unicode_stru.MaximumLength} Str:{unicode_stru.Buffer}")
from ctypes import wintypes, windll, byref
length = windll.kernel32.GetLogicalDriveStringsW(0,0) # total length required
data_string = (wintypes.CHAR*(length*2))() # array of char to access in raw format
windll.kernel32.GetLogicalDriveStringsW(length, byref(data_string)) # call api
print("raw: " + str(data_string.raw))
for drive in data_string.raw.rsplit(b'\x00\x00'):
print(drive.decode("utf-16le"))