rev-idapython

Installation
SKILL.md

rev-idapython - IDAPython / IDALib Script Reference

IDAPython script snippets for IDA interactive use and IDALib headless analysis. Use as reference when generating IDAPython code.

  • IDAPython: scripts run inside IDA GUI (Script Command, plugin, or IDC console)
  • IDALib: headless mode introduced in IDA 9.0 — run analysis scripts without opening the IDA GUI

Common API

Register Operations

idc.get_reg_value('rax')
idaapi.set_reg_val("rax", 1234)

Debug Memory Operations

idc.read_dbg_byte(addr)
idc.read_dbg_memory(addr, size)
idc.read_dbg_dword(addr)
idc.read_dbg_qword(addr)
idc.patch_dbg_byte(addr, val)
idc.add_bpt(0x409437)          # add breakpoint
idaapi.get_imagebase()         # get image base address

Local Memory Operations (modifies IDB database)

idc.get_qword(addr)
idc.patch_qword(addr, val)
idc.patch_dword(addr, val)
idc.patch_word(addr, val)
idc.patch_byte(addr, val)
idc.get_db_byte(addr)
idc.get_bytes(addr, size)
idaapi.get_dword(addr)
idc.get_strlit_contents          # read string literal

Disassembly

GetDisasm(addr)                  # get disassembly text
idc.next_head(ea)                # get next instruction address
idc.create_insn(addr)            # c, Make Code
ida_bytes.create_strlit          # create string, same as 'A' key
ida_funcs.add_func(addr)         # p, create function
idc.del_items(addr)              # U, undefine

Address Conversion

idc.get_name_ea(0, '_sub_6051')  # get address by function name

Function Operations

ida_funcs.get_func(ea)           # get function descriptor

# enumerate all functions
for func in idautils.Functions():
    print("0x%x, %s" % (func, idc.get_func_name(func)))

Code Snippets

Byte Pattern Search

import ida_bytes
import ida_idaapi
import ida_funcs
import idc

# find_bytes_list("90 90 90 90 90")
# find_bytes_list("55 ??")
# returns list of matching addresses
def find_bytes_list(bytes_pattern):
    ea = -1
    result = []
    while True:
        ea = idc.find_bytes(bytes_pattern, ea + 1)
        if ea == ida_idaapi.BADADDR:
            break
        result.append(ea)
    return result

Appcall - Call Debuggee Functions

# test check_passwd(char *passwd) -> int
passwd = ida_idd.Appcall.byref("MyFirstGuess")
res = ida_idd.Appcall.check_passwd(passwd)
if res.value == 0:
  print("Good passwd !")
else:
  print("Bad passwd...")
# Explicitly create the buffer as a byref object
s_in = Appcall.byref("SomeEncryptedBuffer")
# Buffers are always returned byref
s_out = Appcall.buffer(" ", SizeOfBuffer)
# Call the debuggee
Appcall.decrypt_buffer(s_in, s_out, SizeOfBuffer)
# Print the result
print "decrypted=", s_out.value
loadlib = Appcall.proto("kernel32_LoadLibraryA", "int __stdcall loadlib(const char *fn);")
hmod = loadlib("dll_to_inject.dll")

getlasterror = Appcall.proto("kernel32_GetLastError", "DWORD __stdcall GetLastError();")
print "lasterror=", getlasterror()

getcmdline = Appcall.proto("kernel32_GetCommandLineA", "const char *__stdcall getcmdline();")
print "command line:", getcmdline()

Cross References

for ref in idautils.XrefsTo(ea):
    print(hex(ref.frm))

# shorthand
[ref.frm for ref in idautils.XrefsTo(start_ea)]

Basic Block Traversal

fn = 0x4800
f_blocks = idaapi.FlowChart(idaapi.get_func(fn), flags=idaapi.FC_PREDS)
for block in f_blocks:
    print(hex(block.start_ea))
# successor blocks
for succ in block.succs():
    print hex(succ.start_ea)

# predecessor blocks
for pred in block.preds():
    print hex(pred.start_ea)

Debug Memory Read/Write

def patch_dbg_mem(addr, data):
    for i in range(len(data)):
        idc.patch_dbg_byte(addr + i, data[i])

def read_dbg_mem(addr, size):
    dd = []
    for i in range(size):
        dd.append(idc.read_dbg_byte(addr + i))
    return bytes(dd)

Read std::string (64-bit)

def dbg_read_cppstr_64(objectAddr):
    strPtr = idc.read_dbg_qword(objectAddr)
    result = ''
    i = 0
    while True:
        onebyte = idc.read_dbg_byte(strPtr + i)
        if onebyte == 0:
            break
        else:
            result += chr(onebyte)
            i += 1
    return result

Read C String (64-bit)

def dbg_read_cstr_64(objectAddr):
    strPtr = objectAddr
    result = ''
    i = 0
    while True:
        onebyte = idc.read_dbg_byte(strPtr + i)
        if onebyte == 0:
            break
        else:
            result += chr(onebyte)
            i += 1
    return result

Parse GNU C++ std::map

import idautils
import idaapi
import idc

def parse_gnu_map_header(address):
    root = idc.read_dbg_qword(address + 0x10)
    return root

def parse_gnu_map_node(address):
    left  = idc.read_dbg_qword(address + 0x10)
    right = idc.read_dbg_qword(address + 0x18)
    data  = address + 0x20
    return left, right, data

def parse_gnu_map_travel(address):
    # address <- std::map struct address
    result = []
    worklist = [parse_gnu_map_header(address)]
    while len(worklist) > 0:
        addr = worklist.pop()
        (left, right, data) = parse_gnu_map_node(addr)
        if left > 0: worklist.append(left)
        if right > 0: worklist.append(right);
        result.append(data)
    return result

# example
elements = parse_gnu_map_travel(0x0000557518073EB0)
for elem in elements:
    print(hex(elem))

Read XMM Register (Debug)

def read_xmm_reg(name):
    rv = idaapi.regval_t()
    idaapi.get_reg_val(name, rv)
    return (struct.unpack('Q', rv.bytes())[0])

Step Over and Wait for Debug Event

while ida_dbg.step_over():
    wait_for_next_event(WFNE_ANY, -1)
    rip = idc.get_reg_value("rip")
    # .....

Iterate Instructions in a Function

for ins in idautils.FuncItems(0x401000):
    print(hex(ins))

Get Function Callees (Instruction-Based)

def ida_get_callees(func_addr: int) -> list:
    callees = []
    for head in idautils.Heads(func_addr, idaapi.get_func(func_addr).end_ea):
        if idaapi.is_call_insn(head):
            callee_ea = idc.get_operand_value(head, 0)
            callees.append(callee_ea)
    return callees

Double / Complex Number Memory Operations

def float_to_double_bytearray(value):
    double_value = ctypes.c_double(value)
    byte_array = bytearray(ctypes.string_at(ctypes.byref(double_value), ctypes.sizeof(double_value)))
    return byte_array

def set_pos(x, y): # complex<double, double>
    rbp = idc.get_reg_value("rbp")
    complex_base = rbp - 0x260

    patch_dbg_mem(complex_base, float_to_double_bytearray(x))
    patch_dbg_mem(complex_base + 8, float_to_double_bytearray(y))

set_pos(5.0, 6.0)

Import Table

Enumerate Import Table

import ida_nalt

nimps = ida_nalt.get_import_module_qty()

print("Found %d import(s)..." % nimps)

for i in range(nimps):
    name = ida_nalt.get_import_module_name(i)
    if not name:
        print("Failed to get import module name for #%d" % i)
        name = "<unnamed>"

    print("Walking imports for module %s" % name)
    def imp_cb(ea, name, ordinal):
        if not name:
            print("%08x: ordinal #%d" % (ea, ordinal))
        else:
            print("%08x: %s (ordinal #%d)" % (ea, name, ordinal))
        return True
    ida_nalt.enum_import_names(i, imp_cb)

print("All done...")

Check if Address is an Import Function

def ida_is_import_function(addr: int) -> bool:
    is_find = False

    nimps = ida_nalt.get_import_module_qty()

    for i in range(nimps):
        def imp_cb(ea, name, ordinal):
            nonlocal is_find
            if ea == addr:
                is_find = True
                return False
            return True
        ida_nalt.enum_import_names(i, imp_cb)

    return is_find

Enumerate Import Addresses

def ida_enum_import_addr() -> List[int]:
    import_addrs = []
    nimps = ida_nalt.get_import_module_qty()
    for i in range(nimps):
        def imp_cb(ea, name, ordinal):
            nonlocal import_addrs
            import_addrs.append(ea)
            return True
        ida_nalt.enum_import_names(i, imp_cb)
    return import_addrs

Type Information

Struct Member Traversal

def extract_struct_members(type_name):
    fields = []
    tif = ida_typeinf.tinfo_t()
    if tif.get_named_type(None, type_name):
        offset = 0
        for iter in tif.iter_struct(): # udm
            fsize = iter.type.get_size()
            fields.append({
                "offset": iter.offset // 8, # bit offset
                "size": fsize,
                "type": iter.type._print()
            })
            offset += fsize
    else:
        print(f"Unable to get {type_name} type info.")
    return fields

extract_struct_members("sqlite3_vfs")

Enumerate All Types

til = ida_typeinf.get_idati()
for type_name in til.get_type_names():
    print(type_name)

List All Struct Types

def list_struct_types():
    types = []
    til = ida_typeinf.get_idati()
    for type_name in til.get_type_names():
        tif = ida_typeinf.tinfo_t()
        if tif.get_named_type(None, type_name):
            if tif.is_struct():
                types.append(type_name)
    return types

Hex-Rays Decompiler API

Decompile a Function

# verified: IDA 9.0
dec = ida_hexrays.decompile(func_addr)
# dec is an object, str(dec) converts to text
print(str(dec))

Print Microcode at Different Maturity Levels

def print_microcode(func_ea):
    maturity = ida_hexrays.MMAT_GLBOPT3
    #   maturity:
    #   MMAT_ZERO,         ///< microcode does not exist
    #   MMAT_GENERATED,    ///< generated microcode
    #   MMAT_PREOPTIMIZED, ///< preoptimized pass is complete
    #   MMAT_LOCOPT,       ///< local optimization of each basic block is complete.
    #                      ///< control flow graph is ready too.
    #   MMAT_CALLS,        ///< detected call arguments
    #   MMAT_GLBOPT1,      ///< performed the first pass of global optimization
    #   MMAT_GLBOPT2,      ///< most global optimization passes are done
    #   MMAT_GLBOPT3,      ///< completed all global optimization. microcode is fixed now.
    #   MMAT_LVARS,        ///< allocated local variables
    hf = ida_hexrays.hexrays_failure_t()
    pfn = idaapi.get_func(func_ea)
    rng = ida_hexrays.mba_ranges_t(pfn)
    mba = ida_hexrays.gen_microcode(rng, hf, None,
                ida_hexrays.DECOMP_WARNINGS, maturity)
    vp = ida_hexrays.vd_printer_t()
    mba._print(vp)
print_microcode(0x1229)

Custom Instruction to User-Defined Call

class udc_exit_t(ida_hexrays.udc_filter_t):
    def __init__(self, code, name):
        ida_hexrays.udc_filter_t.__init__(self)
        if not self.init("int __usercall %s@<R0>(int status@<R1>);" % name):
            raise Exception("Couldn't initialize udc_exit_t instance")
        self.code = code
        self.installed = False

    def match(self, cdg):
        return cdg.insn.itype == ida_allins.ARM_svc and cdg.insn.Op1.value == self.code

    def install(self):
        ida_hexrays.install_microcode_filter(self, True);
        self.installed = True

    def uninstall(self):
        ida_hexrays.install_microcode_filter(self, False);
        self.installed = False

    def toggle_install(self):
        if self.installed:
            self.uninstall()
        else:
            self.install()

udc_exit = udc_exit_t(0x900001, "svc_exit")
udc_exit.toggle_install()

Hexrays_Hooks

class MicrocodeCallback(ida_hexrays.Hexrays_Hooks):
    def __init__(self, *args):
        super().__init__(*args)
    def microcode(self, mba: ida_hexrays.mba_t) -> "int":
        print("microcode generated.")
        return 0
r = MicrocodeCallback()
r.hook()

Obfuscation Helpers

OLLVM - Set Breakpoints on Real Blocks

Set breakpoints on all real block entry addresses. Real blocks are identified by finding predecessors of the OLLVM dispatcher merge point.

Note: identifying real blocks by xrefs to the merge point is a heuristic and may not be fully accurate. Use IDA breakpoint groups for batch management.

fn = 0x401F60
ollvm_tail = 0x405D4B # OLLVM real block merge point
f_blocks = idaapi.FlowChart(idaapi.get_func(fn), flags=idaapi.FC_PREDS)
for block in f_blocks:
    for succ in block.succs():
        if succ.start_ea == ollvm_tail:
            print(hex(block.start_ea))
            idc.add_bpt(block.start_ea)

Batch Add Breakpoints

def brkall(l):
    for addr in l:
        idc.add_bpt(addr)

Firmware Helpers

Search x86 Function Prologues and Create Functions

# verified: IDA 9.0
def make_x86_func():
    func_headers = find_bytes_list("55 8B")
    for h in func_headers:
        idc.del_items(h)
        idc.create_insn(h)
        ida_funcs.add_func(h)

Basic Block Utilities

Get Basic Block Size

# verified: IDA 9.0
def get_bb_size(bbaddr):
    fn = bbaddr
    f_blocks = idaapi.FlowChart(idaapi.get_func(fn), flags=idaapi.FC_PREDS)
    for block in f_blocks:
        if block.start_ea == bbaddr:
            return block.end_ea - block.start_ea
    raise Exception("Not found")

Get Basic Block by Address

def ida_get_bb(ea):
    f_blocks = idaapi.FlowChart(idaapi.get_func(ea), flags=idaapi.FC_PREDS)
    for block in f_blocks:
        if block.start_ea <= ea and ea < block.end_ea:
            return block
    return None

Instruction Utilities

Search Next Instruction by Keyword

# verified: IDA 9.0
def search_next_insn(addr, insnkey, max_search=0x100):
    cnt = 0
    while cnt < max_search:
        addr = idc.next_head(addr)
        dis = GetDisasm(addr)
        if insnkey in dis:
            return addr
        cnt += 1
    return None

# example
# search_next_insn(addr, 'movdqa')

Undefine a Range (U key equivalent)

# verified: IDA 9.0
def undefine_range(start, end):
    for i in range(start, end):
        idc.del_items(i)
# example
# undefine_range(func_start, func_end)

Search Disassembly Text

# verified: IDA 9.0
def search_text_all(text):
    import idaapi, idc
    start_ea = 0
    result = []
    while True:
        start_ea = idaapi.find_text(ustr=text, x=0, y=0,
            sflag=idaapi.SEARCH_DOWN, start_ea=start_ea)
        if start_ea == idc.BADADDR:
            break
        result.append(start_ea)
        start_ea = idc.next_head(start_ea)
    return result
# example
for x in search_text_all('movdqa'):
    print(GetDisasm(x))

NOP Function

import idaapi
import idautils
import idc

def nop_func(addr_func, arch='arm'):
    func = ida_funcs.get_func(addr_func)
    if not func:
        print("Function not found!")
        return

    start = func.start_ea
    end = func.end_ea

    print(f"Nopping function at: 0x{start:x} - 0x{end:x}")

    if arch == 'x86':
        nop_bytes = [0x90]  # x86 NOP
    elif arch == 'arm':
        nop_bytes = [0x1F, 0x20, 0x03, 0xD5]  # ARM NOP
    else:
        print(f"Unsupported architecture: {arch}")
        return

    ea = start
    while ea < end:
        insn = ida_ua.insn_t()
        length = ida_ua.decode_insn(insn, ea)
        if length == 0:
            print(f"Failed to decode instruction at: 0x{ea:x}")
            break

        nop_len = len(nop_bytes)
        for i in range(0, length, nop_len):
            for j in range(nop_len):
                if i + j < length:
                    idc.patch_byte(ea + i + j, nop_bytes[j])

        ea += length

    print("Nopping complete.")

# example
nop_func(0x401000, 'arm')

IDALib (Headless IDA, IDA 9.0+)

IDALib allows running IDAPython analysis scripts without opening the IDA GUI.

Installation

cd idalib/python
pip install .
python py-activate-idalib.py

Basic Usage

import idapro # must be the first import
import idautils
import idc

# open idb/binary file
ida.open_database("samples/patch.so", True)

# enumerate functions
for func in idautils.Functions():
    func_name = idc.get_func_name(func)
    print("Function Name: {}, Address: {}".format(func_name, hex(func)))

# close and save idb
ida.close_database(save=True)

Batch Decompile to JSON

Usage: decompile.py <input_file_elf> <output_file_json>

decompile.py:

import idapro

import ida_hexrays
import idautils
import idc

import os
import sys
import json

def _decompile_internal():
    result = []
    for func in idautils.Functions():
        func_name = idc.get_func_name(func)
        print("Function Name: {}, Address: {}".format(func_name, hex(func)))
        dec_obj = ida_hexrays.decompile(func)
        if dec_obj is None:
            continue
        dec_str = str(dec_obj)
        result.append({
            'name': func_name,
            'address': hex(func),
            'decompiled': dec_str
        })
    return result

def decomple_export(file, out_file):
    ida.open_database(file, True)
    r = _decompile_internal()
    ida.close_database(save=False)
    open(out_file, "w").write(json.dumps(r, indent=4))

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: {} <input_file_elf> <output_file_json>".format(sys.argv[0]))
        sys.exit(1)
    decomple_export(sys.argv[1], sys.argv[2])

Multiprocess Batch Decompile

import os
import time
from multiprocessing import Pool

args = {
    "NUM_WORKERS": 8,
    "INPUT_DIR": "/Users/ctf/idek2024/baby2/baby",
    "OUTPUT_DIR": "/Users/ctf/idek2024/baby2/decompiled",
    "NUM_MAX_RETRY": 3
}

def decomple_one(file, out_file):
    retry = 0
    while True:
        os.system("python3 decompile.py {} {}".format(file, out_file))
        if os.path.exists(out_file):
            break
        retry += 1
        if retry >= args["NUM_MAX_RETRY"]:
            return "Failed to decompile {}".format(file)
        time.sleep(1)
    return None

if __name__ == "__main__":
    if not os.path.exists(args["OUTPUT_DIR"]):
        os.makedirs(args["OUTPUT_DIR"])
    files = os.listdir(args["INPUT_DIR"])

    files = [os.path.join(args["INPUT_DIR"], f) for f in files]
    out_files = [os.path.join(args["OUTPUT_DIR"], os.path.basename(f) + ".json" ) for f in files]
    with Pool(args["NUM_WORKERS"]) as p:
        r = p.starmap(decomple_one, zip(files, out_files))
        for i in r:
            if i is not None:
                print(i)
Weekly Installs
161
GitHub Stars
797
First Seen
Today