clu2's notes: Notes on Dyninst

Official Dyninst website

Dyninst runtime binary patch

If a function is instrumented at a point, say the entry, then Dyninst inserts a trampoline. To see what happens, consider __open in libc. Before Dyninst binary patching, the original code would look like this:

(gdb) disas/r __open
Dump of assembler code for function open:

   0x32697bc450 <+0>:    83 3d c9 c9 17 00 00   cmpl   $0x0,0x17c9c9(%rip)    # 0x3269938e20 <__libc_multiple_threads>
   0x32697bc457 <+7>:    75 12                  jne    0x32697bc46b <open+27>
   0x32697bc459 <+0>:    48 c7 c0 02 00 00 00   mov    $0x2,%rax
   0x32697bc460 <+7>:    0f 05                  syscall
   .....

and after Dyninst binary patching (to see this, create a binary executable which calls sleep, run it under Dyninst, and then use gdb to attach to that process's PID using gdb -p command) the patched code would look like this:

(gdb) disas/r __open
Dump of assembler code for function open:

   0x32697bc450 <+0>:    e9 93 4d 94 96       jmpq   0x32001011e8
   0x32697bc455 <+5>:    cc                   int3
   0x32697bc456 <+6>:    cc                   int3
   0x32697bc457 <+7>:    cc                   int3
   0x32697bc458 <+8>:    cc                   int3
   0x32697bc459 <+0>:    48 c7 c0 02 00 00 00 mov    $0x2,%rax
   0x32697bc460 <+7>:    0f 05                syscall
   .....

So the first 9 bytes are rewritten as a jump to 32001011e8 following by a bunch of interrupt #3's. The new jump instruction only needs 5 bytes, but since Dyninst always rewrites basic blocks, it wipes out everything until it sees a branch.

If we look at the process's memory map (use the pmap command), we can see the memory region which contains the address 32001011e8 is

...
0000003200000000   1024K rwx--  /dev/zero
0000003200100000      4K rwx--  /dev/zero
0000003200101000   1024K rwx--  /dev/zero
...

What about the code at 32001011e8 ? Gdb will refuse to disassemble since there is no symbol name associated with that address, but there is another command:

(gdb) x/100i 0x32001011e8
   0x32001011e8: lea    -0x80(%rsp),%rsp
   0x32001011ed: pushfq
   0x32001011ee: push   %rax
   0x32001011ef: push   %rbx
   0x32001011f0: push   %r10
   0x32001011f2: push   %r11
   0x32001011f4: push   %r8
   0x32001011f6: push   %r9
   0x32001011f8: push   %rcx
   0x32001011f9: push   %rdx
   0x32001011fa: push   %rsi
   0x32001011fb: push   %rdi
   0x32001011fc: movabs $0x2a95769ec0,%r10
   0x3200101206: mov    (%r10),%ebx
   0x3200101209: test   %rbx,%rbx
   0x320010120c: je     0x3200101261
   0x3200101212: movabs $0x0,%rbx
   0x320010121c: mov    %ebx,(%r10)
   0x320010121f: push   %r10
   0x3200101221: push   %r12
   0x3200101223: push   %r13
   0x3200101225: push   %r14
   0x3200101227: push   %r15
   0x3200101229: movabs $0x32001000a0,%rdi
   0x3200101233: mov    0x28(%rsp),%rsi
   0x3200101238: movabs $0x0,%rax
   0x3200101242: callq  0x3269748570 <printf>
   0x3200101247: mov    %rax,%rbx
   0x320010124a: pop    %r15
   0x320010124c: pop    %r14
   0x320010124e: pop    %r13
   0x3200101250: pop    %r12
   0x3200101252: pop    %r10
   0x3200101254: movabs $0x1,%rbx
   0x320010125e: mov    %ebx,(%r10)
   0x3200101261: pop    %rdi
   0x3200101262: pop    %rsi
   0x3200101263: pop    %rdx
   0x3200101264: pop    %rcx
   0x3200101265: pop    %r9
   0x3200101267: pop    %r8
   0x3200101269: pop    %r11
   0x320010126b: pop    %r10
   0x320010126d: pop    %rbx
   0x320010126e: pop    %rax
   0x320010126f: popfq
   0x3200101270: lea    0x80(%rsp),%rsp
   0x3200101278: cmpl   $0x0,0x69837ba1(%rip)        # 0x3269938e20 <__libc_multiple_threads>
   0x320010127f: jne    0x32697bc46b <open+27>
   0x3200101285: jmpq   0x32697bc459 <__open_nocancel>
   0x320010128a: ud2a

In above example, the instrumentation code we inserted at the entry of __open is a just a printf of the first argument passed to __open, like the following

   BPatch_constExpr arg0 ("open file %s\n");
   BPatch_paramExpr arg1 (0);                  // grab the first argument to __open
   vector< BPatch_snippet * > printf_args;
   printf_args.push_back( &arg0 );
   printf_args.push_back( &arg1 );             // and pass it as the second argument to printf

   BPatch_funcCallExpr callPrintf( *bp_printf, printf_args);

   proc->beginInsertionSet();
   proc->insertSnippet(callPrintf, *funcEntry); // funcEntry is the entry of __open
   proc->finalizeInsertionSet(true);

The instructions at address 3200101278 and 320010127f perform the same task as in the original code at address 32697bc450 and 32697bc457. Finally, the jump at 3200101285 will bounce back to 32697bc459, the first unmodified instruction.

A complete example

The following code example (tested with Dyninst version 6.1) will instrument __open at its entry point and call printf to print the first argument to __open:

#include <stdio.h>
#include <fcntl.h>

#include "BPatch.h"
#include "BPatch_process.h"
#include "BPatch_image.h"
#include "BPatch_module.h"
#include "BPatch_function.h"
#include "BPatch_Vector.h"
#include "BPatch_thread.h"

BPatch bpatch;

int main(int argc, const char *argv[]) {
   char buf[512];
   int i;

   if (argc < 2) {
      fprintf(stderr, "Usage: %s prog_filename\n", argv[0]);
      return 1;
   }
   BPatch_process *proc = bpatch.processCreate( argv[1] , argv+1 );
   BPatch_image*  image = proc->getImage();
   BPatch_module*  libc = image->findModule("libc.so", true);
   if (!libc) {
      fprintf(stderr, "image->findModule('libc.so') failure\n");
      return -1;
   }

   vector< BPatch_function* > __openFunc, __printfFunc;
   libc->findFunction("__open", __openFunc);
   if (__openFunc.empty()) {
      fprintf(stderr, "cannot find __open function\n");
      return -1;
   }
   libc->findFunction("printf", __printfFunc);
   if (__printfFunc.empty()) {
      fprintf(stderr, "cannot find printf function\n");
      return -1;
   }

   BPatch_function * bp_open   = __openFunc[0];
   BPatch_function * bp_printf = __printfFunc[0];
   BPatch_Vector<BPatch_point *> *funcEntry;

   funcEntry = bp_open->findPoint(BPatch_locEntry);
   if (funcEntry->empty()) {
      fprintf(stderr, "cannot find entry of __open function\n");
      return -1;
   }

   BPatch_constExpr arg0 ("open file %s\n");
   BPatch_paramExpr arg1 (0);
   vector< BPatch_snippet * > printf_args;
   printf_args.push_back( &arg0 );
   printf_args.push_back( &arg1 );

   BPatch_funcCallExpr callPrintf( *bp_printf, printf_args);

   proc->beginInsertionSet();
   proc->insertSnippet(callPrintf, *funcEntry);
   proc->finalizeInsertionSet(true);
   proc->continueExecution();
   return 0;
}

Dyninst runtime verbose/debugging mode

Set the following environmental variables (or use the global variable in the code) to any non-zero value to enable Dyninst verbose mode:

(as of Dyninst version 6.1)

Debugging info	Environmental variable	Global variable
Startup	`DYNINST_DEBUG_STARTUP` `DYNINST_DEBUG_PARSE` `DYNINST_DEBUG_DWARF`	`dyn_debug_startup` `dyn_debug_parsing` `dyn_debug_dwarf`
Instrumentation (binary patch, relocation, register allocation, abstract syntax tree ...)	`DYNINST_DEBUG_INST` `DYNINST_DEBUG_BPATCH` `DYNINST_DEBUG_RELOC` `DYNINST_DEBUG_REGALLOC` `DYNINST_DEBUG_CATCHUP` `DYNINST_DEBUG_AST`	`dyn_debug_inst` `dyn_debug_bpatch` `dyn_debug_reloc` `dyn_debug_regalloc` `dyn_debug_catchup` `dyn_debug_ast`
Mutatee handling (inferior RPC, stack walk, dynamic unwinding, process write)	`DYNINST_DEBUG_INFRPC` `DYNINST_DEBUG_INFMALLOC DYNINST_DEBUG_STACKWALK DYNINST_DEBUG_DYN_UNW DYNINST_DEBUG_WRITE`	`dyn_debug_infrpc` `dyn_debug_infmalloc` `dyn_debug_stackwalk` `dyn_debug_dyn_unw` `dyn_debug_write` `dyn_debug_write_filename`
Process control	`DYNINST_DEBUG_FORKEXEC` `DYNINST_DEBUG_PROCCONTROL` `DYNINST_DEBUG_RTLIB`	`dyn_debug_forkexec` `dyn_debug_proccontrol` `dyn_debug_rtlib`
Dyninst internal threads (including call-backs)	`DYNINST_DEBUG_THREAD` `DYNINST_DEBUG_MUTEX` `DYNINST_DEBUG_MAILBOX`	`dyn_debug_thread` `dyn_debug_mutex` `dyn_debug_mailbox`
Miscellaneous (signal, debugger interface, etc)	`DYNINST_DEBUG_SIGNAL` `DYNINST_DEBUG_DBI` `DYNINST_DEBUG_ASYNC` `DYNINST_DEBUG_LIVENESS` `DYNINST_DEBUG_CRASH` `DYNINST_DEBUG_STACKANALYSIS`	`dyn_debug_signal` `dyn_debug_dyn_dbi` `dyn_debug_async` `dyn_debug_liveness` `dyn_debug_crash` `dyn_debug_stackanalysis`

SymtabAPI runtime verbose/debugging mode

Set the following environmental variables (or use the global variable in the code) to any non-zero value to enable SymtabAPI verbose mode:

SYMTAB_DEBUG_PARSING
SYMTAB_DEBUG_AGG
SYMTAB_DEBUG_CREATE
SYMTAB_DEBUG_OBJECT
SYMTAB_DEBUG_TYPES sym_debug_parsing
sym_debug_aggregate
sym_debug_create
sym_debug_object
sym_debug_types