A Tcl_Obj Command Machine Code Generator

George Peter Staplin Nov 2, 2007 - I needed a break from working on NexTk, to avoid going insane. I decided it would be fun to try to generate some simple Tcl_Obj commands at runtime, without invoking gcc. This project was a success -- I'm still sane :) And the commands work too!


/*
 A Tcl_Obj Command Machine Code Generator
 
 By George Peter Staplin
 
 gcc -shared codeproc.c -I/usr/local/include /usr/local/lib/libtclstub8.5.a -o codeproc.so
 
 gcc -Wall -shared codeproc.c -I/usr/local/include /usr/local/lib/libtclstub8.5.a -o codeproc.so 
 
 */
 
#include <tcl.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
 
struct code {
   int localoffset;
   unsigned char *start;
   unsigned char *pc;
   size_t length;
};
 
static struct code *current_code = NULL;
 
#define OBJ_CMD_ARGS ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]
#define defcmd(func,name) \
  Tcl_CreateObjCommand (interp, name, func, (ClientData) NULL, (Tcl_CmdDeleteProc *) NULL)
 
 
static void
dump_eax (struct code *code) {
   static char *fmt = "dump: 0x%x\n";
   uintptr_t fun = (uintptr_t)printf;
   uintptr_t fun2 = (uintptr_t)abort;
   unsigned char docall[] = {0xff, 0xd0};
 
   /*
    7f:   50                      push   %eax
    80:   8b 44 24 08             mov    0x8(%esp),%eax
    84:   50                      push   %eax
    85:   ff 75 0c                pushl  0xc(%ebp)
    88:   b8 dd ee ff 00          mov    $0xffeedd,%eax
    8d:   ff d0                   call   *%eax
   */
   
   *(code->pc) = 0x50; code->pc++; /*pushl*/
 
   *(code->pc) = 0xb8; code->pc++; /*mov*/
   memcpy (code->pc, &fmt, sizeof (char *));
   code->pc += sizeof (char *);
 
   *(code->pc) = 0x50; code->pc++; /*push*/
 
   *(code->pc) = 0xb8; code->pc++; /*mov*/
   memcpy (code->pc, &fun, sizeof fun);
   code->pc += sizeof fun;
   
   memcpy (code->pc, docall, sizeof docall);
   code->pc += sizeof docall;
 
   /*now abort()*/
   *(code->pc) = 0xb8; code->pc++; /*mov*/
   memcpy (code->pc, &fun2, sizeof fun2);
   code->pc+= sizeof fun2;
 
   memcpy (code->pc, docall, sizeof docall);
   code->pc += sizeof docall;
}
 
 
static int 
long_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   int i;
 
   for (i = 1; i < objc; ++i) {
     Tcl_Obj *obj = Tcl_NewIntObj (code->localoffset);
     Tcl_IncrRefCount (obj);
     if (NULL == Tcl_ObjSetVar2 (interp, objv[i], NULL, obj, TCL_LEAVE_ERR_MSG)) {
       Tcl_DecrRefCount (obj);
       return TCL_ERROR;
     }
     Tcl_DecrRefCount (obj);
     code->localoffset += sizeof (long);
   }
 
   return TCL_OK;
}


static int
pointer_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   int i;
 
   for (i = 1; i < objc; ++i) {
     Tcl_Obj *obj = Tcl_NewIntObj (code->localoffset);
     Tcl_IncrRefCount (obj);
     if (NULL == Tcl_ObjSetVar2 (interp, objv[i], NULL, obj, TCL_LEAVE_ERR_MSG)) {
       Tcl_DecrRefCount (obj);
       return TCL_ERROR;
     }
     Tcl_DecrRefCount (obj);
     code->localoffset += sizeof (void *);
   }
 
   return TCL_OK;
}
 
 
static int
get_long_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   uintptr_t fun = (uintptr_t)Tcl_GetLongFromObj;
   int objoffset, longoffset;
   unsigned char leal[] = {0x8d, 0x44, 0x24, /*modify*/ 0x00};
   unsigned char movl[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00};
   unsigned char pushinterp[] = {0xff, 0x75, 0xc};
 
   if (3 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "obj-local long-local");
     return TCL_ERROR;
   }
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &objoffset))
     return TCL_ERROR;
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &longoffset))
     return TCL_ERROR;
   
   /*
     leal long-local,%eax
     pushl %eax
     movl  obj-local,%eax
     pushl %eax
     pushl 12(%ebp)
     
     7b:   8d 44 24 04             lea    0x4(%esp),%eax
     7f:   50                      push   %eax
     80:   8b 44 24 08             mov    0x8(%esp),%eax
     84:   50                      push   %eax
     85:   ff 75 0c                pushl  0xc(%ebp)
     88:   b8 dd ee ff 00          mov    $0xffeedd,%eax
     8d:   ff d0                   call   *%eax
     8f:   83 c4 0c                add    $0xc,%esp
   */
 
   leal[3] = longoffset;
   memcpy (code->pc, leal, sizeof leal);
   code->pc += sizeof leal;
   *(code->pc) = 0x50; code->pc++; /*pushl*/
 
   printf ("longoffset %d\n", longoffset);
 
   movl[3] = objoffset + 4;
   memcpy (code->pc, movl, sizeof movl);
   code->pc += sizeof movl;
   *(code->pc) = 0x50; code->pc++; /*pushl*/
 
   printf ("objoffset %d\n", objoffset);
 
   memcpy (code->pc, pushinterp, sizeof pushinterp);
   code->pc += sizeof pushinterp;
 
   *(code->pc) = 0xb8; code->pc++; /*movl (literal)*/
   memcpy (code->pc, &fun, sizeof fun);
   code->pc += sizeof fun;
 
   *(code->pc) = 0xff; code->pc++; /*call fun*/
   *(code->pc) = 0xd0; code->pc++;
 
   *(code->pc) = 0x83; code->pc++; /*cleanup after the pushes*/
   *(code->pc) = 0xc4; code->pc++;
   *(code->pc) = 12; code->pc++;
 
   return TCL_OK;
}

 
static void
code_free (struct code *code) {
   munmap (code->start, code->length);
   free (code);
}

 
static int
code_init (Tcl_Interp *interp, struct code **code) {
   struct code *r;
   size_t s;
 
   r = malloc (sizeof *r);
   if (NULL == r) {
     const char *err = Tcl_PosixError (interp);
     Tcl_SetResult (interp, "unable to malloc", TCL_STATIC);
     Tcl_AddErrorInfo (interp, err);
     return TCL_ERROR;
   }
   
   s = sysconf (_SC_PAGESIZE);
 
   r->pc = r->start = mmap (NULL, s, PROT_EXEC | PROT_READ | PROT_WRITE, 
                                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (((void *) -1) == r->pc) {
     const char *err = Tcl_PosixError (interp);
     Tcl_SetResult (interp, "unable to mmap", TCL_STATIC);
     Tcl_AddErrorInfo (interp, err);
     free (r);
     return TCL_ERROR;
   }
 
   r->localoffset = sizeof (void *);
   r->length = s;
 
   *code = r;
 
   return TCL_OK;
}

 
static void
enter_function (struct code *code) {
   *(code->pc) = 0x55; code->pc++; /* push %ebp */
   *(code->pc) = 0x89; code->pc++; 
   *(code->pc) = 0xe5; code->pc++; /* mov %esp,%ebp */
}
 

static void
delete_compiled (ClientData cdata) {
   struct code *code = (void *)cdata;
   code_free (code);
}

 
static int 
compile_cmd (OBJ_CMD_ARGS) {
   struct code *code;
   int (*fun) (OBJ_CMD_ARGS);
 
   if (TCL_OK != code_init (interp, &code))
     return TCL_ERROR;
 
   current_code = code;
 
   fun = (Tcl_ObjCmdProc *) code->start;
 
   printf ("code->start %p\n", (void *)code->start);
 
   enter_function (code);
   
   if (TCL_ERROR == Tcl_EvalObjEx (interp, objv[2], TCL_EVAL_DIRECT)) {
     code_free (code);
     return TCL_ERROR;
   }
 
   Tcl_CreateObjCommand (interp, Tcl_GetString (objv[1]), fun, (ClientData)code, delete_compiled);
 
   return TCL_OK;
}


static int
begin_code_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
 
   /*
     83 ec 08                sub    $0x8,%esp
   */
   *code->pc = 0x83; code->pc++;
   *code->pc = 0xec; code->pc++;
   *code->pc = code->localoffset; code->pc++;
 
 
   return TCL_OK;
}


static int
leave_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   /*
      c9                      leave  
      c3                      ret 
   */
 
   *(code->pc) = 0xc9; code->pc++;
   *(code->pc) = 0xc3; code->pc++;
 
   return TCL_OK;
 }
 
 static int
 result_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   char *s;
   int r;
   /*
     b8 00 00 00 00          mov    $0x0,%eax
   */
   if (2 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "result");
     return TCL_ERROR;
   }
 
   s = Tcl_GetString (objv[1]);
   if (!strcmp ("ok", s)) {
     r = TCL_OK;
   } else if (!strcmp ("error", s)) {
     r = TCL_ERROR;
   }
   
   *(code->pc) = 0xb8; code->pc++;
   memcpy (code->pc, &r, 4);
   code->pc += 4;
 
   return TCL_OK;
}


static int
get_arg_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   int argvoffset, localoffset;
   unsigned char m[] = {
     0x8b, 0x4d, 0x14,
     0x83, 0xc1, /*argvoffset*/ 0x00,
     0x8b, 0x01,
     0x89, 0x44, 0x24, /*localoffset*/ 0x00
   };
   
   if (3 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "argv-offset local");
     return TCL_ERROR;
   }
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &argvoffset))
     return TCL_ERROR;
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &localoffset))
     return TCL_ERROR;
   
   /*
   7b:   8b 4d 14                mov    0x14(%ebp),%ecx
   7e:   83 c1 04                add    $0x4,%ecx
   81:   8b 01                   mov    (%ecx),%eax
   83:   89 44 24 04             mov    %eax,0x4(%esp)
 
      (0x4 is objv[1])
      (0x8 is objv[2])
   */
 
   m[5] = argvoffset * 4;
   m[11] = localoffset;
 
   memcpy (code->pc, m, sizeof m);
   code->pc += sizeof m;
 
   return TCL_OK;
}


static int
long_plus_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
 
   unsigned char movleax[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00};
   unsigned char movlecx[] = {0x8b, 0x4c, 0x24, /*modify*/ 0x00};
   unsigned char addl[] = {0x01, 0xc8};
   unsigned char movlresult[] = {0x89, 0x44, 0x24, /*modify*/ 0x00};
   int a, b, r; /* these are local variable offsets */
 
   /*
   7b:   8b 44 24 04             mov    0x4(%esp),%eax
   7f:   8b 4c 24 08             mov    0x8(%esp),%ecx
   83:   01 c8                   add    %ecx,%eax
   85:   89 44 24 12             mov    %eax,0x12(%esp)
   */
 
   if (4 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "a b result");
     return TCL_ERROR;
   }
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &a))
     return TCL_ERROR;
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &b))
     return TCL_ERROR;
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[3], &r))
     return TCL_ERROR;
   
   movleax[3] = a;
   memcpy (code->pc, movleax, sizeof movleax);
   code->pc += sizeof movleax;
 
   movlecx[3] = b;
   memcpy (code->pc, movlecx, sizeof movlecx);
   code->pc += sizeof movlecx;
 
   memcpy (code->pc, addl, sizeof addl);
   code->pc += sizeof addl;
 
   movlresult[3] = r;
   memcpy (code->pc, movlresult, sizeof movlresult);
   code->pc += sizeof movlresult;
   
   return TCL_OK;
}


static int
long_obj_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   uintptr_t fun = (uintptr_t)Tcl_NewLongObj;
   unsigned char docall[] = {0xff, 0xd0};
   unsigned char movlocal[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00};
   unsigned char movlresult[] = {0x89, 0x44, 0x24, /*modify*/ 0x00};
   int longoffset, objoffset;
 
   /*
    7f:   50                      push   %eax
    80:   8b 44 24 08             mov    0x8(%esp),%eax
    84:   50                      push   %eax
    85:   ff 75 0c                pushl  0xc(%ebp)
    88:   b8 dd ee ff 00          mov    $0xffeedd,%eax
    8d:   ff d0                   call   *%eax
   */
 
   if (3 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "longoffset objoffset");
     return TCL_ERROR;    
   }
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &longoffset))
     return TCL_ERROR;
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &objoffset))
     return TCL_ERROR;
 
   movlocal[3] = longoffset;
   memcpy (code->pc, movlocal, sizeof movlocal);
   code->pc += sizeof movlocal;
 
   *(code->pc) = 0x50; code->pc++; /*push*/
 
   *(code->pc) = 0xb8; code->pc++; /*mov*/
   memcpy (code->pc, &fun, sizeof fun);
   code->pc += sizeof fun;
 
   memcpy (code->pc, docall, sizeof docall);
   code->pc += sizeof docall;
   
   *(code->pc) = 0x83; code->pc++; /*cleanup after the push*/
   *(code->pc) = 0xc4; code->pc++;
   *(code->pc) = 4; code->pc++;
 
   movlresult[3] = objoffset;
   memcpy (code->pc, movlresult, sizeof movlresult);
   code->pc += sizeof movlresult;
 
   return TCL_OK; 
}


static int
set_result_cmd (OBJ_CMD_ARGS) {
   struct code *code = current_code;
   uintptr_t fun = (uintptr_t)Tcl_SetObjResult;
   unsigned char docall[] = {0xff, 0xd0};
   unsigned char movlocal[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00};
   unsigned char pushinterp[] = {0xff, 0x75, 0xc};
   int objoffset;
 
   if (2 != objc) {
     Tcl_WrongNumArgs (interp, 1, objv, "objoffset");
     return TCL_ERROR;
   }
 
   if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &objoffset))
     return TCL_ERROR;
  
   movlocal[3] = objoffset;
   memcpy (code->pc, movlocal, sizeof movlocal);
   code->pc += sizeof movlocal;
 
   *(code->pc) = 0x50; code->pc++; /*push*/
 
   /*push the interp*/
   memcpy (code->pc, pushinterp, sizeof pushinterp);
   code->pc += sizeof pushinterp;
   
   *(code->pc) = 0xb8; code->pc++; /*mov*/
   memcpy (code->pc, &fun, sizeof fun);
   code->pc += sizeof fun;
 
   memcpy (code->pc, docall, sizeof docall);
   code->pc += sizeof docall;
 
   /*cleanup the stack*/
   *(code->pc) = 0x83; code->pc++; /*cleanup after the push*/
   *(code->pc) = 0xc4; code->pc++;
   *(code->pc) = 4; code->pc++;
 
   return TCL_OK;
}


int Codeproc_Init (Tcl_Interp *interp) {
   if (NULL == Tcl_InitStubs (interp, TCL_VERSION, 0))
     return TCL_ERROR;
   
   if (TCL_ERROR == Tcl_PkgProvide (interp, "codeproc", "1.0"))
     return TCL_ERROR;
 
   defcmd (compile_cmd, "compile");
   defcmd (long_cmd, "long");
   defcmd (pointer_cmd, "pointer");
   defcmd (get_long_cmd, "get-long");
   defcmd (begin_code_cmd, "begin-code");
   defcmd (leave_cmd, "leave");
   defcmd (result_cmd, "result");
   defcmd (get_arg_cmd, "get-arg");
   defcmd (long_plus_cmd, "long+");
   defcmd (long_obj_cmd, "long-obj");
   defcmd (set_result_cmd, "set-result");
 
   return TCL_OK;
 }


/*
  TEST CODE
  proc container {} {
   compile testadd {
    long a b c
    begin-code
    result ok
    leave
   }
   puts "$a $b $c"
  }
 
 
 load ./codeproc.so
 
 proc container {} {
   compile testadd {
       pointer aobj bobj robj
       long a b r
       begin-code
       get-arg 1 $aobj  
       get-long $aobj $a
       get-arg 2 $bobj
       get-long $bobj $b
       long+ $a $b $r
       long-obj $r $robj
       set-result $robj
       result ok
       leave
   }
   puts $aobj
 }
 
 container
 puts RESULT:[testadd 2 3]
 
 */