Version 7 of bz2:compress and bz2:decompress

Updated 2010-02-06 10:21:23 by newacct

GPS: libbzip2 provides easy compression and decompression using the same algorithm as bzip2. For a project I was working on I needed the ability to compress header files that contained large files converted to hex. The little extension below is what I used. It doesn't have the ability to load normal .bz2 files, because it uses a simple format of uncompressedDataLength\0data. Still it's quite useful.

It could be used to compress large amounts of data before sending over a network, and a lot of other things.

Use it however you want (just don't sue me).


Code

  #include <stdio.h>
  #include <stdlib.h>
  #include "bzip2-1.0.2/bzlib.h"
  #include "tcl.h"


  #define OBJ_CMD_ARGS (ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[])

  int BZ2CompressCmd OBJ_CMD_ARGS {
    Tcl_Obj *result;
    unsigned char *inputData;
    int inputLength;
    char *tmp;
    unsigned int tmpSize = 0;
    /* 1 - 9; 9 gives the best compression but uses the most runtime memory*/
    int blockSize = 9;
    /*1 - 4; 4 gives the most diagnostic info*/
    int verbosity = 0;
    /*30 is suggested; see docs for bzip2 for full info*/
    int workFactor = 30;
    int headerOffset;

    if (objc != 2) {
      Tcl_WrongNumArgs (interp, 1, objv, "dataToCompress");
      return TCL_ERROR;
    }


    inputData = Tcl_GetByteArrayFromObj (objv[1], &inputLength);
    /*The bzip2 documentation says inputLength + %1 + 600 bytes*/
    /* + 40 for the length header*/
    tmpSize = (inputLength / 100 + inputLength + 600 + 40);

    tmp = ckalloc (tmpSize);

    sprintf (tmp, "%d", inputLength);

    /*+ 1 for NUL*/
    headerOffset = (strlen (tmp) + 1);

    if (BZ2_bzBuffToBuffCompress (&tmp[headerOffset], &tmpSize, (char *) inputData, inputLength,
      blockSize, verbosity, workFactor
    ) != BZ_OK) {
      ckfree (tmp);
      Tcl_SetResult (interp, "compression failed", TCL_STATIC);
      return TCL_ERROR;
    }

    /*The BZ2 function sets tmpSize to the new size, but it doesn't know about our header.*/
    result = Tcl_NewByteArrayObj ((unsigned char *)tmp, (int )tmpSize + headerOffset);

    printf ("compressed %d to %d bytes\n", inputLength, (tmpSize + headerOffset));

    ckfree (tmp);

    Tcl_SetObjResult (interp, result);

    return TCL_OK;
  }

  int BZ2DecompressCmd OBJ_CMD_ARGS {
    int compDataLength;
    char *compData;
    int headerOffset;
    unsigned int decompLength = 0;
    char *decompData;
    int verbosity = 0;
    Tcl_Obj *result;

    if (objc != 2) {
      Tcl_WrongNumArgs (interp, 1, objv, "dataToDecompress");
      return TCL_ERROR;
    }

    compData = (char *) Tcl_GetByteArrayFromObj (objv[1], &compDataLength);

    decompLength = atoi (compData);
    headerOffset = (strlen (compData) + 1);

    decompData = ckalloc (decompLength); 

    if (BZ2_bzBuffToBuffDecompress (decompData, &decompLength, &compData[headerOffset], 
      (compDataLength - headerOffset), 0, verbosity
    ) != BZ_OK) {
      ckfree (decompData);
      Tcl_SetResult (interp, "compression failed", TCL_STATIC);
      return TCL_ERROR;
    }

    result = Tcl_NewByteArrayObj ((unsigned char *)decompData, decompLength);

    ckfree (decompData);

    Tcl_SetObjResult (interp, result);

    return TCL_OK;
  }


  int Bz2_Init (Tcl_Interp *interp) {
    #define OBJ_CMD(name,func) Tcl_CreateObjCommand(interp,name,func, (ClientData) NULL, (Tcl_CmdDeleteProc *) NULL)

    OBJ_CMD ("bz2:compress", BZ2CompressCmd);
    OBJ_CMD ("bz2:decompress", BZ2DecompressCmd);

    #undef OBJ_CMD

    return TCL_OK;
  }

  #undef OBJ_CMD_ARGS

Usage

 set compData [bz2:compress $hugeString]

 set decompData [bz2:decompress $compData]

simple eh?