Updated 2011-08-20 20:45:40 by Jorge

Richard Suchenwirth 2007-02-05 - The cross-tabulation code at crosstab works like it should, but I very soon didn't like it - the main proc is too big and has puts sprinkled over the place. So I rewrote it in more functional programming terms by adding some clean functions operating on matrixes (implemented as lists of row lists), lists, resp. strings. The code hasn't become shorter from that, but better organized, and the functions could be re-used in other situations
 set usage {
    usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12
    Reads the infile (or stdin if infile is "-") for cases
    Prints on stdout a cross-tabulation for the specified columns.
    -c x,y : columns to use (1...), default is 1,2
    -sep x : column separator, default is ";"
    -w   n : output column width, default is 12
 if {[llength $argv] == 0} {puts stderr $usage; exit 1}

 proc main argv {
     getopt argv -sep sep  ";"
     set sep [subst $sep] ;#-- e.g. for \t
     getopt argv -c   cols 1,2
     getopt argv -w   w    12

     set infile [lindex $argv 0]
     if {$infile eq "-"} {
         set f stdin
     } elseif {[file exists $infile]} {
         set f [open $infile]
     } else {puts stderr "no such file $infile\n$::usage"; exit 1}

     foreach {x y z} [split $cols ,] break
     incr x -1; incr y -1
     if {$z ne ""} {incr z -1}
     while {[gets $f line] >= 0} {
         set fields [split $line $sep]
         set key [lindex $fields $x],[lindex $fields $y],
         if {$z ne ""} {append key [lindex $fields $z]}
         inc N($key)
     if {$f ne "stdout"} {close $f}

     set xs [get_values N 0]
     set ys [get_values N 1]
     if {$z ne ""} {
         set zs [get_values N 2]
     } else {set zs {{}}}
     set totals [lrepeat [llength $ys] [lrepeat [llength $xs] 0]]
     set ylabels [map [list format %-${w}s] [concat $ys Total]]
     foreach zv $zs {
         array unset a
         foreach i [array names N *,$zv] {
             foreach {p1 p2} [split $i ,] break
             set a($p1,$p2) $N($i)
         set m [marray a $xs $ys]
         set totals [mexpr $totals + $m]
         set m [mlabel [msums $m] [concat $xs Total] $ylabels]
         puts $zv\n[mformat $m $w 1]\n
     if {$zs ne "{}"} {
         set m [mlabel [msums $totals] [concat $xs Total] $ylabels]
         puts "Grand Total:\n[mformat $m $w 1]\n"

#------------------------ additional list functions
 proc lrepeat {n args} { #-- built-in from 8.5
    set res {}
    for {set i 0} {$i<$n} {incr i} {eval lappend res $args}
    set res

#-- Map a function to a list, returning the results
 proc map {script list} {
    set res {}
    foreach i $list {lappend res [eval [linsert $script end $i]]}
    set res

#-- Sum of a list
 proc lsum list {
    set res 0
    foreach i $list {set res [expr {$res+$i}]}
    set res

#-- Apply a binary operator element-wise to two matrixes, giving a third
 proc mexpr {mat1 op mat2} {
    set res {}
    foreach row1 $mat1 row2 $mat2 row "" {
        foreach col1 $row1 col2 $row2 {
            lappend row [expr {$col1} $op {$col2}]
        lappend res $row
    set res

#-- Create a matrix from an array with (x,y) keys
 proc marray {_arr cols rows} {
    upvar 1 $_arr arr
    set res {}
    foreach row $rows {
        set outrow {}
        foreach col $cols {lappend outrow [get arr($col,$row)]}
        lappend res $outrow
    set res

#-- Compute row and columns sums, and add them to a matrix (at right resp. bottom
 proc msums matrix {
    set ncol -1
    set ncols {}
    foreach i [lindex $matrix 0] {
        set [incr ncol] 0
        lappend ncols $ncol
    set res {}
    foreach row $matrix {
        foreach cell $row ncol $ncols {
            set $ncol  [expr {[set $ncol]+$cell}]
        lappend res [lappend row [lsum $row]]
    set colsums {}
    foreach i [lindex $matrix 0] ncol $ncols {
        lappend colsums [set $ncol]
    lappend res [lappend colsums [lsum $colsums]]

#-- turn a matrix into a formatted multiline string
 proc mformat {matrix {w 12} {underline 0}} {
    set res ""
    foreach row $matrix line "" {
        foreach cell $row {append line [format %${w}s $cell]}
        lappend res $line
    if $underline {
        set length [string length [lindex $res 0]]
        set res [linsert $res 1 [string repeat - $length]]
    join $res \n

#-- Add column and row labels to a matrix
 proc mlabel {matrix collabels {rowlabels {}}} {
    #-- Add column and row labels to a matrix
    set res [list [linsert $collabels 0 {}]]
    foreach row $matrix label $rowlabels {
        lappend res [linsert $row 0 $label]
    set res

#-- Retrieve a variable's value, if present, else 0
 proc get _var {
     upvar 1 $_var var
     if {[info exists var]} {set var} else {return 0}

#-- Get the values of (x,y,...) array keys by position
 proc get_values {_arr pos} {
     upvar 1 $_arr arr
     set values {}
     foreach i [array names arr] {
         lappend values [lindex [split $i ,] $pos]
     lsort -unique $values

#-- See getopt for discussion
 proc getopt {_argv name {_var ""} {default ""}} {
     upvar 1 $_argv argv $_var var
     set pos [lsearch -regexp $argv ^$name]
     if {$pos>=0} {
         set to $pos
         if {$_var ne ""} {
             set var [lindex $argv [incr to]]
         set argv [lreplace $argv $pos $to]
         return 1
     } else {
         if {[llength [info level 0]] == 5} {set var $default}
         return 0

#-- auto-initializing increment (standard in 8.5)
 proc inc {_var {amount 1}} {
     upvar 1 $_var var
     if ![info exists var] {set var 0}
     incr var $amount

 main $argv

Category Statistics