set usage {
usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12
Reads the infile (or stdin if infile is "-") for cases
Prints on stdout a cross-tabulation for the specified columns.
-c x,y : columns to use (1...), default is 1,2
-sep x : column separator, default is ";"
-w n : output column width, default is 12
}
if {[llength $argv] == 0} {puts stderr $usage; exit 1}
proc main argv {
getopt argv -sep sep ";"
set sep [subst $sep] ;#-- e.g. for \t
getopt argv -c cols 1,2
getopt argv -w w 12
set infile [lindex $argv 0]
if {$infile eq "-"} {
set f stdin
} elseif {[file exists $infile]} {
set f [open $infile]
} else {puts stderr "no such file $infile\n$::usage"; exit 1}
foreach {x y z} [split $cols ,] break
incr x -1; incr y -1
if {$z ne ""} {incr z -1}
while {[gets $f line] >= 0} {
set fields [split $line $sep]
set key [lindex $fields $x],[lindex $fields $y],
if {$z ne ""} {append key [lindex $fields $z]}
inc N($key)
}
if {$f ne "stdout"} {close $f}
set xs [get_values N 0]
set ys [get_values N 1]
if {$z ne ""} {
set zs [get_values N 2]
} else {set zs {{}}}
set totals [lrepeat [llength $ys] [lrepeat [llength $xs] 0]]
set ylabels [map [list format %-${w}s] [concat $ys Total]]
foreach zv $zs {
array unset a
foreach i [array names N *,$zv] {
foreach {p1 p2} [split $i ,] break
set a($p1,$p2) $N($i)
}
set m [marray a $xs $ys]
set totals [mexpr $totals + $m]
set m [mlabel [msums $m] [concat $xs Total] $ylabels]
puts $zv\n[mformat $m $w 1]\n
}
if {$zs ne "{}"} {
set m [mlabel [msums $totals] [concat $xs Total] $ylabels]
puts "Grand Total:\n[mformat $m $w 1]\n"
}
}#------------------------ additional list functions proc lrepeat {n args} { #-- built-in from 8.5
set res {}
for {set i 0} {$i<$n} {incr i} {eval lappend res $args}
set res
}#-- Map a function to a list, returning the results proc map {script list} {
set res {}
foreach i $list {lappend res [eval [linsert $script end $i]]}
set res
}#-- Sum of a list proc lsum list {
set res 0
foreach i $list {set res [expr {$res+$i}]}
set res
}#-- Apply a binary operator element-wise to two matrixes, giving a third proc mexpr {mat1 op mat2} {
set res {}
foreach row1 $mat1 row2 $mat2 row "" {
foreach col1 $row1 col2 $row2 {
lappend row [expr {$col1} $op {$col2}]
}
lappend res $row
}
set res
}#-- Create a matrix from an array with (x,y) keys proc marray {_arr cols rows} {
upvar 1 $_arr arr
set res {}
foreach row $rows {
set outrow {}
foreach col $cols {lappend outrow [get arr($col,$row)]}
lappend res $outrow
}
set res
}#-- Compute row and columns sums, and add them to a matrix (at right resp. bottom proc msums matrix {
set ncol -1
set ncols {}
foreach i [lindex $matrix 0] {
set [incr ncol] 0
lappend ncols $ncol
}
set res {}
foreach row $matrix {
foreach cell $row ncol $ncols {
set $ncol [expr {[set $ncol]+$cell}]
}
lappend res [lappend row [lsum $row]]
}
set colsums {}
foreach i [lindex $matrix 0] ncol $ncols {
lappend colsums [set $ncol]
}
lappend res [lappend colsums [lsum $colsums]]
}#-- turn a matrix into a formatted multiline string proc mformat {matrix {w 12} {underline 0}} {
set res ""
foreach row $matrix line "" {
foreach cell $row {append line [format %${w}s $cell]}
lappend res $line
}
if $underline {
set length [string length [lindex $res 0]]
set res [linsert $res 1 [string repeat - $length]]
}
join $res \n
}#-- Add column and row labels to a matrix proc mlabel {matrix collabels {rowlabels {}}} {
#-- Add column and row labels to a matrix
set res [list [linsert $collabels 0 {}]]
foreach row $matrix label $rowlabels {
lappend res [linsert $row 0 $label]
}
set res
}#-- Retrieve a variable's value, if present, else 0 proc get _var {
upvar 1 $_var var
if {[info exists var]} {set var} else {return 0}
}#-- Get the values of (x,y,...) array keys by position proc get_values {_arr pos} {
upvar 1 $_arr arr
set values {}
foreach i [array names arr] {
lappend values [lindex [split $i ,] $pos]
}
lsort -unique $values
}#-- See getopt for discussion proc getopt {_argv name {_var ""} {default ""}} {
upvar 1 $_argv argv $_var var
set pos [lsearch -regexp $argv ^$name]
if {$pos>=0} {
set to $pos
if {$_var ne ""} {
set var [lindex $argv [incr to]]
}
set argv [lreplace $argv $pos $to]
return 1
} else {
if {[llength [info level 0]] == 5} {set var $default}
return 0
}
}#-- auto-initializing increment (standard in 8.5) proc inc {_var {amount 1}} {
upvar 1 $_var var
if ![info exists var] {set var 0}
incr var $amount
}
main $argvCategory Statistics
