John;M;soccer Jane;F;tennis Tom;M;football Dick;M;soccer Harry;M;tennis Mary;F;baseball Jeff;M;baseball Jane;F;tenniscan yield the following tabulation:
$ crosstab.tcl test.csv -c 2,3 1\2 F M Total ------------------------------------------------ baseball 1 1 2 football 0 1 1 soccer 0 2 2 tennis 2 1 3 Total 3 5 8which might make the point that tennis is more popular with females than with males, etc.A later addition is that you can also specify a third column as "z axis", where for all values of z a table like the above is produced. The script is also a demonstration of the set usage ...; proc main ...; ...; main pattern that I usually follow.
#!/usr/bin/env tclsh
set usage {$Id: 17641,v 1.7 2007-02-05 19:00:22 jcw Exp $
usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12
Reads the infile (or stdin if infile is "-") for cases
Prints on stdout a cross-tabulation for the specified columns.
-c x,y : columns to use (1...), default is 1,2
-sep x : column separator, default is ";"
-w n : output column width, default is 12
}
if {[llength $argv] == 0} {puts stderr $usage; exit 1}
proc main argv {
getopt argv -sep sep ";"
getopt argv -c cols 1,2
getopt argv -w w 12
set infile [lindex $argv 0]
if {$infile eq "-"} {
set f stdin
} elseif {[file exists $infile]} {
set f [open $infile]
} else {puts stderr "no such file $infile\n$::usage"; exit 1}
foreach {x y z} [split $cols ,] break
incr x -1; incr y -1
if {$z ne ""} {incr z -1}
while {[gets $f line] >= 0} {
set fields [split $line $sep]
set key [lindex $fields $x],[lindex $fields $y]
if {$z ne ""} {append key ,[lindex $fields $z]}
inc N($key)
}
if {$f ne "stdout"} {close $f}
set xs [get_values N 0]
set ys [get_values N 1]
if {$z ne ""} {set zs [get_values N 2]} else {set zs {{}}}
foreach zv $zs {
puts $zv
set header [format %-${w}s $x\\$y]
foreach xv [concat $xs Total] {append header [format %${w}s $xv]}
puts $header
puts [string repeat - [string length $header]]
foreach yv [concat $ys Total] {
set line [format %-${w}s $yv]
set sum 0
foreach xv $xs {
set key $xv,$yv
if {$z ne ""} {append key ,$zv}
set n [get N($key)]
append line [format %${w}d $n]
incr sum $n
set key $xv,Total
if {$z ne ""} {append key ,$zv}
inc N($key) $n
}
append line [format %${w}d $sum]
if {$yv eq "Total"} {puts ""}
puts $line
}
puts ""
}
}#-- retrieve a variable value, if existing, else return 0 proc get _var {
upvar 1 $_var var
if {[info exists var]} {set var} else {return 0}
}#-- enumerate values used in array keys at one position (comma-separated) proc get_values {_arr pos} {
upvar 1 $_arr arr
set values {}
foreach i [array names arr] {
lappend values [lindex [split $i ,] $pos]
}
lsort -unique $values
}#-- option retriever, see getopt page proc getopt {_argv name {_var ""} {default ""}} {
upvar 1 $_argv argv $_var var
set pos [lsearch -regexp $argv ^$name]
if {$pos>=0} {
set to $pos
if {$_var ne ""} {
set var [lindex $argv [incr to]]
}
set argv [lreplace $argv $pos $to]
return 1
} else {
if {[llength [info level 0]] == 5} {set var $default}
return 0
}
}#-- From Tcl 8.5, incr will auto-initialize, so this workaround will no longer be needed proc inc {_var {amount 1}} {
upvar 1 $_var var
if ![info exists var] {set var 0}
incr var $amount
}
main $argvSee also crosstab again for a more functional rewrite.Category Example - Arts and crafts of Tcl-Tk programming - Category Statistics
