Here is a pure Tcl script that builds up the history of the chatroom. See also Larry Virden's ksh chatroom snaphost history. Note that Larry's script requires (ksh + lynx + sed + diff + egrep).
Notes:
A first version, which sometimes stutters and is slower, is at Tcl chatroom snaphost history; I left it there so that I can reconstruct my mistakes ... This version has the following improvements:
#! /usr/local/bin/tclsh # Author: Miguel Sofer < mailto:[email protected] > # Date: Sept. 23, 2001 # Version: 0.4 # Adapted from Larry W. Virden's program (ksh+lynx+sed+diff+egrep) # Purpose: to scrape tcl'ers chat log pages and accumulate them # NOTE: This script is going to record private messages and memos to your # id, so you should be careful to review the information before making it # public. ############################################################# # USAGE ############################################################# # # First define the variables URL and chatPath in this section. # # Then call this program giving a time (in seconds) between checks, # it will run forever and update the history file periodically. # # # Set URL to your chat information # set URL {http://mini.net/cgi-bin/chat.cgi?action=chat&name=miguel&password=I_WONT_TELL_YOU&updatefrequency=45&color=800000&new_msg_on_top=0&ls=&pause=#end} # To get this URL, use your web browser to visit the # chat room, and then check the URL information for the dialog frame. # WARNING: The URL contains your chat password - so this file needs to be # protected appropriately. # # Set the directory for history files histFile_date.htm # set chatPath /SCRAPE ############################################################# set len [llength $argv] if {$len != 1} { if {$len} { error "USAGE: $argv0 seconds_between_snapshots" } else { set argv 180 } } set time [expr {1000 * [lindex $argv 0]}] package require http proc repeatRecord {time} { recordData after $time repeatRecord $time } # # Build a regexp to clean up; this is very dependent on the # particular formatting of the chat program # The cleaned-up data has to start with a "real data" line # # match headers, up to <BODY ...> tag set re1 {^.*<BO[^>]*>} # match entered/left lines set re2 {<B>[^ :]* h[^R]*R>\n} # match trailers, after <A ...> tag set re3 {<A NAME.*} set re "${re1}|${re2}|${re3}" # # getData gets and cleans up the data from the chat. # proc getChat {} { set token {} if {[catch {set token [::http::geturl $::URL -timeout 30000]}]\ || ([::http::status $token] != "ok")\ || ([::http::ncode $token] != "200")} { ::http::cleanup $token return {} } regsub -all $::re [::http::data $token] {} data ::http::cleanup $token set data } # # the workhorse: record the data # variable oldData {} proc recordData {} { variable oldData set data [getChat] set dataLen [string length $data] if {$dataLen} { set new $data } else { set new "<B>******* [clock format [clock seconds]]: ... no connection</B>\n<BR>\n" } set date [clock format [clock seconds] -format %y%m%d] set histFile [file join $::chatPath histfile_$date.htm] # # If there is no history file, start one with the current data # if {![file isfile $histFile]} { set hist [open $histFile w] puts $hist {<HTML><HEAD><STYLE TYPE="text/css"> </STYLE></HEAD><BODY BGColor=#ffffff>} puts $hist "<B>******* [clock format [clock seconds]]: START RECORDING</B>\n<BR>\n<BR>\n" puts -nonewline $hist $new close $hist set oldData $data return } # # If old data is not in memory, get it from the history file; # only get as many bytes as you need # if {![string length $oldData]} { set hist [open $histFile] set oldData [read $hist] set oldData [string range $oldData end-[string length $data] end] close $hist } # # Record the new messages # set hist [open $histFile a] if {$dataLen} { set pos [findOverlap $oldData $data] if {$pos} { puts -nonewline $hist [string range $data $pos end] } else { puts $hist "<B>....... possibly missing data</B>\n<BR>\n" puts -nonewline $hist $data } set oldData $data } else { puts $hist $new } close $hist } # # This proc finds the new messages; specialized from # http://mini.net/tcl/2184.html # proc findOverlap {str1 str2} { set first2 [string range $str2 0 [string first "\n" $str2]] set firstLen [string length $first2] while {1} { set index [string first $first2 $str1] if {$index == -1} { return 0 } set str1 [string range $str1 $index end] set len [string length $str1] if {[string equal -length $len $str1 $str2]} { return $len } set str1 [string range $str1 $firstLen end] } } repeatRecord $time vwait forever