Version 1 of RSS Monitor

Updated 2004-03-26 23:12:34

This is a small program I wrote to monitor changes in the RSS feeds of various websites and email those changes to me at work. I currently am running it as a cron job on debian linux with Tcl 8.4.6. BDK


The first file, rss.tcl, is a very minimal package to read rss feeds. Yes, I know such a package already exists, but it helped me get into the xml package.

 #!/usr/local/bin/tclsh
 package require Tcl 8.4
 package require struct 2.0
 package require xml 2.6
 package require snit 0.9

 package provide rss 1.0

 namespace eval ::rss {
    variable parser
    variable parserStack
    variable channelObject
    variable itemObject

    variable currentCmds
    array set currentCmds \
        [list \
             elementStart [list [namespace current]::XML.StartRSS] \
             elementEnd [list [namespace current]::XML.EndRSS] \
             characterData {} \
            ]
 }

 proc ::rss::Parser.NewState {elementStart elementEnd characterData} {
    variable parser
    variable parserStack
    variable currentCmds
    variable channel

    $parserStack push \
        [list \
             $currentCmds(elementStart) \
             $currentCmds(elementEnd) \
             $currentCmds(characterData) \
             ]

    set currentCmds(elementStart) $elementStart
    set currentCmds(elementEnd) $elementEnd
    set currentCmds(characterData) $characterData

    return
 }

 proc ::rss::Parser.PreviousState {} {
    variable parser
    variable parserStack
    variable currentCmds

    foreach {currentCmds(elementStart) currentCmds(elementEnd) currentCmds(characterData)} [$parserStack pop] {break}

    return
 }

 proc ::rss::Wrapper.ElementStart {name attlist args} {
    variable currentCmds

    if {$currentCmds(elementStart)!={}} {
        set code [catch {uplevel \#0 $currentCmds(elementStart) [list $name $attlist] $args} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::Wrapper.ElementEnd {name args} {
    variable currentCmds

    if {$currentCmds(elementEnd)!={}} {
        set code [catch {uplevel \#0 $currentCmds(elementEnd) [list $name] $args} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::Wrapper.CharacterData {data} {
    variable currentCmds

    if {$currentCmds(characterData)!={}} {
        set code [catch {uplevel \#0 $currentCmds(characterData) [list $data]} result]
        return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result
    }
 }

 proc ::rss::parse {data} {
    variable parser
    variable parserStack
    variable channel

    set parser [::xml::parser ]
    set parserStack [::struct::stack]
    set channel [Channel %AUTO%]

    $parser configure \
        -elementstartcommand [namespace current]::Wrapper.ElementStart \
        -elementendcommand [namespace current]::Wrapper.ElementEnd \
        -characterdatacommand [namespace current]::Wrapper.CharacterData

    $parser parse $data

    $parser free
    $parserStack destroy

    return $channel
 }

 proc ::rss::XML.StartRSS {name attlist args} {
    variable channel

    switch -- $name {
        channel {
            Parser.NewState \
                [list [namespace current]::XML.Channel $channel] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
        item {
            set obj [Item %AUTO%]
            $channel AddItem $obj

            Parser.NewState \
                [list [namespace current]::XML.Item $obj] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
    }

    return
 }

 proc ::rss::XML.EndRSS {name args} {
    return
 }

 proc ::rss::XML.Channel {obj name attlist args} {
    switch -- $name {
        title {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable title]]
        }
        link {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable link]]
        }
        description {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable description]]
        }
        item {
            set item [Item %AUTO%]
            $obj AddItem $item

            Parser.NewState \
                [list [namespace current]::XML.Item $item] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
        default {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                {}
        }
    }

    return
 }

 proc ::rss::XML.Item {obj name attlist args} {
    switch -- $name {
        title {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable title]]
        }
        link {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable link]]
        }
        description {
            Parser.NewState \
                [list [namespace current]::XML.ElementStart] \
                [list [namespace current]::XML.ElementEnd] \
                [list [namespace current]::XML.CharacterData [$obj GetVariable description]]
        }
        default {
             Parser.NewState \
                 [list [namespace current]::XML.ElementStart] \
                 [list [namespace current]::XML.ElementEnd] \
                 {}
        }
    }

    return
 }

 proc ::rss::XML.ElementStart {name attlist args} {
    Parser.NewState \
        [list [namespace current]::XML.ElementStart] \
        [list [namespace current]::XML.ElementEnd] \
        {}

    return
 }

 proc ::rss::XML.ElementEnd {name args} {
    Parser.PreviousState

    return
 }

 proc ::rss::XML.CharacterData {var data} {
    upvar \#0 $var myVar

    append myVar $data

    return
 }

 ::snit::type ::rss::Channel {
    variable title {}
    variable link {}
    variable description {}
    variable items {}

    destructor {
        foreach item $items {
            $item destroy
        }

        return
    }

    method title {} {
        return $title
    }

    method link {} {
        return $link
    }

    method description {} {
        return $description
    }

    method items {} {
        return $items
    }

    method GetVariable {var} {
        return [varname $var]
    }

    method AddItem {item} {
        lappend items $item
    }
 }

 ::snit::type ::rss::Item {
    variable title {}
    variable link {}
    variable pubDate {}
    variable description {}

    method title {} {
        return $title
    }

    method link {} {
        return $link
    }

    method pubDate {} {
        return $pubDate
    }

    method description {} {
        return $description
    }

    method GetVariable {var} {
        return [varname $var]
    }
 }

This is the main body of the program, rss_monitor.tcl

 #!/usr/local/bin/tclsh
 lappend auto_path .

 package require Tcl 8.4
 package require http 2.4
 package require mime 1.3
 package require smtp 1.3
 package require md5 1.4
 package require rss 1.0

 #Reads the options file.
 proc loadOptions {file} {
    #Initialize the interpreter which executes the contents
    #of the options file.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias email loadOptions.email

    #Read the options file.
    set inFile [open $file r]
    $interp eval [read $inFile]
    close $inFile

    interp delete $interp

    return
 }

 proc loadOptions.email {email data} {
    #Initialize the interpeter which executes the contents of the
    #data variable.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias rss loadOptions.rss $email

    $interp eval $data

    interp delete $interp

    return
 }

 proc loadOptions.rss {email url} {
    #Store the url in the global options array.
    if {![info exists ::options(email,$url)]} {
        lappend ::options(url) $url
    }
    lappend ::options(email,$url) $email

    return
 }

 #Loads the MD5 hash records.
 proc loadHashes {file} {
    #Create the interpeter.
    set interp [interp create -safe]
    $interp eval [list namespace delete ::]
    $interp alias hash loadHashes.hash

    #Open the file and read in the data.
    set inFile [open $file r]
    $interp eval [read $inFile]
    close $inFile

    interp delete $interp

    return
 }

 proc loadHashes.hash {url hash} {
    #Store the hash in the global options array.
    set ::options(hash,$url) $hash

    return
 }

 #Saves the hashes for all of the url's.
 proc saveHashes {file} {
    set outFile [open $file w]

    foreach url $::options(url) {
        if {[info exists ::options(hash,$url)]} {
            puts $outFile [list hash $url $::options(hash,$url)]
        }
    }

    close $outFile
 }

 #Do the work of parsing RSS feeds, generating and comparing hashes, sending messages.
 proc generateMessages {} {
    foreach url $::options(url) {
        #Retrieve the current hash for the URL.
        if {[info exists ::options(hash,$url)]} {
            set hash $::options(hash,$url)
        } else {
            set hash {}
        }

        #Retrieve the URL.
        set token [::http::geturl $url -timeout 2000]
        if {[::http::status $token] != "ok"} {
            puts "Could not retrieve data for $url"
            ::http::cleanup $token
            continue
        }
        set data [::http::data $token]
        ::http::cleanup $token

        #Parse the RSS feed.
        set channel [::rss::parse $data]

        set msgBody {}

        #Generate the msg body.
        append msgBody "[$channel title] ([$channel link])\n"
        append msgBody "[$channel description]\n\n"
        foreach item [$channel items] {
            append msgBody "----------------------------------------------------------------\n"
            append msgBody "[$item title] ([$item link])\n"
            append msgBody "\n"
            append msgBody "[$item description]\n"
            append msgBody "----------------------------------------------------------------\n"
            append msgBody "\n"
        }

        #Generate the new hash.
        #I thought it would be more efficient to generate the hash prior to parsing and
        #creating the message, but some sites change the comments within their XML data
        #to reflect when the feed was generated, like sourforge.net for example,
        #so this is a quick and simple fix.
        set newHash [::md5::md5 $msgBody]

        #If the hashes do not match, then the site has changed, 
        #so send out the messages.
        if {$newHash != $hash} {

            #Create the MIME message.
            set mime [::mime::initialize -canonical text/plan -string $msgBody]
            ::mime::setheader $mime Subject "[$channel title] has been updated"

            foreach email $::options(email,$url) {
                ::smtp::sendmessage $mime \
                    -recipients $email \
                    -originator "[email protected]"
            }

            #Destroy (deallocate) the MIME message.
            ::mime::finalize $mime

            #Destroy (deallocate) parsed data.
            $channel destroy
        }

        #Store the new hash in the options array.
        set ::options(hash,$url) $newHash
    }
 }

 #The main program.

 #Locations of the configuration files.
 set configFile "rss_config"
 set hashFile "rss_hashes"

 loadOptions $configFile

 if {[file exists $hashFile]} {
    loadHashes $hashFile
 }

 generateMessages

 saveHashes $hashFile

And finally, and example configuration file, rss_config

 email [email protected] {
        #Tcl'ers Wiki
        rss http://mini.net/tcl/rss.xml

        #Slashdot
        rss http://slashdot.org/index.rss

        #Sourceforge
        rss http://sourceforge.net/export/rss2_sfnews.php?feed
 }

 email [email protected] {
        #Sourceforge
        rss http://sourceforge.net/export/rss2_sfnews.php?feed          
 }

[ Category Internet ]