[WJR] - There was a discussion on the [TclHttpd] mailing list about processing [RSS]. Here's a variant I came up with using [tDOM] and [The Tcler's Wiki] RSS feed (it should work with any RSS feed):
[Doc_Dynamic]
[
package require http
package require tdom
html::set token [http::geturl http://mini.net/tcl/rss.xml]
html::set rss_xml [http::data $token]
html::set doc [dom parse $rss_xml]
html::set root [$doc documentElement]
html::set stories [$root selectNodes /rss/channel/item]
]
[html::head {RSS Example}]
[html::bodyTag]
[html::h1 {RSS Example}]
[html::foreach story $stories {
-
[[$story selectNodes title/text()] nodeValue]
([[$story selectNodes pubDate/text()] nodeValue])
- [[$story selectNodes description/text()] nodeValue]
}]
[html::end]
[tDOM] and [TclHttpd] makes this pretty simple!
----
[DG] -- That little starter script got me going. I was messing with [TclXML] at first, but [tDOM] is a whole lot easier to use. See it in action [http://www.pobox.com/~davygrvy/news.tml]
# tmlrss.tcl --
#
# Process RSS (0.91, 0.92, 0.93, 1.0, 2.0) newsfeeds into
# 4.01 HTML. For use in .tml templates with tclhttpd.
#
# http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html
# http://blogs.law.harvard.edu/tech/rss
# http://www.tdom.org/
#
# Instructions:
# 1) place this file in your tml library directory so
# the server sources it automatically at startup.
# 2) Call TmlRss_GenHTML with the url of the RSS feed
# from your .tml file (set to dynamic generation).
# This function returns the formatted HTML of the feed.
#
# By David Gravereaux
#
package provide tmlrss 0.2
package require httpd
package require http
package require html
package require uri
package require htmlparse
if {[catch {package require tdom} err]} {
eval [subst {proc TmlRss_GenHTML {uri {attr {}}} {
return ""}
}]
return -code error $err
}
namespace eval tmlrss {
variable rss_xml_cache
array set rss_xml_cache {}
}
proc TmlRss_GenHTML {uri {attr {}}} {
append html "\n"
if {[catch {
set doc [tmlrss::GetDOM $uri]
append html [tmlrss::genTitleBlock $doc]
append html [tmlrss::genContent $doc]
} err]} {
append html "$uri | [tmlrss::fixUri $err] |
"
}
append html "
\n"
catch {$doc delete}
return $html
}
proc tmlrss::GetDOM {uri} {
# Is it cached locally?
if {[isCachedXMLExpired $uri]} {
set doc [fetchXML $uri]
} else {
set doc [dom parse -baseurl [uriBase $uri] $rss_xml_cache($uri)]
}
return $doc
}
# returns the DOM object of the RSS feed.
proc tmlrss::fetchXML {uri} {
variable rss_xml_cache
set token [http::geturl $uri]
if {[http::status $token] != "ok" || [http::ncode $token] != 200} {
set err [http::code $token]
http::cleanup $token
return -code error $err
}
set xml [http::data $token]
upvar #0 $token state
array set meta $state(meta)
http::cleanup $token
set doc [dom parse -baseurl [uriBase $uri] $xml]
# Append a download time to the DOM as a comment.
set comment [$doc createComment \
"downloaded: [clock format [clock seconds] -format {%a, %e %b %Y %T GMT} -gmt 1]"]
[$doc documentElement] appendChild $comment
# Append when the server considers it expired to the DOM.
catch {
set comment [$doc createComment "expires: $meta(Expires)"]
[$doc documentElement] appendChild $comment
}
# Save it in the cache.
set rss_xml_cache($uri) [$doc asXML -indent 4]
return $doc
}
proc tmlrss::isCachedXMLExpired {uri} {
variable rss_xml_cache
#TODO: make this work
if {[info exist rss_xml_cache($uri)]} {
set xml $rss_xml_cache($uri)
set doc [dom parse -baseurl [uriBase $uri] $xml]
# TODO: Is it past the TTL (if supported)?
# just return yes, for now.
if {1} {
$doc delete
unset rss_xml_cache($uri)
return true
}
return false
} else {
return true
}
}
proc tmlrss::uriBase {uri} {
array set info [uri::split $uri]
set info(path) [file dirname $info(path)]
return [eval uri::join [array get info]]
}
proc tmlrss::getRSSVersion {doc} {
set root [$doc documentElement]
switch [$root nodeName] {
rss {
if {[$root hasAttribute version]} {
return [$root getAttribute version]
}
# Best guess as most stuff is optional...
return 0.92
}
rdf:RDF {
return 1.0
}
default {
return 0
}
}
}
proc tmlrss::genTitleBlock {doc} {
set root [$doc documentElement]
append html "\n\n"
switch [getRSSVersion $doc] {
0.91 - 0.92 - 0.93 - 2.0 {
set titleXpath {/rss/channel/title/text()}
set linkXpath {/rss/channel/link/text()}
set imgNodeXpath {/rss/channel/image/title}
set imgTitleXpath {/rss/channel/image/title/text()}
set imgLinkXpath {/rss/channel/image/url/text()}
set imgWidthXpath {/rss/channel/image/width/text()}
set imgHeightXpath {/rss/channel/image/height/text()}
}
1.0 {
set titleXpath {/rdf:RDF/*[local-name()='channel']/*[local-name()='title']/text()}
set linkXpath {/rdf:RDF/*[local-name()='channel']/*[local-name()='link']/text()}
set imgNodeXpath {/rdf:RDF/*[local-name()='image']}
set imgTitleXpath {/rdf:RDF/*[local-name()='image']/*[local-name()='title']/text()}
set imgLinkXpath {/rdf:RDF/*[local-name()='image']/*[local-name()='url']/text()}
set imgWidthXpath {/rdf:RDF/*[local-name()='image']/*[local-name()='width']/text()}
set imgHeightXpath {/rdf:RDF/*[local-name()='image']/*[local-name()='height']/text()}
}
}
append html "[nodeTxt $root $titleXpath] | \n"
if {[$root selectNode $imgNodeXpath] != ""} {
append html " | \n"
}
append html "
\n\n"
return $html
}
proc tmlrss::genContent {doc} {
set root [$doc documentElement]
append html "\n"
switch [getRSSVersion $doc] {
0.91 - 0.92 - 0.93 - 2.0 {
set storiesXpath {/rss/channel/item}
set titleXpath {title/text()}
set linkXpath {link/text()}
set pubDateXpath {pubDate/text()}
set descXpath {description/text()}
}
1.0 {
set storiesXpath {/rdf:RDF/*[local-name()='item']}
set titleXpath {*[local-name()='title']/text()}
set linkXpath {*[local-name()='link']/text()}
set pubDateXpath {*[local-name()='pubDate']/text()}
set descXpath {*[local-name()='description']/text()}
}
}
set stories [$root selectNodes $storiesXpath]
append html [html::foreach story $stories {
[nodeTxt $story $descXpath]
|
}]
append html "\n"
return $html
}
proc tmlrss::fixUri {uri} {
return [string map { & & < < > > } $uri]
}
proc tmlrss::fixTxt {txt} {
return [string map { & & } $txt]
}
proc tmlrss::nodeTxt {node xpath} {
if {[$node selectNode $xpath] != ""} {
# make sure we turn all entities into chars and only
# if there is a lonely &, quote it back to an entity.
return [fixTxt [::htmlparse::mapEscapes [[$node selectNode $xpath] nodeValue]]]
} else {
return ""
}
}
proc tmlrss::ShutDown {} {
}
Httpd_RegisterShutdown tmlrss::ShutDown
A .tml file to call it would look something like this:
[Doc_Dynamic]
SNews, you lose!
[html::foreach rss_source {
http://www.npr.org/rss/rss.php?topicId=2
http://rss.news.yahoo.com/rss/highestrated
http://mini.net/tcl/rss.xml
http://sourceforge.net/export/rss2_projsummary.php?group_id=10894
http://sourceforge.net/export/rss2_projnews.php?group_id=10894
http://sourceforge.net/export/rss2_projdocs.php?group_id=10894
} {[TmlRss_GenHTML $rss_source "border=1"]}]
[WJR] - Nice, works great on my system! You should consider making this one of TclHttpd's sample apps (a number of apps come with the distribution in the sampleapp subdir).
[DG] ''Thanks..'' It's almost done. I had some problems with RSS v1.0, but just got them fixed. XML namespaces are no fun.
----
[Category TclHttpd]