if 0 {[Richard Suchenwirth] 2005-02-09 - As a companion piece to [RS's RSS], this midnight fun project script works without [Tk] - specify a [RSS] URL on the command line, and get on stdout a generated [HTML] page with the headlines linked locally to compacted versions of the pages they refer to, so the extension of the RSS is contained in a single file (about 23...110 KB size). My use case is that I want to "reap" fresh news to download to the [iPaq] for offline reading, but avoiding fancy frames, ads, links that I couldn't follow. Usage example: * C:\_Ricci\sep>tclsh rss2html.tcl > t.html http://news.bbc.co.uk/rss/newsonline_world_edition/front_page/rss091.xml Result at [http://mini.net/files/bbc.html] - Sample screenshot: [http://mini.net/files/rss2html.jpg] Again: no warranties at all, but I'm basically happy with the output of this script on my few test cases (Spiegel online, Tagesspiegel, BBC...) - feel free to add criticisms or improvements :) A tiny [bash] script called ''feedme'' reaps all feeds that I want, on demand, and ActiveSynch takes care to transfer the HTML pages to the little thing: tclsh x:/tcl/rss2html.tcl > Spiegel.htm www.spiegel.de/schlagzeilen/rss/0,5291,,00.xml tclsh x:/tcl/rss2html.tcl > Tagesspiegel.htm www.tagesspiegel.de/feed/index.xml tclsh x:/tcl/rss2html.tcl > BBC.htm news.bbc.co.uk/rss/newsonline_world_edition/front_page/rss091.xml } set usage { usage: rss2html.tcl rss_url > htmlfile } package require http package require uri proc main argv { if {[llength $argv] != 1} {puts stderr $::usage; exit} set rss [lindex $argv 0] ;# other arguments ignored for now set content [readRSS $rss] set n 0 puts "
From: $rss-
- -$content\n} - - - - \n"} } } set res } proc html2txt {html} { set res {} set re {(<[^>]+>) *([^<>]*)} foreach {all tag content} [regexp -all -inline $re $html] { if {![regexp src= $content]} { lappend res $tag $content } } string map { Ü ��œ ß ��Ÿ ä ��¤ ö ��¶ ü ��¼ ' ' ä ��¤ ö ��¶ ü ��¼ ß ��Ÿ " " } $res } proc despace string {string trim [regsub -all {\s+} $string " "]} #-- courtesy KPV's http://wiki.tcl.tk/11831 proc geturl_followRedirects {url args} { array set URI [::uri::split $url] ;# Need host info from here while {1} { set token [eval [list http::geturl $url] $args] if {![string match {30[1237]} [::http::ncode $token]]} {return $token} array set meta [set ${token}(meta)] if {![info exist meta(Location)]} { return $token } array set uri [::uri::split $meta(Location)] unset meta if {$uri(host) == ""} { set uri(host) $URI(host) } # problem w/ relative versus absolute paths set url [eval ::uri::join [array get uri]] } } main $argv if 0 { ---- [Category Internet] | [Arts and crafts of Tcl-Tk programming] }