Updated 2008-10-21 01:31:06 by rmax

by Reinhard Max

This little script sends its command line arguments as a query to the online dictionary at http://dict.leo.org and writes the parsed result to stdout. It uses Tcl's http package and the htmlparse package from Tcllib.

The scraper part (everything inside the ::dict.leo.org namespace) could also be included from other frontends. Its [query] proc takes a list of words to search for, and returns a list of english/german pairs that matched the query.

rmax - 2004-11-10: Updated it to recognize the new HTML format of the site, and changed it to use http://pda.leo.org, because that has less fluff and ads around the real data which would be cut away anyways. Thanks to Synox for pointing out that the old version wasn't working anymore.
 package require http
 package require htmlparse
 namespace eval ::dict.leo.org {
    variable td
    variable table ""
    variable tdcounter 0
    proc parse {tag close options body} {
	variable td
	variable table
	variable tdcounter
	switch -- $close$tag {
	    /TR - /tr {
		if {[info exists td(2)] && [info exists td(3)]} {
		    lappend table [string trim $td(2)] [string trim $td(3)]
		set tdcounter 0
		array unset td
	    td - td { incr tdcounter }
	    default {
		set item [htmlparse::mapEscapes $body]
		if {[string length $item]} {
		    append td($tdcounter) $item
    proc query {query} {
	variable table
	set url http://pda.leo.org
	set query [http::formatQuery search $query]
	set tok [::http::geturl $url -query $query]
	foreach line [split [::http::data $tok] "\n"] {
	    if {[string match "*ENGLISCH*DEUTSCH*" $line]} break
	::http::cleanup $tok
	set table [list]
	::htmlparse::parse -cmd ::dict.leo.org::parse $line
	return $table
 proc max {a b} {expr {$a > $b ? $a : $b}}
 proc main {argv} {
    set table [dict.leo.org::query [join $argv]]
    set max 0
    foreach c $table {set max [max $max [string length $c]]}
    set sep [string repeat - $max]
    set table [linsert $table 0 " English" " Deutsch" $sep $sep]
    foreach {c1 c2} $table {
	puts [format "%-*s  %-*s" $max $c1 $max $c2]
    puts ""
 main $argv

RS: Proud owners of a firewall might have to add a line like
    http::config -proxyhost proxy -proxyport 80

at the very top of proc query. Helped in my case to really get out.
Category Internet Web scraping Using Tcl to write WWW client side applications