Basic GeoIP with Tcl

Olly - 26 Jul 2006 - The GeoIP API has now been ported to TCL and contributed to maxmind : http://www.maxmind.com/app/tcl

David Zolli - 24 Sep 2004 - This is basic GeoIP (see http://www.maxmind.com for details) that return country code for a given IP adress.

 ################################################################################
 #
 # Basic GeoIP for tcl.
 #
 # Copyright © 2004 - David Zolli - http://www.zolli.fr
 #
 # This script is under NOL : https://wiki.tcl-lang.org/nol
 #
 # Version 1.0 - 24 Sep 2004
 #
 ################################################################################
    
 # This needs Maxmind CVS database available here :
 # http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
 proc GeoIP  { IP {cvsfile GeoIPCountryWhois.csv} } {
     foreach "a b c d" [split $IP .] {
         set V [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
     }
     set fin [open $cvsfile r]
     while {![eof $fin]} {
         foreach "1 2 3 4 5 6" [split [gets $fin] ,] {
             foreach "a b c d" [split [lindex $1 0] .] {
                 set min [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
             }
             foreach "a b c d" [split [lindex $2 0] .] {
                 set max [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
             }
             if { $V >= $min && $V <= $max } {
                 close $fin
                 return "[lindex $5 0] ([lindex $6 0])"
             }
         }
     }
     close $fin
     return "?? (unknow)"
 }

rmax - 24 Sep 2004. This variant uses a modified file with fixed line length and makes a binary search over it. A conversion proc for the csv file is also included.

 proc convert {} {
    set fd [open GeoIPCountryWhois.csv]
    set data [split [read -nonewline $fd] \n]
    close $fd
    foreach line [string map {\" ""} $data] {
        foreach {a b c d e f} [split $line ,] break
        puts [format "%ld % %s" [expr {$d - $c + 1}] $c $d $e ]

    }
 }

 proc main {ip} {
    foreach {a b c d} [split $ip .] break
    set find [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
    set fd [open bar.ssv]
    set size [file size bar.ssv]
    set lines [expr {$size/21}]
    set a 0; set b $lines; set found 0
    while {!$found && $a != $b} {
        set point [expr {($b+$a)/2}]
        seek $fd [expr {$point * 21}]
        foreach {start end country} [gets $fd] break
        if "$find >= 0x$start" {
            if "$find <= 0x$end" {
                puts $ip $country
                set found 1
            } else {
                set a $point
            }
        } else {
            set b $point
        }
    }
 }

 foreach ip $argv {
    puts [time {main $ip}]
 }

SS 24Sep2004. This version tries to do a binary search directly on the original CSV format. The parsing is a bit tricky, it's not unlikely it may contain errors, but appears to work from some simple test. Some cleanup should be possible, and some part seems to be written more in C than in Tcl...

 # Copyright (C) 2004 Salvatore Sanfilippo
 # Under the same license as Tcl/Tk 8.4
 
 namespace eval geoip {}
 set ::geoip::filename "GeoIPCountryWhois.csv"
 set ::geoip::fd -1
 
 proc ::geoip::geoip {ipstr} {
     # Open the file only the first time.
     if {$::geoip::fd == -1} {
         set ::geoip::fd [open $::geoip::filename]
         file stat $::geoip::filename statbuf
         set ::geoip::len $statbuf(size)
     }
     # Convert the IP address into a number
     foreach {a b c d} [split $ipstr .] break
     set ip [expr {(wide($a)<<24)+($b<<16)+($c<<8)+$d}]
     # Binary search
     set start 0
     set end [expr {$::geoip::len -1}]
     while 1 {
         #puts "RANGE: $start - $end"
         set half [expr {int($start+(($end-$start)/2))}]
         set buf {}
         # Seek the start/end of the line
         if {($half-120) < 0} {
             append buf [string repeat "\n" [expr {120-$half}]]
             seek $::geoip::fd 0
         } else {
             seek $::geoip::fd [expr {$half-120}]
         }
         append buf [read $::geoip::fd 241]
         #puts "BUF: $buf"
         if {[string length $buf] < 121} {
             set linestart [expr {[string length $buf]-1}]
         } else {
             set linestart 120
         }
         set lineend $linestart
         # Go forward if we seek a newline as first character. We want
         # to be in the middle of the line.
         while {[string index $buf $linestart] eq "\n"} {
             incr linestart -1
             incr lineend -1
         }
         # Seek the line start/end
         while {[string index $buf $linestart] ne "\n" &&
                [string index $buf $linestart] ne {}} {
                incr linestart -1
         }
         while {[string index $buf $lineend] ne "\n" &&
                [string index $buf $lineend] ne {}} {
                incr lineend 1
         }
         # Get the line
         set line [string range $buf $linestart $lineend]
         foreach {_ _ rangestart rangeend code country} [split $line ,] break
         foreach var {rangestart rangeend code country} {
             set $var [string range [string trim [set $var] "\r\n "] 1 end-1]
         }
         #puts "LINE: $line"
         #puts "FOUND: ($rangestart,$rangeend) - $ip"
         # Trivial binary search
         if {$ip >= $rangestart && $ip <= $rangeend} {
             return [list $code $country]
         }
         if {$ip > $rangestart} {
             set start [expr {$half+1}]
         } else {
             set end [expr {$half-1}]
         }
         if {abs($start-$end)<5} {
             return {}
         }
     }
 }
 
 puts [time {geoip::geoip [lindex $argv 0]} 100]
 puts [geoip::geoip [lindex $argv 0]]

You can see this implementation working at http://wiki.hping.org/index.cgi?op=recentchanges where the wiki's recent changes IP addresses are used to display the country of the editor. It's mainly useful to check for spam because the IP addresses are almost always from China :(

Kroc Now http://wfr.tcl.tk uses this too to prevent spam.

jcw - Here's an obfuscated way to convert dotted IP's to numeric IP's:

  proc ipAsInt {ip} {
    expr "((([string map {. {)*256+}} $ip]"
  }

Here's another variant which trades space for speed. It generates a reasonably compact binary form and stores it in a file (once), it reads that form into a string and binary searches it.

I should add something to fetch and unpack the .zip file from the URL if it doesn't exist, but it would overly obfuscate the code.

The generated string is about 380Kb in length, which isn't *too* bad. - CMcC 20041026

    # geoip.tcl
    # a small package to convert the geoip database into a compact binary form
    # and provide a search to map ip address to country code.
    
    package require fileutil
    package require csv
    
    namespace eval geoip {
        variable dbbin "geoip.bin"        ;# generated binary file
        variable dbsrc "GeoIPCountryWhois.csv" ;# source file from
        # http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
    
        variable verbose 0
    }

    # readdb - read the binary db into a local string variable
    proc geoip::readdb {} {
        variable dbbin
        variable db
    
        set fd [open $dbbin]
        fconfigure $fd -translation binary -encoding binary
        set db [read $fd [file size $dbbin]]
        close $fd
    
        if {([string length $db] / 6) * 6 != [string length $db]} {
            error "string length must be divisible by 6"
        }
    }

    # csv2bin - generate a binary db from the source db
    proc geoip::csv2bin {} {
        variable dbsrc
        variable dbbin
    
        set fd [open $dbbin w]
        fconfigure $fd -translation binary -encoding binary
    
        fileutil::foreachLine line $dbsrc {
            foreach {fromIP toIP from to cc country} [::csv::split $line] break
            puts -nonewline $fd [binary format Ia2 $from $cc]
        }
    
        close $fd
    }

    # int2ip - utility to generate dot-quad form from integer
    proc geoip::int2quad {i} {
        binary scan [binary format I $i] c4 ip
        set result {}
        foreach el $ip {
            lappend result [expr {($el + 256) % 256}]
        }
        return [join $result .]
    }

    # find - return the country code for a given IP address (in quad form)
    proc geoip::find {ip} {
        variable db
        set ip [expr "((([string map {. {)*256+}} $ip]"] ;# danke jcw
        set ip [binary format I $ip]
        for {
            set probe [expr {[expr [string length $db] / 6] / 2}]; set range [expr {$probe / 2}]
        } {
           $range > 0
       } {
            set range [expr {$range / 2}]
        } {
            set cp [expr {$probe * 6}]
            set pip [string range $db $cp [expr {$cp+4}]]
            if {$ip > $pip} {
                #puts "$ip > $pip at $probe"
                incr probe $range
            } elseif {$ip < $pip} {
                #puts "$ip < $pip at $probe"
                incr probe -$range
            } else {
                #puts "$ip == $pip at $probe"
                return [string range $db [expr {($probe * 6) + 4}] 2]
            }
        }
        #puts "Dropped out with $ip and $pip at $probe"
        return [string range $db [expr {($probe * 6) + 4}] [expr {($probe * 6) + 5}]]
    }

    if {[info proc Stderr] eq {}} {
        # little routine to conform with tclhttpd's stderr requirements
        proc Stderr {txt} {
            puts stderr $txt
        }
    }

    # create the db and read it in
    namespace eval geoip {
        if {![file exists $dbbin]} {
            if {$verbose} {
                Stderr "Creating Geoip binary db $dbbin"
            }
            csv2bin
        } else {
            if {$verbose} {
                Stderr "Using Geoip binary db: $dbbin"
            }
        }
    
        # fetch the database
        readdb
    }

    # test script
    if {[info script] eq $argv0} {
        foreach ip {
            209.17.179.230
            64.71.168.43
            144.136.123.45
            210.73.87.103
        } {
            set time [time {set cc [geoip::find $ip]} 100]
            puts "$cc - $time"
        }
    }