Updated 2011-05-10 19:58:39 by dkf

Chi Hung Chan 16 Nov 2004

Motivation:

  • inspired by Download Accelerator Plus (www.speedbit.com)
  • get my hands dirty on Tcl Thread
  • frustrated with wget on UNIX for big file download

Tested on

  • Tcl 8.3.5, Tcl-thread 2.5.2 on Solaris 8 SPARC
  • Tcl 8.4.7, Tcl-thread 2.5.2 on Solaris 10 x86

Some benchmark results:


#! /usr/local/bin/tclsh
 
if { $argc < 1 || $argc > 2 } {
     puts stderr "Usage: $argv0 <url> \[#threads\]"
     puts stderr "       default #threads is 4"
     exit 1
}
 
proc comma {num {sep ,}} {
     while {[regsub {^([-+]?\d+)(\d\d\d)} $num "\\1$sep\\2" num]} {}
     return $num
}
 
proc now {} {
     return [clock format [clock seconds] -format {%H:%M:%S}]
}
 
proc lremove { l v } {
     foreach i $v {
         set ind [lsearch $l $i]
         if { $ind == -1 } { continue }
         set indm1 [expr {$ind-1}]
         set indp1 [expr {$ind+1}]
         set l [concat [lrange $l 0 $indm1] [lrange $l $indp1 end]]
     }
     return $l
}
 
proc urlSize { url } {
     global validate
 
     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     set code [http::ncode $validate]
     if { $code != 200 } {
         puts stderr "Error. http return code=$code"
         exit 2
     }
     set size [set ${validate}(totalsize)]
 
     return $size
}
 
proc urlType { url } {
     global validate
 
     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     return [set ${validate}(type)]
}
 
proc isAcceptRanges { url } {
     global validate
 
     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     array set www [set ${validate}(meta)]
     if { [info exists www(Accept-Ranges)] == 1 } {
         return 1
     } else {
         return 0
     }
}
 
#
# get basename of url
#
proc urlBasename { url } {
     array set www [uri::split $url]
     set fname [lindex [split $www(path) /] end]
     if { [string length $fname] == 0 } {
         set fname {index.html}
     }
     return $fname
}
 
#
# work out the byte range
#
proc byteRanges { size nthreads } {
     set step [expr $size/$nthreads]
     set p0 -1
     set p1 -1
     set br {}
     for { set i 0 } { $i < $nthreads } { incr i } {
         set p0 [expr $p1 + 1]
         if { $i == [expr {$nthreads-1}] } {
             set p1 $size
         } else {
             set p1 [expr $p0 + $step]
             }
         lappend br $p0
         lappend br $p1
         set p0 $p1
     }
     return $br
}
 
#
# fix up nthreads
# if server does not support accept-range, nthreads=1
# if '#nthreads' file exists, get from there
#
proc fixNthreads { url nthreads } {
     set rc $nthreads
 
     # if server cannot support byte range, nthreads=1
     if { [isAcceptRanges $url] == 0 } {
         set rc 1
         return $rc
     }
 
     # in resume mode, nthreads now and previous has to tally
     set fname [urlBasename $url]
     set ntFilename ".${fname}#nthreads"
     if { [file exists $ntFilename] } {
         set fp [open $ntFilename r]
         set rc [read $fp]
         close $fp
     } else {
         set fp [open $ntFilename w]
         puts $fp $nthreads
         close $fp
         set rc $nthreads
     }
     return $rc
}

# MAIN PROGRAM STARTS HERE

package require Thread
package require http
package require uri

set url [lindex $argv 0]
set nthreads 4
if { $argc == 2 } {
     set nthreads [lindex $argv 1]
}
tsv::set dap url $url
tsv::set dap t0 [clock seconds]
 
puts "--[now]-- $url"
puts "\t=> [urlBasename $url]"
 
#
# if resume is needed, set resumeSize to sum of file size
#
set resume [glob -nocomplain [format {.%s-*} [urlBasename $url]]]
if { [llength $resume] > 0 } {
     set rs 0
     foreach i $resume {
         incr rs [file size $i]
     }
     tsv::set dap resumeSize $rs
} else {
     tsv::set dap resumeSize 0
}
 
set nthreads [fixNthreads $url $nthreads]
 
#
# create and initialise thread pool
#
puts -nonewline "Setting up thread pool of $nthreads threads ... "
set tpool [tpool::create -minworkers $nthreads -maxworkers $nthreads \
         -idletime 20 -initcmd {
     package require http
     package require uri
 
     proc dl { seq p0 p1 } {
         set url [tsv::get dap url]
         array set www [uri::split $url]
         set fname [lindex [split $www(path) /] end]
         set fname [format {.%s-%d} $fname $seq]
 
         # resume
         if { [file exists $fname] == 1 } {
             set size [file size $fname]
             if { $size >= [expr $p1-$p0+1] } {
                 return
             }
             set p0 [expr $p0+$size]
         }
 
         set fpi [open $fname a]
         fconfigure $fpi -translation binary
         set s [http::geturl $url -channel $fpi -binary 1 \
             -progress httpProgress \
             -headers [list Range bytes=$p0-$p1]]
         close $fpi
     }
     proc httpProgress { token total current } {
         upvar #0 $token state
 
         tsv::set dap thread[thread::id] $current
 
         # calculate
         set max [tsv::get dap size]
         set sum [tsv::get dap resumeSize]
         foreach t [thread::names] {
             if { $t == 1 } { continue }
             incr sum [tsv::get dap thread$t]
         }
 
         # progress status 
         set t0 [tsv::get dap t0]
         set size [tsv::get dap size]
         set percent [expr {100*$sum/$max}]
         set elapse [expr [clock seconds] - $t0]
         set kbps [expr {$sum*8.0/(1024.0*$elapse)}]
         set eta [expr [clock seconds]-$t0]
         set etam [expr $eta/60]
         set etas [expr $eta-$etam*60]
         set status [format {%3d%%[%-51s] %6.2fKbps  ETA %02d:%02d} \
             $percent \
             "[string repeat = [expr $percent/2]]>" \
             $kbps \
             $etam \
             $etas]
         puts -nonewline "$status\r"
         flush stdout
     }
}]
puts "Done"
 
#
# submit job to thread pool, work out the byte range for each thread
#
puts -nonewline "Submitting jobs to all threads ... "
set joblist {}
set seq 1
set size [urlSize $url]
tsv::set dap size $size
foreach { p0 p1 } [byteRanges $size $nthreads] {
     lappend joblist [tpool::post $tpool [list dl $seq $p0 $p1]] 
     incr seq 
}
puts "Done"
 
puts "Length: [comma $size] \[[urlType $url]\]"
 
#
# monitor thread pool til completion
#
while 1 {
     set f [tpool::wait $tpool $joblist]
     set joblist [lremove $joblist $f]
     if { [llength $joblist] == 0 } { break }
     after 100
}
 
#
# consolidation
#
puts "\n"
puts -nonewline "Download completed. Consolidating ... "
set fnameo [urlBasename $url]
set fpo [open $fnameo w]
fconfigure $fpo -translation binary
for { set seq 1 } { $seq <= $nthreads } { incr seq } {
     set fnamei [format {.%s-%d} $fnameo $seq]
     set fpi [open $fnamei r]
     fconfigure $fpi -translation binary
     fcopy $fpi $fpo -size [file size $fnamei]
     close $fpi
}
close $fpo
puts "Done"
 
#
# cleanup
#
foreach i [glob -nocomplain ".${fnameo}*"] {
     file delete -force $i
}
 
puts ""
puts "--[now]-- $fnameo ([file size $fnameo]/$size)"