** Summary ** timeentry.kforce.com doesn't provide an API, so this web-scraping script was born ** Code ** ====== #! /bin/env tclsh package require tls package require http package require tdom package require sha256 proc form token { set data [http::data $token] dom parse -html $data html $html documentElement element $element normalize set fields [dict create] foreach input [$element getElementsByTagName input] { if {[$input hasAttribute name]} { set name [$input getAttribute name] } elseif {[$input hasAttribute id]} { set name [$input getAttribute id] } puts stderr "input: [$input asList]" if {[catch {set value [$input getAttribute value]}]} { set value "" } if {[info exists name]} { dict set fields $name $value } } return [dict create data $data html $html fields $fields ] } proc cookies {t} { set cookies [list] foreach {k v} [http::meta $t] { if {[string tolower $k] eq "set-cookie"} { set v [split $v \;] set v [lindex $v 0] if {[lindex [split $v =] 1] == ""} continue lappend cookies $v } } return $cookies } proc formatCookies cookies { return [list Cookie [join $cookies "; "]] } proc timecard_id node { set href [$node getAttribute href] regexp {/TimeEntry\.Web/Shared/Timecard.aspx\?ID=([^&]+)&} $href -> id return $id } proc clean dict { dict with dict { dict for {key val} $dict { if {$val eq "\xa0"} { set val {} } } } return $dict } proc search1 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]/../../../following-sibling::*[1]/table/tr/td}] return [$node text] } proc search2 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]/../../..}] set node [[[$node nextSibling node] nextSibling node] selectNodes {table/tr/td}] return [$node text] } proc search3 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]/../../../..}] #$node parentNode node [[[$node previousSibling node] firstChild node] nextSibling node] nextSibling node set node [$node selectNodes {table/tr/td}] return [$node asText] } proc search4 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]}] $node parentNode node $node parentNode node $node parentNode node $node parentNode node #$node parentNode node $node nextSibling node $node firstChild node $node nextSibling node $node nextSibling node set node [$node selectNodes {table/tr/td}] return [$node asText] } proc search5 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]/../following-sibling::*[1]}] return [$node asText] } proc search6 {node pattern} { set node [$node selectNodes {//*[contains(text(),$pattern)]/../../../..}] $node nextSibling node $node nextSibling node return [$node asText] } proc hours {node pattern} { set table [list] set rowheaders {Hours {Regular IC} {Daily Total}} set node [$node selectNodes {//*[contains(text(),$pattern)]/../..}] if {$node eq {} } { raise -code error "Hours should not be parsed on a timecard with no hours" return } for {set i 0} {$i<3} {incr i} { set row [list] foreach child [$node childNodes] { set values [list] foreach text [$child selectNodes */text()] { lappend values [$text asText] } lappend row $values } lappend table $row $node nextSibling node } return $table } http::register https 443 ::tls::socket http::config -useragent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:17.0) Gecko/20100101 Firefox/17.0" puts stderr "enter username: " gets stdin username puts stderr "enter password: " gets stdin password set creds [dict create {ctl00$ContentPlaceHolder1$txtUserName} $username {ctl00$ContentPlaceHolder1$txtPassword} $password] set token [http::geturl https://timeentry.kforce.com/TimeEntry.Web/Login.aspx] set cookies [cookies $token] puts stderr "\ncookies1: $cookies" set fields [dict get [form $token] fields] http::cleanup $token set other_inputs { {ctl00$ContentPlaceHolder1$ImageButton2.x} 0 {ctl00$ContentPlaceHolder1$ImageButton2.y} 0 ctl00_RoleRadMenu_ClientState {} } set login [dict merge $fields $other_inputs $creds] puts stderr $login set token [http::geturl https://timeentry.kforce.com/TimeEntry.Web/Login.aspx \ -headers [formatCookies $cookies] \ -query [http::formatQuery {*}$login]] set cookies [dict merge $cookies [cookies $token]] puts stderr "\ncookies2: $cookies" http::cleanup $token set token [http::geturl https://timeentry.kforce.com/TimeEntry.Web/Consultant/TimecardHistory.aspx \ -headers [formatCookies $cookies] \ ] set cookies [concat $cookies [cookies $token]] puts stderr "\ncookies3: $cookies" set fields [dict get [form $token] fields] http::cleanup $token set newfields { __EVENTARGUMENT {FireCommand:ctl00$ContentPlaceHolder1$grdTimecardHistory$ctl00;PageSize;50} __EVENTTARGET {ctl00$ContentPlaceHolder1$grdTimecardHistory} ctl00$ContentPlaceHolder1$grdTimecardHistory$ctl00$ctl03$ctl01$PageSizeComboBox {50} ctl00_ContentPlaceHolder1_grdTimecardHistory_ctl00_ctl03_ctl01_PageSizeComboBox_ClientState {{"logEntries":[],"value":"50","text":"50","enabled":true}} ctl00_ContentPlaceHolder1_historyDetailView_ClientState {} } set fields [dict merge $fields $newfields] puts stderr "\ncookies4: $cookies" set token [http::geturl https://timeentry.kforce.com/TimeEntry.Web/Consultant/TimecardHistory.aspx \ -headers [formatCookies $cookies] -query [http::formatQuery {*}$fields] \ ] set data [http::data $token] http::cleanup $token dom parse -html $data html $html documentElement root $root normalize set timecard_history [$root getElementById ctl00_ContentPlaceHolder1_grdTimecardHistory_ctl00] set timecard_history [$timecard_history selectNodes tbody] set timecard_history [$timecard_history selectNodes tr] set columns {company assignment status notes} set timecards [list] set timecard [dict create] foreach row $timecard_history { set fields [lassign [$row selectNodes td] id] set date [[$id selectNodes a] text] dict set timecard id [timecard_id [$id selectNodes a]] for {set i 0} {$i<[llength $columns]} {incr i} { dict set timecard [lindex $columns $i] [[lindex $fields $i] text] } set timecard [clean $timecard] #temporary, for testing" #if {[dict get $timecard id] == 972874} { lappend timecards $timecard #} } #https://timeentry.kforce.com/TimeEntry.Web/Shared/Timecard.aspx?ID=[dict get $timecard id]&Mode=Edit foreach timecard $timecards[set timecards [list]] { set token [http::geturl \ https://timeentry.kforce.com/TimeEntry.Web/Shared/PrintTimecard.aspx?TimecardID=[dict get $timecard id]&ReportFormat=html \ -headers [formatCookies $cookies]] set data [http::data $token] http::cleanup $token dom parse -html $data html $html documentElement root $root normalize dict set timecard consultant [search1 $root "Consultant Name:"] dict set timecard employee_id [search2 $root "Employee ID:"] dict set timecard client [search1 $root "Client:"] dict set timecard week [search2 $root "Week Ending:"] dict set timecard assignment [search3 $root "Assignment ID:"] dict set timecard fax [search2 $root "Fax Number:"] dict set timecard location [search4 $root "Client Location:"] dict set timecard title [search2 $root "Job Title:"] dict set timecard cost_center [search5 $root "Cost Center:"] dict set timecard purchase_order [search5 $root "Purchase Order:"] if {[dict get $timecard status] eq "Complete"} { dict set timecard hours [hours $root "Hours"] } dict set timecard notes [search6 $root "Special Timecard Notes"] set timecard [clean $timecard] lappend timecards $timecard #set val [$root selectNodes {//*[contains(text(),"key:")]}] #puts "path1: [$val toXPath]" #set val [$root selectNodes {//*[contains(text(),"val")]}] #puts "path2: [$val toXPath]" } foreach timecard $timecards { set sig [::sha2::sha256 -hex $timecard] set chan [open $sig w] puts $chan $timecard close $chan } ======