getfiles cached

JOB - 2016-07-12 20:22:35

Purpose:

  • Search for files in a given directory matching a specified pattern.
  • The procedure recursively traverses the tree structure and as well creates a cache file, which is stored in the given root directory.
  • The cache file holds all the file references which can then be used for another search (using the same search pattern).
# -----------------------------------------------------------------------------
# getfiles.tcl ---
# -----------------------------------------------------------------------------
# (c) 2016, Johann Oberdorfer - Engineering Support | CAD | Software
#     johann.oberdorfer [at] googlemail.com
#     www.johann-oberdorfer.eu
# -----------------------------------------------------------------------------
# This source file is distributed under the BSD license.
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#   See the BSD License for more details.
# -----------------------------------------------------------------------------
# Credits:
#   The code is heavily based on https://wiki.tcl-lang.org/19762 - [AQI] rglob procedure
#
# Purpose:
#   Search for files in a given directory matching a specified pattern.
#
#   The procedure recursively traverses the tree structure and creates
#   a chache file, which is stored in the given root directory.
#   The cache file holds all the file references which can then be used
#   for another search (using the same search pattern).
#
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Revision history:
#   June, 16: J.Oberdorfer, initial release
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------

package provide getfiles 0.1


namespace eval getfiles {

        namespace export \
                set_excluded_names \
                set_cachefilename \
                get_cachefilename \
                delete_cachfile \
                getfiles_cached
                

        variable cache_file_name
        variable excluded_dirnames

        set cache_file_name ".getfile.cache"
        
        set excluded_dirnames {
                        "tmp"
                        "Archiv" "Backup"
        }

        proc set_excluded_names {names_list} {
                variable excluded_dirnames
                foreach name $names_list {
                        if { [lsearch $excluded_dirnames $name] == -1} {
                                lappend excluded_dirnames $name
                        }
                }
        }

        proc set_cachefilename {fname} {
                variable cache_file_name
                set cache_file_name $fname
        }
                
        proc get_cachefilename {} {
                variable cache_file_name
                return $cache_file_name
        }
        
        proc GetFiles { dir pattern searchcmd } {
                variable excluded_dirnames
    
                set file_list {}

                # fix the directory name...
                set basedir [string trimright [file join [file normalize $dir] { }]]

                # search in the current directory for matching files...
                foreach fname [glob -nocomplain -type {f r} -path $basedir $pattern] {

                        # evaluate command in parent namespace:
                        if {$searchcmd != ""} {
                                catch {uplevel $searchcmd $fname}
                        }
                        lappend file_list $fname
                }
        
                # now search for any sub direcories in the current directory...
                foreach dir_name [glob -nocomplain -type {d r} -path $basedir "*"] {

                        set is_valid_dir 1
                        foreach item $excluded_dirnames {
                                if { [string first [string tolower $item] [string tolower $dir_name]] != -1 } {
                                        set is_valid_dir 0
                                        break
                                }
                        }
                        
                        if {$is_valid_dir == 1} {
                                # recusive call ...
                                set subdir_list [GetFiles $dir_name $pattern $searchcmd]

                                if { [llength $subdir_list] > 0 } {
                                        foreach fname $subdir_list {
                                                lappend file_list $fname
                                        }
                                }
                        }
                }

                return $file_list
        }

        proc ReadCacheFile {cache_file} {
                set rlist {}
                set fp [open $cache_file "r"]

                while { ![eof $fp] } {
                        gets $fp item
                        if { [set str [string trim $item]] != "" } {
                                lappend rlist $str
                        }
                }
                close $fp
                return $rlist
        }

        proc delete_cachfile { root_dir } {
                variable cache_file_name
                set cache_file [file join $root_dir $cache_file_name]

                if { [file exists $cache_file] } {
                        if { ![file writable $cache_file] } {
                                tk_messageBox \
                                        -title "Error while attempting to delete cache file." \
                                        -icon "warning" \
                                        -message "Unable to remove cache file: $msg" \
                                        -type ok
                        } else {
                                # delete previous cache file...
                                file delete -force $cache_file
                        }
                }
        }

        proc getfiles_cached { root_dir pattern cachefile_created {searchcmd ""} } {
                upvar $cachefile_created file_created
                variable cache_file_name

                set file_list {}
                set file_created 0
                set cache_file [file join $root_dir $cache_file_name]

                if { ![file exists $cache_file] ||
                          [llength [set file_list [ReadCacheFile $cache_file]]] == 0 } {

                        # read files...
                        set file_list [GetFiles $root_dir $pattern $searchcmd]

                        # and initially write cache file...
                        if { ![catch {set ofile [open $cache_file "w"]} msg] } {

                                foreach f $file_list {
                                        puts -nonewline $ofile "$f\n"
                                }

                                close $ofile
                                set file_created 1

                        } else {
                                tk_messageBox \
                                        -title "Error while attempting to write cache file." \
                                        -icon "warning" \
                                        -message "Unable to write cache file: $msg" \
                                        -type ok
                                set file_created 99
                        }
                }

                return $file_list
        }
}

Demo Code:

lappend auto_path [file join [file dirname [info script]]]

package require Tk
catch {console show}

package require getfiles

# testing the code...

proc SearchCommand {args} {
        set fname [lindex $args 0]
        puts $fname
        update
}

set root_dir "Z:/projects/whatever"
set pattern  "*.pdf*"

set cachefile_created 0


# force cache file to be re-created !
# -----------------------------------
getfiles::delete_cachfile $root_dir
# -----------------------------------

getfiles::set_excluded_names {
        "Archiv" "Backup" "tmp"
}

set rlist [getfiles::getfiles_cached \
                                $root_dir $pattern cachefile_created \
                                SearchCommand]

if {$cachefile_created} {
        puts "*** Cache file has been created:"
        puts "      [file join $root_dir [getfiles::get_cachefilename]]"
}


# try to find ".pdf"
# ------------------
set t0 [clock milliseconds]
set file_list {}
set part_num "find_something"

foreach f $rlist {
        if { [file extension $f] == ".pdf" && [string first $part_num $f] != -1 } {
                lappend file_list $f
        }
}

# print result
puts "Search Result:"

switch -- [llength $file_list] {
        0 { puts "No PDF available matching: $pattern" }
        1 { puts "--> [lindex $file_list 0]" }
        default {
                puts "More than one CATParts found, please choose the associated model:"
                foreach f $file_list {
                        puts $f
                }
        }
}

puts "*** [expr ( [clock milliseconds] - $t0 ) / 1000.0] sec"

# try to find another pdf (cached)
# --------------------------------
set t0 [clock milliseconds]
set file_list {}
set part_num "find_something_else_cached"

foreach f $rlist {
        if { [file extension $f] == ".pdf" && [string first $part_num $f] != -1} {
                lappend file_list $f
        }
}

# print result:
switch -- [llength $file_list] {
        0 { puts "No CATDrawing available matching: $pattern" }
        1 { puts "--> [lindex $file_list 0]" }
        default {
                puts "More than one CATDrawing found, please choose the associated model:"
                foreach f $file_list {
                        puts $f
                }
        }
}

puts "*** [expr ( [clock milliseconds] - $t0 ) / 1000.0] sec"