Matthias Hoffmann - Tcl-Code-Snippets - misc - globx

Remark: old sourcecodes moved to the end of the page. Newest code at the top.


pkgIndex.tcl

# 04.07.2014
package ifneeded Globx 0.1 [list source [file join $dir globx.tcl]]
package ifneeded globx 0.1 [list source [file join $dir globx.tcl]]

globx.tcl

#****h* Library/globx.tcl
#
#  NAME
#
#     globx.tcl - list files or folders of a folder tree
#     v0.10, 07.07.2014
#
#  AUTHOR
#
#     M.Hoffmann, © 2004-2014
#
#  PORTABILITY
#
#     Tcl. Tested by the author on Windows only.
#
#  USAGE
#
#  -- package require globx
#     To get all matching files in the given folder and subfolders:
#  -- set files [globx startDir patterns]
#     or
#     proc callback fileName {...}
#  -- set numFiles [globx startDir patterns callback]
#     To get all subfolders of given folder (including the folder itself):
#  -- set dirs [globx2 startDir]
#     or
#     proc callback dirName {...}
#  -- set numDirs [globx2 startDir callback]
#
#  NOTES
#
#  -- works non-recursiv
#  -- if using a callback, the order in which the cb is called is undetermined
#  -- proc looks in *all* subfolders - only files are matched against pattern
#  -- hidden files or folders are included
#  -- use * instead of *.* to get the same results as with Windows commands (dir /s)
#
#  TODO
#
#  -- Namespace
#  -- maybe using 8.6`s lmap, and coroutine to avoid blocking a gui while reading large dirs
#
#  HISTORY
#
#     v0.01 06.02.2004 - first documented, usable version
#     v0.02 21.10.2004 - wiki fix (suppress . and ..)
#     v0.03 09.12.2004 - new globx2 to only list folders
#     v0.04 21.07.2006 - multiple patterns made possible
#     v0.05 14.06.2009 - globx2 returns the given folder itself, too
#     v0.06 26.08.2011 - catch{} to prevent some dubios runtime errors
#     v0.10 07.07.2014 - optimizations - approx. 10% better performance.
#                      - bugfix: globx2 with callback did not return given folder.
#                      - removed update; a GUI program could use the callback method
#                        and, if required, call update from within the cb. Or switch
#                        back to the old behaviour and call 'globxSetUpdate update' once.
#                      - configurable update command (see above).
#                      - additional package name all lowercase.
#                      - callback could break the loop by returning a break.
#
#  SOURCE

package require Tcl 8.5
package provide Globx 0.1
package provide globx 0.1

proc globxUpdate {} {
}

# Sets a command, which is then called repeatedly during processing via globxUpdate.
#  By default, globxUpdate does nothing. To achieve same behaviour as with versions 
#  prior to 0.1, call 'globxSetUpdate update' once before anything else.
#
proc globxSetUpdate {script} {
     proc globxUpdate {} $script
}

# Returns all files wich match given search-mask(s) in the given directory and below,
#  hidden or not hidden. ALL subdirectories (*) are visited, hidden or not, without
#  a recursive proc call.
#
proc globx {startDir {search *} {cb ""}} {
     set dirStack [list [file normalize $startDir]]
     set files [list]
     set fc 0
     while {[llength $dirStack]} {
           set newStack [list]
           foreach dir $dirStack {
                   set filesHere [list]
                   set dirsHere  [list]
                   # temporary var's only because eventually using CallBack
                   catch {
                      lappend filesHere {*}[glob -noc -typ f          -dir $dir -- {*}$search]
                      lappend filesHere {*}[glob -noc -typ {f hidden} -dir $dir -- {*}$search]
                   }
                   if {[string equal $cb ""]} {
                      lappend files {*}$filesHere; # cumulation
                   } else {
                      # call back early, not at the end
                      foreach f $filesHere {
                              incr fc
                              set rc [catch {uplevel [list $cb $f]} errMsg opts]
                              if {$rc == 3} {
                                 return $fc; # TCL_BREAK means: stop here, but don't propagate
                              } elseif {$rc != 0} {
                                 # propagate the error
                                 return -options $opts $errMsg
                              }
                      }
                   }
                   catch {
                      lappend dirsHere {*}[glob -noc -typ d          -dir $dir -- *]
                      lappend dirsHere {*}[glob -noc -typ {d hidden} -dir $dir -- *]
                   }
                   #  note: lmap possible in 8.6
                   # start Wikipatch v0.02 ---
                   foreach newDir $dirsHere {
                           set theDir [file tail $newDir]
                           if {[string equal $theDir "." ] || [string equal $theDir ".."]} {
                              # Don't push this, otherwise entering an endless
                              # loop (on UNIX, at least)
                           } else {
                              lappend newStack $newDir
                           }
                   }
                   # end Wikipatch ---
           }
           set dirStack $newStack
           globxUpdate
     }
     if {[string equal $cb ""]} {
        return [lsort $files]
     } else {
        return $fc
     }
}

# Returns the name of the given dir and all of it's subdirectories (direct or
#  indirect). No wildcard selection possible - proc is for reading the
#  whole folder names of the tree starting at the given point.
#
proc globx2 {startDir {cb ""}} {
     set startDir [file normalize $startDir]
     set dirStack [list $startDir]
     set dirs [list]
     set dc 0
     # bugfix. Up until v0.06, startDir still not returned if using a callback
     if {[file isdirectory $startDir]} {
        incr dc
        if {[string equal $cb ""]} {
           set dirs $dirStack; # until v0.04, startDir wasn't returned at all...
        } else {
           uplevel [list $cb $startDir]
        }
        while {[llength $dirStack]} {
              set newStack [list]
              foreach dir $dirStack {
                      set dirsHere [list]
                      catch {
                         lappend dirsHere {*}[glob -noc -typ d          -dir $dir -- *]
                         lappend dirsHere {*}[glob -noc -typ {d hidden} -dir $dir -- *]
                      }
                      foreach newDir $dirsHere {
                              set theDir [file tail $newDir]
                              if {[string equal $theDir "." ] || [string equal $theDir ".."]} {
                                 # Don't push this, otherwise entering an endless
                                 # loop (on UNIX, at least)
                                 # v0.1: don't process . and .. at all
                              } else {
                                 lappend newStack $newDir
                                 if {[string equal $cb ""]} {
                                    lappend dirs $newDir; # cumulation
                                 } else {
                                    # call back early, not at the end
                                    incr dc
                                    set rc [catch {uplevel [list $cb $newDir]} errMsg opts]
                                    if {$rc == 3} {
                                       return $dc; # TCL_BREAK means: stop here, but don't propagate
                                    } elseif {$rc != 0} {
                                       # propagate the error
                                       return -options $opts $errMsg
                                    }
                                 }
                              }
                      }
              }
              set dirStack $newStack
              globxUpdate
        }
     }
     if {[string equal $cb ""]} {
        return [lsort $dirs]
     } else {
        return $dc
     }
}

#*******************************************************************************

Version history

  • 2009/06/14: globx2 minimally returns the name of the requested folder itself (before, only subfolders where returned, if some exist) -- Att: incompatibility!
  • 2006/07/26: exposed glob's possibility of specifying more than one search mask to avoid double calls to globx. Backward compatible.
 package provide Globx 0.05

 proc globx {startDir {search *} {cb ""}} {
      set dirStack [list [file normalize $startDir]]
      set files {}
      set fc    0
      while {[llength $dirStack]} {
            set newStack {}
            foreach dir $dirStack {
                    # temporary var's only because eventually using CallBack
                    set c [list glob -noc -typ f          -dir $dir --]; eval lappend c $search; set fn [eval $c]
                    set c [list glob -noc -typ {f hidden} -dir $dir --]; eval lappend c $search; set fh [eval $c]
                    if {[string equal $cb ""]} {
                       eval lappend files $fn $fh
                    } else {
                       foreach f [concat $fn $fh] {
                               incr fc
                               uplevel [list $cb $f]
                       }
                    }
                    set dn [glob -noc -typ d          -dir $dir *]
                    set dh [glob -noc -typ {d hidden} -dir $dir *]
                    # eval lappend newStack $dn $dh; # v0.01
                    # Wikipatch Start v0.02 ---
                    foreach newDir [concat $dn $dh] {
                            set theDir [file tail $newDir]
                            if {[string equal $theDir "." ] || \
                                [string equal $theDir ".."]} {
                               # Don't push this, otherwise entering an endless
                               # loop (on UNIX, at least)
                            } else {
                               lappend newStack $newDir
                            }
                    }
                    # Wikipatch Ende ---
            }
            set dirStack $newStack
            update; # keep Background alive
      }
      if {[string equal $cb ""]} {
         return [lsort $files]
      } else {
         return $fc
      }
 }

 # Die Anwendung von Wildcards hier wäre zwar möglich, aber erst 
 # auf UNTERSTER EBENE sinnvoll bzw. wäre ganz am Ende
 # eine Filterung des Gesamtpfads mittels string match besser.

 proc globx2 {startDir {cb ""}} {
      set dirStack [list [file normalize $startDir]]
      set dirs $dirStack; # bis v0.04: {} (ACHTUNG: potentielle Inkompatibilität!)
      set dc   0
      while {[llength $dirStack]} {
            set newStack {}
            foreach dir $dirStack {
                    set dn [glob -noc -typ d          -dir $dir -- *]
                    set dh [glob -noc -typ {d hidden} -dir $dir -- *]
                    if {[string equal $cb ""]} {
                       eval lappend dirs $dn $dh
                    } else {
                       foreach d [concat $dn $dh] {
                               incr dc
                               uplevel [list $cb $d]
                       }
                    }
                    foreach newDir [concat $dn $dh] {
                            set theDir [file tail $newDir]
                            if {[string equal $theDir "." ] || \
                                [string equal $theDir ".."]} {
                               # Don't push this, otherwise entering an endless
                               # loop (on UNIX, at least)
                            } else {
                               lappend newStack $newDir
                            }
                    }
            }
            set dirStack $newStack
            update
      }
      if {[string equal $cb ""]} {
         return [lsort $dirs]
      } else {
         return $dc
      }
 }

 #*******************************************************************************

Examples:

Without a callback, directly returning the filenames as a list:

 puts [globx c:/winnt]
 puts [globx c:/winnt *.dll]

Returning the filenames unsorted name-by-name via callback:

 proc callback file {
      puts $file
 }

 puts [globx c:/winnt * callback]; # will return the number of files read

This is to save memory!

Specifying search masks (v0.04)

 puts [globx c:/winnt {*.dll *.sys *.exe}]

ECS: I had to include some lines to test for "." and ".." otherwise the routine loops.

MH: On my platform (W2k, Tcl 8.4.6), the original routine does not loop; the glob command never returns '..' and '.'. Which platform did you test the code on?

ECS: Debian Linux: Linux babylon 2.4.26-ow2 #1 Fri Jul 9 15:19:06 BRT 2004 i686 GNU/Linux TCL is 8.4.7 (samething happens with 8.4.6). In any case it is better to be safe than sorry :-)

MHo Version 0.6:

package provide Globx 0.06

proc globx {startDir {search *} {cb ""}} {
     set dirStack [list [file normalize $startDir]]
     set files {}
     set fc    0
     while {[llength $dirStack]} {
           set newStack {}
           foreach dir $dirStack {
                   # temporary var's only because eventually using CallBack
                   catch {
                      set c [list glob -noc -typ f          -dir $dir --]; eval lappend c $search; set fn [eval $c]
                      set c [list glob -noc -typ {f hidden} -dir $dir --]; eval lappend c $search; set fh [eval $c]
                      if {[string equal $cb ""]} {
                         eval lappend files $fn $fh
                      } else {
                         foreach f [concat $fn $fh] {
                                 incr fc
                                 uplevel [list $cb $f]
                         }
                      }
                   }
                   catch {
                      set dn [glob -noc -typ d          -dir $dir *]
                      set dh [glob -noc -typ {d hidden} -dir $dir *]
                      # eval lappend newStack $dn $dh; # v0.01
                      # Wikipatch Start v0.02 ---
                      foreach newDir [concat $dn $dh] {
                              set theDir [file tail $newDir]
                              if {[string equal $theDir "." ] || \
                                  [string equal $theDir ".."]} {
                                 # Don't push this, otherwise entering an endless
                                 # loop (on UNIX, at least)
                              } else {
                                 lappend newStack $newDir
                              }
                      }
                      # Wikipatch Ende ---
                   }
           }
           set dirStack $newStack
           update; # keep Background alive
     }
     if {[string equal $cb ""]} {
        return [lsort $files]
     } else {
        return $fc
     }
}

# Die Anwendung von Wildcards hier wäre zwar möglich, aber erst auf UNTERSTER
# EBENE sinnvoll bzw. wäre ganz am Ende eine Filterung des Gesamtpfads mittels
# string match besser.

proc globx2 {startDir {cb ""}} {
     set dirStack [list [file normalize $startDir]]
     set dirs $dirStack; # bis v0.04: {} (ACHTUNG: potentielle Inkompatibilität!)
     set dc   0
     while {[llength $dirStack]} {
           set newStack {}
           foreach dir $dirStack {
                   catch {
                      set dn [glob -noc -typ d          -dir $dir -- *]
                      set dh [glob -noc -typ {d hidden} -dir $dir -- *]
                      if {[string equal $cb ""]} {
                         eval lappend dirs $dn $dh
                      } else {
                         foreach d [concat $dn $dh] {
                                 incr dc
                                 uplevel [list $cb $d]
                         }
                      }
                      foreach newDir [concat $dn $dh] {
                              set theDir [file tail $newDir]
                              if {[string equal $theDir "." ] || \
                                  [string equal $theDir ".."]} {
                                 # Don't push this, otherwise entering an endless
                                 # loop (on UNIX, at least)
                              } else {
                                 lappend newStack $newDir
                              }
                      }
                   }
           }
           set dirStack $newStack
           update
     }
     if {[string equal $cb ""]} {
        return [lsort $dirs]
     } else {
        return $dc
     }
}

#*******************************************************************************