purse

CMCc: Purse provides simple-minded array persistence using variable traces and files.

# purse makes tcl arrays purse-istant
package provide Purse 0.2
 
namespace eval ::purse {}

# param - sets a purse's parameters
# dir - directory in which purses are stored (default [pwd])
proc ::purse::param {var val {control purse}} {
    variable $control
    upvar 0 $control cont

    if {![info exists cont]} {
        #::purse::purse $control $control
        set cont() [dict create dir [pwd]]
    }
    
    if {$var == "dir"} {
        file mkdir $val
    }
    dict set cont() $var $val
}

# Restart from current state on disk
proc ::purse::restart { {glob ::*} {control purse}} {
    variable $control
    upvar 0 $control cont

    init $control

    set arrays [glob -directory [dict get $cont() dir] \
                    -nocomplain \
                    -tails [string map [dict get $cont() todisk] $glob]]
    foreach array $arrays {
        set array [fromdisk $array]
        set nspace [namespace qualifiers $array]
        if { $nspace ne "" } {
            namespace eval $nspace {}
        }
        r $control $array $array 
    }
    return $arrays
}

# Initialise purse storage in control variable
proc ::purse::init { {control purse} } {
    global tcl_platform
    variable $control
    upvar 0 $control cont

    if {![info exists cont]} {
        #::purse::purse $control $control
        set cont() [dict create]
    }

    # Make sure we have a "dir" parameter, this will be the directory
    # where we will be persisting arrays.
    if {![dict exists $cont() dir]} {
        param dir [pwd] $control
    }

    # Make sure we know how to map unallowed characters onto
    # characters that can be used for file names.
    if { ![dict exists $cont() todisk] } {
        if { $tcl_platform(platform) eq "windows" } {
            param todisk [list ":" "¨"] $control
        } else {
            param todisk {} $control
        }
    }
    
    # The opposite...
    if { ![dict exists $cont() fromdisk] } {
        if { $tcl_platform(platform) eq "windows" } {
            param fromdisk [list "¨" ":"] $control
        } else {
            param fromdisk {} $control
        }
    }
}


# Return full path to where to store an array on disk.
proc ::purse::todisk { name {control purse}} {
    variable $control
    upvar 0 $control cont
    init $control
    return  [file join [dict get $cont() dir] \
                 [string map [dict get $cont() todisk] $name]]
}

# Return name of array, given its name on disk.
proc ::purse::fromdisk { name {control purse}} {
    variable $control
    upvar 0 $control cont
    init $control
    return [string map [dict get $cont() fromdisk] $name]
}

# purse - purses an array
proc ::purse::purse {array {control purse}} {
    #puts stderr "purse $array $control"
    #set array [namespace which -variable $array]
    upvar $array arr
    variable $control
    upvar 0 $control cont
    
    init $control;   # Initialise storage.
    
    trace add variable arr read  [list ::purse::r $control $array]        ;# one shot load file
    trace add variable arr array [list ::purse::r $control $array]        ;# one shot load file
    trace add variable arr write [list ::purse::w $control $array]
    trace add variable arr unset [list ::purse::u $control $array]
    
    # register a single purse flush at exit
    set pfx ::exit_purse_[string map [list ":" ""] $control]_
    if { [info commands ${pfx}*] eq "" } {
        set newex ${pfx}[expr rand()]
        rename ::exit $newex
        proc ::exit {} [subst {
            #puts stderr "flush $control"
            ::purse::flush * $control
            $newex
        }]
    }
}

# initializes pursed array - one shot
proc ::purse::r {control name array args} {
    upvar $array arr
    variable $control
    upvar 0 $control cont
    
    trace remove variable arr read  [list ::purse::r $control $name]        ;# one shot load file
    trace remove variable arr array [list ::purse::r $control $name]        ;# one shot load file
    trace remove variable arr write [list ::purse::w $control $name]
    trace remove variable arr unset [list ::purse::u $control $name]
    
    set file [todisk $name $control]
    
    if {[file exists $file]} {
        # if the purse exists, load its contents to array
        set fd [open $file r+]
        while {![eof $fd]} {
            array set arr [gets $fd]
        }
        close $fd
        
        set cont($name) [open $file w]
        fconfigure $cont($name) -buffering line
        puts $cont($name) [array get arr]
    } else {
        # brand new purse - create the file
        set cont($name) [open $file w]
        fconfigure $cont($name) -buffering line
    }
    
    # we no longer need a read trace
    trace add variable arr write [list ::purse::w $control $name]
    trace add variable arr unset [list ::purse::u $control $name]
}

# trace unset - writes an element to purse
proc ::purse::w {control name array el op} {
    #puts stderr "write $control $name $array $el $op"
    upvar $array arr
    variable $control
    upvar 0 $control cont
    
    if {![info exists cont($name)]} {
        r $control $name arr
    }
    
    set fd $cont($name)
    array set junk [list $el $arr($el)]
    puts $fd [array get junk]
}

# trace unset - unsets an element in a pursed array
proc ::purse::u {control name array el op} {
    #puts stderr "unset $control $name $array $el $op"
    upvar $array arr
    variable $control
    upvar 0 $control cont
    
    if {![info exists cont($name)]} {
        r $control $name arr
        if {$el != "" && [info exists arr($el)]} {
            unset arr($el)        ;# we have recreated the element - recurse
        }
        return
    }
    
    set file [todisk $name $control]
    if {$el == ""} {
        # removing the entire array - destroy the purse
        if {[file exists $file]} {
            file remove $file
        }
    } else {
        # removing an element - flush the purse
        close $cont($name)
        set cont($name) [open $file w]
        fconfigure $cont($name) -buffering line
        puts $cont($name) [array get arr]
    }
}

# flush arrays matching glob
proc ::purse::flush {{glob *} {control purse}} {
    variable $control
    upvar 0 $control cont
    
    foreach {array fd} [array get cont $glob] {
        if {$array == ""} continue
        upvar #0 $array arr
        
        catch {close $cont($array)}
        
        set file [todisk $array $control]
        set cont($array) [open $file w]
        fconfigure $cont($array) -buffering line
        puts $cont($array) [array get arr]
        ::flush $cont($array)
    }
}
namespace export -clear purse

Now for some simple tests

 if {[info script] == $argv0} {
    purse::param dir [file join [pwd] .purse] ;# set the dir for purses
 
    purse::purse x        ;# purse the array x
    puts "initial: [array get x]"
    set x(1) [clock scan now]
    set x(2) [clock scan now]
    unset x(2)
    puts "subsequent: [array get x]"
     
    exit        ;# flushes the purse'd arrays
 }

Note that by the nature of the implementation, [info exists] will report 0 on every element of the array until the array is loaded. We load lazily (although that would be easy to change) so it would make sense to perform an [array size] or similar to provoke loading, if you need to test existence on an element.

EF I modified slightly the original code so that:

  • It resists better to failures (abrupt end of the process) by forcing line buffering, which, in effect will write changes to disk as soon as they happen.
  • Removed the numerous exit functions, the previous code would create one exit function that would flush ALL pursed array for each purse array that would have been created. Instead, there is only one exit function that flushes all arrays for each known control purse, which is I believe was intended in the first place.
  • Changed the order of the flush procedure so that it better respect the rest of the API (i.e. the control purse is placed at the end).
  • Added a restart procedure to force catching up with global arrays on restart (thus avoiding the lazy loading if this was explicitly wanted).
  • Make sure we can fully qualified array names on disk on all platforms. On Windows, the `:` character is forbidden, so we'll replace it by some other character when creating filenames. This isn't perfect, but avoid the creation of an index file.

EF Yet more changes to keep low on resources. As this has shifted much from the initial codebase, I've moved things to purse NG.