#======================================================================= # # Package: DGA.tcl # # Purpose: A tcl package that implements a number of algorithms on top of # the basic Tcl ::struct::graph package. package provide DGA 1.0 #======================================================================= # NAMESPACE ::DGA:: namespace eval DGA { #=================================================================== # Export the functions that the user should be calling namespace export shortest_path min_span_tree predecessor_list namespace export pl_node pl_arc pl_dist namespace export topological_sort outonly_nodes is_DAG # The graph package is needed for obvious reasons so we include # the struct package that contains it package require struct # Define any variables used by the package variable topo_sort {} #------------------------------------------------------------------- # update_node # # g - The graph # n - The node # u - Used start 0 Unused, 1 Found, 2 Completely used # d - The distance from the starting node # p - The predecessor node proc update_node { g n d p u } { # Set this nodes predecessor $g node set $n -key predecessor $p # The distance to the source node $g node set $n -key distance $d # Set the used flag if { [expr $u > -1] } { $g node set $n -key "used" $u } } #------------------------------------------------------------------- # init_graph # Algorithms such as Dijkstra'a and Prim's need to flag the nodes # and arcs of the graph and to record additional information # against the nodes and arcs. This function initializes these # values for a graph proc init_graph { g } { # Setup. Update each node with the keyed values that will hold # the following information for each node: # The distance from the source to the node - infinite # The predecessor of the node - nothing # The nodes status 0 - Not found, 1 - Found, 2 - Finished foreach node [$g nodes] { # This node doesn't have a predecessor and the distance to the # source node is undefined and the node is unused update_node $g $node -1 {} 0 } # Initialize the arcs in the graph. Make sure they all # have a weight associated with them. Give a default # weight of 1 if no weight is found. Set a flag to show # if the arc has been used in the algorithm foreach arc [$g arcs] { $g arc set $arc -key used 0 if { [catch { $g arc get $arc -key weight }] } { $g arc set $arc -key weight 1.0 } } } #------------------------------------------------------------------- # min_arc # Within the graph zero, one or many nodes will be in state 1, # i.e., the node has been found and still has outward arcs that # have not been used. Find the arc that has the minimum weight. # As a side effect if a node has no unused outward arcs left # then mark the node as used. If no arc can be found then # return an empty list. # g - The graph to search proc min_arc { g } { # Initialise the working variables used by this proc # The arc from the source node set tArc {} # The weight of the arc set tArcMin -1 # Find the shortest arc out of the nodes that have been found. # Use this arc if it provides a shorter route to the target or # if it discovers a new target node foreach node [$g nodes -key used -value 1] { # The hasArc counter is used to determine when a node has # no unused arcs left. set hasArc 0 # Look at each arc that goes out of the current node foreach arc [$g arcs -out $node -key used -value 0] { # Set hasArc as this node has at least one unused arc set hasArc 1 # Get the weight of this arc. If it is less than the # current value of tArcMin then it will be used set arcWeight [$g arc get $arc -key weight] if { [expr $tArcMin == -1] || [expr $tArcMin > $arcWeight] } { # Store the source node and arc names set tArc $arc set tArcMin $arcWeight } } # If a node doesn't have any unused arcs left then # mark the node as completely used if { [expr $hasArc == 0] } { $g node set $node -key used 2 } } # Now mark this arc as having been used if { $tArc != {} } { $g arc set $tArc -key used 1 } return $tArc } #=================================================================== # predecessor_list # After an algorithm such as: # shortest_path # min_span_tree # has been run on the graph it will contain a predecessor list, # that is for each node that was found an arc will be held under # the key 'predecessor' that defines the arc and node that this # node was reached from. This function returns this list as a # Tcl list with a structure. The pl_ functions can be used to # access this list # g - The graph proc predecessor_list { g } { set idxList {} set detList {} foreach node [$g nodes] { # The distance node is from the source set nDist [$g node get $node -key distance] # The arc that links node to its predecessor set pArc [$g node get $node -key predecessor] # The node that precedes this node, if there is one if { $pArc == {} } { set pNode {} } else { set pNode [$g arc source $pArc] } lappend idxList $node lappend detList [list $node $pNode $pArc $nDist] } return [list [lsort $idxList] [lsort $detList]] } #=================================================================== # pl_node # pl_arc # pl_dist # Three helper functions that make it easier to get # details about a node's predecessor. # pl - The predecessor list as returned by predecessor_list # n - The node proc pl_node { pl n } { set idx [lsearch -sorted [lindex $pl 0] $n] return [lindex [lindex [lindex $pl 1] $idx] 1] } proc pl_arc { pl n } { set idx [lsearch -sorted [lindex $pl 0] $n] return [lindex [lindex [lindex $pl 1] $idx] 2] } proc pl_dist { pl n } { set idx [lsearch -sorted [lindex $pl 0] $n] return [lindex [lindex [lindex $pl 1] $idx] 3] } #=================================================================== # shortest_path # Using Dijkstra's algorithm find a shortest path from the node # n to every other node that is reachable from n. # # g - The graph to be processed # n - The starting node proc shortest_path { g n } { # Setup. Update the nodes and arcs with the required # flags etc. init_graph $g # Mark the starting node as being used and as having a distance # of zero from itself. update_node $g $n 0.0 {} 1 # Now start looking for nodes we don't know about by exploring out # from the starting node. while {1} { # Find the arc with minimum length that goes from # a found node set tArc [min_arc $g] # Test for the end of the graph, we have no unused nodes # or arcs left. This does not imply that all of the nodes # in the graph have been discovered, some nodes may not be # reachable from the given starting node. These can be # identified as they have a distance of -1. if { $tArc == {} } { break } # Get the distance from the first node of # this node set sDist [$g node get [$g arc source $tArc] -key distance] # Get the name of the target node set tNode [$g arc target $tArc] # Get the weight of the arc joining the source and # target nodes. set weight [$g arc get $tArc -key weight] # Get the distance from the source node of the # target node. This will be -1 if we have never # seen this node before. set tDist [$g node get $tNode -key distance] # Does this arc build a short path to the target node? if { [expr $tDist == -1] } { # Yes - it must do, this is the first time # the target node has been encountered. # Save the distance from the source node and the # arc that leads to the predecessor and mark the # node as found, it will now be considered next time # we look for the shortest arc. update_node $g $tNode [expr $sDist + $weight] $tArc 1 } else { # Does this arc provide a cheaper way to get to # the already discovered node? Calculate the distance # based on the new arc's weight and compare it with the stored # distance for the node. set ttDist [expr $sDist + $weight] if { [expr $ttDist < $tDist] } { # Update the node to use the new arc as its # predecessor update_node $g $tNode $ttDist $tArc -1 } } } } #=================================================================== # min_span_tree # Using Prims's algorithm find a minimum spanning tree starting # at the node n and reaching to every other node that is reachable # from n. # # g - The graph to be processed # n - The starting node proc min_span_tree { g n } { # Setup. Update the nodes and arcs with the required # flags etc. init_graph $g # Mark the starting node as being used and as having a distance # of zero from itself. update_node $g $n 0.0 {} 1 # Now start looking for nodes we don't know about by exploring out # from the starting node. while {1} { # Find the arc with minimum length that goes from # a found node set tArc [min_arc $g] # Test for the end of the graph, we have no unused nodes # or arcs left. This does not imply that all of the nodes # in the graph have been discovered, some nodes may not be # reachable from the given starting node. These can be # identified as they have a distance of -1. if { $tArc == {} } { break } # Get the distance from the first node of # this node set sDist [$g node get [$g arc source $tArc] -key distance] # Get the name of the target node. set tNode [$g arc target $tArc] # Get the weight of the arc joining the source and # target nodes. set weight [$g arc get $tArc -key weight] # Get the distance from the source node of the # target node. This will be -1 if we have never # seen this node before. set tDist [$g node get $tNode -key distance] # Does this arc discover a new target node? if { [expr $tDist == -1] } { # Yes it does # Save the distance from the source node and the # arc that leads to the predecessor and mark the # node as found. It will now be considered next time # we look for the shortest arc. update_node $g $tNode [expr $sDist + $weight] $tArc 1 } } } #=================================================================== # outonly_nodes # An 'out only' node is one that has no inward arcs. That is the # node can have no predecessor as no no arc has it as its target # This function finds all such nodes with the graph. # # g - The graph proc outonly_nodes { g } { set result {} foreach node [$g nodes] { if { [$g node degree -in $node] == 0 } { lappend result $node } } return $result } #------------------------------------------------------------------- # make_toplevel_node # When checking for DAGness and/or performing a topological sort # a single top-level node to start the process from is required. # This function transforms the graph so that it has that single # top node. The name of this new node is returned. proc make_toplevel_node { g } { # Find toplevel node(s) set oonList [outonly_nodes $g] # If there are no out-only nodes then we can't sort the # graph as it must have at least one cycle. if { [llength $oonList] == 0 } { error "$g is not a DAG. Graph contains one or more cycles" } # If there are more than one out-only nodes a new parent node that # provides a single starting point for the sort must be created above # these nodes. To make life simple we do this for one node as well. set startNode [$g node insert] # Connect the startNode to the original out-only nodes foreach node $oonList { $g arc insert $startNode $node } return $startNode } #------------------------------------------------------------------- # topo_node # This routine is called by the topological_sort when a node has # been completely explored. The routine has two functions. proc topo_node { dir g n } { variable topo_sort lappend topo_sort $n } #=================================================================== # topological_sort # Perform a topological sort on the graph. This will return a # list of nodes giving an ordering on the nodes such that all # arcs go from left to right. Only an acyclic graph can have a # topological sort, because a directed cycle must eventually # return home to the source of the cycle. However, every DAG # has at least one topological sort. # # g - The graph to sort proc topological_sort { g } { variable topo_sort {} # Make a toplevel (parentless) node to start the sort from set startNode [make_toplevel_node $g] # Setup. Update the nodes and arcs with the required # flags etc. init_graph $g # Walk the graph, nodes are added to the topo_sort list # in the order that they are marked as completely explored. # The topological sort is the reverse of this order. # NB - This code won't detect any cycles in the data so it's # up to the user to determine that the graph is a DAG # The is_DAG function can be used for this $g walk $startNode -order post -type dfs -command topo_node # Remove the start node we created from the graph, all of the # arcs will be removed as well. $g node delete $startNode # Reverse the list and discard the node that # was added to the graph by this routine. set result {} for { set i [expr [llength $topo_sort] - 2] } {$i >= 0} {incr i -1} { lappend result [lindex $topo_sort $i] } return $result } #=================================================================== # dag_dfs # Recursive Depth First Search of the graph. If a node is # discovered that has the used flag set to 1, then it has been # found by a cycle through one of its children and the graph # is not a DAG. A stack-based implementation of this function # would be better. It would not risk blowing up if the recursion # level got too big. # # g - The graph # n - The next node to check proc dag_dfs { g n } { # We have found a new node, mark it as such. $g node set $n -key used 1 # Process each of the arcs out of the node. foreach arc [$g arcs -out $n] { # Find out the details of the target node. set tNode [$g arc target $arc] set used [$g node get $tNode -key used] # If the node has been discovered but not completed then # this is a back edge and the graph contains a cycle. if { $used == 1 } { error "$g is not a DAG. Graph contains one or more cycles" } dag_dfs $g $tNode } # We have completely used this node. $g node set $n -key used 2 } #=================================================================== # is_DAG # Determine if the graph is a DAG, that is that it contains no # cycles. If it isn't a DAG then an error is thrown! # # g - The graph proc is_DAG { g } { # Make a toplevel (parentless) node to start the sort from set startNode [make_toplevel_node $g] # Setup. Update the nodes and arcs with the required # flags etc. init_graph $g # Use the simple recursive definition of depth-first search # to search through the tree for back edges. set result [dag_dfs $g $startNode] # Remove the start node we created from the graph. All of the # arcs will be removed as well. $g node delete $startNode return $result } }