GeneOntol

DESCRIPTION

A snit for working with the Gene Ontology framework [L1 ]. Candidate for including into the coming biotcl package.


#  Author        : Dr. Detlef Groth, MPIMG Berlin
#  Created By    : Dr. Detlef Groth
#  Created       : 2005-02-21
#  Last Modified : <050222.0600>
#
#  Description        
package require snit 0.97
package require oomk
snit::type GeneOntol {
    # public options
    option -obofile -default "" -configuremethod SetObofile
    option -mkfile ""
    # private variables
    variable go
    variable db
    # views
    variable pvNames
    variable pvParents
    variable pvAltids
    variable pvObsoletes
    variable GETTREE
    # `GeneOntol gon -obofile obofile ?-mkfile metakitfile?' --
    #            constructor for the GeneOntol type
    #  Arguments:
    #           `-obofile filename' the Gene Ontology obofile
    #           `?-mkfile?' the metakit databasefile, defaults to obofile.mk
    # -----------------------------------------------
    constructor {args} {
        $self configurelist $args
        if {$options(-mkfile) eq ""} {
            set options(-mkfile) $options(-obofile).mk
        }
        $self Init
    }   
    destructor {
         if {[info exists db]} {
             $db close
         }
    }
    # public methods (are lowercase)

    # `gon' isAlt --
    #          info if an given GO-id is an alternative id 
    #  Arguments:
    #          `id' a GO-id
    #  Returns: 
    #          true if id is an alternative GO-id or false if it is not
    # ------------------------------------------------------------ 
    method isAlt {id} {
        [$pvAltids select -exact altid $id -count 1] as pSel
        if {[$pSel size] > 0} { 
            return true 
        } else { 
            return false 
        }
    }
    # `gon' alt2id --
    #          getting an the right GO-id for an alternative one
    #  Arguments:
    #          `id' a GO id
    #  Returns: 
    #          returns the right GO-id for an alternative one
    # ------------------------------------------------------------ 
    method alt2id {id} {
        [$pvAltids select -exact altid $id -count 1] as pSel
        if {[$pSel size] > 0} { 
            return [$pSel get 0 id]
        } else { 
            return "" 
        }
    }
    # `gon' isObsolete --
    #          info if an GO-id is an older obsolete GO-id
    #  Arguments:
    #          `id' a GO id
    #  Returns: 
    #          true if id is an obsolete GO-id, false if it its not
    # ------------------------------------------------------------ 
    method isObsolete {id} {
        [$pvObsoletes select -exact id $id -count 1] as pSel
        if {[$pSel size] > 0} { 
            return true 
        } else { 
            return false 
        }
    }
    # `gon' parents --
    #          getting a list of parents for a certain GO-id
    #  Arguments:
    #          `id' a GO-id
    #  Returns: 
    #          a list of lists with the keys parent, relation and id foreach listitem
    # ------------------------------------------------------------ 
    method parents {id} {
        set parents [list]
        [$pvParents select -exact id $id] as pSel
        $pSel loop c {
            lappend parents [array get c]
        }
        return $parents
    };
    # `gon' children --
    #          getting a list of children for a certain GO-id
    #  Arguments:
    #          `id' a GO-id
    #  Returns: 
    #          a list of lists with the keys parent, relation and id foreach listitem
    # ------------------------------------------------------------ 
    method children {id} {
        set children [list]
        if {[$self isAlt $id]} {
            set id [$self alt id]
        }
        [$pvParents select -exact parent $id] as pSel
        $pSel loop c {
            lappend children [array get c]
        }
        return $children
    }
    # `gon' name --
    #          getting the name for a GO-id
    #  Arguments:
    #          `id' a GO-id
    #  Returns: 
    #          the name of the GO-id, or an empty string if no valid GO-id
    # ------------------------------------------------------------ 
    method name {id} {
        if {[$self isAlt $id]} {
            set id [$self alt $id]
        }
        [$pvNames select -count 1 -exact id $id] as pSel
        if {[$pSel size] > 0} { 
            return [$pSel get 0 name]
        } else {
            return ""
        }
    }
    # `gon' id --
    #          getting the id for a GO-term
    #  Arguments:
    #          `name' a GO-term
    #  Returns: 
    #          the GO-id for the given term
    # ------------------------------------------------------------ 
    method id {name} {
        [$pvNames select -count 1 -exact name $name] as pSel
        if {[$pSel size] > 0} { 
            return [$pSel get 0 id]
        } else {
            return ""
        }
    }
    # `gon' like --
    #          performs a glob style pattern search against all GO-terms
    #  Arguments:
    #          `pattern' a glob style pattern
    #  Returns: 
    #          a list of lists with the keys id,name foreach matching term
    # ------------------------------------------------------------ 
    method like {pattern} {
        set res [list]
        [$pvNames select -glob name $pattern] as pSel
        $pSel loop c {
            lappend res [array get c]
        }
        return $res
    } 
    # `gon' getTree --
    #          method to get all GO-ids forming a full topdown tree for a certain
    #          GO-id
    #  Arguments:
    #          `id' a GO-id
    #  Returns: 
    #          a list of GO-ids, which constructs the full tree
    # ------------------------------------------------------------ 
    method getTree {id {stack 0}} {
        if {$stack == 0} { array unset GETTREE }
        incr stack
        set GETTREE($id) 1
        if {![lsearch [list GO:0003674 GO:0005575 GO:0008150] $id] >= 0} {
            foreach par [$self parents $id] {
                array set c $par
                $self getTree $c(parent) $stack
            }
        }
        return [array names GETTREE]
    }
    method getDefinition {id} {
        error "method getDefinition not yet implemented"
        # use an index for the obofile
        # (index done inside Init)
        # jump via seek inside obofile
        # extract the definition
    }
    method getInfo {} {
        error "method getInfo not yet implemented"
        # inside the mkfile during Init
        # collect number of MF,BP,CC terms
        # ua
    }
    # private methods (are uppercase)
    method SetObofile {option value} {
        if {$options($option) ne ""} {
            error "option $option can only be set a object creation time"
        }
        set options($option) $value
        #$self Init
    }
    method Init {} {
        if {[info exists db]} {
            $db close
        }
        if {[file exists "$options(-mkfile)"]} {
            puts stderr filexists
            set db [mkstorage %AUTO% $options(-mkfile)]
            [$db view names] as pvNames
            [$db view parents] as pvParents
            [$db view altids] as pvAltids
            [$db view obsoletes] as pvObsoletes
            return
        }
        puts stderr indexcreation
        set prog [Progress %AUTO% -file $options(-obofile)]
        set db [mkstorage %AUTO% $options(-mkfile)]
        $db layout names {id name}
        $db layout altids {id altid}
        $db layout parents {id relation parent}
        $db layout obsoletes {id}
        [$db view names] as pvNames
        [$db view parents] as pvParents
        [$db view altids] as pvAltids
        [$db view obsoletes] as pvObsoletes
        set x 0
        set id ""
        if [catch {open $options(-obofile) r} infh] {
            error "Cannot open $options(-obofile) : $infh"
        } 
        while {[gets $infh line] >= 0} {
            if {[incr x] % 500 == 0} { $prog progress [tell $infh] }
            if {[regexp {^id: *(GO:[0-9]{7})} $line -> id]} {
                #puts stderr $id
            } elseif {[regexp {^alt_id: *(GO:[0-9]{7})} $line -> alt]} {
                $pvAltids append id $id altid $alt
            } elseif {[regexp {^name: *(.+)} $line -> go(name,$id)]} {
                $pvNames append id $id name $go(name,$id)
            } elseif {[regexp {^(is_a:|relationship: part_of) *(GO:[0-9]{7})} $line -> kind goid]} {
                regsub {relationship: } $kind "" kind
                $pvParents append id $id relation $kind parent $goid
            } elseif {[regexp {^is_obsolete: *true} $line ]} {
                $pvObsoletes append id $id
            }
        }
        $db commit
    }
}

# test
proc test {} {
    source [file join [file dirname [info script]] Progress.tcl]
    set datadir e:/links/project/goblet/data/
   
    foreach version {2004-05-01 2004-09-01 2005-02-01} {
        set t [clock seconds]
        set gon [GeneOntol %AUTO% -obofile $datadir/goblet-databases/gene_ontology.obo.$version]
        puts stderr "Loading version $version in [expr [clock seconds] - $t] seconds"
        puts [$gon parents GO:0003720]
        puts [$gon children GO:0003674]
        puts [$gon name GO:0003674]
        foreach go {GO:0003674 GO:0003720} {
            puts "Children of $go :"
            foreach child [$gon children $go] {
                array set c $child
                set name [$gon name $c(id)]
                puts " $c(id) $go $name [$gon id $name]"
            }
            puts stderr "[expr [clock seconds] - $t] seconds"
        }
        foreach go [$gon like membrane*] {
            puts stderr " pattern $go [$gon name $go]"
        }
      
       puts stderr [$gon name GO:0003674]
       puts stderr [$gon id [$gon name GO:0003674]]
       puts stderr [$gon children GO:0003674]
       puts stderr [$gon name GO:0048201]
       puts stderr [$gon id [$gon name GO:0048201]]
       puts stderr [$gon parents GO:0048201]
       puts stderr "obsolet: GO:0000211 [$gon name GO:0000211]"
       puts stderr [$gon parents GO:0000211]
       puts stderr "obsolet-children: GO:0000211"
       puts stderr [$gon children GO:0000211]
       puts stderr "is_alt GO:0004752"
       puts stderr [$gon name GO:0004752]
       puts stderr [$gon parents GO:0004752]
       puts stderr "is_alt children GO:0004752"
       puts stderr [$gon children GO:0004752]
       puts stderr "[expr [clock seconds] - $t] seconds"
       puts stderr "A tree for GO:0048201"
       puts stderr [$gon getTree GO:0048201]
    }
}