laydi/workflows/geneontology.py

import networkx
import re

class GOTerm:
    def __init__(self):
        self.d = {}

        ## Create empty lists for all list values
        for l in GOTerm.lists:
            self.d[l] = []
        for s in GOTerm.scalars:
            self.d[l] = None
        
    def __getitem__(self, key):
        if self.d.has_key(key):
            return self.d[key]
        return None

    def __setitem__(self, key, value):
        self.d[key] = value

GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
                'narrow_synonym', 'related_synonym', 'relationship',
                'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
                  'comment', 'is_obsolete']


class GeneOntology(networkx.XDiGraph):
    def __init__(self):
        networkx.XDiGraph.__init__(self)
        self.by_id = {}

    def add_term(self, term):
        self.add_node(term)
        self.by_id[term['id']] = term
        
    def link_ontology(self, linkattr, obsolete=False):
        for node in self.nodes():
            for link in node[linkattr]:
                self.add_edge(node, self.by_id[link], linkattr)

    def get_bp(self):
        return self.by_id['GO:0008150']

    def get_cc(self):
        return self.by_id['id: GO:0005575']

    def get_mf(self):
        return self.by_id['GO:0003674']


def _split_obo_line(line):
    """Splits a line from an obo file in its three constituent parts.
    
    @param line: A string containing a line from an obo file. The line must
        either be a section definition field with a section name in brackets
        or a line of the form: keyword: value ! comment
    @returns: A tuple of four strings conaining the section, key, value and 
        description defined in the string. If the section part is None, all
        the other fields are strings and if section is a string, all the other
        fields are None.
    """
    attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
    s = line.strip()    
    if s == "":
        return (None, None, None, None)
    elif s.startswith('[') and s.endswith(']'):
        return (s[1:-1], None, None, None)
    else:
        m = attrib_re.match(s)
        if m:
            key, value, comment = [x.strip() for x in m.groups()]        
            return (None, key, value, comment)
        else:
            raise Exception('Unparsable line: %s' % line)


def _add_term_attribute(term, key, value, comment):
    if key in GOTerm.scalars:
        term[key] = value
    elif key in GOTerm.lists:
        term[key].append(value)
    else:
        raise Exception('Unknown key %s: %s' % (key, value))
   

def read_gene_ontology(fd):
    """Reads the Gene Ontology from an obo file.

    @param fd: An open file object to the obo file.
    """
    go = GeneOntology()
    term = None
    section = None

    line = fd.readline()
    while line:
        s, k, v, c = _split_obo_line(line)
        if s == None and k == None:
            pass
        elif s:
            if term:
                go.add_term(term)

            section = s
            if s == 'Term':
                term = GOTerm()
                print "[Term]"
            else:
                term = None
                print "ignoring: %s" %s
        else:
            if term:
                _add_term_attribute(term, k, v, c)
#                print "    %s: %s" % (k, v)
            else:
                print "no term: ignoring: %s" %line
        print '.',
        line = fd.readline()

    if term:
        go.add_term(term)

    return go            

def read_default_go():
    f = open("/usr/share/gene-ontology/gene_ontology.obo")
    go = read_gene_ontology(f)

    f.close()
    return go
GO workflow. Does not work. 2006-10-22 17:31:36 +02:00			`import networkx`
			`import re`

			`class GOTerm:`
			`def __init__(self):`
			`self.d = {}`

			`## Create empty lists for all list values`
			`for l in GOTerm.lists:`
			`self.d[l] = []`
			`for s in GOTerm.scalars:`
			`self.d[l] = None`

			`def __getitem__(self, key):`
			`if self.d.has_key(key):`
			`return self.d[key]`
			`return None`

			`def __setitem__(self, key, value):`
			`self.d[key] = value`

			`GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',`
			`'narrow_synonym', 'related_synonym', 'relationship',`
			`'subset', 'synonym', 'xref_analog', 'xref_unknown']`
			`GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',`
			`'comment', 'is_obsolete']`


			`class GeneOntology(networkx.XDiGraph):`
			`def __init__(self):`
			`networkx.XDiGraph.__init__(self)`
			`self.by_id = {}`

			`def add_term(self, term):`
			`self.add_node(term)`
			`self.by_id[term['id']] = term`

			`def link_ontology(self, linkattr, obsolete=False):`
			`for node in self.nodes():`
			`for link in node[linkattr]:`
			`self.add_edge(node, self.by_id[link], linkattr)`

			`def get_bp(self):`
			`return self.by_id['GO:0008150']`

			`def get_cc(self):`
			`return self.by_id['id: GO:0005575']`

			`def get_mf(self):`
			`return self.by_id['GO:0003674']`


			`def _split_obo_line(line):`
			`"""Splits a line from an obo file in its three constituent parts.`

			`@param line: A string containing a line from an obo file. The line must`
			`either be a section definition field with a section name in brackets`
			`or a line of the form: keyword: value ! comment`
			`@returns: A tuple of four strings conaining the section, key, value and`
			`description defined in the string. If the section part is None, all`
			`the other fields are strings and if section is a string, all the other`
			`fields are None.`
			`"""`
			`attrib_re = re.compile(r'^\s([\w-]+)\s:\s([^!])!?(.*$)')`
			`s = line.strip()`
			`if s == "":`
			`return (None, None, None, None)`
			`elif s.startswith('[') and s.endswith(']'):`
			`return (s[1:-1], None, None, None)`
			`else:`
			`m = attrib_re.match(s)`
			`if m:`
			`key, value, comment = [x.strip() for x in m.groups()]`
			`return (None, key, value, comment)`
			`else:`
			`raise Exception('Unparsable line: %s' % line)`


			`def _add_term_attribute(term, key, value, comment):`
			`if key in GOTerm.scalars:`
			`term[key] = value`
			`elif key in GOTerm.lists:`
			`term[key].append(value)`
			`else:`
			`raise Exception('Unknown key %s: %s' % (key, value))`


			`def read_gene_ontology(fd):`
			`"""Reads the Gene Ontology from an obo file.`

			`@param fd: An open file object to the obo file.`
			`"""`
			`go = GeneOntology()`
			`term = None`
			`section = None`

			`line = fd.readline()`
			`while line:`
			`s, k, v, c = _split_obo_line(line)`
			`if s == None and k == None:`
			`pass`
			`elif s:`
			`if term:`
			`go.add_term(term)`

			`section = s`
			`if s == 'Term':`
			`term = GOTerm()`
			`print "[Term]"`
			`else:`
			`term = None`
			`print "ignoring: %s" %s`
			`else:`
			`if term:`
			`_add_term_attribute(term, k, v, c)`
			`# print " %s: %s" % (k, v)`
			`else:`
			`print "no term: ignoring: %s" %line`
			`print '.',`
			`line = fd.readline()`

			`if term:`
			`go.add_term(term)`

			`return go`

			`def read_default_go():`
			`f = open("/usr/share/gene-ontology/gene_ontology.obo")`
			`go = read_gene_ontology(f)`

			`f.close()`
			`return go`