133 lines
3.7 KiB
Python
133 lines
3.7 KiB
Python
|
import networkx
|
||
|
import re
|
||
|
|
||
|
class GOTerm:
|
||
|
def __init__(self):
|
||
|
self.d = {}
|
||
|
|
||
|
## Create empty lists for all list values
|
||
|
for l in GOTerm.lists:
|
||
|
self.d[l] = []
|
||
|
for s in GOTerm.scalars:
|
||
|
self.d[l] = None
|
||
|
|
||
|
def __getitem__(self, key):
|
||
|
if self.d.has_key(key):
|
||
|
return self.d[key]
|
||
|
return None
|
||
|
|
||
|
def __setitem__(self, key, value):
|
||
|
self.d[key] = value
|
||
|
|
||
|
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
|
||
|
'narrow_synonym', 'related_synonym', 'relationship',
|
||
|
'subset', 'synonym', 'xref_analog', 'xref_unknown']
|
||
|
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
|
||
|
'comment', 'is_obsolete']
|
||
|
|
||
|
|
||
|
class GeneOntology(networkx.XDiGraph):
|
||
|
def __init__(self):
|
||
|
networkx.XDiGraph.__init__(self)
|
||
|
self.by_id = {}
|
||
|
|
||
|
def add_term(self, term):
|
||
|
self.add_node(term)
|
||
|
self.by_id[term['id']] = term
|
||
|
|
||
|
def link_ontology(self, linkattr, obsolete=False):
|
||
|
for node in self.nodes():
|
||
|
for link in node[linkattr]:
|
||
|
self.add_edge(node, self.by_id[link], linkattr)
|
||
|
|
||
|
def get_bp(self):
|
||
|
return self.by_id['GO:0008150']
|
||
|
|
||
|
def get_cc(self):
|
||
|
return self.by_id['id: GO:0005575']
|
||
|
|
||
|
def get_mf(self):
|
||
|
return self.by_id['GO:0003674']
|
||
|
|
||
|
|
||
|
def _split_obo_line(line):
|
||
|
"""Splits a line from an obo file in its three constituent parts.
|
||
|
|
||
|
@param line: A string containing a line from an obo file. The line must
|
||
|
either be a section definition field with a section name in brackets
|
||
|
or a line of the form: keyword: value ! comment
|
||
|
@returns: A tuple of four strings conaining the section, key, value and
|
||
|
description defined in the string. If the section part is None, all
|
||
|
the other fields are strings and if section is a string, all the other
|
||
|
fields are None.
|
||
|
"""
|
||
|
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
|
||
|
s = line.strip()
|
||
|
if s == "":
|
||
|
return (None, None, None, None)
|
||
|
elif s.startswith('[') and s.endswith(']'):
|
||
|
return (s[1:-1], None, None, None)
|
||
|
else:
|
||
|
m = attrib_re.match(s)
|
||
|
if m:
|
||
|
key, value, comment = [x.strip() for x in m.groups()]
|
||
|
return (None, key, value, comment)
|
||
|
else:
|
||
|
raise Exception('Unparsable line: %s' % line)
|
||
|
|
||
|
|
||
|
def _add_term_attribute(term, key, value, comment):
|
||
|
if key in GOTerm.scalars:
|
||
|
term[key] = value
|
||
|
elif key in GOTerm.lists:
|
||
|
term[key].append(value)
|
||
|
else:
|
||
|
raise Exception('Unknown key %s: %s' % (key, value))
|
||
|
|
||
|
|
||
|
def read_gene_ontology(fd):
|
||
|
"""Reads the Gene Ontology from an obo file.
|
||
|
|
||
|
@param fd: An open file object to the obo file.
|
||
|
"""
|
||
|
go = GeneOntology()
|
||
|
term = None
|
||
|
section = None
|
||
|
|
||
|
line = fd.readline()
|
||
|
while line:
|
||
|
s, k, v, c = _split_obo_line(line)
|
||
|
if s == None and k == None:
|
||
|
pass
|
||
|
elif s:
|
||
|
if term:
|
||
|
go.add_term(term)
|
||
|
|
||
|
section = s
|
||
|
if s == 'Term':
|
||
|
term = GOTerm()
|
||
|
print "[Term]"
|
||
|
else:
|
||
|
term = None
|
||
|
print "ignoring: %s" %s
|
||
|
else:
|
||
|
if term:
|
||
|
_add_term_attribute(term, k, v, c)
|
||
|
# print " %s: %s" % (k, v)
|
||
|
else:
|
||
|
print "no term: ignoring: %s" %line
|
||
|
print '.',
|
||
|
line = fd.readline()
|
||
|
|
||
|
if term:
|
||
|
go.add_term(term)
|
||
|
|
||
|
return go
|
||
|
|
||
|
def read_default_go():
|
||
|
f = open("/usr/share/gene-ontology/gene_ontology.obo")
|
||
|
go = read_gene_ontology(f)
|
||
|
|
||
|
f.close()
|
||
|
return go
|