GO workflow. Does not work.
This commit is contained in:
132
workflows/geneontology.py
Normal file
132
workflows/geneontology.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import networkx
|
||||
import re
|
||||
|
||||
class GOTerm:
|
||||
def __init__(self):
|
||||
self.d = {}
|
||||
|
||||
## Create empty lists for all list values
|
||||
for l in GOTerm.lists:
|
||||
self.d[l] = []
|
||||
for s in GOTerm.scalars:
|
||||
self.d[l] = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
if self.d.has_key(key):
|
||||
return self.d[key]
|
||||
return None
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.d[key] = value
|
||||
|
||||
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
|
||||
'narrow_synonym', 'related_synonym', 'relationship',
|
||||
'subset', 'synonym', 'xref_analog', 'xref_unknown']
|
||||
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
|
||||
'comment', 'is_obsolete']
|
||||
|
||||
|
||||
class GeneOntology(networkx.XDiGraph):
|
||||
def __init__(self):
|
||||
networkx.XDiGraph.__init__(self)
|
||||
self.by_id = {}
|
||||
|
||||
def add_term(self, term):
|
||||
self.add_node(term)
|
||||
self.by_id[term['id']] = term
|
||||
|
||||
def link_ontology(self, linkattr, obsolete=False):
|
||||
for node in self.nodes():
|
||||
for link in node[linkattr]:
|
||||
self.add_edge(node, self.by_id[link], linkattr)
|
||||
|
||||
def get_bp(self):
|
||||
return self.by_id['GO:0008150']
|
||||
|
||||
def get_cc(self):
|
||||
return self.by_id['id: GO:0005575']
|
||||
|
||||
def get_mf(self):
|
||||
return self.by_id['GO:0003674']
|
||||
|
||||
|
||||
def _split_obo_line(line):
|
||||
"""Splits a line from an obo file in its three constituent parts.
|
||||
|
||||
@param line: A string containing a line from an obo file. The line must
|
||||
either be a section definition field with a section name in brackets
|
||||
or a line of the form: keyword: value ! comment
|
||||
@returns: A tuple of four strings conaining the section, key, value and
|
||||
description defined in the string. If the section part is None, all
|
||||
the other fields are strings and if section is a string, all the other
|
||||
fields are None.
|
||||
"""
|
||||
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
|
||||
s = line.strip()
|
||||
if s == "":
|
||||
return (None, None, None, None)
|
||||
elif s.startswith('[') and s.endswith(']'):
|
||||
return (s[1:-1], None, None, None)
|
||||
else:
|
||||
m = attrib_re.match(s)
|
||||
if m:
|
||||
key, value, comment = [x.strip() for x in m.groups()]
|
||||
return (None, key, value, comment)
|
||||
else:
|
||||
raise Exception('Unparsable line: %s' % line)
|
||||
|
||||
|
||||
def _add_term_attribute(term, key, value, comment):
|
||||
if key in GOTerm.scalars:
|
||||
term[key] = value
|
||||
elif key in GOTerm.lists:
|
||||
term[key].append(value)
|
||||
else:
|
||||
raise Exception('Unknown key %s: %s' % (key, value))
|
||||
|
||||
|
||||
def read_gene_ontology(fd):
|
||||
"""Reads the Gene Ontology from an obo file.
|
||||
|
||||
@param fd: An open file object to the obo file.
|
||||
"""
|
||||
go = GeneOntology()
|
||||
term = None
|
||||
section = None
|
||||
|
||||
line = fd.readline()
|
||||
while line:
|
||||
s, k, v, c = _split_obo_line(line)
|
||||
if s == None and k == None:
|
||||
pass
|
||||
elif s:
|
||||
if term:
|
||||
go.add_term(term)
|
||||
|
||||
section = s
|
||||
if s == 'Term':
|
||||
term = GOTerm()
|
||||
print "[Term]"
|
||||
else:
|
||||
term = None
|
||||
print "ignoring: %s" %s
|
||||
else:
|
||||
if term:
|
||||
_add_term_attribute(term, k, v, c)
|
||||
# print " %s: %s" % (k, v)
|
||||
else:
|
||||
print "no term: ignoring: %s" %line
|
||||
print '.',
|
||||
line = fd.readline()
|
||||
|
||||
if term:
|
||||
go.add_term(term)
|
||||
|
||||
return go
|
||||
|
||||
def read_default_go():
|
||||
f = open("/usr/share/gene-ontology/gene_ontology.obo")
|
||||
go = read_gene_ontology(f)
|
||||
|
||||
f.close()
|
||||
return go
|
Reference in New Issue
Block a user