Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/geneontology.py

133 lines
3.7 KiB
Python
Raw Normal View History

2006-10-22 17:31:36 +02:00
import networkx
import re
class GOTerm:
def __init__(self):
self.d = {}
## Create empty lists for all list values
for l in GOTerm.lists:
self.d[l] = []
for s in GOTerm.scalars:
self.d[l] = None
def __getitem__(self, key):
if self.d.has_key(key):
return self.d[key]
return None
def __setitem__(self, key, value):
self.d[key] = value
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
'narrow_synonym', 'related_synonym', 'relationship',
'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
'comment', 'is_obsolete']
class GeneOntology(networkx.XDiGraph):
def __init__(self):
networkx.XDiGraph.__init__(self)
self.by_id = {}
def add_term(self, term):
self.add_node(term)
self.by_id[term['id']] = term
def link_ontology(self, linkattr, obsolete=False):
for node in self.nodes():
for link in node[linkattr]:
self.add_edge(node, self.by_id[link], linkattr)
def get_bp(self):
return self.by_id['GO:0008150']
def get_cc(self):
return self.by_id['id: GO:0005575']
def get_mf(self):
return self.by_id['GO:0003674']
def _split_obo_line(line):
"""Splits a line from an obo file in its three constituent parts.
@param line: A string containing a line from an obo file. The line must
either be a section definition field with a section name in brackets
or a line of the form: keyword: value ! comment
@returns: A tuple of four strings conaining the section, key, value and
description defined in the string. If the section part is None, all
the other fields are strings and if section is a string, all the other
fields are None.
"""
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
s = line.strip()
if s == "":
return (None, None, None, None)
elif s.startswith('[') and s.endswith(']'):
return (s[1:-1], None, None, None)
else:
m = attrib_re.match(s)
if m:
key, value, comment = [x.strip() for x in m.groups()]
return (None, key, value, comment)
else:
raise Exception('Unparsable line: %s' % line)
def _add_term_attribute(term, key, value, comment):
if key in GOTerm.scalars:
term[key] = value
elif key in GOTerm.lists:
term[key].append(value)
else:
raise Exception('Unknown key %s: %s' % (key, value))
def read_gene_ontology(fd):
"""Reads the Gene Ontology from an obo file.
@param fd: An open file object to the obo file.
"""
go = GeneOntology()
term = None
section = None
line = fd.readline()
while line:
s, k, v, c = _split_obo_line(line)
if s == None and k == None:
pass
elif s:
if term:
go.add_term(term)
section = s
if s == 'Term':
term = GOTerm()
print "[Term]"
else:
term = None
print "ignoring: %s" %s
else:
if term:
_add_term_attribute(term, k, v, c)
# print " %s: %s" % (k, v)
else:
print "no term: ignoring: %s" %line
print '.',
line = fd.readline()
if term:
go.add_term(term)
return go
def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f)
f.close()
return go