GO workflow. Does not work.

This commit is contained in:
Einar Ryeng 2006-10-22 15:31:36 +00:00
parent de4a65e7b3
commit 5491f5ce04
2 changed files with 217 additions and 0 deletions

132
workflows/geneontology.py Normal file
View File

@ -0,0 +1,132 @@
import networkx
import re
class GOTerm:
def __init__(self):
self.d = {}
## Create empty lists for all list values
for l in GOTerm.lists:
self.d[l] = []
for s in GOTerm.scalars:
self.d[l] = None
def __getitem__(self, key):
if self.d.has_key(key):
return self.d[key]
return None
def __setitem__(self, key, value):
self.d[key] = value
GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym',
'narrow_synonym', 'related_synonym', 'relationship',
'subset', 'synonym', 'xref_analog', 'xref_unknown']
GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive',
'comment', 'is_obsolete']
class GeneOntology(networkx.XDiGraph):
def __init__(self):
networkx.XDiGraph.__init__(self)
self.by_id = {}
def add_term(self, term):
self.add_node(term)
self.by_id[term['id']] = term
def link_ontology(self, linkattr, obsolete=False):
for node in self.nodes():
for link in node[linkattr]:
self.add_edge(node, self.by_id[link], linkattr)
def get_bp(self):
return self.by_id['GO:0008150']
def get_cc(self):
return self.by_id['id: GO:0005575']
def get_mf(self):
return self.by_id['GO:0003674']
def _split_obo_line(line):
"""Splits a line from an obo file in its three constituent parts.
@param line: A string containing a line from an obo file. The line must
either be a section definition field with a section name in brackets
or a line of the form: keyword: value ! comment
@returns: A tuple of four strings conaining the section, key, value and
description defined in the string. If the section part is None, all
the other fields are strings and if section is a string, all the other
fields are None.
"""
attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)')
s = line.strip()
if s == "":
return (None, None, None, None)
elif s.startswith('[') and s.endswith(']'):
return (s[1:-1], None, None, None)
else:
m = attrib_re.match(s)
if m:
key, value, comment = [x.strip() for x in m.groups()]
return (None, key, value, comment)
else:
raise Exception('Unparsable line: %s' % line)
def _add_term_attribute(term, key, value, comment):
if key in GOTerm.scalars:
term[key] = value
elif key in GOTerm.lists:
term[key].append(value)
else:
raise Exception('Unknown key %s: %s' % (key, value))
def read_gene_ontology(fd):
"""Reads the Gene Ontology from an obo file.
@param fd: An open file object to the obo file.
"""
go = GeneOntology()
term = None
section = None
line = fd.readline()
while line:
s, k, v, c = _split_obo_line(line)
if s == None and k == None:
pass
elif s:
if term:
go.add_term(term)
section = s
if s == 'Term':
term = GOTerm()
print "[Term]"
else:
term = None
print "ignoring: %s" %s
else:
if term:
_add_term_attribute(term, k, v, c)
# print " %s: %s" % (k, v)
else:
print "no term: ignoring: %s" %line
print '.',
line = fd.readline()
if term:
go.add_term(term)
return go
def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f)
f.close()
return go

85
workflows/go_workflow.py Normal file
View File

@ -0,0 +1,85 @@
import gtk
from fluents import dataset, logger, plots, workflow, fluents
#import geneontology
#import gostat
from scipy import array, randn, log, ones
import networkx
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
self.set_label('GO Term')
class GeneOntologyTree (gtk.HPaned):
def __init__(self):
gtk.HPaned.__init__(self)
self._tree_view = gtk.TreeView()
self._desc_view = GoTermView()
self.add1(self._tree_view)
self.add2(self._desc_view)
self.show_all()
class GoWorkflow (workflow.Workflow):
name = 'Gene Ontology'
ident = 'go'
description = 'Gene Ontology Workflow. For tree distance measures based '\
+ 'on the GO tree.'
def __init__(self, app):
workflow.Workflow.__init__(self, app)
load = workflow.Stage('load', 'Load GO Annotations')
load.add_function(LoadGOFunction())
load.add_function(LoadAnnotationsFunction())
self.add_stage(load)
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
browser = GeneOntologyTree()
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def load_file(self, filename):
f = open(filename)
self.annotations = Annotations('genes', 'go-terms')
logger.log('notice', 'Loading annotation file: %s' % filename)
for line in f.readlines():
val = line.split(' \t')
if len(val) > 1:
val = [v.strip() for v in val]
retval.add_annotations('genes', val[0],
'go-terms', set(val[1:]))
def on_response(self, dialog, response):
if response == gtk.RESPONSE_OK:
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
self.load_file(dialog.get_filename())
def run(self):
btns = ('Open', gtk.RESPONSE_OK, \
'Cancel', gtk.RESPONSE_CANCEL)
dialog = gtk.FileChooserDialog('Open GO Annotation File',
buttons=btns)
dialog.connect('response', self.on_response)
dialog.run()
dialog.destroy()
return [self.annotations]