diff --git a/workflows/geneontology.py b/workflows/geneontology.py new file mode 100644 index 0000000..05475cb --- /dev/null +++ b/workflows/geneontology.py @@ -0,0 +1,132 @@ +import networkx +import re + +class GOTerm: + def __init__(self): + self.d = {} + + ## Create empty lists for all list values + for l in GOTerm.lists: + self.d[l] = [] + for s in GOTerm.scalars: + self.d[l] = None + + def __getitem__(self, key): + if self.d.has_key(key): + return self.d[key] + return None + + def __setitem__(self, key, value): + self.d[key] = value + +GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym', + 'narrow_synonym', 'related_synonym', 'relationship', + 'subset', 'synonym', 'xref_analog', 'xref_unknown'] +GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive', + 'comment', 'is_obsolete'] + + +class GeneOntology(networkx.XDiGraph): + def __init__(self): + networkx.XDiGraph.__init__(self) + self.by_id = {} + + def add_term(self, term): + self.add_node(term) + self.by_id[term['id']] = term + + def link_ontology(self, linkattr, obsolete=False): + for node in self.nodes(): + for link in node[linkattr]: + self.add_edge(node, self.by_id[link], linkattr) + + def get_bp(self): + return self.by_id['GO:0008150'] + + def get_cc(self): + return self.by_id['id: GO:0005575'] + + def get_mf(self): + return self.by_id['GO:0003674'] + + +def _split_obo_line(line): + """Splits a line from an obo file in its three constituent parts. + + @param line: A string containing a line from an obo file. The line must + either be a section definition field with a section name in brackets + or a line of the form: keyword: value ! comment + @returns: A tuple of four strings conaining the section, key, value and + description defined in the string. If the section part is None, all + the other fields are strings and if section is a string, all the other + fields are None. + """ + attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)') + s = line.strip() + if s == "": + return (None, None, None, None) + elif s.startswith('[') and s.endswith(']'): + return (s[1:-1], None, None, None) + else: + m = attrib_re.match(s) + if m: + key, value, comment = [x.strip() for x in m.groups()] + return (None, key, value, comment) + else: + raise Exception('Unparsable line: %s' % line) + + +def _add_term_attribute(term, key, value, comment): + if key in GOTerm.scalars: + term[key] = value + elif key in GOTerm.lists: + term[key].append(value) + else: + raise Exception('Unknown key %s: %s' % (key, value)) + + +def read_gene_ontology(fd): + """Reads the Gene Ontology from an obo file. + + @param fd: An open file object to the obo file. + """ + go = GeneOntology() + term = None + section = None + + line = fd.readline() + while line: + s, k, v, c = _split_obo_line(line) + if s == None and k == None: + pass + elif s: + if term: + go.add_term(term) + + section = s + if s == 'Term': + term = GOTerm() + print "[Term]" + else: + term = None + print "ignoring: %s" %s + else: + if term: + _add_term_attribute(term, k, v, c) +# print " %s: %s" % (k, v) + else: + print "no term: ignoring: %s" %line + print '.', + line = fd.readline() + + if term: + go.add_term(term) + + return go + +def read_default_go(): + f = open("/usr/share/gene-ontology/gene_ontology.obo") + go = read_gene_ontology(f) + + f.close() + return go diff --git a/workflows/go_workflow.py b/workflows/go_workflow.py new file mode 100644 index 0000000..37c9f8a --- /dev/null +++ b/workflows/go_workflow.py @@ -0,0 +1,85 @@ +import gtk +from fluents import dataset, logger, plots, workflow, fluents +#import geneontology +#import gostat +from scipy import array, randn, log, ones +import networkx + +class GoTermView (gtk.Frame): + + def __init__(self): + gtk.Frame.__init__(self) + self.set_label('GO Term') + + +class GeneOntologyTree (gtk.HPaned): + + def __init__(self): + gtk.HPaned.__init__(self) + self._tree_view = gtk.TreeView() + self._desc_view = GoTermView() + + self.add1(self._tree_view) + self.add2(self._desc_view) + self.show_all() + +class GoWorkflow (workflow.Workflow): + + name = 'Gene Ontology' + ident = 'go' + description = 'Gene Ontology Workflow. For tree distance measures based '\ + + 'on the GO tree.' + + def __init__(self, app): + workflow.Workflow.__init__(self, app) + + load = workflow.Stage('load', 'Load GO Annotations') + load.add_function(LoadGOFunction()) + load.add_function(LoadAnnotationsFunction()) + self.add_stage(load) + + +class LoadGOFunction(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology') + + def run(self): + browser = GeneOntologyTree() + label = gtk.Label('_Gene Ontology') + label.set_use_underline(True) + fluents.app['bottom_notebook'].append_page(browser, label) + +class LoadAnnotationsFunction(workflow.Function): + + def __init__(self): + workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') + self.annotations = None + + def load_file(self, filename): + f = open(filename) + self.annotations = Annotations('genes', 'go-terms') + logger.log('notice', 'Loading annotation file: %s' % filename) + + for line in f.readlines(): + val = line.split(' \t') + + if len(val) > 1: + val = [v.strip() for v in val] + retval.add_annotations('genes', val[0], + 'go-terms', set(val[1:])) + + def on_response(self, dialog, response): + if response == gtk.RESPONSE_OK: + logger.log('notice', 'Reading file: %s' % dialog.get_filename()) + self.load_file(dialog.get_filename()) + + def run(self): + btns = ('Open', gtk.RESPONSE_OK, \ + 'Cancel', gtk.RESPONSE_CANCEL) + dialog = gtk.FileChooserDialog('Open GO Annotation File', + buttons=btns) + dialog.connect('response', self.on_response) + dialog.run() + dialog.destroy() + return [self.annotations] +