diff --git a/workflows/geneontology.py b/workflows/geneontology.py index a9a06fe..5a40e41 100644 --- a/workflows/geneontology.py +++ b/workflows/geneontology.py @@ -42,6 +42,12 @@ class GeneOntology(networkx.XDiGraph): for link in node[linkattr]: self.add_edge(self.by_id[link], node, linkattr) + def link_relationships(self): + for node in self.nodes(): + for link in node['relationship']: + link_type, term = link.split(' ') + self.add_edge(self.by_id[term.strip()], node, link_type.strip()) + def get_bp(self): """Returns the root node of the biological_process tree""" return self.by_id['GO:0008150'] @@ -114,13 +120,13 @@ def read_gene_ontology(fd): # print "[Term]" else: term = None - print "ignoring: %s" %s + #print "ignoring: %s" %s else: if term: _add_term_attribute(term, k, v, c) # print " %s: %s" % (k, v) - else: - print "no term: ignoring: %s" %line +# else: +# print "no term: ignoring: %s" %line # print '.', line = fd.readline() @@ -133,6 +139,7 @@ def read_default_go(): f = open("/usr/share/gene-ontology/gene_ontology.obo") go = read_gene_ontology(f) go.link_ontology('is_a') + go.link_relationships() f.close() return go diff --git a/workflows/go_workflow.py b/workflows/go_workflow.py index f3c2b61..258109b 100644 --- a/workflows/go_workflow.py +++ b/workflows/go_workflow.py @@ -26,6 +26,7 @@ DISTANCE_METRICS = [('resnik', 'Resnik'), GO_DATA_DIR = '/home/einarr/data' evidence = None +go = None class GoTermView (gtk.Frame): @@ -137,11 +138,14 @@ class LoadGOFunction(workflow.Function): class LoadTextDatasetFunction(workflow.Function): def __init__(self): - workflow.Function.__init__(self, 'load-text-ds', 'Load text dataset') + workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence') + def run(self): f = open('/home/einarr/data/goa-condensed.ftsv') - return [dataset.read_ftsv(f)] - + global evidence + evidence = dataset.read_ftsv(f) + return [evidence] + class LoadAnnotationsFunction(workflow.Function): @@ -253,17 +257,54 @@ class GoDistanceDialog(gtk.Dialog): self._metric_frame.set_sensitive(editable) +class NumericDict(dict): + def __init__(self): + dict.__init__(self) + + def __getitem__(self, key): + retval = 0 + try: + retval = dict.__getitem__(self, key) + except: + retval = 0.0 + return retval + class GoDistanceFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix') self.options = GoDistanceOptions() - def run(self, selection): + def run(self, x, selection): + global evidence, go self.options = self.show_gui(self.options) if not selection.has_key('go-terms') or len(selection['go-terms']) == 0: logger.log('warning', 'No GO terms selected. Cannot make distance matrix.') + codes = [c for c, d in EVIDENCE_CODES if self.options[c]] + ev_indices = evidence.get_indices('evidence', codes) + ann_count_matrix = evidence._array[:, ev_indices].sum(1) + total_ann = ann_count_matrix.sum(0) + annotations = NumericDict() + ic = NumericDict() + + # Accumulate annotations + for term in reversed(networkx.topological_sort(go)): + for parent in go.in_neighbors(term): + annotations[parent['id']] += annotations[term['id']] + print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']]) + + # Create information content dictionary + for i, v in enumerate(evidence.get_identifiers('go-terms')): + annotations[v] = ann_count_matrix[i] + ic[v] = -log(ann_count_matrix[i] / total_ann) + +# # Create resnik distance matrix +# for t1 in selection['go-terms']: +# for t2 in selection['go-terms']: + + + def show_gui(self, options, edit=True): dialog = GoDistanceDialog() dialog.set_options(self.options)