From e4d7f8e5a29389e9bd6a52cbda7db6a0d152a03b Mon Sep 17 00:00:00 2001 From: einarr Date: Thu, 11 Jan 2007 22:01:31 +0000 Subject: [PATCH] Resnik distances between GO terms work now. --- workflows/geneontology.py | 25 +++++++++++++++++++++++++ workflows/go_workflow.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/workflows/geneontology.py b/workflows/geneontology.py index 5a40e41..79419b8 100644 --- a/workflows/geneontology.py +++ b/workflows/geneontology.py @@ -60,6 +60,31 @@ class GeneOntology(networkx.XDiGraph): """Returns the root node of the molecular_function tree""" return self.by_id['GO:0003674'] + def subsumer(self, t1, t2): + if t1 == t2: +# print "t1 == t2" + return t1 + + go_undir = self.to_undirected() +# print go_undir.nodes() + path = networkx.shortest_path(go_undir, t1, t2) + if not path: + print "Woah, path not found." + return None + + if path == [1]: + print "This shouldn't happen" + return t1 + +# print t1['id'], t2['id'], path +# print "path:", path + for t in path: + if networkx.shortest_path(self, t, t1) and \ + networkx.shortest_path(self, t, t2): +# print " ", t1, t2, t + return t + + print "GeneOntology.subsumer: should not reach this point" def _split_obo_line(line): """Splits a line from an obo file in its three constituent parts. diff --git a/workflows/go_workflow.py b/workflows/go_workflow.py index 258109b..3352ca7 100644 --- a/workflows/go_workflow.py +++ b/workflows/go_workflow.py @@ -118,6 +118,7 @@ class GoWorkflow (workflow.Workflow): self.add_stage(load) go = workflow.Stage('go', 'Gene Ontology') + go.add_function(SelectGoTermsFunction(self)) go.add_function(GoDistanceFunction()) self.add_stage(go) @@ -269,11 +270,36 @@ class NumericDict(dict): retval = 0.0 return retval + +class SelectGoTermsFunction(workflow.Function): + def __init__(self, wf): + workflow.Function.__init__(self, 'go-select', 'Select GO Terms') + self.wf = wf + + def run(self): + self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704'])) + + class GoDistanceFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix') self.options = GoDistanceOptions() + def resnik_distance_matrix(self, selection, ic): + size = len(selection['go-terms']) + m = zeros((size, size)) + # Create resnik distance matrix + ids = list(selection['go-terms']) + for i, t1 in enumerate(ids): + for j, t2 in enumerate(ids): + term1 = go.by_id[t1] + term2 = go.by_id[t2] + subsumer = go.subsumer(term1, term2) + print "%s - %s - %s" % (t1, subsumer['id'], t2) + m[i, j] = ic[subsumer['id']] - ic[t1] + ic[subsumer['id']] - ic[t2] + ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik') + return ds + def run(self, x, selection): global evidence, go self.options = self.show_gui(self.options) @@ -288,22 +314,21 @@ class GoDistanceFunction(workflow.Function): annotations = NumericDict() ic = NumericDict() + # Insert annotations into dict + for i, v in enumerate(evidence.get_identifiers('go-terms')): + annotations[v] = ann_count_matrix[i] + # Accumulate annotations for term in reversed(networkx.topological_sort(go)): for parent in go.in_neighbors(term): annotations[parent['id']] += annotations[term['id']] - print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']]) +# print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']]) # Create information content dictionary for i, v in enumerate(evidence.get_identifiers('go-terms')): - annotations[v] = ann_count_matrix[i] ic[v] = -log(ann_count_matrix[i] / total_ann) -# # Create resnik distance matrix -# for t1 in selection['go-terms']: -# for t2 in selection['go-terms']: - - + return [self.resnik_distance_matrix(selection, ic)] def show_gui(self, options, edit=True): dialog = GoDistanceDialog()