Resnik distances between GO terms work now.
This commit is contained in:
parent
4b1d16ea78
commit
e4d7f8e5a2
|
@ -60,6 +60,31 @@ class GeneOntology(networkx.XDiGraph):
|
||||||
"""Returns the root node of the molecular_function tree"""
|
"""Returns the root node of the molecular_function tree"""
|
||||||
return self.by_id['GO:0003674']
|
return self.by_id['GO:0003674']
|
||||||
|
|
||||||
|
def subsumer(self, t1, t2):
|
||||||
|
if t1 == t2:
|
||||||
|
# print "t1 == t2"
|
||||||
|
return t1
|
||||||
|
|
||||||
|
go_undir = self.to_undirected()
|
||||||
|
# print go_undir.nodes()
|
||||||
|
path = networkx.shortest_path(go_undir, t1, t2)
|
||||||
|
if not path:
|
||||||
|
print "Woah, path not found."
|
||||||
|
return None
|
||||||
|
|
||||||
|
if path == [1]:
|
||||||
|
print "This shouldn't happen"
|
||||||
|
return t1
|
||||||
|
|
||||||
|
# print t1['id'], t2['id'], path
|
||||||
|
# print "path:", path
|
||||||
|
for t in path:
|
||||||
|
if networkx.shortest_path(self, t, t1) and \
|
||||||
|
networkx.shortest_path(self, t, t2):
|
||||||
|
# print " ", t1, t2, t
|
||||||
|
return t
|
||||||
|
|
||||||
|
print "GeneOntology.subsumer: should not reach this point"
|
||||||
|
|
||||||
def _split_obo_line(line):
|
def _split_obo_line(line):
|
||||||
"""Splits a line from an obo file in its three constituent parts.
|
"""Splits a line from an obo file in its three constituent parts.
|
||||||
|
|
|
@ -118,6 +118,7 @@ class GoWorkflow (workflow.Workflow):
|
||||||
self.add_stage(load)
|
self.add_stage(load)
|
||||||
|
|
||||||
go = workflow.Stage('go', 'Gene Ontology')
|
go = workflow.Stage('go', 'Gene Ontology')
|
||||||
|
go.add_function(SelectGoTermsFunction(self))
|
||||||
go.add_function(GoDistanceFunction())
|
go.add_function(GoDistanceFunction())
|
||||||
self.add_stage(go)
|
self.add_stage(go)
|
||||||
|
|
||||||
|
@ -269,11 +270,36 @@ class NumericDict(dict):
|
||||||
retval = 0.0
|
retval = 0.0
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
class SelectGoTermsFunction(workflow.Function):
|
||||||
|
def __init__(self, wf):
|
||||||
|
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
|
||||||
|
self.wf = wf
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704']))
|
||||||
|
|
||||||
|
|
||||||
class GoDistanceFunction(workflow.Function):
|
class GoDistanceFunction(workflow.Function):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
||||||
self.options = GoDistanceOptions()
|
self.options = GoDistanceOptions()
|
||||||
|
|
||||||
|
def resnik_distance_matrix(self, selection, ic):
|
||||||
|
size = len(selection['go-terms'])
|
||||||
|
m = zeros((size, size))
|
||||||
|
# Create resnik distance matrix
|
||||||
|
ids = list(selection['go-terms'])
|
||||||
|
for i, t1 in enumerate(ids):
|
||||||
|
for j, t2 in enumerate(ids):
|
||||||
|
term1 = go.by_id[t1]
|
||||||
|
term2 = go.by_id[t2]
|
||||||
|
subsumer = go.subsumer(term1, term2)
|
||||||
|
print "%s - %s - %s" % (t1, subsumer['id'], t2)
|
||||||
|
m[i, j] = ic[subsumer['id']] - ic[t1] + ic[subsumer['id']] - ic[t2]
|
||||||
|
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
|
||||||
|
return ds
|
||||||
|
|
||||||
def run(self, x, selection):
|
def run(self, x, selection):
|
||||||
global evidence, go
|
global evidence, go
|
||||||
self.options = self.show_gui(self.options)
|
self.options = self.show_gui(self.options)
|
||||||
|
@ -288,22 +314,21 @@ class GoDistanceFunction(workflow.Function):
|
||||||
annotations = NumericDict()
|
annotations = NumericDict()
|
||||||
ic = NumericDict()
|
ic = NumericDict()
|
||||||
|
|
||||||
|
# Insert annotations into dict
|
||||||
|
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
||||||
|
annotations[v] = ann_count_matrix[i]
|
||||||
|
|
||||||
# Accumulate annotations
|
# Accumulate annotations
|
||||||
for term in reversed(networkx.topological_sort(go)):
|
for term in reversed(networkx.topological_sort(go)):
|
||||||
for parent in go.in_neighbors(term):
|
for parent in go.in_neighbors(term):
|
||||||
annotations[parent['id']] += annotations[term['id']]
|
annotations[parent['id']] += annotations[term['id']]
|
||||||
print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']])
|
# print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']])
|
||||||
|
|
||||||
# Create information content dictionary
|
# Create information content dictionary
|
||||||
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
||||||
annotations[v] = ann_count_matrix[i]
|
|
||||||
ic[v] = -log(ann_count_matrix[i] / total_ann)
|
ic[v] = -log(ann_count_matrix[i] / total_ann)
|
||||||
|
|
||||||
# # Create resnik distance matrix
|
return [self.resnik_distance_matrix(selection, ic)]
|
||||||
# for t1 in selection['go-terms']:
|
|
||||||
# for t2 in selection['go-terms']:
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def show_gui(self, options, edit=True):
|
def show_gui(self, options, edit=True):
|
||||||
dialog = GoDistanceDialog()
|
dialog = GoDistanceDialog()
|
||||||
|
|
Reference in New Issue