Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Create Resnik distances and save them as ftsv.

This commit is contained in:
Einar Ryeng 2007-01-12 00:00:56 +00:00
parent 08b7d8dd85
commit ae3aa7e4aa
2 changed files with 21 additions and 12 deletions

View File

@ -32,6 +32,7 @@ class GeneOntology(networkx.XDiGraph):
def __init__(self): def __init__(self):
networkx.XDiGraph.__init__(self) networkx.XDiGraph.__init__(self)
self.by_id = {} self.by_id = {}
self.undirected = None
def add_term(self, term): def add_term(self, term):
self.add_node(term) self.add_node(term)
@ -62,12 +63,12 @@ class GeneOntology(networkx.XDiGraph):
def subsumer(self, t1, t2): def subsumer(self, t1, t2):
if t1 == t2: if t1 == t2:
# print "t1 == t2"
return t1 return t1
go_undir = self.to_undirected() if self.undirected == None:
# print go_undir.nodes() self.undirected = self.to_undirected()
path = networkx.shortest_path(go_undir, t1, t2)
path = networkx.shortest_path(self.undirected, t1, t2)
if not path: if not path:
print "Woah, path not found." print "Woah, path not found."
return None return None
@ -76,12 +77,9 @@ class GeneOntology(networkx.XDiGraph):
print "This shouldn't happen" print "This shouldn't happen"
return t1 return t1
# print t1['id'], t2['id'], path
# print "path:", path
for t in path: for t in path:
if networkx.shortest_path(self, t, t1) and \ if networkx.shortest_path(self, t, t1) and \
networkx.shortest_path(self, t, t2): networkx.shortest_path(self, t, t2):
# print " ", t1, t2, t
return t return t
print "GeneOntology.subsumer: should not reach this point" print "GeneOntology.subsumer: should not reach this point"

View File

@ -120,6 +120,7 @@ class GoWorkflow (workflow.Workflow):
go = workflow.Stage('go', 'Gene Ontology') go = workflow.Stage('go', 'Gene Ontology')
go.add_function(SelectGoTermsFunction(self)) go.add_function(SelectGoTermsFunction(self))
go.add_function(GoDistanceFunction()) go.add_function(GoDistanceFunction())
go.add_function(SaveDistancesFunction())
self.add_stage(go) self.add_stage(go)
@ -277,7 +278,7 @@ class SelectGoTermsFunction(workflow.Function):
self.wf = wf self.wf = wf
def run(self): def run(self):
self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704'])) self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
class GoDistanceFunction(workflow.Function): class GoDistanceFunction(workflow.Function):
@ -296,7 +297,7 @@ class GoDistanceFunction(workflow.Function):
term2 = go.by_id[t2] term2 = go.by_id[t2]
subsumer = go.subsumer(term1, term2) subsumer = go.subsumer(term1, term2)
print "%s - %s - %s" % (t1, subsumer['id'], t2) print "%s - %s - %s" % (t1, subsumer['id'], t2)
m[i, j] = ic[subsumer['id']] - ic[t1] + ic[subsumer['id']] - ic[t2] m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik') ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
return ds return ds
@ -322,11 +323,10 @@ class GoDistanceFunction(workflow.Function):
for term in reversed(networkx.topological_sort(go)): for term in reversed(networkx.topological_sort(go)):
for parent in go.in_neighbors(term): for parent in go.in_neighbors(term):
annotations[parent['id']] += annotations[term['id']] annotations[parent['id']] += annotations[term['id']]
# print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']])
# Create information content dictionary # Create information content dictionary
for i, v in enumerate(evidence.get_identifiers('go-terms')): for term, count in annotations.items():
ic[v] = -log(ann_count_matrix[i] / total_ann) ic[term] = -log(count / total_ann)
return [self.resnik_distance_matrix(selection, ic)] return [self.resnik_distance_matrix(selection, ic)]
@ -343,6 +343,17 @@ class GoDistanceFunction(workflow.Function):
return options return options
class SaveDistancesFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
def run(self, ds):
filename = '/home/einarr/data/output.ftsv'
fd = open(filename, 'w')
dataset.write_ftsv(fd, ds)
fd.close()
class Options(dict): class Options(dict):
def __init__(self): def __init__(self):
dict.__init__(self) dict.__init__(self)