Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

Updated go-workflow.

This commit is contained in:
Einar Ryeng 2007-01-11 18:57:45 +00:00
parent 3340b1b7ac
commit 4b1d16ea78
2 changed files with 55 additions and 7 deletions

View File

@ -42,6 +42,12 @@ class GeneOntology(networkx.XDiGraph):
for link in node[linkattr]: for link in node[linkattr]:
self.add_edge(self.by_id[link], node, linkattr) self.add_edge(self.by_id[link], node, linkattr)
def link_relationships(self):
for node in self.nodes():
for link in node['relationship']:
link_type, term = link.split(' ')
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
def get_bp(self): def get_bp(self):
"""Returns the root node of the biological_process tree""" """Returns the root node of the biological_process tree"""
return self.by_id['GO:0008150'] return self.by_id['GO:0008150']
@ -114,13 +120,13 @@ def read_gene_ontology(fd):
# print "[Term]" # print "[Term]"
else: else:
term = None term = None
print "ignoring: %s" %s #print "ignoring: %s" %s
else: else:
if term: if term:
_add_term_attribute(term, k, v, c) _add_term_attribute(term, k, v, c)
# print " %s: %s" % (k, v) # print " %s: %s" % (k, v)
else: # else:
print "no term: ignoring: %s" %line # print "no term: ignoring: %s" %line
# print '.', # print '.',
line = fd.readline() line = fd.readline()
@ -133,6 +139,7 @@ def read_default_go():
f = open("/usr/share/gene-ontology/gene_ontology.obo") f = open("/usr/share/gene-ontology/gene_ontology.obo")
go = read_gene_ontology(f) go = read_gene_ontology(f)
go.link_ontology('is_a') go.link_ontology('is_a')
go.link_relationships()
f.close() f.close()
return go return go

View File

@ -26,6 +26,7 @@ DISTANCE_METRICS = [('resnik', 'Resnik'),
GO_DATA_DIR = '/home/einarr/data' GO_DATA_DIR = '/home/einarr/data'
evidence = None evidence = None
go = None
class GoTermView (gtk.Frame): class GoTermView (gtk.Frame):
@ -137,10 +138,13 @@ class LoadGOFunction(workflow.Function):
class LoadTextDatasetFunction(workflow.Function): class LoadTextDatasetFunction(workflow.Function):
def __init__(self): def __init__(self):
workflow.Function.__init__(self, 'load-text-ds', 'Load text dataset') workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
def run(self): def run(self):
f = open('/home/einarr/data/goa-condensed.ftsv') f = open('/home/einarr/data/goa-condensed.ftsv')
return [dataset.read_ftsv(f)] global evidence
evidence = dataset.read_ftsv(f)
return [evidence]
class LoadAnnotationsFunction(workflow.Function): class LoadAnnotationsFunction(workflow.Function):
@ -253,16 +257,53 @@ class GoDistanceDialog(gtk.Dialog):
self._metric_frame.set_sensitive(editable) self._metric_frame.set_sensitive(editable)
class NumericDict(dict):
def __init__(self):
dict.__init__(self)
def __getitem__(self, key):
retval = 0
try:
retval = dict.__getitem__(self, key)
except:
retval = 0.0
return retval
class GoDistanceFunction(workflow.Function): class GoDistanceFunction(workflow.Function):
def __init__(self): def __init__(self):
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix') workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
self.options = GoDistanceOptions() self.options = GoDistanceOptions()
def run(self, selection): def run(self, x, selection):
global evidence, go
self.options = self.show_gui(self.options) self.options = self.show_gui(self.options)
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0: if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.') logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
ev_indices = evidence.get_indices('evidence', codes)
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
total_ann = ann_count_matrix.sum(0)
annotations = NumericDict()
ic = NumericDict()
# Accumulate annotations
for term in reversed(networkx.topological_sort(go)):
for parent in go.in_neighbors(term):
annotations[parent['id']] += annotations[term['id']]
print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']])
# Create information content dictionary
for i, v in enumerate(evidence.get_identifiers('go-terms')):
annotations[v] = ann_count_matrix[i]
ic[v] = -log(ann_count_matrix[i] / total_ann)
# # Create resnik distance matrix
# for t1 in selection['go-terms']:
# for t2 in selection['go-terms']:
def show_gui(self, options, edit=True): def show_gui(self, options, edit=True):
dialog = GoDistanceDialog() dialog = GoDistanceDialog()