Updated go-workflow.
This commit is contained in:
parent
3340b1b7ac
commit
4b1d16ea78
|
@ -42,6 +42,12 @@ class GeneOntology(networkx.XDiGraph):
|
||||||
for link in node[linkattr]:
|
for link in node[linkattr]:
|
||||||
self.add_edge(self.by_id[link], node, linkattr)
|
self.add_edge(self.by_id[link], node, linkattr)
|
||||||
|
|
||||||
|
def link_relationships(self):
|
||||||
|
for node in self.nodes():
|
||||||
|
for link in node['relationship']:
|
||||||
|
link_type, term = link.split(' ')
|
||||||
|
self.add_edge(self.by_id[term.strip()], node, link_type.strip())
|
||||||
|
|
||||||
def get_bp(self):
|
def get_bp(self):
|
||||||
"""Returns the root node of the biological_process tree"""
|
"""Returns the root node of the biological_process tree"""
|
||||||
return self.by_id['GO:0008150']
|
return self.by_id['GO:0008150']
|
||||||
|
@ -114,13 +120,13 @@ def read_gene_ontology(fd):
|
||||||
# print "[Term]"
|
# print "[Term]"
|
||||||
else:
|
else:
|
||||||
term = None
|
term = None
|
||||||
print "ignoring: %s" %s
|
#print "ignoring: %s" %s
|
||||||
else:
|
else:
|
||||||
if term:
|
if term:
|
||||||
_add_term_attribute(term, k, v, c)
|
_add_term_attribute(term, k, v, c)
|
||||||
# print " %s: %s" % (k, v)
|
# print " %s: %s" % (k, v)
|
||||||
else:
|
# else:
|
||||||
print "no term: ignoring: %s" %line
|
# print "no term: ignoring: %s" %line
|
||||||
# print '.',
|
# print '.',
|
||||||
line = fd.readline()
|
line = fd.readline()
|
||||||
|
|
||||||
|
@ -133,6 +139,7 @@ def read_default_go():
|
||||||
f = open("/usr/share/gene-ontology/gene_ontology.obo")
|
f = open("/usr/share/gene-ontology/gene_ontology.obo")
|
||||||
go = read_gene_ontology(f)
|
go = read_gene_ontology(f)
|
||||||
go.link_ontology('is_a')
|
go.link_ontology('is_a')
|
||||||
|
go.link_relationships()
|
||||||
f.close()
|
f.close()
|
||||||
return go
|
return go
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ DISTANCE_METRICS = [('resnik', 'Resnik'),
|
||||||
GO_DATA_DIR = '/home/einarr/data'
|
GO_DATA_DIR = '/home/einarr/data'
|
||||||
|
|
||||||
evidence = None
|
evidence = None
|
||||||
|
go = None
|
||||||
|
|
||||||
class GoTermView (gtk.Frame):
|
class GoTermView (gtk.Frame):
|
||||||
|
|
||||||
|
@ -137,10 +138,13 @@ class LoadGOFunction(workflow.Function):
|
||||||
class LoadTextDatasetFunction(workflow.Function):
|
class LoadTextDatasetFunction(workflow.Function):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'load-text-ds', 'Load text dataset')
|
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
f = open('/home/einarr/data/goa-condensed.ftsv')
|
f = open('/home/einarr/data/goa-condensed.ftsv')
|
||||||
return [dataset.read_ftsv(f)]
|
global evidence
|
||||||
|
evidence = dataset.read_ftsv(f)
|
||||||
|
return [evidence]
|
||||||
|
|
||||||
|
|
||||||
class LoadAnnotationsFunction(workflow.Function):
|
class LoadAnnotationsFunction(workflow.Function):
|
||||||
|
@ -253,16 +257,53 @@ class GoDistanceDialog(gtk.Dialog):
|
||||||
self._metric_frame.set_sensitive(editable)
|
self._metric_frame.set_sensitive(editable)
|
||||||
|
|
||||||
|
|
||||||
|
class NumericDict(dict):
|
||||||
|
def __init__(self):
|
||||||
|
dict.__init__(self)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
retval = 0
|
||||||
|
try:
|
||||||
|
retval = dict.__getitem__(self, key)
|
||||||
|
except:
|
||||||
|
retval = 0.0
|
||||||
|
return retval
|
||||||
|
|
||||||
class GoDistanceFunction(workflow.Function):
|
class GoDistanceFunction(workflow.Function):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
||||||
self.options = GoDistanceOptions()
|
self.options = GoDistanceOptions()
|
||||||
|
|
||||||
def run(self, selection):
|
def run(self, x, selection):
|
||||||
|
global evidence, go
|
||||||
self.options = self.show_gui(self.options)
|
self.options = self.show_gui(self.options)
|
||||||
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
|
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
|
||||||
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
|
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
|
||||||
|
|
||||||
|
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
|
||||||
|
ev_indices = evidence.get_indices('evidence', codes)
|
||||||
|
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
|
||||||
|
total_ann = ann_count_matrix.sum(0)
|
||||||
|
|
||||||
|
annotations = NumericDict()
|
||||||
|
ic = NumericDict()
|
||||||
|
|
||||||
|
# Accumulate annotations
|
||||||
|
for term in reversed(networkx.topological_sort(go)):
|
||||||
|
for parent in go.in_neighbors(term):
|
||||||
|
annotations[parent['id']] += annotations[term['id']]
|
||||||
|
print "%s -> %s (%s)" % (term['id'], parent['id'], annotations[parent['id']])
|
||||||
|
|
||||||
|
# Create information content dictionary
|
||||||
|
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
||||||
|
annotations[v] = ann_count_matrix[i]
|
||||||
|
ic[v] = -log(ann_count_matrix[i] / total_ann)
|
||||||
|
|
||||||
|
# # Create resnik distance matrix
|
||||||
|
# for t1 in selection['go-terms']:
|
||||||
|
# for t2 in selection['go-terms']:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def show_gui(self, options, edit=True):
|
def show_gui(self, options, edit=True):
|
||||||
dialog = GoDistanceDialog()
|
dialog = GoDistanceDialog()
|
||||||
|
|
Reference in New Issue