2006-10-22 17:31:36 +02:00
|
|
|
import gtk
|
2008-12-05 23:07:56 +01:00
|
|
|
from laydi import dataset, logger, plots, workflow, laydi, project
|
|
|
|
from laydi.lib import blmfuncs
|
2006-10-23 01:19:54 +02:00
|
|
|
import geneontology
|
2006-10-22 17:31:36 +02:00
|
|
|
#import gostat
|
2006-12-15 13:53:27 +01:00
|
|
|
from scipy import array, randn, log, ones, zeros
|
2006-10-22 17:31:36 +02:00
|
|
|
import networkx
|
2007-01-10 18:35:58 +01:00
|
|
|
import re
|
2006-10-22 17:31:36 +02:00
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
|
|
|
|
('IGI', 'Inferred from genetic interaction'),
|
|
|
|
('IPI', 'Inferred from physical interaction'),
|
|
|
|
('ISS', 'Inferred from sequence or structure similarity'),
|
|
|
|
('IDA', 'Inferred from direct assay'),
|
|
|
|
('IEP', 'Inferred on expression pattern'),
|
|
|
|
('IEA', 'Inferred from electronic annotation'),
|
|
|
|
('TAS', 'Traceable author statement'),
|
|
|
|
('NAS', 'Non-traceable author statement'),
|
|
|
|
('ND', 'No biological data available'),
|
|
|
|
('RCA', 'Inferred from reviewed computational analysis'),
|
|
|
|
('IC', 'Inferred by curator')]
|
|
|
|
|
|
|
|
DISTANCE_METRICS = [('resnik', 'Resnik'),
|
|
|
|
('jiang', 'Jiang & Conrath'),
|
|
|
|
('fussimeg', 'FuSSiMeG')]
|
|
|
|
|
2006-12-15 13:53:27 +01:00
|
|
|
GO_DATA_DIR = '/home/einarr/data'
|
|
|
|
|
|
|
|
evidence = None
|
2007-01-11 19:57:45 +01:00
|
|
|
go = None
|
2006-12-15 13:53:27 +01:00
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
class GoTermView (gtk.Frame):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Frame.__init__(self)
|
2006-10-23 01:19:54 +02:00
|
|
|
tab = gtk.Table(2, 2, False)
|
|
|
|
self._table = tab
|
2006-10-22 17:31:36 +02:00
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
self._name = gtk.Label('')
|
2006-10-26 18:51:42 +02:00
|
|
|
self._name.set_line_wrap(True)
|
|
|
|
self._name.set_alignment(0, 0)
|
|
|
|
name_label = gtk.Label('Name:')
|
|
|
|
name_label.set_alignment(0, 0)
|
|
|
|
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
|
|
|
|
|
|
|
|
self._def = gtk.TextBuffer()
|
|
|
|
textview = gtk.TextView(self._def)
|
|
|
|
textview.set_wrap_mode(gtk.WRAP_WORD)
|
|
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
|
|
scrolled_window.add(textview)
|
|
|
|
def_label = gtk.Label('Def:')
|
|
|
|
def_label.set_alignment(0.0, 0.0)
|
|
|
|
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
|
2006-10-23 01:19:54 +02:00
|
|
|
|
|
|
|
self.add(tab)
|
|
|
|
self.set_go_term(None)
|
|
|
|
|
|
|
|
def set_go_term(self, term):
|
|
|
|
if term:
|
|
|
|
self.set_label(term['id'])
|
|
|
|
self._name.set_text(term['name'])
|
2006-10-26 18:51:42 +02:00
|
|
|
self._def.set_text(term['def'])
|
2006-10-23 01:19:54 +02:00
|
|
|
else:
|
|
|
|
self.set_label('GO Term')
|
|
|
|
self._name.set_text('')
|
2006-10-26 18:51:42 +02:00
|
|
|
self._def.set_text('')
|
2006-10-23 01:19:54 +02:00
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
|
|
|
|
class GeneOntologyTree (gtk.HPaned):
|
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
def __init__(self, network):
|
2006-10-22 17:31:36 +02:00
|
|
|
gtk.HPaned.__init__(self)
|
2006-10-23 01:19:54 +02:00
|
|
|
|
|
|
|
treemodel = geneontology.get_go_treestore(network)
|
|
|
|
self._treemodel = treemodel
|
|
|
|
self._tree_view = gtk.TreeView(treemodel)
|
|
|
|
|
2007-03-01 20:13:13 +01:00
|
|
|
self._selected_terms = set()
|
|
|
|
|
|
|
|
self._tree_view.set_fixed_height_mode(True)
|
|
|
|
|
2007-01-18 16:45:48 +01:00
|
|
|
# Set up context menu
|
|
|
|
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
|
|
|
|
self._tree_view.connect('popup_menu', self._popup_menu)
|
|
|
|
self._tree_view.connect('button_press_event', self._on_button_press)
|
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
renderer = gtk.CellRendererText()
|
|
|
|
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
|
2007-03-01 20:13:13 +01:00
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_fixed_width(200)
|
|
|
|
go_column.set_resizable(True)
|
2006-10-23 01:19:54 +02:00
|
|
|
self._tree_view.insert_column(go_column, 0)
|
|
|
|
|
2007-03-01 20:13:13 +01:00
|
|
|
renderer = gtk.CellRendererToggle()
|
|
|
|
renderer.set_property('activatable', True)
|
|
|
|
renderer.connect('toggled', self._toggle_selected)
|
|
|
|
renderer.set_active(True)
|
|
|
|
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
|
|
|
go_column = gtk.TreeViewColumn('T', renderer, active=2)
|
|
|
|
go_column.set_fixed_width(20)
|
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_resizable(True)
|
|
|
|
self._tree_view.insert_column(go_column, 1)
|
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
renderer = gtk.CellRendererText()
|
|
|
|
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
|
2007-03-01 20:13:13 +01:00
|
|
|
go_column.set_fixed_width(200)
|
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_resizable(True)
|
|
|
|
self._tree_view.insert_column(go_column, 2)
|
2006-10-23 01:19:54 +02:00
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
self._desc_view = GoTermView()
|
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
|
|
|
|
|
|
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
|
|
scrolled_window.add(self._tree_view)
|
|
|
|
self.add1(scrolled_window)
|
2006-10-22 17:31:36 +02:00
|
|
|
self.add2(self._desc_view)
|
|
|
|
self.show_all()
|
|
|
|
|
2006-10-23 01:19:54 +02:00
|
|
|
def _on_cursor_changed(self, tree):
|
|
|
|
path, col = self._tree_view.get_cursor()
|
|
|
|
current = self._treemodel.get_iter(path)
|
2007-03-01 20:13:13 +01:00
|
|
|
term = self._treemodel.get_value(current, 3)
|
2006-10-23 01:19:54 +02:00
|
|
|
self._desc_view.set_go_term(term)
|
|
|
|
|
|
|
|
|
2007-01-18 16:45:48 +01:00
|
|
|
##
|
|
|
|
## GTK Callback functions
|
|
|
|
##
|
|
|
|
def _popup_menu(self, *rest):
|
|
|
|
self.menu.popup(None, None, None, 0, 0)
|
|
|
|
|
|
|
|
def _on_button_press(self, widget, event):
|
|
|
|
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
|
|
|
iter = None
|
|
|
|
|
|
|
|
if path:
|
|
|
|
iter = self._treemodel.get_iter(path[0])
|
2007-03-01 20:13:13 +01:00
|
|
|
obj = self._treemodel.get_value(iter, 3)
|
2007-01-18 16:45:48 +01:00
|
|
|
else:
|
|
|
|
obj = None
|
|
|
|
|
2007-01-21 17:16:37 +01:00
|
|
|
self._context_menu.set_current_term(obj, iter)
|
2007-01-18 16:45:48 +01:00
|
|
|
|
|
|
|
if event.button == 3:
|
|
|
|
self._context_menu.popup(None, None, None, event.button, event.time)
|
|
|
|
|
2007-03-01 20:13:13 +01:00
|
|
|
def _toggle_selected(self, renderer, path):
|
|
|
|
iter = self._treemodel.get_iter(path)
|
|
|
|
|
|
|
|
selected = self._treemodel.get_value(iter, 2)
|
|
|
|
id = self._treemodel.get_value(iter, 0)
|
|
|
|
|
|
|
|
self._treemodel.set_value(iter, 2, not selected)
|
|
|
|
|
|
|
|
if selected:
|
|
|
|
self._selected_terms.remove(id)
|
|
|
|
else:
|
|
|
|
self._selected_terms.add(id)
|
|
|
|
|
2007-01-18 16:45:48 +01:00
|
|
|
|
|
|
|
class GoTermContextMenu (gtk.Menu):
|
|
|
|
"""Context menu for GO terms in the gene ontology browser"""
|
|
|
|
|
|
|
|
def __init__(self, treemodel, treeview):
|
|
|
|
self._treemodel = treemodel
|
|
|
|
self._treeview = treeview
|
|
|
|
self._current_term = None
|
2007-01-21 17:16:37 +01:00
|
|
|
self._current_iter = None
|
2007-01-18 16:45:48 +01:00
|
|
|
|
|
|
|
gtk.Menu.__init__(self)
|
|
|
|
|
|
|
|
# Popuplate tree
|
2007-01-21 17:16:37 +01:00
|
|
|
self._expand_item = i = gtk.MenuItem('Expand')
|
|
|
|
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
|
|
|
self._collapse_item = i = gtk.MenuItem('Collapse')
|
|
|
|
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
2007-01-18 16:45:48 +01:00
|
|
|
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
|
|
|
|
i.connect('activate', self._on_select_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
2007-01-21 17:16:37 +01:00
|
|
|
def set_current_term(self, term, it):
|
2007-01-18 16:45:48 +01:00
|
|
|
self._current_term = term
|
2007-01-21 17:16:37 +01:00
|
|
|
self._current_iter = it
|
|
|
|
|
|
|
|
def _on_expand_subtree(self, item, treemodel, treeview):
|
|
|
|
path = treemodel.get_path(self._current_iter)
|
|
|
|
treeview.expand_row(path, True)
|
|
|
|
|
|
|
|
def _on_collapse_subtree(self, item, treemodel, treeview):
|
|
|
|
treeview.collapse_row(treemodel.get_path(self._current_iter))
|
2007-01-18 16:45:48 +01:00
|
|
|
|
|
|
|
def _on_select_subtree(self, item, treemodel, treeview):
|
|
|
|
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
|
|
|
|
(self._current_term['id'], self._current_term['name']))
|
|
|
|
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
|
|
|
|
project.project.set_selection('go-terms', set(ids))
|
|
|
|
|
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
class GoWorkflow (workflow.Workflow):
|
|
|
|
|
|
|
|
name = 'Gene Ontology'
|
|
|
|
ident = 'go'
|
|
|
|
description = 'Gene Ontology Workflow. For tree distance measures based '\
|
|
|
|
+ 'on the GO tree.'
|
|
|
|
|
|
|
|
def __init__(self, app):
|
|
|
|
workflow.Workflow.__init__(self, app)
|
|
|
|
|
|
|
|
load = workflow.Stage('load', 'Load GO Annotations')
|
|
|
|
load.add_function(LoadGOFunction())
|
|
|
|
load.add_function(LoadAnnotationsFunction())
|
2007-01-10 16:33:19 +01:00
|
|
|
load.add_function(LoadTextDatasetFunction())
|
2006-10-22 17:31:36 +02:00
|
|
|
self.add_stage(load)
|
2006-10-26 18:51:42 +02:00
|
|
|
|
|
|
|
go = workflow.Stage('go', 'Gene Ontology')
|
2007-01-11 23:01:31 +01:00
|
|
|
go.add_function(SelectGoTermsFunction(self))
|
2006-10-26 18:51:42 +02:00
|
|
|
go.add_function(GoDistanceFunction())
|
2007-01-12 01:00:56 +01:00
|
|
|
go.add_function(SaveDistancesFunction())
|
2006-10-26 18:51:42 +02:00
|
|
|
self.add_stage(go)
|
2006-10-22 17:31:36 +02:00
|
|
|
|
2007-01-16 13:28:56 +01:00
|
|
|
blm = workflow.Stage('blm', 'Bilinear Analysis')
|
|
|
|
blm.add_function(blmfuncs.PCA())
|
|
|
|
self.add_stage(blm)
|
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
|
|
|
|
class LoadGOFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
|
|
|
|
|
|
|
|
def run(self):
|
2006-10-23 01:19:54 +02:00
|
|
|
global go
|
|
|
|
go = geneontology.read_default_go()
|
|
|
|
browser = GeneOntologyTree(go)
|
2006-10-22 17:31:36 +02:00
|
|
|
label = gtk.Label('_Gene Ontology')
|
|
|
|
label.set_use_underline(True)
|
2008-12-05 23:07:56 +01:00
|
|
|
laydi.app['bottom_notebook'].append_page(browser, label)
|
2006-10-22 17:31:36 +02:00
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
|
2007-01-08 22:13:22 +01:00
|
|
|
class LoadTextDatasetFunction(workflow.Function):
|
|
|
|
|
|
|
|
def __init__(self):
|
2007-01-11 19:57:45 +01:00
|
|
|
workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence')
|
|
|
|
|
2007-01-08 22:13:22 +01:00
|
|
|
def run(self):
|
2007-01-10 18:35:58 +01:00
|
|
|
f = open('/home/einarr/data/goa-condensed.ftsv')
|
2007-01-11 19:57:45 +01:00
|
|
|
global evidence
|
|
|
|
evidence = dataset.read_ftsv(f)
|
|
|
|
return [evidence]
|
|
|
|
|
2007-01-08 22:13:22 +01:00
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
class LoadAnnotationsFunction(workflow.Function):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
|
|
self.annotations = None
|
2006-12-15 13:53:27 +01:00
|
|
|
|
2006-10-22 17:31:36 +02:00
|
|
|
def run(self):
|
2006-12-15 13:53:27 +01:00
|
|
|
global evidence
|
|
|
|
f = open(GO_DATA_DIR + '/goa-condensed')
|
|
|
|
ev_codes = f.readline().split()
|
|
|
|
go_terms = []
|
|
|
|
|
|
|
|
lines = f.readlines()
|
|
|
|
m = zeros((len(lines), len(ev_codes)))
|
|
|
|
|
|
|
|
for i, l in enumerate(lines):
|
|
|
|
values = l.split()
|
|
|
|
go_terms.append(values[0])
|
|
|
|
for j, v in enumerate(values[1:]):
|
|
|
|
m[i,j] = float(v.strip())
|
|
|
|
|
|
|
|
d = dataset.Dataset(m,
|
|
|
|
[['go-terms', go_terms], ['evidence', ev_codes]],
|
|
|
|
name='GO evidence')
|
2006-10-22 17:31:36 +02:00
|
|
|
|
2006-12-15 13:53:27 +01:00
|
|
|
evidence = d
|
|
|
|
return [d]
|
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
|
|
|
|
class EvidenceCodeFrame(gtk.Frame):
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Frame.__init__(self, 'Evidence Codes')
|
|
|
|
|
|
|
|
self._ec_buttons = {}
|
|
|
|
|
|
|
|
vbox = gtk.VBox(len(EVIDENCE_CODES))
|
|
|
|
for code, desc in EVIDENCE_CODES:
|
|
|
|
btn = gtk.CheckButton('%s (%s)' % (code, desc))
|
|
|
|
self._ec_buttons[code] = btn
|
|
|
|
vbox.add(btn)
|
|
|
|
self.add(vbox)
|
|
|
|
|
|
|
|
def set_options(self, options):
|
|
|
|
for code, desc in EVIDENCE_CODES:
|
|
|
|
self._ec_buttons[code].set_active(options[code])
|
|
|
|
|
|
|
|
def update_options(self, options):
|
|
|
|
for code, desc in EVIDENCE_CODES:
|
|
|
|
options[code] = self._ec_buttons[code].get_active()
|
|
|
|
return options
|
|
|
|
|
|
|
|
|
|
|
|
class DistanceMetricFrame(gtk.Frame):
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Frame.__init__(self, 'Distance Metrics')
|
|
|
|
|
|
|
|
self._metric_buttons = {}
|
|
|
|
|
2007-01-09 09:53:28 +01:00
|
|
|
vbox = gtk.VBox()
|
2006-10-26 18:51:42 +02:00
|
|
|
prev = None
|
|
|
|
for code, text in DISTANCE_METRICS:
|
|
|
|
btn = gtk.RadioButton(prev, '%s' % text)
|
|
|
|
self._metric_buttons[code] = btn
|
|
|
|
vbox.add(btn)
|
|
|
|
prev = btn
|
|
|
|
self.add(vbox)
|
|
|
|
|
|
|
|
def set_options(self, options):
|
|
|
|
self._metric_buttons[options['metric']].set_active(True)
|
|
|
|
|
|
|
|
def update_options(self, options):
|
|
|
|
for code, text in DISTANCE_METRICS:
|
|
|
|
if self._metric_buttons[code].get_active():
|
|
|
|
options['metric'] = code
|
|
|
|
return options
|
|
|
|
return options
|
|
|
|
|
|
|
|
|
|
|
|
class GoDistanceDialog(gtk.Dialog):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Dialog.__init__(self, 'GO term distance matrix',
|
|
|
|
None,
|
|
|
|
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
|
|
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
|
|
|
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
|
|
|
|
|
|
|
self._ec_frame = EvidenceCodeFrame()
|
|
|
|
self._metric_frame = DistanceMetricFrame()
|
|
|
|
self.vbox.add(self._ec_frame)
|
|
|
|
self.vbox.add(self._metric_frame)
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
self.vbox.show_all()
|
|
|
|
return gtk.Dialog.run(self)
|
|
|
|
|
|
|
|
def set_options(self, options):
|
|
|
|
self._ec_frame.set_options(options)
|
|
|
|
self._metric_frame.set_options(options)
|
|
|
|
|
|
|
|
def update_options(self, options):
|
|
|
|
self._ec_frame.update_options(options)
|
|
|
|
self._metric_frame.update_options(options)
|
|
|
|
return options
|
|
|
|
|
|
|
|
def set_editable(self, editable):
|
|
|
|
self._ec_frame.set_sensitive(editable)
|
|
|
|
self._metric_frame.set_sensitive(editable)
|
|
|
|
|
|
|
|
|
2007-01-11 19:57:45 +01:00
|
|
|
class NumericDict(dict):
|
|
|
|
def __init__(self):
|
|
|
|
dict.__init__(self)
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
retval = 0
|
|
|
|
try:
|
|
|
|
retval = dict.__getitem__(self, key)
|
|
|
|
except:
|
|
|
|
retval = 0.0
|
|
|
|
return retval
|
|
|
|
|
2007-01-11 23:01:31 +01:00
|
|
|
|
|
|
|
class SelectGoTermsFunction(workflow.Function):
|
|
|
|
def __init__(self, wf):
|
|
|
|
workflow.Function.__init__(self, 'go-select', 'Select GO Terms')
|
|
|
|
self.wf = wf
|
|
|
|
|
2007-01-12 11:50:55 +01:00
|
|
|
def run(self, ds):
|
|
|
|
terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())]
|
|
|
|
|
|
|
|
self.wf.project.set_selection('go-terms', set(terms[:100]))
|
|
|
|
# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419']))
|
2007-01-11 23:01:31 +01:00
|
|
|
|
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
class GoDistanceFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix')
|
|
|
|
self.options = GoDistanceOptions()
|
|
|
|
|
2007-01-11 23:01:31 +01:00
|
|
|
def resnik_distance_matrix(self, selection, ic):
|
|
|
|
size = len(selection['go-terms'])
|
|
|
|
m = zeros((size, size))
|
|
|
|
# Create resnik distance matrix
|
|
|
|
ids = list(selection['go-terms'])
|
|
|
|
for i, t1 in enumerate(ids):
|
|
|
|
for j, t2 in enumerate(ids):
|
|
|
|
term1 = go.by_id[t1]
|
|
|
|
term2 = go.by_id[t2]
|
|
|
|
subsumer = go.subsumer(term1, term2)
|
2007-01-12 11:50:55 +01:00
|
|
|
|
|
|
|
if subsumer == None:
|
|
|
|
m[i, j] = 1000
|
|
|
|
else:
|
|
|
|
# print "%s - %s - %s" % (t1, subsumer['id'], t2)
|
|
|
|
m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']]
|
2007-01-11 23:01:31 +01:00
|
|
|
ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik')
|
|
|
|
return ds
|
|
|
|
|
2007-01-11 19:57:45 +01:00
|
|
|
def run(self, x, selection):
|
|
|
|
global evidence, go
|
2006-10-26 18:51:42 +02:00
|
|
|
self.options = self.show_gui(self.options)
|
2006-12-15 13:53:27 +01:00
|
|
|
if not selection.has_key('go-terms') or len(selection['go-terms']) == 0:
|
|
|
|
logger.log('warning', 'No GO terms selected. Cannot make distance matrix.')
|
|
|
|
|
2007-01-11 19:57:45 +01:00
|
|
|
codes = [c for c, d in EVIDENCE_CODES if self.options[c]]
|
|
|
|
ev_indices = evidence.get_indices('evidence', codes)
|
|
|
|
ann_count_matrix = evidence._array[:, ev_indices].sum(1)
|
|
|
|
total_ann = ann_count_matrix.sum(0)
|
|
|
|
|
|
|
|
annotations = NumericDict()
|
|
|
|
ic = NumericDict()
|
|
|
|
|
2007-01-11 23:01:31 +01:00
|
|
|
# Insert annotations into dict
|
|
|
|
for i, v in enumerate(evidence.get_identifiers('go-terms')):
|
|
|
|
annotations[v] = ann_count_matrix[i]
|
|
|
|
|
2007-01-11 19:57:45 +01:00
|
|
|
# Accumulate annotations
|
|
|
|
for term in reversed(networkx.topological_sort(go)):
|
|
|
|
for parent in go.in_neighbors(term):
|
|
|
|
annotations[parent['id']] += annotations[term['id']]
|
|
|
|
|
|
|
|
# Create information content dictionary
|
2007-01-12 01:00:56 +01:00
|
|
|
for term, count in annotations.items():
|
|
|
|
ic[term] = -log(count / total_ann)
|
2007-01-11 19:57:45 +01:00
|
|
|
|
2007-01-11 23:01:31 +01:00
|
|
|
return [self.resnik_distance_matrix(selection, ic)]
|
2007-01-11 19:57:45 +01:00
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
def show_gui(self, options, edit=True):
|
|
|
|
dialog = GoDistanceDialog()
|
|
|
|
dialog.set_options(self.options)
|
|
|
|
dialog.show_all()
|
|
|
|
dialog.set_editable(edit)
|
|
|
|
response = dialog.run()
|
|
|
|
dialog.hide()
|
|
|
|
if response == gtk.RESPONSE_OK:
|
|
|
|
return dialog.update_options(self.options)
|
|
|
|
else:
|
|
|
|
return options
|
|
|
|
|
|
|
|
|
2007-01-12 01:00:56 +01:00
|
|
|
class SaveDistancesFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'save-matrix', 'Save Matrix')
|
|
|
|
|
|
|
|
def run(self, ds):
|
|
|
|
filename = '/home/einarr/data/output.ftsv'
|
|
|
|
fd = open(filename, 'w')
|
|
|
|
dataset.write_ftsv(fd, ds)
|
|
|
|
fd.close()
|
|
|
|
|
|
|
|
|
2006-10-26 18:51:42 +02:00
|
|
|
class Options(dict):
|
|
|
|
def __init__(self):
|
|
|
|
dict.__init__(self)
|
|
|
|
|
|
|
|
|
|
|
|
class GoDistanceOptions(Options):
|
|
|
|
def __init__(self):
|
|
|
|
Options.__init__(self)
|
|
|
|
for code, desc in EVIDENCE_CODES:
|
|
|
|
self[code] = True
|
|
|
|
self['metric'] = 'fussimeg'
|
|
|
|
|