2007-03-14 22:08:56 +01:00
|
|
|
|
|
|
|
import gtk
|
2007-08-02 12:20:33 +02:00
|
|
|
from fluents import dataset, logger, plots, workflow, fluents, project, view, main
|
2007-03-14 22:08:56 +01:00
|
|
|
import geneontology
|
2007-07-23 19:02:28 +02:00
|
|
|
from matplotlib.nxutils import points_inside_poly
|
|
|
|
import matplotlib
|
2007-07-03 18:25:38 +02:00
|
|
|
#from scipy import array, randn, log, ones, zeros
|
|
|
|
from scipy import *
|
2007-07-23 19:02:28 +02:00
|
|
|
from numpy import matlib
|
2007-03-14 22:08:56 +01:00
|
|
|
import networkx
|
|
|
|
import re
|
2007-07-23 19:02:28 +02:00
|
|
|
import rpy
|
2007-03-14 22:08:56 +01:00
|
|
|
|
|
|
|
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
|
|
|
|
('IGI', 'Inferred from genetic interaction'),
|
|
|
|
('IPI', 'Inferred from physical interaction'),
|
|
|
|
('ISS', 'Inferred from sequence or structure similarity'),
|
|
|
|
('IDA', 'Inferred from direct assay'),
|
|
|
|
('IEP', 'Inferred on expression pattern'),
|
|
|
|
('IEA', 'Inferred from electronic annotation'),
|
|
|
|
('TAS', 'Traceable author statement'),
|
|
|
|
('NAS', 'Non-traceable author statement'),
|
|
|
|
('ND', 'No biological data available'),
|
|
|
|
('RCA', 'Inferred from reviewed computational analysis'),
|
|
|
|
('IC', 'Inferred by curator')]
|
|
|
|
|
|
|
|
DISTANCE_METRICS = [('resnik', 'Resnik'),
|
|
|
|
('jiang', 'Jiang & Conrath'),
|
|
|
|
('fussimeg', 'FuSSiMeG')]
|
|
|
|
|
|
|
|
GO_DATA_DIR = '/home/einarr/data'
|
|
|
|
|
|
|
|
evidence = None
|
|
|
|
go = None
|
|
|
|
|
|
|
|
class GoTermView (gtk.Frame):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Frame.__init__(self)
|
2007-07-26 14:35:59 +02:00
|
|
|
tab = gtk.Table(2, 3, False)
|
2007-03-14 22:08:56 +01:00
|
|
|
self._table = tab
|
|
|
|
|
|
|
|
self._name = gtk.Label('')
|
|
|
|
self._name.set_line_wrap(True)
|
|
|
|
self._name.set_alignment(0, 0)
|
|
|
|
name_label = gtk.Label('Name:')
|
|
|
|
name_label.set_alignment(0, 0)
|
|
|
|
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
|
|
|
|
|
2007-07-26 14:35:59 +02:00
|
|
|
self._isa_parents = gtk.HBox()
|
|
|
|
isa_parents_label = gtk.Label('Is a:')
|
|
|
|
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
|
2007-03-14 22:08:56 +01:00
|
|
|
self._def = gtk.TextBuffer()
|
|
|
|
textview = gtk.TextView(self._def)
|
|
|
|
textview.set_wrap_mode(gtk.WRAP_WORD)
|
|
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
|
|
scrolled_window.add(textview)
|
|
|
|
def_label = gtk.Label('Def:')
|
|
|
|
def_label.set_alignment(0.0, 0.0)
|
2007-07-26 14:35:59 +02:00
|
|
|
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
|
|
|
|
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
|
2007-03-14 22:08:56 +01:00
|
|
|
|
2007-07-26 14:35:59 +02:00
|
|
|
self._tab = tab
|
2007-03-14 22:08:56 +01:00
|
|
|
self.add(tab)
|
|
|
|
self.set_go_term(None)
|
|
|
|
|
|
|
|
def set_go_term(self, term):
|
|
|
|
if term:
|
|
|
|
self.set_label(term['id'])
|
|
|
|
self._name.set_text(term['name'])
|
|
|
|
self._def.set_text(term['def'])
|
2007-07-26 14:35:59 +02:00
|
|
|
self._tab.remove(self._isa_parents)
|
|
|
|
self._isa_parents = gtk.HBox()
|
|
|
|
for p in term['is_a']:
|
|
|
|
btn = gtk.Button(p)
|
|
|
|
btn.show()
|
|
|
|
self._isa_parents.add(btn)
|
|
|
|
self._isa_parents.show()
|
|
|
|
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
2007-03-14 22:08:56 +01:00
|
|
|
else:
|
|
|
|
self.set_label('GO Term')
|
|
|
|
self._name.set_text('')
|
|
|
|
self._def.set_text('')
|
2007-07-26 14:35:59 +02:00
|
|
|
self._tab.remove(self._isa_parents)
|
|
|
|
self._isa_parents = gtk.HBox()
|
|
|
|
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
|
2007-03-14 22:08:56 +01:00
|
|
|
|
2007-07-26 14:35:59 +02:00
|
|
|
|
2007-03-14 22:08:56 +01:00
|
|
|
class GeneOntologyTree (gtk.HPaned):
|
|
|
|
|
|
|
|
def __init__(self, network):
|
|
|
|
gtk.HPaned.__init__(self)
|
2007-07-26 14:35:59 +02:00
|
|
|
self.set_position(400)
|
2007-03-14 22:08:56 +01:00
|
|
|
|
|
|
|
treemodel = geneontology.get_go_treestore(network)
|
|
|
|
self._treemodel = treemodel
|
|
|
|
self._tree_view = gtk.TreeView(treemodel)
|
|
|
|
|
|
|
|
self._selected_terms = set()
|
|
|
|
|
|
|
|
self._tree_view.set_fixed_height_mode(True)
|
|
|
|
|
|
|
|
# Set up context menu
|
|
|
|
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
|
|
|
|
self._tree_view.connect('popup_menu', self._popup_menu)
|
|
|
|
self._tree_view.connect('button_press_event', self._on_button_press)
|
|
|
|
|
|
|
|
renderer = gtk.CellRendererText()
|
|
|
|
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
|
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_fixed_width(200)
|
|
|
|
go_column.set_resizable(True)
|
|
|
|
self._tree_view.insert_column(go_column, 0)
|
|
|
|
|
|
|
|
renderer = gtk.CellRendererToggle()
|
|
|
|
renderer.set_property('activatable', True)
|
|
|
|
renderer.connect('toggled', self._toggle_selected)
|
|
|
|
renderer.set_active(True)
|
|
|
|
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
|
|
|
|
go_column = gtk.TreeViewColumn('T', renderer, active=2)
|
|
|
|
go_column.set_fixed_width(20)
|
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_resizable(True)
|
|
|
|
self._tree_view.insert_column(go_column, 1)
|
|
|
|
|
|
|
|
renderer = gtk.CellRendererText()
|
|
|
|
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
|
|
|
|
go_column.set_fixed_width(200)
|
|
|
|
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
|
|
|
|
go_column.set_resizable(True)
|
|
|
|
self._tree_view.insert_column(go_column, 2)
|
|
|
|
|
|
|
|
self._desc_view = GoTermView()
|
|
|
|
|
|
|
|
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
|
|
|
|
|
|
|
|
scrolled_window = gtk.ScrolledWindow()
|
|
|
|
scrolled_window.add(self._tree_view)
|
|
|
|
self.add1(scrolled_window)
|
|
|
|
self.add2(self._desc_view)
|
|
|
|
self.show_all()
|
|
|
|
|
|
|
|
def _on_cursor_changed(self, tree):
|
|
|
|
path, col = self._tree_view.get_cursor()
|
|
|
|
current = self._treemodel.get_iter(path)
|
|
|
|
term = self._treemodel.get_value(current, 3)
|
|
|
|
self._desc_view.set_go_term(term)
|
|
|
|
|
|
|
|
|
|
|
|
##
|
|
|
|
## GTK Callback functions
|
|
|
|
##
|
|
|
|
def _popup_menu(self, *rest):
|
|
|
|
self.menu.popup(None, None, None, 0, 0)
|
|
|
|
|
|
|
|
def _on_button_press(self, widget, event):
|
|
|
|
path = widget.get_path_at_pos(int(event.x), int(event.y))
|
|
|
|
iter = None
|
|
|
|
|
|
|
|
if path:
|
|
|
|
iter = self._treemodel.get_iter(path[0])
|
|
|
|
obj = self._treemodel.get_value(iter, 3)
|
|
|
|
else:
|
|
|
|
obj = None
|
|
|
|
|
|
|
|
self._context_menu.set_current_term(obj, iter)
|
|
|
|
|
|
|
|
if event.button == 3:
|
|
|
|
self._context_menu.popup(None, None, None, event.button, event.time)
|
|
|
|
|
|
|
|
def _toggle_selected(self, renderer, path):
|
|
|
|
iter = self._treemodel.get_iter(path)
|
|
|
|
|
|
|
|
selected = self._treemodel.get_value(iter, 2)
|
|
|
|
id = self._treemodel.get_value(iter, 0)
|
|
|
|
|
|
|
|
self._treemodel.set_value(iter, 2, not selected)
|
|
|
|
|
|
|
|
if selected:
|
|
|
|
self._selected_terms.remove(id)
|
|
|
|
else:
|
|
|
|
self._selected_terms.add(id)
|
|
|
|
|
|
|
|
|
|
|
|
class GoTermContextMenu (gtk.Menu):
|
|
|
|
"""Context menu for GO terms in the gene ontology browser"""
|
|
|
|
|
|
|
|
def __init__(self, treemodel, treeview):
|
|
|
|
self._treemodel = treemodel
|
|
|
|
self._treeview = treeview
|
|
|
|
self._current_term = None
|
|
|
|
self._current_iter = None
|
|
|
|
|
|
|
|
gtk.Menu.__init__(self)
|
|
|
|
|
|
|
|
# Popuplate tree
|
|
|
|
self._expand_item = i = gtk.MenuItem('Expand')
|
|
|
|
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
|
|
|
self._collapse_item = i = gtk.MenuItem('Collapse')
|
|
|
|
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
|
|
|
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
|
|
|
|
i.connect('activate', self._on_select_subtree, treemodel, treeview)
|
|
|
|
self.append(i)
|
|
|
|
i.show()
|
|
|
|
|
|
|
|
def set_current_term(self, term, it):
|
|
|
|
self._current_term = term
|
|
|
|
self._current_iter = it
|
|
|
|
|
|
|
|
def _on_expand_subtree(self, item, treemodel, treeview):
|
|
|
|
path = treemodel.get_path(self._current_iter)
|
|
|
|
treeview.expand_row(path, True)
|
|
|
|
|
|
|
|
def _on_collapse_subtree(self, item, treemodel, treeview):
|
|
|
|
treeview.collapse_row(treemodel.get_path(self._current_iter))
|
|
|
|
|
|
|
|
def _on_select_subtree(self, item, treemodel, treeview):
|
|
|
|
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
|
|
|
|
(self._current_term['id'], self._current_term['name']))
|
|
|
|
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
|
|
|
|
project.project.set_selection('go-terms', set(ids))
|
|
|
|
|
|
|
|
|
|
|
|
class LoadGOFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
global go
|
2007-06-21 12:26:35 +02:00
|
|
|
if go:
|
|
|
|
return
|
|
|
|
|
2007-03-14 22:08:56 +01:00
|
|
|
go = geneontology.read_default_go()
|
|
|
|
browser = GeneOntologyTree(go)
|
|
|
|
label = gtk.Label('_Gene Ontology')
|
|
|
|
label.set_use_underline(True)
|
|
|
|
fluents.app['bottom_notebook'].append_page(browser, label)
|
|
|
|
|
2007-06-22 17:37:22 +02:00
|
|
|
class LoadAnnotationsFunction(workflow.Function):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
|
|
self.annotations = None
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
global evidence
|
|
|
|
f = open(GO_DATA_DIR + '/goa-condensed')
|
|
|
|
ev_codes = f.readline().split()
|
|
|
|
go_terms = []
|
|
|
|
|
|
|
|
lines = f.readlines()
|
|
|
|
m = zeros((len(lines), len(ev_codes)))
|
|
|
|
|
|
|
|
for i, l in enumerate(lines):
|
|
|
|
values = l.split()
|
|
|
|
go_terms.append(values[0])
|
|
|
|
for j, v in enumerate(values[1:]):
|
|
|
|
m[i,j] = float(v.strip())
|
|
|
|
|
|
|
|
d = dataset.Dataset(m,
|
|
|
|
[['go-terms', go_terms], ['evidence', ev_codes]],
|
|
|
|
name='GO evidence')
|
|
|
|
|
|
|
|
evidence = d
|
|
|
|
return [d]
|
|
|
|
|
|
|
|
|
|
|
|
class GOWeightDialog(gtk.Dialog):
|
|
|
|
def __init__(self):
|
|
|
|
gtk.Dialog.__init__(self, 'GO Gene List Influence',
|
|
|
|
None,
|
|
|
|
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
|
|
(gtk.STOCK_OK, gtk.RESPONSE_OK,
|
|
|
|
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
|
|
|
|
|
|
|
|
table = gtk.Table(2, 2)
|
|
|
|
|
|
|
|
sim_lbl = gtk.Label('Similarity threshold: ')
|
|
|
|
table.attach(sim_lbl, 0, 1, 0, 1)
|
|
|
|
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
|
|
|
|
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
|
|
|
|
table.attach(sim_spin, 1, 2, 0, 1)
|
|
|
|
|
|
|
|
rank_lbl = gtk.Label('Rank threshold: ')
|
|
|
|
table.attach(rank_lbl, 0, 1, 1, 2)
|
|
|
|
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
|
|
|
|
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
|
|
|
|
table.attach(rank_spin, 1, 2, 1, 2)
|
|
|
|
|
|
|
|
sim_lbl.show()
|
|
|
|
sim_spin.show()
|
|
|
|
rank_lbl.show()
|
|
|
|
rank_spin.show()
|
|
|
|
|
|
|
|
table.show()
|
|
|
|
self.vbox.add(table)
|
|
|
|
self._sim_spin = sim_spin
|
|
|
|
self._rank_spin = rank_spin
|
|
|
|
|
|
|
|
def set_options(self, options):
|
|
|
|
self._sim_spin.set_value(options['similarity_threshold'])
|
|
|
|
self._rank_spin.set_value(options['rank_threshold'])
|
|
|
|
|
|
|
|
def set_editable(self, editable):
|
|
|
|
self._sim_spin.set_sensitive(editable)
|
|
|
|
self._rank_spin.set_sensitive(editable)
|
|
|
|
|
|
|
|
def update_options(self, options):
|
|
|
|
options['similarity_threshold'] = self._sim_spin.get_value()
|
|
|
|
options['rank_threshold'] = self._rank_spin.get_value()
|
|
|
|
|
|
|
|
|
2007-07-05 20:36:59 +02:00
|
|
|
class DistanceToSelectionFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
|
|
|
|
self.options = DistanceToSelectionOptions()
|
|
|
|
|
|
|
|
def run(self, similarities, selection):
|
|
|
|
self.show_gui(similarities, self.options)
|
|
|
|
|
|
|
|
retval = []
|
|
|
|
|
|
|
|
dims = similarities.get_dim_name()
|
|
|
|
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
|
|
|
|
logger.log('warning', 'Are you sure this is a similarity matrix?')
|
|
|
|
|
|
|
|
dim = dims[0]
|
|
|
|
print "dim", dim
|
|
|
|
|
|
|
|
print "selection", selection[dim]
|
|
|
|
print "indices", similarities.get_indices(dim, selection[dim])
|
|
|
|
indices = similarities.get_indices(dim, selection[dim])
|
|
|
|
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
|
|
|
|
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
|
|
|
|
("_dummy", '0')]))
|
|
|
|
|
|
|
|
return retval
|
|
|
|
|
|
|
|
def show_gui(self, similarities, options, edit=True):
|
|
|
|
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
|
|
|
|
response = dialog.run()
|
|
|
|
dialog.hide()
|
|
|
|
if response == gtk.RESPONSE_OK:
|
|
|
|
dialog.set_output()
|
|
|
|
return dialog.get_options()
|
|
|
|
else:
|
|
|
|
return options
|
|
|
|
|
2007-06-22 17:37:22 +02:00
|
|
|
class GOWeightFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
|
|
|
|
self.options = GOWeightOptions()
|
|
|
|
|
|
|
|
def run(self, genelist, similarity):
|
|
|
|
## Show dialog box
|
|
|
|
self.show_gui(self.options)
|
|
|
|
|
|
|
|
## assure that data is "correct", i.e., that we can perform
|
|
|
|
## the desired operations.
|
|
|
|
common_dims = genelist.common_dims(similarity)
|
|
|
|
if len(common_dims) == 0:
|
|
|
|
logger.log('error', 'No common dimension in the selected datasets.')
|
|
|
|
elif len(common_dims) > 1:
|
|
|
|
logger.log('error', "More than one common dimension in the " +
|
|
|
|
"selected datasets. Don't know what to do.")
|
|
|
|
gene_dim = common_dims[0]
|
|
|
|
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
|
|
|
|
|
|
|
|
## Do the calculations.
|
|
|
|
d = {}
|
|
|
|
|
|
|
|
|
|
|
|
def show_gui(self, options, edit=True):
|
|
|
|
dialog = GOWeightDialog()
|
|
|
|
dialog.set_options(self.options)
|
|
|
|
dialog.show_all()
|
|
|
|
dialog.set_editable(edit)
|
|
|
|
response = dialog.run()
|
|
|
|
dialog.hide()
|
|
|
|
if response == gtk.RESPONSE_OK:
|
|
|
|
return dialog.update_options(self.options)
|
|
|
|
else:
|
|
|
|
return options
|
|
|
|
|
|
|
|
|
2007-07-05 20:36:59 +02:00
|
|
|
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
|
|
|
|
def __init__(self, data, options):
|
|
|
|
workflow.OptionsDialog.__init__(self, data, options, ['X'])
|
|
|
|
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
class TTestOptionsDialog(workflow.OptionsDialog):
|
|
|
|
|
|
|
|
def __init__(self, data, options):
|
|
|
|
workflow.OptionsDialog.__init__(self, data, options,
|
|
|
|
['X', 'Categories'])
|
|
|
|
|
2007-07-03 18:25:38 +02:00
|
|
|
vb = gtk.VBox()
|
|
|
|
l = gtk.Label("Limit")
|
|
|
|
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
|
|
|
|
sb = gtk.SpinButton(adj, 0.0, 2)
|
|
|
|
l.show()
|
|
|
|
sb.show()
|
|
|
|
|
|
|
|
vb.add(l)
|
|
|
|
vb.add(sb)
|
|
|
|
vb.show()
|
|
|
|
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
|
|
|
|
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
class TTestFunction(workflow.Function):
|
2007-06-22 17:37:22 +02:00
|
|
|
def __init__(self):
|
2007-06-28 23:48:13 +02:00
|
|
|
workflow.Function.__init__(self, 't-test', 't-test')
|
|
|
|
self.options = TTestOptions()
|
|
|
|
|
|
|
|
def run(self, x, categories):
|
|
|
|
self.show_gui(x, categories)
|
|
|
|
|
2007-07-03 18:25:38 +02:00
|
|
|
retval = []
|
|
|
|
m = x.asarray()
|
|
|
|
c = categories.asarray()
|
|
|
|
|
|
|
|
# Nonsmokers and current smokers
|
|
|
|
ns = m.take(nonzero(c[:,0]), 0)[0]
|
|
|
|
cs = m.take(nonzero(c[:,2]), 0)[0]
|
|
|
|
|
|
|
|
tscores = stats.ttest_ind(ns, cs)
|
|
|
|
|
|
|
|
print "Out data:", self.options['out_data']
|
|
|
|
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
|
2007-07-05 20:36:59 +02:00
|
|
|
('_t', ['0'])],
|
2007-07-03 18:25:38 +02:00
|
|
|
name='t-values')
|
|
|
|
if 't-value' in self.options['out_data']:
|
|
|
|
retval.append(tds)
|
|
|
|
|
|
|
|
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
|
2007-07-05 20:36:59 +02:00
|
|
|
('_p', ['0'])],
|
2007-07-03 18:25:38 +02:00
|
|
|
name='p-values')
|
|
|
|
if 'p-value' in self.options['out_data']:
|
|
|
|
retval.append(pds)
|
|
|
|
|
|
|
|
if ProbabilityHistogramPlot in self.options['out_plots']:
|
|
|
|
retval.append(ProbabilityHistogramPlot(pds))
|
|
|
|
|
2007-07-05 20:36:59 +02:00
|
|
|
if VolcanoPlot in self.options['out_plots']:
|
|
|
|
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
|
|
|
|
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
|
|
|
|
('_dummy', ['0'])],
|
|
|
|
name="Fold change")
|
|
|
|
|
|
|
|
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
|
|
|
|
|
2007-07-03 18:25:38 +02:00
|
|
|
return retval
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
def show_gui(self, x, categories):
|
2007-07-03 18:25:38 +02:00
|
|
|
dialog = TTestOptionsDialog([x, categories], self.options)
|
|
|
|
response = dialog.run()
|
|
|
|
dialog.hide()
|
|
|
|
if response == gtk.RESPONSE_OK:
|
|
|
|
dialog.set_output()
|
|
|
|
return dialog.get_options()
|
|
|
|
else:
|
|
|
|
return options
|
2007-06-22 17:37:22 +02:00
|
|
|
|
|
|
|
|
2007-08-02 12:20:33 +02:00
|
|
|
class SetICFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'set-ic', 'Set IC')
|
|
|
|
|
|
|
|
def run(self, ds):
|
|
|
|
if 'go-terms' in ds.get_dim_name():
|
|
|
|
main.workflow.current_ic = ds
|
|
|
|
else:
|
|
|
|
logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension')
|
|
|
|
return
|
|
|
|
|
2007-07-23 19:02:28 +02:00
|
|
|
class PlotDagFunction(workflow.Function):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
|
|
|
|
|
|
|
|
def run(self, selection):
|
|
|
|
g = self.get_network(list(selection['go-terms']))
|
|
|
|
ds = dataset.GraphDataset(networkx.adj_matrix(g),
|
|
|
|
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
|
|
|
|
name="DAG")
|
|
|
|
return [DagPlot(g)]
|
|
|
|
|
|
|
|
def get_network(self, terms, subtree='bp'):
|
|
|
|
"""Returns a DAG connecting the given terms by including their parents
|
|
|
|
up to the level needed to connect them. The subtree parameter is one of
|
|
|
|
mf - molecular function
|
|
|
|
bp - biological process
|
|
|
|
cc - cellular component"""
|
|
|
|
|
|
|
|
rpy.r.library("GOstats")
|
|
|
|
|
|
|
|
if subtree == 'mf':
|
|
|
|
subtree_r = rpy.r.GOMFPARENTS
|
|
|
|
elif subtree == 'bp':
|
|
|
|
subtree_r = rpy.r.GOBPPARENTS
|
|
|
|
elif subtree == 'cc':
|
|
|
|
subtree_r = rpy.r.GOCCPARENTS
|
|
|
|
else:
|
|
|
|
raise Exception("Unknown subtree. Use mf, bp or cc.")
|
|
|
|
|
|
|
|
g = rpy.r.GOGraph(terms, subtree_r)
|
|
|
|
edges = rpy.r.edges(g)
|
|
|
|
|
|
|
|
nxgraph = networkx.DiGraph()
|
|
|
|
for child, d in edges.items():
|
|
|
|
for parent in d.keys():
|
|
|
|
nxgraph.add_edge(parent, child)
|
|
|
|
|
|
|
|
return nxgraph
|
|
|
|
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
class TTestOptions(workflow.Options):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Options.__init__(self)
|
2007-07-05 20:36:59 +02:00
|
|
|
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
|
|
|
|
(VolcanoPlot, 'Histogram', True)]
|
2007-07-03 18:25:38 +02:00
|
|
|
self['all_data'] = [('t-value', 't-values', True),
|
|
|
|
('p-value', 'Probabilities', True),
|
|
|
|
('categories', 'Categories', False)]
|
|
|
|
self['out_data'] = ['t-value', 'p-value']
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
|
2007-07-05 20:36:59 +02:00
|
|
|
class DistanceToSelectionOptions(workflow.Options):
|
|
|
|
def __init__(self):
|
|
|
|
workflow.Options.__init__(self)
|
|
|
|
self['all_data'] = [('mindist', 'Minimum distance', True)]
|
|
|
|
|
|
|
|
|
2007-06-28 23:48:13 +02:00
|
|
|
class GOWeightOptions(workflow.Options):
|
2007-06-22 17:37:22 +02:00
|
|
|
def __init__(self):
|
2007-06-28 23:48:13 +02:00
|
|
|
workflow.Options.__init__(self)
|
2007-06-22 17:37:22 +02:00
|
|
|
self['similarity_threshold'] = 0.0
|
|
|
|
self['rank_threshold'] = 0.0
|
|
|
|
|
2007-07-03 18:25:38 +02:00
|
|
|
class ProbabilityHistogramPlot(plots.HistogramPlot):
|
|
|
|
def __init__(self, ds):
|
2007-07-05 20:36:59 +02:00
|
|
|
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
|
|
|
|
|
2007-07-03 18:25:38 +02:00
|
|
|
|
2007-07-05 20:36:59 +02:00
|
|
|
class VolcanoPlot(plots.ScatterPlot):
|
|
|
|
def __init__(self, fold_ds, p_ds, dim, **kw):
|
|
|
|
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
|
|
|
|
'0', '0',
|
|
|
|
name="Volcano plot",
|
|
|
|
sel_dim_2='_p', **kw)
|
2007-07-03 18:25:38 +02:00
|
|
|
|
2007-07-30 16:15:23 +02:00
|
|
|
|
2007-07-23 19:02:28 +02:00
|
|
|
class DagPlot(plots.Plot):
|
|
|
|
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
|
|
|
|
with_labels=False, name='DAG Plot'):
|
|
|
|
|
|
|
|
plots.Plot.__init__(self, name)
|
2007-08-02 12:20:33 +02:00
|
|
|
self._add_ic_spin_buttons()
|
2007-07-23 19:02:28 +02:00
|
|
|
self.nodes = graph.nodes()
|
|
|
|
self.graph = graph
|
|
|
|
self._pos = pos
|
|
|
|
self._nodesize = nodesize
|
|
|
|
self._nodecolor = nodecolor
|
|
|
|
self._with_labels = with_labels
|
2007-08-02 12:20:33 +02:00
|
|
|
self.visible = set()
|
2007-07-23 19:02:28 +02:00
|
|
|
|
|
|
|
self.current_dim = dim
|
|
|
|
|
|
|
|
if not self._pos:
|
|
|
|
self._pos = self._calc_pos(graph)
|
|
|
|
self._xy = asarray([self._pos[node] for node in self.nodes])
|
|
|
|
self.xaxis_data = self._xy[:,0]
|
|
|
|
self.yaxis_data = self._xy[:,1]
|
|
|
|
|
|
|
|
# Initial draw
|
|
|
|
self.default_props = {'nodesize' : 50,
|
|
|
|
'nodecolor' : 'blue',
|
|
|
|
'edge_color' : 'gray',
|
|
|
|
'edge_color_selected' : 'red'}
|
|
|
|
self.node_collection = None
|
|
|
|
self.edge_collection = None
|
|
|
|
self.node_labels = None
|
|
|
|
lw = zeros(self.xaxis_data.shape)
|
|
|
|
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
|
|
|
|
s=self._nodesize,
|
|
|
|
c=self._nodecolor,
|
|
|
|
linewidth=lw,
|
|
|
|
zorder=3)
|
|
|
|
self._mappable = self.node_collection
|
|
|
|
|
|
|
|
# selected nodes is a transparent graph that adjust node-edge visibility
|
|
|
|
# according to the current selection needed to get get the selected
|
|
|
|
# nodes 'on top' as zorder may not be defined individually
|
|
|
|
self.selected_nodes = self.axes.scatter(self.xaxis_data,
|
|
|
|
self.yaxis_data,
|
|
|
|
s=self._nodesize,
|
|
|
|
c=self._nodecolor,
|
|
|
|
edgecolor='r',
|
|
|
|
linewidth=lw,
|
|
|
|
zorder=4,
|
|
|
|
alpha=0)
|
|
|
|
|
|
|
|
edge_color = self.default_props['edge_color']
|
|
|
|
self.edge_collection = networkx.draw_networkx_edges(self.graph,
|
|
|
|
self._pos,
|
|
|
|
ax=self.axes,
|
|
|
|
edge_color=edge_color)
|
|
|
|
# edge color rgba-arrays
|
|
|
|
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
|
|
|
|
self.graph.number_of_edges(),1)
|
|
|
|
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
|
|
|
|
if self._with_labels:
|
|
|
|
self.node_labels = networkx.draw_networkx_labels(self.graph,
|
|
|
|
self._pos,
|
|
|
|
ax=self.axes)
|
|
|
|
|
|
|
|
# remove axes, frame and grid
|
|
|
|
self.axes.set_xticks([])
|
|
|
|
self.axes.set_yticks([])
|
|
|
|
self.axes.grid(False)
|
|
|
|
self.axes.set_frame_on(False)
|
|
|
|
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
|
|
|
|
|
2007-08-02 12:20:33 +02:00
|
|
|
def _add_ic_spin_buttons(self):
|
|
|
|
"""Adds spin buttons to the toolbar for selecting minimum and maximum
|
|
|
|
threshold values on information content."""
|
|
|
|
sb_min = gtk.SpinButton()
|
|
|
|
sb_min.set_range(0, 100)
|
|
|
|
sb_min.set_value(1)
|
|
|
|
sb_min.set_increments(1, 3)
|
|
|
|
sb_min.connect('value-changed', self._on_ic_value_changed)
|
|
|
|
self._ic_sb_min = sb_min
|
|
|
|
|
|
|
|
sb_max = gtk.SpinButton()
|
|
|
|
sb_max.set_range(0, 100)
|
|
|
|
sb_max.set_value(1)
|
|
|
|
sb_max.set_increments(1, 3)
|
|
|
|
sb_max.connect('value-changed', self._on_ic_value_changed)
|
|
|
|
self._ic_sb_max = sb_max
|
|
|
|
|
|
|
|
label = gtk.Label(" < IC < ")
|
2007-07-30 16:15:23 +02:00
|
|
|
hbox = gtk.HBox()
|
2007-08-02 12:20:33 +02:00
|
|
|
hbox.pack_start(sb_min)
|
2007-07-30 16:15:23 +02:00
|
|
|
hbox.pack_start(label)
|
2007-08-02 12:20:33 +02:00
|
|
|
hbox.pack_start(sb_max)
|
2007-07-30 16:15:23 +02:00
|
|
|
ti = gtk.ToolItem()
|
|
|
|
ti.set_expand(False)
|
|
|
|
ti.add(hbox)
|
2007-08-02 12:20:33 +02:00
|
|
|
sb_min.show()
|
|
|
|
sb_max.show()
|
2007-07-30 16:15:23 +02:00
|
|
|
label.show()
|
|
|
|
hbox.show()
|
|
|
|
ti.show()
|
|
|
|
self._toolbar.insert(ti, -1)
|
|
|
|
ti.set_tooltip(self._toolbar.tooltips, "Set information content threshold")
|
|
|
|
|
2007-07-23 19:02:28 +02:00
|
|
|
def _calc_pos(self, graph):
|
2007-08-02 12:20:33 +02:00
|
|
|
"""Calculates position for graph nodes using 'dot' layout."""
|
2007-07-23 19:02:28 +02:00
|
|
|
gv_graph = networkx.DiGraph()
|
|
|
|
for start, end in graph.edges():
|
|
|
|
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
|
|
|
|
|
|
|
|
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
|
|
|
|
pos = {}
|
|
|
|
for k, v in pos_gv.items():
|
|
|
|
if k != "all":
|
|
|
|
pos["GO:%s" % k] = v
|
|
|
|
else:
|
|
|
|
pos[k] = v
|
|
|
|
return pos
|
|
|
|
|
2007-08-02 12:20:33 +02:00
|
|
|
def set_ic_threshold(self, ic_min, ic_max):
|
|
|
|
"""Sets Information Content min and max to the given values.
|
|
|
|
Updates the plot accordingly to show only values that have an
|
|
|
|
information content within the boundaries. Other values are
|
|
|
|
also excluded from being selected from the plot.
|
|
|
|
@param ic_min Do not show nodes with IC below this value.
|
|
|
|
@param ic_max Do not show nodes with IC above this value.
|
|
|
|
"""
|
|
|
|
ic = getattr(main.workflow, 'current_ic', None)
|
|
|
|
if ic == None:
|
|
|
|
print "no ic set"
|
|
|
|
return
|
|
|
|
|
|
|
|
icnodes = ic.existing_identifiers('go-terms', self.nodes)
|
|
|
|
icindices = ic.get_indices('go-terms', icnodes)
|
|
|
|
a = ravel(ic.asarray()[icindices])
|
|
|
|
ic_good = set(array(icnodes)[(a>ic_min) & (a<ic_max)])
|
|
|
|
|
|
|
|
sizes = zeros(len(self.nodes))
|
|
|
|
visible = set()
|
|
|
|
for i, n in enumerate(self.nodes):
|
|
|
|
if n in ic_good:
|
|
|
|
sizes[i] = 50
|
|
|
|
visible.add(n)
|
|
|
|
else:
|
|
|
|
sizes[i] = 0
|
|
|
|
self.visible = visible
|
|
|
|
|
|
|
|
self.node_collection._sizes = sizes
|
|
|
|
self.canvas.draw()
|
|
|
|
|
|
|
|
def _on_ic_value_changed(self, sb):
|
|
|
|
"""Callback on spin button value changes."""
|
|
|
|
ic_min = self._ic_sb_min.get_value()
|
|
|
|
ic_max = self._ic_sb_max.get_value()
|
|
|
|
self.set_ic_threshold(ic_min, ic_max)
|
|
|
|
|
2007-07-23 19:02:28 +02:00
|
|
|
def rectangle_select_callback(self, x1, y1, x2, y2, key):
|
|
|
|
ydata = self.yaxis_data
|
|
|
|
xdata = self.xaxis_data
|
|
|
|
|
|
|
|
# find indices of selected area
|
|
|
|
if x1>x2:
|
|
|
|
x1, x2 = x2, x1
|
|
|
|
if y1>y2:
|
|
|
|
y1, y2 = y2, y1
|
|
|
|
assert x1<=x2
|
|
|
|
assert y1<=y2
|
|
|
|
|
|
|
|
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
|
2007-08-02 12:20:33 +02:00
|
|
|
ids = self.visible.intersection([self.nodes[i] for i in index])
|
2007-07-23 19:02:28 +02:00
|
|
|
ids = self.update_selection(ids, key)
|
|
|
|
self.selection_listener(self.current_dim, ids)
|
|
|
|
|
|
|
|
def lasso_select_callback(self, verts, key=None):
|
|
|
|
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
|
|
|
|
index = nonzero(points_inside_poly(xys, verts))[0]
|
|
|
|
ids = [self.nodes[i] for i in index]
|
|
|
|
ids = self.update_selection(ids, key)
|
|
|
|
self.selection_listener(self.current_dim, ids)
|
|
|
|
|
|
|
|
def set_current_selection(self, selection):
|
|
|
|
linewidth = zeros(self.xaxis_data.shape)
|
|
|
|
edge_color_rgba = self._edge_color_rgba.copy()
|
|
|
|
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
|
|
|
|
if len(index) > 0:
|
|
|
|
linewidth[index] = 2
|
|
|
|
idents = selection[self.current_dim]
|
|
|
|
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
|
|
|
|
if len(edge_index)>0:
|
|
|
|
for i in edge_index:
|
|
|
|
edge_color_rgba[i,:] = self._edge_color_selected
|
|
|
|
self._A = None
|
|
|
|
|
|
|
|
self.edge_collection._colors = edge_color_rgba
|
|
|
|
self.selected_nodes.set_linewidth(linewidth)
|
|
|
|
self.canvas.draw()
|
|
|
|
|
2007-07-26 17:45:42 +02:00
|
|
|
def is_mappable_with(self, obj):
|
|
|
|
"""Returns True if dataset/selection is mappable with this plot.
|
|
|
|
"""
|
|
|
|
if isinstance(obj, fluents.dataset.Dataset):
|
|
|
|
if self.current_dim in obj.get_dim_name():
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
def _update_color_from_dataset(self, ds):
|
|
|
|
"""Updates the facecolors from a dataset.
|
|
|
|
"""
|
|
|
|
|
|
|
|
array = ds.asarray()
|
|
|
|
|
|
|
|
#only support for 2d-arrays:
|
|
|
|
try:
|
|
|
|
m, n = array.shape
|
|
|
|
except:
|
|
|
|
raise ValueError, "No support for more than 2 dimensions."
|
|
|
|
# is dataset a vector or matrix?
|
|
|
|
if not n==1:
|
|
|
|
# we have a category dataset
|
|
|
|
if isinstance(ds, fluents.dataset.CategoryDataset):
|
|
|
|
vec = dot(array, diag(arange(n))).sum(1)
|
|
|
|
else:
|
|
|
|
vec = array.sum(1)
|
|
|
|
else:
|
|
|
|
vec = array.ravel()
|
|
|
|
|
|
|
|
indices = ds.get_indices(self.current_dim, self.nodes)
|
|
|
|
nodes = ds.existing_identifiers(self.current_dim, self.nodes)
|
2007-07-30 16:15:23 +02:00
|
|
|
|
2007-07-26 17:45:42 +02:00
|
|
|
v = vec.take(indices, 0)
|
2007-07-30 19:42:48 +02:00
|
|
|
vec_min = min(vec[vec > -inf])
|
|
|
|
vec_max = max(vec[vec < inf])
|
2007-07-30 16:15:23 +02:00
|
|
|
v[v==inf] = vec_max
|
|
|
|
v[v==-inf] = vec_min
|
|
|
|
|
2007-07-26 17:45:42 +02:00
|
|
|
d = dict(zip(nodes, list(v)))
|
|
|
|
|
|
|
|
map_vec = zeros(len(self.nodes))
|
|
|
|
for i, n in enumerate(self.nodes):
|
|
|
|
map_vec[i] = d.get(n, -1)
|
|
|
|
|
|
|
|
# update facecolors
|
|
|
|
self.node_collection.set_array(map_vec)
|
2007-07-30 19:42:48 +02:00
|
|
|
self.node_collection.set_clim(vec_min, vec_max)
|
2007-07-26 17:45:42 +02:00
|
|
|
self.node_collection.update_scalarmappable() #sets facecolors from array
|
|
|
|
self.canvas.draw()
|
|
|
|
|