Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/gobrowser.py

489 lines
16 KiB
Python
Raw Normal View History

2007-03-14 22:08:56 +01:00
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
import geneontology
#from scipy import array, randn, log, ones, zeros
from scipy import *
2007-03-14 22:08:56 +01:00
import networkx
import re
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
2007-03-14 22:08:56 +01:00
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
2007-07-05 20:36:59 +02:00
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
2007-07-05 20:36:59 +02:00
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options, ['X'])
class TTestOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options,
['X', 'Categories'])
vb = gtk.VBox()
l = gtk.Label("Limit")
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
sb = gtk.SpinButton(adj, 0.0, 2)
l.show()
sb.show()
vb.add(l)
vb.add(sb)
vb.show()
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
class TTestFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 't-test', 't-test')
self.options = TTestOptions()
def run(self, x, categories):
self.show_gui(x, categories)
retval = []
m = x.asarray()
c = categories.asarray()
# Nonsmokers and current smokers
ns = m.take(nonzero(c[:,0]), 0)[0]
cs = m.take(nonzero(c[:,2]), 0)[0]
tscores = stats.ttest_ind(ns, cs)
print "Out data:", self.options['out_data']
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
2007-07-05 20:36:59 +02:00
('_t', ['0'])],
name='t-values')
if 't-value' in self.options['out_data']:
retval.append(tds)
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
2007-07-05 20:36:59 +02:00
('_p', ['0'])],
name='p-values')
if 'p-value' in self.options['out_data']:
retval.append(pds)
if ProbabilityHistogramPlot in self.options['out_plots']:
retval.append(ProbabilityHistogramPlot(pds))
2007-07-05 20:36:59 +02:00
if VolcanoPlot in self.options['out_plots']:
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
('_dummy', ['0'])],
name="Fold change")
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
return retval
def show_gui(self, x, categories):
dialog = TTestOptionsDialog([x, categories], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class TTestOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
2007-07-05 20:36:59 +02:00
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
(VolcanoPlot, 'Histogram', True)]
self['all_data'] = [('t-value', 't-values', True),
('p-value', 'Probabilities', True),
('categories', 'Categories', False)]
self['out_data'] = ['t-value', 'p-value']
2007-07-05 20:36:59 +02:00
class DistanceToSelectionOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_data'] = [('mindist', 'Minimum distance', True)]
class GOWeightOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['similarity_threshold'] = 0.0
self['rank_threshold'] = 0.0
class ProbabilityHistogramPlot(plots.HistogramPlot):
def __init__(self, ds):
2007-07-05 20:36:59 +02:00
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
2007-07-05 20:36:59 +02:00
class VolcanoPlot(plots.ScatterPlot):
def __init__(self, fold_ds, p_ds, dim, **kw):
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
'0', '0',
name="Volcano plot",
sel_dim_2='_p', **kw)