This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/gobrowser.py

720 lines
26 KiB
Python
Raw Normal View History

2007-03-14 22:08:56 +01:00
import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
import geneontology
from matplotlib.nxutils import points_inside_poly
import matplotlib
#from scipy import array, randn, log, ones, zeros
from scipy import *
from numpy import matlib
2007-03-14 22:08:56 +01:00
import networkx
import re
import rpy
2007-03-14 22:08:56 +01:00
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 3, False)
2007-03-14 22:08:56 +01:00
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._isa_parents = gtk.HBox()
isa_parents_label = gtk.Label('Is a:')
tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
2007-03-14 22:08:56 +01:00
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
2007-03-14 22:08:56 +01:00
self._tab = tab
2007-03-14 22:08:56 +01:00
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
for p in term['is_a']:
btn = gtk.Button(p)
btn.show()
self._isa_parents.add(btn)
self._isa_parents.show()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
2007-03-14 22:08:56 +01:00
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
self._tab.remove(self._isa_parents)
self._isa_parents = gtk.HBox()
self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
2007-03-14 22:08:56 +01:00
2007-03-14 22:08:56 +01:00
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
self.set_position(400)
2007-03-14 22:08:56 +01:00
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
2007-03-14 22:08:56 +01:00
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
2007-07-05 20:36:59 +02:00
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
2007-07-05 20:36:59 +02:00
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options, ['X'])
class TTestOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options,
['X', 'Categories'])
vb = gtk.VBox()
l = gtk.Label("Limit")
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
sb = gtk.SpinButton(adj, 0.0, 2)
l.show()
sb.show()
vb.add(l)
vb.add(sb)
vb.show()
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
class TTestFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 't-test', 't-test')
self.options = TTestOptions()
def run(self, x, categories):
self.show_gui(x, categories)
retval = []
m = x.asarray()
c = categories.asarray()
# Nonsmokers and current smokers
ns = m.take(nonzero(c[:,0]), 0)[0]
cs = m.take(nonzero(c[:,2]), 0)[0]
tscores = stats.ttest_ind(ns, cs)
print "Out data:", self.options['out_data']
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
2007-07-05 20:36:59 +02:00
('_t', ['0'])],
name='t-values')
if 't-value' in self.options['out_data']:
retval.append(tds)
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
2007-07-05 20:36:59 +02:00
('_p', ['0'])],
name='p-values')
if 'p-value' in self.options['out_data']:
retval.append(pds)
if ProbabilityHistogramPlot in self.options['out_plots']:
retval.append(ProbabilityHistogramPlot(pds))
2007-07-05 20:36:59 +02:00
if VolcanoPlot in self.options['out_plots']:
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
('_dummy', ['0'])],
name="Fold change")
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
return retval
def show_gui(self, x, categories):
dialog = TTestOptionsDialog([x, categories], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class PlotDagFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
def run(self, selection):
g = self.get_network(list(selection['go-terms']))
ds = dataset.GraphDataset(networkx.adj_matrix(g),
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
name="DAG")
return [DagPlot(g)]
def get_network(self, terms, subtree='bp'):
"""Returns a DAG connecting the given terms by including their parents
up to the level needed to connect them. The subtree parameter is one of
mf - molecular function
bp - biological process
cc - cellular component"""
rpy.r.library("GOstats")
if subtree == 'mf':
subtree_r = rpy.r.GOMFPARENTS
elif subtree == 'bp':
subtree_r = rpy.r.GOBPPARENTS
elif subtree == 'cc':
subtree_r = rpy.r.GOCCPARENTS
else:
raise Exception("Unknown subtree. Use mf, bp or cc.")
g = rpy.r.GOGraph(terms, subtree_r)
edges = rpy.r.edges(g)
nxgraph = networkx.DiGraph()
for child, d in edges.items():
for parent in d.keys():
nxgraph.add_edge(parent, child)
return nxgraph
class TTestOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
2007-07-05 20:36:59 +02:00
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
(VolcanoPlot, 'Histogram', True)]
self['all_data'] = [('t-value', 't-values', True),
('p-value', 'Probabilities', True),
('categories', 'Categories', False)]
self['out_data'] = ['t-value', 'p-value']
2007-07-05 20:36:59 +02:00
class DistanceToSelectionOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_data'] = [('mindist', 'Minimum distance', True)]
class GOWeightOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['similarity_threshold'] = 0.0
self['rank_threshold'] = 0.0
class ProbabilityHistogramPlot(plots.HistogramPlot):
def __init__(self, ds):
2007-07-05 20:36:59 +02:00
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
2007-07-05 20:36:59 +02:00
class VolcanoPlot(plots.ScatterPlot):
def __init__(self, fold_ds, p_ds, dim, **kw):
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
'0', '0',
name="Volcano plot",
sel_dim_2='_p', **kw)
class DagPlot(plots.Plot):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
plots.Plot.__init__(self, name)
self.nodes = graph.nodes()
self.graph = graph
self._pos = pos
self._nodesize = nodesize
self._nodecolor = nodecolor
self._with_labels = with_labels
self.current_dim = dim
if not self._pos:
self._pos = self._calc_pos(graph)
self._xy = asarray([self._pos[node] for node in self.nodes])
self.xaxis_data = self._xy[:,0]
self.yaxis_data = self._xy[:,1]
# Initial draw
self.default_props = {'nodesize' : 50,
'nodecolor' : 'blue',
'edge_color' : 'gray',
'edge_color_selected' : 'red'}
self.node_collection = None
self.edge_collection = None
self.node_labels = None
lw = zeros(self.xaxis_data.shape)
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
linewidth=lw,
zorder=3)
self._mappable = self.node_collection
# selected nodes is a transparent graph that adjust node-edge visibility
# according to the current selection needed to get get the selected
# nodes 'on top' as zorder may not be defined individually
self.selected_nodes = self.axes.scatter(self.xaxis_data,
self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
edgecolor='r',
linewidth=lw,
zorder=4,
alpha=0)
edge_color = self.default_props['edge_color']
self.edge_collection = networkx.draw_networkx_edges(self.graph,
self._pos,
ax=self.axes,
edge_color=edge_color)
# edge color rgba-arrays
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
self.graph.number_of_edges(),1)
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
if self._with_labels:
self.node_labels = networkx.draw_networkx_labels(self.graph,
self._pos,
ax=self.axes)
# remove axes, frame and grid
self.axes.set_xticks([])
self.axes.set_yticks([])
self.axes.grid(False)
self.axes.set_frame_on(False)
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
def _calc_pos(self, graph):
"""Calculates position for graph nodes."""
gv_graph = networkx.DiGraph()
for start, end in graph.edges():
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
pos = {}
for k, v in pos_gv.items():
if k != "all":
pos["GO:%s" % k] = v
else:
pos[k] = v
return pos
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ydata = self.yaxis_data
xdata = self.xaxis_data
# find indices of selected area
if x1>x2:
x1, x2 = x2, x1
if y1>y2:
y1, y2 = y2, y1
assert x1<=x2
assert y1<=y2
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def lasso_select_callback(self, verts, key=None):
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
index = nonzero(points_inside_poly(xys, verts))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def set_current_selection(self, selection):
linewidth = zeros(self.xaxis_data.shape)
edge_color_rgba = self._edge_color_rgba.copy()
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
if len(index) > 0:
linewidth[index] = 2
idents = selection[self.current_dim]
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
if len(edge_index)>0:
for i in edge_index:
edge_color_rgba[i,:] = self._edge_color_selected
self._A = None
self.edge_collection._colors = edge_color_rgba
self.selected_nodes.set_linewidth(linewidth)
self.canvas.draw()
def is_mappable_with(self, obj):
"""Returns True if dataset/selection is mappable with this plot.
"""
if isinstance(obj, fluents.dataset.Dataset):
if self.current_dim in obj.get_dim_name():
return True
return False
def _update_color_from_dataset(self, ds):
"""Updates the facecolors from a dataset.
"""
array = ds.asarray()
#only support for 2d-arrays:
try:
m, n = array.shape
except:
raise ValueError, "No support for more than 2 dimensions."
# is dataset a vector or matrix?
if not n==1:
# we have a category dataset
if isinstance(ds, fluents.dataset.CategoryDataset):
vec = dot(array, diag(arange(n))).sum(1)
else:
vec = array.sum(1)
else:
vec = array.ravel()
indices = ds.get_indices(self.current_dim, self.nodes)
nodes = ds.existing_identifiers(self.current_dim, self.nodes)
v = vec.take(indices, 0)
d = dict(zip(nodes, list(v)))
map_vec = zeros(len(self.nodes))
for i, n in enumerate(self.nodes):
map_vec[i] = d.get(n, -1)
# update facecolors
self.node_collection.set_array(map_vec)
self.node_collection.set_clim(map_vec.min(), map_vec.max())
self.node_collection.update_scalarmappable() #sets facecolors from array
self.canvas.draw()