Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/gobrowser.py

659 lines
23 KiB
Python

import gtk
from fluents import dataset, logger, plots, workflow, fluents, project
import geneontology
from matplotlib.nxutils import points_inside_poly
import matplotlib
#from scipy import array, randn, log, ones, zeros
from scipy import *
from numpy import matlib
import networkx
import re
import rpy
EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'),
('IGI', 'Inferred from genetic interaction'),
('IPI', 'Inferred from physical interaction'),
('ISS', 'Inferred from sequence or structure similarity'),
('IDA', 'Inferred from direct assay'),
('IEP', 'Inferred on expression pattern'),
('IEA', 'Inferred from electronic annotation'),
('TAS', 'Traceable author statement'),
('NAS', 'Non-traceable author statement'),
('ND', 'No biological data available'),
('RCA', 'Inferred from reviewed computational analysis'),
('IC', 'Inferred by curator')]
DISTANCE_METRICS = [('resnik', 'Resnik'),
('jiang', 'Jiang & Conrath'),
('fussimeg', 'FuSSiMeG')]
GO_DATA_DIR = '/home/einarr/data'
evidence = None
go = None
class GoTermView (gtk.Frame):
def __init__(self):
gtk.Frame.__init__(self)
tab = gtk.Table(2, 2, False)
self._table = tab
self._name = gtk.Label('')
self._name.set_line_wrap(True)
self._name.set_alignment(0, 0)
name_label = gtk.Label('Name:')
name_label.set_alignment(0, 0)
tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5)
self._def = gtk.TextBuffer()
textview = gtk.TextView(self._def)
textview.set_wrap_mode(gtk.WRAP_WORD)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(textview)
def_label = gtk.Label('Def:')
def_label.set_alignment(0.0, 0.0)
tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5)
tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5)
self.add(tab)
self.set_go_term(None)
def set_go_term(self, term):
if term:
self.set_label(term['id'])
self._name.set_text(term['name'])
self._def.set_text(term['def'])
else:
self.set_label('GO Term')
self._name.set_text('')
self._def.set_text('')
class GeneOntologyTree (gtk.HPaned):
def __init__(self, network):
gtk.HPaned.__init__(self)
treemodel = geneontology.get_go_treestore(network)
self._treemodel = treemodel
self._tree_view = gtk.TreeView(treemodel)
self._selected_terms = set()
self._tree_view.set_fixed_height_mode(True)
# Set up context menu
self._context_menu = GoTermContextMenu(treemodel, self._tree_view)
self._tree_view.connect('popup_menu', self._popup_menu)
self._tree_view.connect('button_press_event', self._on_button_press)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('GO ID', renderer, text=0)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_fixed_width(200)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 0)
renderer = gtk.CellRendererToggle()
renderer.set_property('activatable', True)
renderer.connect('toggled', self._toggle_selected)
renderer.set_active(True)
renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
go_column = gtk.TreeViewColumn('T', renderer, active=2)
go_column.set_fixed_width(20)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 1)
renderer = gtk.CellRendererText()
go_column = gtk.TreeViewColumn('Name', renderer, text=1)
go_column.set_fixed_width(200)
go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED)
go_column.set_resizable(True)
self._tree_view.insert_column(go_column, 2)
self._desc_view = GoTermView()
self._tree_view.connect('cursor-changed', self._on_cursor_changed)
scrolled_window = gtk.ScrolledWindow()
scrolled_window.add(self._tree_view)
self.add1(scrolled_window)
self.add2(self._desc_view)
self.show_all()
def _on_cursor_changed(self, tree):
path, col = self._tree_view.get_cursor()
current = self._treemodel.get_iter(path)
term = self._treemodel.get_value(current, 3)
self._desc_view.set_go_term(term)
##
## GTK Callback functions
##
def _popup_menu(self, *rest):
self.menu.popup(None, None, None, 0, 0)
def _on_button_press(self, widget, event):
path = widget.get_path_at_pos(int(event.x), int(event.y))
iter = None
if path:
iter = self._treemodel.get_iter(path[0])
obj = self._treemodel.get_value(iter, 3)
else:
obj = None
self._context_menu.set_current_term(obj, iter)
if event.button == 3:
self._context_menu.popup(None, None, None, event.button, event.time)
def _toggle_selected(self, renderer, path):
iter = self._treemodel.get_iter(path)
selected = self._treemodel.get_value(iter, 2)
id = self._treemodel.get_value(iter, 0)
self._treemodel.set_value(iter, 2, not selected)
if selected:
self._selected_terms.remove(id)
else:
self._selected_terms.add(id)
class GoTermContextMenu (gtk.Menu):
"""Context menu for GO terms in the gene ontology browser"""
def __init__(self, treemodel, treeview):
self._treemodel = treemodel
self._treeview = treeview
self._current_term = None
self._current_iter = None
gtk.Menu.__init__(self)
# Popuplate tree
self._expand_item = i = gtk.MenuItem('Expand')
i.connect('activate', self._on_expand_subtree, treemodel, treeview)
self.append(i)
i.show()
self._collapse_item = i = gtk.MenuItem('Collapse')
i.connect('activate', self._on_collapse_subtree, treemodel, treeview)
self.append(i)
i.show()
self._select_subtree_item = i = gtk.MenuItem('Select subtree')
i.connect('activate', self._on_select_subtree, treemodel, treeview)
self.append(i)
i.show()
def set_current_term(self, term, it):
self._current_term = term
self._current_iter = it
def _on_expand_subtree(self, item, treemodel, treeview):
path = treemodel.get_path(self._current_iter)
treeview.expand_row(path, True)
def _on_collapse_subtree(self, item, treemodel, treeview):
treeview.collapse_row(treemodel.get_path(self._current_iter))
def _on_select_subtree(self, item, treemodel, treeview):
logger.log('notice', 'Selecting subtree from GO id: %s (%s)' %
(self._current_term['id'], self._current_term['name']))
ids = [x['id'] for x in networkx.bfs(go, self._current_term)]
project.project.set_selection('go-terms', set(ids))
class LoadGOFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology')
def run(self):
global go
if go:
return
go = geneontology.read_default_go()
browser = GeneOntologyTree(go)
label = gtk.Label('_Gene Ontology')
label.set_use_underline(True)
fluents.app['bottom_notebook'].append_page(browser, label)
class LoadAnnotationsFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def run(self):
global evidence
f = open(GO_DATA_DIR + '/goa-condensed')
ev_codes = f.readline().split()
go_terms = []
lines = f.readlines()
m = zeros((len(lines), len(ev_codes)))
for i, l in enumerate(lines):
values = l.split()
go_terms.append(values[0])
for j, v in enumerate(values[1:]):
m[i,j] = float(v.strip())
d = dataset.Dataset(m,
[['go-terms', go_terms], ['evidence', ev_codes]],
name='GO evidence')
evidence = d
return [d]
class GOWeightDialog(gtk.Dialog):
def __init__(self):
gtk.Dialog.__init__(self, 'GO Gene List Influence',
None,
gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_OK,
gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
table = gtk.Table(2, 2)
sim_lbl = gtk.Label('Similarity threshold: ')
table.attach(sim_lbl, 0, 1, 0, 1)
adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
sim_spin = gtk.SpinButton(adjustment, 0.0, 2)
table.attach(sim_spin, 1, 2, 0, 1)
rank_lbl = gtk.Label('Rank threshold: ')
table.attach(rank_lbl, 0, 1, 1, 2)
rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0)
rank_spin = gtk.SpinButton(rank_adj, 0.0, 2)
table.attach(rank_spin, 1, 2, 1, 2)
sim_lbl.show()
sim_spin.show()
rank_lbl.show()
rank_spin.show()
table.show()
self.vbox.add(table)
self._sim_spin = sim_spin
self._rank_spin = rank_spin
def set_options(self, options):
self._sim_spin.set_value(options['similarity_threshold'])
self._rank_spin.set_value(options['rank_threshold'])
def set_editable(self, editable):
self._sim_spin.set_sensitive(editable)
self._rank_spin.set_sensitive(editable)
def update_options(self, options):
options['similarity_threshold'] = self._sim_spin.get_value()
options['rank_threshold'] = self._rank_spin.get_value()
class DistanceToSelectionFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection')
self.options = DistanceToSelectionOptions()
def run(self, similarities, selection):
self.show_gui(similarities, self.options)
retval = []
dims = similarities.get_dim_name()
if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]:
logger.log('warning', 'Are you sure this is a similarity matrix?')
dim = dims[0]
print "dim", dim
print "selection", selection[dim]
print "indices", similarities.get_indices(dim, selection[dim])
indices = similarities.get_indices(dim, selection[dim])
m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1))
retval.append(dataset.Dataset(m, [(dim, similarities[dim]),
("_dummy", '0')]))
return retval
def show_gui(self, similarities, options, edit=True):
dialog = DistanceToSelectionOptionsDialog([similarities], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class GOWeightFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'load-go-ann', 'GO Influence')
self.options = GOWeightOptions()
def run(self, genelist, similarity):
## Show dialog box
self.show_gui(self.options)
## assure that data is "correct", i.e., that we can perform
## the desired operations.
common_dims = genelist.common_dims(similarity)
if len(common_dims) == 0:
logger.log('error', 'No common dimension in the selected datasets.')
elif len(common_dims) > 1:
logger.log('error', "More than one common dimension in the " +
"selected datasets. Don't know what to do.")
gene_dim = common_dims[0]
logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim)
## Do the calculations.
d = {}
def show_gui(self, options, edit=True):
dialog = GOWeightDialog()
dialog.set_options(self.options)
dialog.show_all()
dialog.set_editable(edit)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
return dialog.update_options(self.options)
else:
return options
class DistanceToSelectionOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options, ['X'])
class TTestOptionsDialog(workflow.OptionsDialog):
def __init__(self, data, options):
workflow.OptionsDialog.__init__(self, data, options,
['X', 'Categories'])
vb = gtk.VBox()
l = gtk.Label("Limit")
adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0)
sb = gtk.SpinButton(adj, 0.0, 2)
l.show()
sb.show()
vb.add(l)
vb.add(sb)
vb.show()
self.nb.insert_page(vb, gtk.Label("Limit"), -1)
class TTestFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 't-test', 't-test')
self.options = TTestOptions()
def run(self, x, categories):
self.show_gui(x, categories)
retval = []
m = x.asarray()
c = categories.asarray()
# Nonsmokers and current smokers
ns = m.take(nonzero(c[:,0]), 0)[0]
cs = m.take(nonzero(c[:,2]), 0)[0]
tscores = stats.ttest_ind(ns, cs)
print "Out data:", self.options['out_data']
tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']),
('_t', ['0'])],
name='t-values')
if 't-value' in self.options['out_data']:
retval.append(tds)
pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']),
('_p', ['0'])],
name='p-values')
if 'p-value' in self.options['out_data']:
retval.append(pds)
if ProbabilityHistogramPlot in self.options['out_plots']:
retval.append(ProbabilityHistogramPlot(pds))
if VolcanoPlot in self.options['out_plots']:
fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs)
fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']),
('_dummy', ['0'])],
name="Fold change")
retval.append(VolcanoPlot(fcds, pds, 'gene_id'))
return retval
def show_gui(self, x, categories):
dialog = TTestOptionsDialog([x, categories], self.options)
response = dialog.run()
dialog.hide()
if response == gtk.RESPONSE_OK:
dialog.set_output()
return dialog.get_options()
else:
return options
class PlotDagFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'go-dag', 'Build DAG')
def run(self, selection):
g = self.get_network(list(selection['go-terms']))
# print g.edges()
ds = dataset.GraphDataset(networkx.adj_matrix(g),
[('go-terms', g.nodes()), ('_go-terms', g.nodes())],
name="DAG")
return [DagPlot(g)]
def get_network(self, terms, subtree='bp'):
"""Returns a DAG connecting the given terms by including their parents
up to the level needed to connect them. The subtree parameter is one of
mf - molecular function
bp - biological process
cc - cellular component"""
rpy.r.library("GOstats")
if subtree == 'mf':
subtree_r = rpy.r.GOMFPARENTS
elif subtree == 'bp':
subtree_r = rpy.r.GOBPPARENTS
elif subtree == 'cc':
subtree_r = rpy.r.GOCCPARENTS
else:
raise Exception("Unknown subtree. Use mf, bp or cc.")
g = rpy.r.GOGraph(terms, subtree_r)
edges = rpy.r.edges(g)
nxgraph = networkx.DiGraph()
for child, d in edges.items():
for parent in d.keys():
nxgraph.add_edge(parent, child)
return nxgraph
class TTestOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True),
(VolcanoPlot, 'Histogram', True)]
self['all_data'] = [('t-value', 't-values', True),
('p-value', 'Probabilities', True),
('categories', 'Categories', False)]
self['out_data'] = ['t-value', 'p-value']
class DistanceToSelectionOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['all_data'] = [('mindist', 'Minimum distance', True)]
class GOWeightOptions(workflow.Options):
def __init__(self):
workflow.Options.__init__(self)
self['similarity_threshold'] = 0.0
self['rank_threshold'] = 0.0
class ProbabilityHistogramPlot(plots.HistogramPlot):
def __init__(self, ds):
plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50)
class VolcanoPlot(plots.ScatterPlot):
def __init__(self, fold_ds, p_ds, dim, **kw):
plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy',
'0', '0',
name="Volcano plot",
sel_dim_2='_p', **kw)
class DagPlot(plots.Plot):
def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40,
with_labels=False, name='DAG Plot'):
plots.Plot.__init__(self, name)
self.nodes = graph.nodes()
self.graph = graph
self._pos = pos
self._nodesize = nodesize
self._nodecolor = nodecolor
self._with_labels = with_labels
self.current_dim = dim
if not self._pos:
self._pos = self._calc_pos(graph)
self._xy = asarray([self._pos[node] for node in self.nodes])
self.xaxis_data = self._xy[:,0]
self.yaxis_data = self._xy[:,1]
# Initial draw
self.default_props = {'nodesize' : 50,
'nodecolor' : 'blue',
'edge_color' : 'gray',
'edge_color_selected' : 'red'}
self.node_collection = None
self.edge_collection = None
self.node_labels = None
lw = zeros(self.xaxis_data.shape)
self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
linewidth=lw,
zorder=3)
self._mappable = self.node_collection
# selected nodes is a transparent graph that adjust node-edge visibility
# according to the current selection needed to get get the selected
# nodes 'on top' as zorder may not be defined individually
self.selected_nodes = self.axes.scatter(self.xaxis_data,
self.yaxis_data,
s=self._nodesize,
c=self._nodecolor,
edgecolor='r',
linewidth=lw,
zorder=4,
alpha=0)
edge_color = self.default_props['edge_color']
self.edge_collection = networkx.draw_networkx_edges(self.graph,
self._pos,
ax=self.axes,
edge_color=edge_color)
# edge color rgba-arrays
self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color),
self.graph.number_of_edges(),1)
self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected'])
if self._with_labels:
self.node_labels = networkx.draw_networkx_labels(self.graph,
self._pos,
ax=self.axes)
# remove axes, frame and grid
self.axes.set_xticks([])
self.axes.set_yticks([])
self.axes.grid(False)
self.axes.set_frame_on(False)
self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
def _calc_pos(self, graph):
"""Calculates position for graph nodes."""
gv_graph = networkx.DiGraph()
for start, end in graph.edges():
gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', ''))
pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot")
pos = {}
for k, v in pos_gv.items():
if k != "all":
pos["GO:%s" % k] = v
else:
pos[k] = v
return pos
def rectangle_select_callback(self, x1, y1, x2, y2, key):
ydata = self.yaxis_data
xdata = self.xaxis_data
# find indices of selected area
if x1>x2:
x1, x2 = x2, x1
if y1>y2:
y1, y2 = y2, y1
assert x1<=x2
assert y1<=y2
index = nonzero((xdata>x1) & (xdata<x2) & (ydata>y1) & (ydata<y2))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def lasso_select_callback(self, verts, key=None):
xys = c_[self.xaxis_data[:,newaxis], self.yaxis_data[:,newaxis]]
index = nonzero(points_inside_poly(xys, verts))[0]
ids = [self.nodes[i] for i in index]
ids = self.update_selection(ids, key)
self.selection_listener(self.current_dim, ids)
def set_current_selection(self, selection):
linewidth = zeros(self.xaxis_data.shape)
edge_color_rgba = self._edge_color_rgba.copy()
index = [i for i in range(len(self.nodes)) if self.nodes[i] in selection[self.current_dim]]
if len(index) > 0:
linewidth[index] = 2
idents = selection[self.current_dim]
edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)]
if len(edge_index)>0:
for i in edge_index:
edge_color_rgba[i,:] = self._edge_color_selected
self._A = None
self.edge_collection._colors = edge_color_rgba
self.selected_nodes.set_linewidth(linewidth)
self.canvas.draw()