import gtk from fluents import dataset, logger, plots, workflow, fluents, project, view, main import geneontology from matplotlib.nxutils import points_inside_poly import matplotlib #from scipy import array, randn, log, ones, zeros from scipy import * from numpy import matlib import networkx import re import rpy EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'), ('IGI', 'Inferred from genetic interaction'), ('IPI', 'Inferred from physical interaction'), ('ISS', 'Inferred from sequence or structure similarity'), ('IDA', 'Inferred from direct assay'), ('IEP', 'Inferred on expression pattern'), ('IEA', 'Inferred from electronic annotation'), ('TAS', 'Traceable author statement'), ('NAS', 'Non-traceable author statement'), ('ND', 'No biological data available'), ('RCA', 'Inferred from reviewed computational analysis'), ('IC', 'Inferred by curator')] DISTANCE_METRICS = [('resnik', 'Resnik'), ('jiang', 'Jiang & Conrath'), ('fussimeg', 'FuSSiMeG')] GO_DATA_DIR = '/home/einarr/data' evidence = None go = None class GoTermView (gtk.Frame): def __init__(self): gtk.Frame.__init__(self) tab = gtk.Table(2, 3, False) self._table = tab self._name = gtk.Label('') self._name.set_line_wrap(True) self._name.set_alignment(0, 0) name_label = gtk.Label('Name:') name_label.set_alignment(0, 0) tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5) tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5) self._isa_parents = gtk.HBox() isa_parents_label = gtk.Label('Is a:') tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5) tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) self._def = gtk.TextBuffer() textview = gtk.TextView(self._def) textview.set_wrap_mode(gtk.WRAP_WORD) scrolled_window = gtk.ScrolledWindow() scrolled_window.add(textview) def_label = gtk.Label('Def:') def_label.set_alignment(0.0, 0.0) tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5) tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5) self._tab = tab self.add(tab) self.set_go_term(None) def set_go_term(self, term): if term: self.set_label(term['id']) self._name.set_text(term['name']) self._def.set_text(term['def']) self._tab.remove(self._isa_parents) self._isa_parents = gtk.HBox() for p in term['is_a']: btn = gtk.Button(p) btn.show() self._isa_parents.add(btn) self._isa_parents.show() self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) else: self.set_label('GO Term') self._name.set_text('') self._def.set_text('') self._tab.remove(self._isa_parents) self._isa_parents = gtk.HBox() self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) class GeneOntologyTree (gtk.HPaned): def __init__(self, network): gtk.HPaned.__init__(self) self.set_position(400) treemodel = geneontology.get_go_treestore(network) self._treemodel = treemodel self._tree_view = gtk.TreeView(treemodel) self._selected_terms = set() self._tree_view.set_fixed_height_mode(True) # Set up context menu self._context_menu = GoTermContextMenu(treemodel, self._tree_view) self._tree_view.connect('popup_menu', self._popup_menu) self._tree_view.connect('button_press_event', self._on_button_press) renderer = gtk.CellRendererText() go_column = gtk.TreeViewColumn('GO ID', renderer, text=0) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_fixed_width(200) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 0) renderer = gtk.CellRendererToggle() renderer.set_property('activatable', True) renderer.connect('toggled', self._toggle_selected) renderer.set_active(True) renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE) go_column = gtk.TreeViewColumn('T', renderer, active=2) go_column.set_fixed_width(20) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 1) renderer = gtk.CellRendererText() go_column = gtk.TreeViewColumn('Name', renderer, text=1) go_column.set_fixed_width(200) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 2) self._desc_view = GoTermView() self._tree_view.connect('cursor-changed', self._on_cursor_changed) scrolled_window = gtk.ScrolledWindow() scrolled_window.add(self._tree_view) self.add1(scrolled_window) self.add2(self._desc_view) self.show_all() def _on_cursor_changed(self, tree): path, col = self._tree_view.get_cursor() current = self._treemodel.get_iter(path) term = self._treemodel.get_value(current, 3) self._desc_view.set_go_term(term) ## ## GTK Callback functions ## def _popup_menu(self, *rest): self.menu.popup(None, None, None, 0, 0) def _on_button_press(self, widget, event): path = widget.get_path_at_pos(int(event.x), int(event.y)) iter = None if path: iter = self._treemodel.get_iter(path[0]) obj = self._treemodel.get_value(iter, 3) else: obj = None self._context_menu.set_current_term(obj, iter) if event.button == 3: self._context_menu.popup(None, None, None, event.button, event.time) def _toggle_selected(self, renderer, path): iter = self._treemodel.get_iter(path) selected = self._treemodel.get_value(iter, 2) id = self._treemodel.get_value(iter, 0) self._treemodel.set_value(iter, 2, not selected) if selected: self._selected_terms.remove(id) else: self._selected_terms.add(id) class GoTermContextMenu (gtk.Menu): """Context menu for GO terms in the gene ontology browser""" def __init__(self, treemodel, treeview): self._treemodel = treemodel self._treeview = treeview self._current_term = None self._current_iter = None gtk.Menu.__init__(self) # Popuplate tree self._expand_item = i = gtk.MenuItem('Expand') i.connect('activate', self._on_expand_subtree, treemodel, treeview) self.append(i) i.show() self._collapse_item = i = gtk.MenuItem('Collapse') i.connect('activate', self._on_collapse_subtree, treemodel, treeview) self.append(i) i.show() self._select_subtree_item = i = gtk.MenuItem('Select subtree') i.connect('activate', self._on_select_subtree, treemodel, treeview) self.append(i) i.show() def set_current_term(self, term, it): self._current_term = term self._current_iter = it def _on_expand_subtree(self, item, treemodel, treeview): path = treemodel.get_path(self._current_iter) treeview.expand_row(path, True) def _on_collapse_subtree(self, item, treemodel, treeview): treeview.collapse_row(treemodel.get_path(self._current_iter)) def _on_select_subtree(self, item, treemodel, treeview): logger.log('notice', 'Selecting subtree from GO id: %s (%s)' % (self._current_term['id'], self._current_term['name'])) ids = [x['id'] for x in networkx.bfs(go, self._current_term)] project.project.set_selection('go-terms', set(ids)) class LoadGOFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology') def run(self): global go if go: return go = geneontology.read_default_go() browser = GeneOntologyTree(go) label = gtk.Label('_Gene Ontology') label.set_use_underline(True) fluents.app['bottom_notebook'].append_page(browser, label) class LoadAnnotationsFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') self.annotations = None def run(self): global evidence f = open(GO_DATA_DIR + '/goa-condensed') ev_codes = f.readline().split() go_terms = [] lines = f.readlines() m = zeros((len(lines), len(ev_codes))) for i, l in enumerate(lines): values = l.split() go_terms.append(values[0]) for j, v in enumerate(values[1:]): m[i,j] = float(v.strip()) d = dataset.Dataset(m, [['go-terms', go_terms], ['evidence', ev_codes]], name='GO evidence') evidence = d return [d] class GOWeightDialog(gtk.Dialog): def __init__(self): gtk.Dialog.__init__(self, 'GO Gene List Influence', None, gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT, (gtk.STOCK_OK, gtk.RESPONSE_OK, gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)) table = gtk.Table(2, 2) sim_lbl = gtk.Label('Similarity threshold: ') table.attach(sim_lbl, 0, 1, 0, 1) adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) sim_spin = gtk.SpinButton(adjustment, 0.0, 2) table.attach(sim_spin, 1, 2, 0, 1) rank_lbl = gtk.Label('Rank threshold: ') table.attach(rank_lbl, 0, 1, 1, 2) rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) rank_spin = gtk.SpinButton(rank_adj, 0.0, 2) table.attach(rank_spin, 1, 2, 1, 2) sim_lbl.show() sim_spin.show() rank_lbl.show() rank_spin.show() table.show() self.vbox.add(table) self._sim_spin = sim_spin self._rank_spin = rank_spin def set_options(self, options): self._sim_spin.set_value(options['similarity_threshold']) self._rank_spin.set_value(options['rank_threshold']) def set_editable(self, editable): self._sim_spin.set_sensitive(editable) self._rank_spin.set_sensitive(editable) def update_options(self, options): options['similarity_threshold'] = self._sim_spin.get_value() options['rank_threshold'] = self._rank_spin.get_value() class DistanceToSelectionFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection') self.options = DistanceToSelectionOptions() def run(self, similarities, selection): self.show_gui(similarities, self.options) retval = [] dims = similarities.get_dim_name() if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]: logger.log('warning', 'Are you sure this is a similarity matrix?') dim = dims[0] print "dim", dim print "selection", selection[dim] print "indices", similarities.get_indices(dim, selection[dim]) indices = similarities.get_indices(dim, selection[dim]) m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1)) retval.append(dataset.Dataset(m, [(dim, similarities[dim]), ("_dummy", '0')])) return retval def show_gui(self, similarities, options, edit=True): dialog = DistanceToSelectionOptionsDialog([similarities], self.options) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: dialog.set_output() return dialog.get_options() else: return options class GOWeightFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go-ann', 'GO Influence') self.options = GOWeightOptions() def run(self, genelist, similarity): ## Show dialog box self.show_gui(self.options) ## assure that data is "correct", i.e., that we can perform ## the desired operations. common_dims = genelist.common_dims(similarity) if len(common_dims) == 0: logger.log('error', 'No common dimension in the selected datasets.') elif len(common_dims) > 1: logger.log('error', "More than one common dimension in the " + "selected datasets. Don't know what to do.") gene_dim = common_dims[0] logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim) ## Do the calculations. d = {} def show_gui(self, options, edit=True): dialog = GOWeightDialog() dialog.set_options(self.options) dialog.show_all() dialog.set_editable(edit) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: return dialog.update_options(self.options) else: return options class DistanceToSelectionOptionsDialog(workflow.OptionsDialog): def __init__(self, data, options): workflow.OptionsDialog.__init__(self, data, options, ['X']) class TTestOptionsDialog(workflow.OptionsDialog): def __init__(self, data, options): workflow.OptionsDialog.__init__(self, data, options, ['X', 'Categories']) vb = gtk.VBox() l = gtk.Label("Limit") adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0) sb = gtk.SpinButton(adj, 0.0, 2) l.show() sb.show() vb.add(l) vb.add(sb) vb.show() self.nb.insert_page(vb, gtk.Label("Limit"), -1) class TTestFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 't-test', 't-test') self.options = TTestOptions() def run(self, x, categories): self.show_gui(x, categories) retval = [] m = x.asarray() c = categories.asarray() # Nonsmokers and current smokers ns = m.take(nonzero(c[:,0]), 0)[0] cs = m.take(nonzero(c[:,2]), 0)[0] tscores = stats.ttest_ind(ns, cs) print "Out data:", self.options['out_data'] tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']), ('_t', ['0'])], name='t-values') if 't-value' in self.options['out_data']: retval.append(tds) pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']), ('_p', ['0'])], name='p-values') if 'p-value' in self.options['out_data']: retval.append(pds) if ProbabilityHistogramPlot in self.options['out_plots']: retval.append(ProbabilityHistogramPlot(pds)) if VolcanoPlot in self.options['out_plots']: fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs) fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']), ('_dummy', ['0'])], name="Fold change") retval.append(VolcanoPlot(fcds, pds, 'gene_id')) return retval def show_gui(self, x, categories): dialog = TTestOptionsDialog([x, categories], self.options) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: dialog.set_output() return dialog.get_options() else: return options class SetICFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'set-ic', 'Set IC') def run(self, ds): if 'go-terms' in ds.get_dim_name(): main.workflow.current_ic = ds else: logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension') return class PlotDagFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'go-dag', 'Build DAG') def run(self, selection): g = self.get_network(list(selection['go-terms'])) ds = dataset.GraphDataset(networkx.adj_matrix(g), [('go-terms', g.nodes()), ('_go-terms', g.nodes())], name="DAG") return [DagPlot(g)] def get_network(self, terms, subtree='bp'): """Returns a DAG connecting the given terms by including their parents up to the level needed to connect them. The subtree parameter is one of mf - molecular function bp - biological process cc - cellular component""" rpy.r.library("GOstats") if subtree == 'mf': subtree_r = rpy.r.GOMFPARENTS elif subtree == 'bp': subtree_r = rpy.r.GOBPPARENTS elif subtree == 'cc': subtree_r = rpy.r.GOCCPARENTS else: raise Exception("Unknown subtree. Use mf, bp or cc.") g = rpy.r.GOGraph(terms, subtree_r) edges = rpy.r.edges(g) nxgraph = networkx.DiGraph() for child, d in edges.items(): for parent in d.keys(): nxgraph.add_edge(parent, child) return nxgraph class TTestOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True), (VolcanoPlot, 'Histogram', True)] self['all_data'] = [('t-value', 't-values', True), ('p-value', 'Probabilities', True), ('categories', 'Categories', False)] self['out_data'] = ['t-value', 'p-value'] class DistanceToSelectionOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['all_data'] = [('mindist', 'Minimum distance', True)] class GOWeightOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['similarity_threshold'] = 0.0 self['rank_threshold'] = 0.0 class ProbabilityHistogramPlot(plots.HistogramPlot): def __init__(self, ds): plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50) class VolcanoPlot(plots.ScatterPlot): def __init__(self, fold_ds, p_ds, dim, **kw): plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy', '0', '0', name="Volcano plot", sel_dim_2='_p', **kw) class DagPlot(plots.Plot): def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40, with_labels=False, name='DAG Plot'): plots.Plot.__init__(self, name) self._add_ic_spin_buttons() self.nodes = graph.nodes() self.graph = graph self._pos = pos self._nodesize = nodesize self._nodecolor = nodecolor self._with_labels = with_labels self.visible = set() self.current_dim = dim if not self._pos: self._pos = self._calc_pos(graph) self._xy = asarray([self._pos[node] for node in self.nodes]) self.xaxis_data = self._xy[:,0] self.yaxis_data = self._xy[:,1] # Initial draw self.default_props = {'nodesize' : 50, 'nodecolor' : 'blue', 'edge_color' : 'gray', 'edge_color_selected' : 'red'} self.node_collection = None self.edge_collection = None self.node_labels = None lw = zeros(self.xaxis_data.shape) self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data, s=self._nodesize, c=self._nodecolor, linewidth=lw, zorder=3) self._mappable = self.node_collection # selected nodes is a transparent graph that adjust node-edge visibility # according to the current selection needed to get get the selected # nodes 'on top' as zorder may not be defined individually self.selected_nodes = self.axes.scatter(self.xaxis_data, self.yaxis_data, s=self._nodesize, c=self._nodecolor, edgecolor='r', linewidth=lw, zorder=4, alpha=0) edge_color = self.default_props['edge_color'] self.edge_collection = networkx.draw_networkx_edges(self.graph, self._pos, ax=self.axes, edge_color=edge_color) # edge color rgba-arrays self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color), self.graph.number_of_edges(),1) self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected']) if self._with_labels: self.node_labels = networkx.draw_networkx_labels(self.graph, self._pos, ax=self.axes) # remove axes, frame and grid self.axes.set_xticks([]) self.axes.set_yticks([]) self.axes.grid(False) self.axes.set_frame_on(False) self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1) def _add_ic_spin_buttons(self): """Adds spin buttons to the toolbar for selecting minimum and maximum threshold values on information content.""" sb_min = gtk.SpinButton() sb_min.set_range(0, 100) sb_min.set_value(1) sb_min.set_increments(1, 3) sb_min.connect('value-changed', self._on_ic_value_changed) self._ic_sb_min = sb_min sb_max = gtk.SpinButton() sb_max.set_range(0, 100) sb_max.set_value(1) sb_max.set_increments(1, 3) sb_max.connect('value-changed', self._on_ic_value_changed) self._ic_sb_max = sb_max label = gtk.Label(" < IC < ") hbox = gtk.HBox() hbox.pack_start(sb_min) hbox.pack_start(label) hbox.pack_start(sb_max) ti = gtk.ToolItem() ti.set_expand(False) ti.add(hbox) sb_min.show() sb_max.show() label.show() hbox.show() ti.show() self._toolbar.insert(ti, -1) ti.set_tooltip(self._toolbar.tooltips, "Set information content threshold") def _calc_pos(self, graph): """Calculates position for graph nodes using 'dot' layout.""" gv_graph = networkx.DiGraph() for start, end in graph.edges(): gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', '')) pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot") pos = {} for k, v in pos_gv.items(): if k != "all": pos["GO:%s" % k] = v else: pos[k] = v return pos def set_ic_threshold(self, ic_min, ic_max): """Sets Information Content min and max to the given values. Updates the plot accordingly to show only values that have an information content within the boundaries. Other values are also excluded from being selected from the plot. @param ic_min Do not show nodes with IC below this value. @param ic_max Do not show nodes with IC above this value. """ ic = getattr(main.workflow, 'current_ic', None) if ic == None: print "no ic set" return icnodes = ic.existing_identifiers('go-terms', self.nodes) icindices = ic.get_indices('go-terms', icnodes) a = ravel(ic.asarray()[icindices]) ic_good = set(array(icnodes)[(a>ic_min) & (ax2: x1, x2 = x2, x1 if y1>y2: y1, y2 = y2, y1 assert x1<=x2 assert y1<=y2 index = nonzero((xdata>x1) & (xdatay1) & (ydata 0: linewidth[index] = 2 idents = selection[self.current_dim] edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)] if len(edge_index)>0: for i in edge_index: edge_color_rgba[i,:] = self._edge_color_selected self._A = None self.edge_collection._colors = edge_color_rgba self.selected_nodes.set_linewidth(linewidth) self.canvas.draw() def is_mappable_with(self, obj): """Returns True if dataset/selection is mappable with this plot. """ if isinstance(obj, fluents.dataset.Dataset): if self.current_dim in obj.get_dim_name(): return True return False def _update_color_from_dataset(self, ds): """Updates the facecolors from a dataset. """ array = ds.asarray() #only support for 2d-arrays: try: m, n = array.shape except: raise ValueError, "No support for more than 2 dimensions." # is dataset a vector or matrix? if not n==1: # we have a category dataset if isinstance(ds, fluents.dataset.CategoryDataset): vec = dot(array, diag(arange(n))).sum(1) else: vec = array.sum(1) else: vec = array.ravel() indices = ds.get_indices(self.current_dim, self.nodes) nodes = ds.existing_identifiers(self.current_dim, self.nodes) v = vec.take(indices, 0) vec_min = min(vec[vec > -inf]) vec_max = max(vec[vec < inf]) v[v==inf] = vec_max v[v==-inf] = vec_min d = dict(zip(nodes, list(v))) map_vec = zeros(len(self.nodes)) for i, n in enumerate(self.nodes): map_vec[i] = d.get(n, -1) # update facecolors self.node_collection.set_array(map_vec) self.node_collection.set_clim(vec_min, vec_max) self.node_collection.update_scalarmappable() #sets facecolors from array self.canvas.draw()