import gtk from fluents import dataset, logger, plots, workflow, fluents, project import geneontology #from scipy import array, randn, log, ones, zeros from scipy import * import networkx import re EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'), ('IGI', 'Inferred from genetic interaction'), ('IPI', 'Inferred from physical interaction'), ('ISS', 'Inferred from sequence or structure similarity'), ('IDA', 'Inferred from direct assay'), ('IEP', 'Inferred on expression pattern'), ('IEA', 'Inferred from electronic annotation'), ('TAS', 'Traceable author statement'), ('NAS', 'Non-traceable author statement'), ('ND', 'No biological data available'), ('RCA', 'Inferred from reviewed computational analysis'), ('IC', 'Inferred by curator')] DISTANCE_METRICS = [('resnik', 'Resnik'), ('jiang', 'Jiang & Conrath'), ('fussimeg', 'FuSSiMeG')] GO_DATA_DIR = '/home/einarr/data' evidence = None go = None class GoTermView (gtk.Frame): def __init__(self): gtk.Frame.__init__(self) tab = gtk.Table(2, 2, False) self._table = tab self._name = gtk.Label('') self._name.set_line_wrap(True) self._name.set_alignment(0, 0) name_label = gtk.Label('Name:') name_label.set_alignment(0, 0) tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5) tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5) self._def = gtk.TextBuffer() textview = gtk.TextView(self._def) textview.set_wrap_mode(gtk.WRAP_WORD) scrolled_window = gtk.ScrolledWindow() scrolled_window.add(textview) def_label = gtk.Label('Def:') def_label.set_alignment(0.0, 0.0) tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5) tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5) self.add(tab) self.set_go_term(None) def set_go_term(self, term): if term: self.set_label(term['id']) self._name.set_text(term['name']) self._def.set_text(term['def']) else: self.set_label('GO Term') self._name.set_text('') self._def.set_text('') class GeneOntologyTree (gtk.HPaned): def __init__(self, network): gtk.HPaned.__init__(self) treemodel = geneontology.get_go_treestore(network) self._treemodel = treemodel self._tree_view = gtk.TreeView(treemodel) self._selected_terms = set() self._tree_view.set_fixed_height_mode(True) # Set up context menu self._context_menu = GoTermContextMenu(treemodel, self._tree_view) self._tree_view.connect('popup_menu', self._popup_menu) self._tree_view.connect('button_press_event', self._on_button_press) renderer = gtk.CellRendererText() go_column = gtk.TreeViewColumn('GO ID', renderer, text=0) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_fixed_width(200) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 0) renderer = gtk.CellRendererToggle() renderer.set_property('activatable', True) renderer.connect('toggled', self._toggle_selected) renderer.set_active(True) renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE) go_column = gtk.TreeViewColumn('T', renderer, active=2) go_column.set_fixed_width(20) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 1) renderer = gtk.CellRendererText() go_column = gtk.TreeViewColumn('Name', renderer, text=1) go_column.set_fixed_width(200) go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) go_column.set_resizable(True) self._tree_view.insert_column(go_column, 2) self._desc_view = GoTermView() self._tree_view.connect('cursor-changed', self._on_cursor_changed) scrolled_window = gtk.ScrolledWindow() scrolled_window.add(self._tree_view) self.add1(scrolled_window) self.add2(self._desc_view) self.show_all() def _on_cursor_changed(self, tree): path, col = self._tree_view.get_cursor() current = self._treemodel.get_iter(path) term = self._treemodel.get_value(current, 3) self._desc_view.set_go_term(term) ## ## GTK Callback functions ## def _popup_menu(self, *rest): self.menu.popup(None, None, None, 0, 0) def _on_button_press(self, widget, event): path = widget.get_path_at_pos(int(event.x), int(event.y)) iter = None if path: iter = self._treemodel.get_iter(path[0]) obj = self._treemodel.get_value(iter, 3) else: obj = None self._context_menu.set_current_term(obj, iter) if event.button == 3: self._context_menu.popup(None, None, None, event.button, event.time) def _toggle_selected(self, renderer, path): iter = self._treemodel.get_iter(path) selected = self._treemodel.get_value(iter, 2) id = self._treemodel.get_value(iter, 0) self._treemodel.set_value(iter, 2, not selected) if selected: self._selected_terms.remove(id) else: self._selected_terms.add(id) class GoTermContextMenu (gtk.Menu): """Context menu for GO terms in the gene ontology browser""" def __init__(self, treemodel, treeview): self._treemodel = treemodel self._treeview = treeview self._current_term = None self._current_iter = None gtk.Menu.__init__(self) # Popuplate tree self._expand_item = i = gtk.MenuItem('Expand') i.connect('activate', self._on_expand_subtree, treemodel, treeview) self.append(i) i.show() self._collapse_item = i = gtk.MenuItem('Collapse') i.connect('activate', self._on_collapse_subtree, treemodel, treeview) self.append(i) i.show() self._select_subtree_item = i = gtk.MenuItem('Select subtree') i.connect('activate', self._on_select_subtree, treemodel, treeview) self.append(i) i.show() def set_current_term(self, term, it): self._current_term = term self._current_iter = it def _on_expand_subtree(self, item, treemodel, treeview): path = treemodel.get_path(self._current_iter) treeview.expand_row(path, True) def _on_collapse_subtree(self, item, treemodel, treeview): treeview.collapse_row(treemodel.get_path(self._current_iter)) def _on_select_subtree(self, item, treemodel, treeview): logger.log('notice', 'Selecting subtree from GO id: %s (%s)' % (self._current_term['id'], self._current_term['name'])) ids = [x['id'] for x in networkx.bfs(go, self._current_term)] project.project.set_selection('go-terms', set(ids)) class LoadGOFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology') def run(self): global go if go: return go = geneontology.read_default_go() browser = GeneOntologyTree(go) label = gtk.Label('_Gene Ontology') label.set_use_underline(True) fluents.app['bottom_notebook'].append_page(browser, label) class LoadAnnotationsFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') self.annotations = None def run(self): global evidence f = open(GO_DATA_DIR + '/goa-condensed') ev_codes = f.readline().split() go_terms = [] lines = f.readlines() m = zeros((len(lines), len(ev_codes))) for i, l in enumerate(lines): values = l.split() go_terms.append(values[0]) for j, v in enumerate(values[1:]): m[i,j] = float(v.strip()) d = dataset.Dataset(m, [['go-terms', go_terms], ['evidence', ev_codes]], name='GO evidence') evidence = d return [d] class GOWeightDialog(gtk.Dialog): def __init__(self): gtk.Dialog.__init__(self, 'GO Gene List Influence', None, gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT, (gtk.STOCK_OK, gtk.RESPONSE_OK, gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)) table = gtk.Table(2, 2) sim_lbl = gtk.Label('Similarity threshold: ') table.attach(sim_lbl, 0, 1, 0, 1) adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) sim_spin = gtk.SpinButton(adjustment, 0.0, 2) table.attach(sim_spin, 1, 2, 0, 1) rank_lbl = gtk.Label('Rank threshold: ') table.attach(rank_lbl, 0, 1, 1, 2) rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) rank_spin = gtk.SpinButton(rank_adj, 0.0, 2) table.attach(rank_spin, 1, 2, 1, 2) sim_lbl.show() sim_spin.show() rank_lbl.show() rank_spin.show() table.show() self.vbox.add(table) self._sim_spin = sim_spin self._rank_spin = rank_spin def set_options(self, options): self._sim_spin.set_value(options['similarity_threshold']) self._rank_spin.set_value(options['rank_threshold']) def set_editable(self, editable): self._sim_spin.set_sensitive(editable) self._rank_spin.set_sensitive(editable) def update_options(self, options): options['similarity_threshold'] = self._sim_spin.get_value() options['rank_threshold'] = self._rank_spin.get_value() class DistanceToSelectionFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection') self.options = DistanceToSelectionOptions() def run(self, similarities, selection): self.show_gui(similarities, self.options) retval = [] dims = similarities.get_dim_name() if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]: logger.log('warning', 'Are you sure this is a similarity matrix?') dim = dims[0] print "dim", dim print "selection", selection[dim] print "indices", similarities.get_indices(dim, selection[dim]) indices = similarities.get_indices(dim, selection[dim]) m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1)) retval.append(dataset.Dataset(m, [(dim, similarities[dim]), ("_dummy", '0')])) return retval def show_gui(self, similarities, options, edit=True): dialog = DistanceToSelectionOptionsDialog([similarities], self.options) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: dialog.set_output() return dialog.get_options() else: return options class GOWeightFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 'load-go-ann', 'GO Influence') self.options = GOWeightOptions() def run(self, genelist, similarity): ## Show dialog box self.show_gui(self.options) ## assure that data is "correct", i.e., that we can perform ## the desired operations. common_dims = genelist.common_dims(similarity) if len(common_dims) == 0: logger.log('error', 'No common dimension in the selected datasets.') elif len(common_dims) > 1: logger.log('error', "More than one common dimension in the " + "selected datasets. Don't know what to do.") gene_dim = common_dims[0] logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim) ## Do the calculations. d = {} def show_gui(self, options, edit=True): dialog = GOWeightDialog() dialog.set_options(self.options) dialog.show_all() dialog.set_editable(edit) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: return dialog.update_options(self.options) else: return options class DistanceToSelectionOptionsDialog(workflow.OptionsDialog): def __init__(self, data, options): workflow.OptionsDialog.__init__(self, data, options, ['X']) class TTestOptionsDialog(workflow.OptionsDialog): def __init__(self, data, options): workflow.OptionsDialog.__init__(self, data, options, ['X', 'Categories']) vb = gtk.VBox() l = gtk.Label("Limit") adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0) sb = gtk.SpinButton(adj, 0.0, 2) l.show() sb.show() vb.add(l) vb.add(sb) vb.show() self.nb.insert_page(vb, gtk.Label("Limit"), -1) class TTestFunction(workflow.Function): def __init__(self): workflow.Function.__init__(self, 't-test', 't-test') self.options = TTestOptions() def run(self, x, categories): self.show_gui(x, categories) retval = [] m = x.asarray() c = categories.asarray() # Nonsmokers and current smokers ns = m.take(nonzero(c[:,0]), 0)[0] cs = m.take(nonzero(c[:,2]), 0)[0] tscores = stats.ttest_ind(ns, cs) print "Out data:", self.options['out_data'] tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']), ('_t', ['0'])], name='t-values') if 't-value' in self.options['out_data']: retval.append(tds) pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']), ('_p', ['0'])], name='p-values') if 'p-value' in self.options['out_data']: retval.append(pds) if ProbabilityHistogramPlot in self.options['out_plots']: retval.append(ProbabilityHistogramPlot(pds)) if VolcanoPlot in self.options['out_plots']: fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs) fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']), ('_dummy', ['0'])], name="Fold change") retval.append(VolcanoPlot(fcds, pds, 'gene_id')) return retval def show_gui(self, x, categories): dialog = TTestOptionsDialog([x, categories], self.options) response = dialog.run() dialog.hide() if response == gtk.RESPONSE_OK: dialog.set_output() return dialog.get_options() else: return options class TTestOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True), (VolcanoPlot, 'Histogram', True)] self['all_data'] = [('t-value', 't-values', True), ('p-value', 'Probabilities', True), ('categories', 'Categories', False)] self['out_data'] = ['t-value', 'p-value'] class DistanceToSelectionOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['all_data'] = [('mindist', 'Minimum distance', True)] class GOWeightOptions(workflow.Options): def __init__(self): workflow.Options.__init__(self) self['similarity_threshold'] = 0.0 self['rank_threshold'] = 0.0 class ProbabilityHistogramPlot(plots.HistogramPlot): def __init__(self, ds): plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50) class VolcanoPlot(plots.ScatterPlot): def __init__(self, fold_ds, p_ds, dim, **kw): plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy', '0', '0', name="Volcano plot", sel_dim_2='_p', **kw)