From f8e0cfdb8b7891d03824aaa42d010820879abcc1 Mon Sep 17 00:00:00 2001 From: flatberg Date: Mon, 3 Sep 2007 15:50:16 +0000 Subject: [PATCH] removed unused workflows --- workflows/geneontology.py | 261 ------------- workflows/go_workflow.py | 486 ----------------------- workflows/gobrowser.py | 777 ------------------------------------- workflows/smokers.py | 445 --------------------- workflows/test_workflow.py | 15 + 5 files changed, 15 insertions(+), 1969 deletions(-) delete mode 100644 workflows/geneontology.py delete mode 100644 workflows/go_workflow.py delete mode 100644 workflows/gobrowser.py delete mode 100644 workflows/smokers.py diff --git a/workflows/geneontology.py b/workflows/geneontology.py deleted file mode 100644 index c6bb411..0000000 --- a/workflows/geneontology.py +++ /dev/null @@ -1,261 +0,0 @@ -import gobject -import gtk -import networkx -import re - -class GOTerm: - def __init__(self): - self.d = {} - - ## Create empty lists for all list values - for l in GOTerm.lists: - self.d[l] = [] - for s in GOTerm.scalars: - self.d[l] = None - - def __getitem__(self, key): - if self.d.has_key(key): - return self.d[key] - return None - - def __setitem__(self, key, value): - self.d[key] = value - -GOTerm.lists = ['is_a', 'alt_id', 'exact_synonym', 'broad_synonym', - 'narrow_synonym', 'related_synonym', 'relationship', - 'subset', 'synonym', 'xref_analog', 'xref_unknown'] -GOTerm.scalars = ['name', 'id', 'namespace', 'def', 'is_transitive', - 'comment', 'is_obsolete'] - - -class GeneOntology(networkx.XDiGraph): - def __init__(self): - networkx.XDiGraph.__init__(self) - self.by_id = {} - self.undirected = None - - def add_term(self, term): - self.add_node(term) - self.by_id[term['id']] = term - - def link_ontology(self, linkattr, obsolete=False): - for node in self.nodes(): - for link in node[linkattr]: - self.add_edge(self.by_id[link], node, linkattr) - - def link_relationships(self): - for node in self.nodes(): - for link in node['relationship']: - link_type, term = link.split(' ') - self.add_edge(self.by_id[term.strip()], node, link_type.strip()) - - def get_bp(self): - """Returns the root node of the biological_process tree""" - return self.by_id['GO:0008150'] - - def get_cc(self): - """Returns the root node of the cellular_component tree""" - return self.by_id['id: GO:0005575'] - - def get_mf(self): - """Returns the root node of the molecular_function tree""" - return self.by_id['GO:0003674'] - - def _subsumer(self, t1, t2, heap): - while heap != []: - t = heap[0] - heap = heap[1:] - - p1 = networkx.shortest_path(self, t, t1) - p2 = networkx.shortest_path(self, t, t2) - if p1 and p2: - return t - - heap += self.in_neighbors(t) - return None - - def subsumer(self, t1, t2): - if t1 == t2: - return t1 - if networkx.shortest_path(self, t1, t2): - return t1 - elif networkx.shortest_path(self, t2, t1): - return t2 - return self._subsumer(t1, t2, self.in_neighbors(t1)) - - def old_subsumer(self, t1, t2): - if t1 == t2: - return t1 - - if self.undirected == None: - self.undirected = self.to_undirected() - - path = networkx.shortest_path(self.undirected, t1, t2) - if not path: - print "Woah, path not found." - return None - - if path == [1]: - print "This shouldn't happen" - return t1 - - for t in path: - if networkx.shortest_path(self, t, t1) and \ - networkx.shortest_path(self, t, t2): - return t - - print "GeneOntology.subsumer: should not reach this point" - print "path is now: %s" % path - print "ids are: %s " % [x['id'] for x in path] - -def _split_obo_line(line): - """Splits a line from an obo file in its three constituent parts. - - @param line: A string containing a line from an obo file. The line must - either be a section definition field with a section name in brackets - or a line of the form: keyword: value ! comment - @returns: A tuple of four strings conaining the section, key, value and - description defined in the string. If the section part is None, all - the other fields are strings and if section is a string, all the other - fields are None. - """ - attrib_re = re.compile(r'^\s*([\w-]+)\s*:\s*([^!]*)!?(.*$)') - s = line.strip() - if s == "": - return (None, None, None, None) - elif s.startswith('[') and s.endswith(']'): - return (s[1:-1], None, None, None) - else: - m = attrib_re.match(s) - if m: - key, value, comment = [x.strip() for x in m.groups()] - return (None, key, value, comment) - else: - raise Exception('Unparsable line: %s' % line) - - -def _add_term_attribute(term, key, value, comment): - if key in GOTerm.scalars: - term[key] = value - elif key in GOTerm.lists: - term[key].append(value) - else: - raise Exception('Unknown key %s: %s' % (key, value)) - - -def read_gene_ontology(fd): - """Reads the Gene Ontology from an obo file. - - @param fd: An open file object to the obo file. - """ - go = GeneOntology() - term = None - section = None - - line = fd.readline() - while line: - s, k, v, c = _split_obo_line(line) - if s == None and k == None: - pass - elif s: - if term: - go.add_term(term) - - section = s - if s == 'Term': - term = GOTerm() - else: - term = None - print "ignoring: %s" %s - else: - if term: - _add_term_attribute(term, k, v, c) - line = fd.readline() - - if term: - go.add_term(term) - - return go - - -def pickle_gene_ontology(go, fn): - fd = open(fn, 'wb') - pickle.dump(go, fd) - fd.close() - -def load_pickled_ontology(fn): - fd = open(fn, 'rb') - go = pickle.load(fd) - fd.close() - return go - -def read_default_go(): - f = open("/usr/share/gene-ontology/gene_ontology.obo") - go = read_gene_ontology(f) - go.link_ontology('is_a') - go.link_relationships() - f.close() - return go - -def _add_subgraphs(treestore, ontology, parent, nodes): - for n in nodes: - i = treestore.insert(parent, 0, (n['id'], n['name'], False, n)) - _add_subgraphs(treestore, ontology, i, ontology.successors(n)) - -def get_go_treestore(ontology): - ts = gtk.TreeStore(gobject.TYPE_STRING, ## ID - gobject.TYPE_STRING, ## Name - gobject.TYPE_BOOLEAN, ## Selected - gobject.TYPE_PYOBJECT) ## Node - _add_subgraphs(ts, ontology, None, [ontology.get_bp()]) - return ts - -class NetworkTreeModel(gtk.GenericTreeModel): - - def __init__(self, network, root): - gtk.GenericTreeModel.__init__(self) - self._network = network - self._root = root - - def on_get_flags(self): - return 0 - - def on_get_n_columns(self): - return 1 - - def on_get_column_type(self, index): - if index==0: - return gobject.TYPE_STRING - - def on_get_iter(self, path): - node = self._root - for p in path[1:]: - children = self._network.predecessors(node) - node = children[p] - return node - - def on_get_path(self, rowref): - pass - - def on_get_value(self, rowref, column): - print 'get_value' - return rowref['id'] - - def on_iter_next(self, rowref): - pass - - def on_iter_children(self, parent): - pass - - def on_iter_has_child(self, rowref): - pass - - def on_iter_n_children(self, rowref): - pass - - def on_iter_nth_child(self, parent, n): - pass - - def on_iter_parent(self, child): - pass - diff --git a/workflows/go_workflow.py b/workflows/go_workflow.py deleted file mode 100644 index 9276c16..0000000 --- a/workflows/go_workflow.py +++ /dev/null @@ -1,486 +0,0 @@ -import gtk -from fluents import dataset, logger, plots, workflow, fluents, project -from fluents.lib import blmfuncs -import geneontology -#import gostat -from scipy import array, randn, log, ones, zeros -import networkx -import re - -EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'), - ('IGI', 'Inferred from genetic interaction'), - ('IPI', 'Inferred from physical interaction'), - ('ISS', 'Inferred from sequence or structure similarity'), - ('IDA', 'Inferred from direct assay'), - ('IEP', 'Inferred on expression pattern'), - ('IEA', 'Inferred from electronic annotation'), - ('TAS', 'Traceable author statement'), - ('NAS', 'Non-traceable author statement'), - ('ND', 'No biological data available'), - ('RCA', 'Inferred from reviewed computational analysis'), - ('IC', 'Inferred by curator')] - -DISTANCE_METRICS = [('resnik', 'Resnik'), - ('jiang', 'Jiang & Conrath'), - ('fussimeg', 'FuSSiMeG')] - -GO_DATA_DIR = '/home/einarr/data' - -evidence = None -go = None - -class GoTermView (gtk.Frame): - - def __init__(self): - gtk.Frame.__init__(self) - tab = gtk.Table(2, 2, False) - self._table = tab - - self._name = gtk.Label('') - self._name.set_line_wrap(True) - self._name.set_alignment(0, 0) - name_label = gtk.Label('Name:') - name_label.set_alignment(0, 0) - tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5) - tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5) - - self._def = gtk.TextBuffer() - textview = gtk.TextView(self._def) - textview.set_wrap_mode(gtk.WRAP_WORD) - scrolled_window = gtk.ScrolledWindow() - scrolled_window.add(textview) - def_label = gtk.Label('Def:') - def_label.set_alignment(0.0, 0.0) - tab.attach(def_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5) - tab.attach(scrolled_window, 1, 2, 1, 2, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5) - - self.add(tab) - self.set_go_term(None) - - def set_go_term(self, term): - if term: - self.set_label(term['id']) - self._name.set_text(term['name']) - self._def.set_text(term['def']) - else: - self.set_label('GO Term') - self._name.set_text('') - self._def.set_text('') - - -class GeneOntologyTree (gtk.HPaned): - - def __init__(self, network): - gtk.HPaned.__init__(self) - - treemodel = geneontology.get_go_treestore(network) - self._treemodel = treemodel - self._tree_view = gtk.TreeView(treemodel) - - self._selected_terms = set() - - self._tree_view.set_fixed_height_mode(True) - - # Set up context menu - self._context_menu = GoTermContextMenu(treemodel, self._tree_view) - self._tree_view.connect('popup_menu', self._popup_menu) - self._tree_view.connect('button_press_event', self._on_button_press) - - renderer = gtk.CellRendererText() - go_column = gtk.TreeViewColumn('GO ID', renderer, text=0) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_fixed_width(200) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 0) - - renderer = gtk.CellRendererToggle() - renderer.set_property('activatable', True) - renderer.connect('toggled', self._toggle_selected) - renderer.set_active(True) - renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE) - go_column = gtk.TreeViewColumn('T', renderer, active=2) - go_column.set_fixed_width(20) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 1) - - renderer = gtk.CellRendererText() - go_column = gtk.TreeViewColumn('Name', renderer, text=1) - go_column.set_fixed_width(200) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 2) - - self._desc_view = GoTermView() - - self._tree_view.connect('cursor-changed', self._on_cursor_changed) - - scrolled_window = gtk.ScrolledWindow() - scrolled_window.add(self._tree_view) - self.add1(scrolled_window) - self.add2(self._desc_view) - self.show_all() - - def _on_cursor_changed(self, tree): - path, col = self._tree_view.get_cursor() - current = self._treemodel.get_iter(path) - term = self._treemodel.get_value(current, 3) - self._desc_view.set_go_term(term) - - - ## - ## GTK Callback functions - ## - def _popup_menu(self, *rest): - self.menu.popup(None, None, None, 0, 0) - - def _on_button_press(self, widget, event): - path = widget.get_path_at_pos(int(event.x), int(event.y)) - iter = None - - if path: - iter = self._treemodel.get_iter(path[0]) - obj = self._treemodel.get_value(iter, 3) - else: - obj = None - - self._context_menu.set_current_term(obj, iter) - - if event.button == 3: - self._context_menu.popup(None, None, None, event.button, event.time) - - def _toggle_selected(self, renderer, path): - iter = self._treemodel.get_iter(path) - - selected = self._treemodel.get_value(iter, 2) - id = self._treemodel.get_value(iter, 0) - - self._treemodel.set_value(iter, 2, not selected) - - if selected: - self._selected_terms.remove(id) - else: - self._selected_terms.add(id) - - -class GoTermContextMenu (gtk.Menu): - """Context menu for GO terms in the gene ontology browser""" - - def __init__(self, treemodel, treeview): - self._treemodel = treemodel - self._treeview = treeview - self._current_term = None - self._current_iter = None - - gtk.Menu.__init__(self) - - # Popuplate tree - self._expand_item = i = gtk.MenuItem('Expand') - i.connect('activate', self._on_expand_subtree, treemodel, treeview) - self.append(i) - i.show() - - self._collapse_item = i = gtk.MenuItem('Collapse') - i.connect('activate', self._on_collapse_subtree, treemodel, treeview) - self.append(i) - i.show() - - self._select_subtree_item = i = gtk.MenuItem('Select subtree') - i.connect('activate', self._on_select_subtree, treemodel, treeview) - self.append(i) - i.show() - - def set_current_term(self, term, it): - self._current_term = term - self._current_iter = it - - def _on_expand_subtree(self, item, treemodel, treeview): - path = treemodel.get_path(self._current_iter) - treeview.expand_row(path, True) - - def _on_collapse_subtree(self, item, treemodel, treeview): - treeview.collapse_row(treemodel.get_path(self._current_iter)) - - def _on_select_subtree(self, item, treemodel, treeview): - logger.log('notice', 'Selecting subtree from GO id: %s (%s)' % - (self._current_term['id'], self._current_term['name'])) - ids = [x['id'] for x in networkx.bfs(go, self._current_term)] - project.project.set_selection('go-terms', set(ids)) - - -class GoWorkflow (workflow.Workflow): - - name = 'Gene Ontology' - ident = 'go' - description = 'Gene Ontology Workflow. For tree distance measures based '\ - + 'on the GO tree.' - - def __init__(self, app): - workflow.Workflow.__init__(self, app) - - load = workflow.Stage('load', 'Load GO Annotations') - load.add_function(LoadGOFunction()) - load.add_function(LoadAnnotationsFunction()) - load.add_function(LoadTextDatasetFunction()) - self.add_stage(load) - - go = workflow.Stage('go', 'Gene Ontology') - go.add_function(SelectGoTermsFunction(self)) - go.add_function(GoDistanceFunction()) - go.add_function(SaveDistancesFunction()) - self.add_stage(go) - - blm = workflow.Stage('blm', 'Bilinear Analysis') - blm.add_function(blmfuncs.PCA()) - self.add_stage(blm) - - -class LoadGOFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology') - - def run(self): - global go - go = geneontology.read_default_go() - browser = GeneOntologyTree(go) - label = gtk.Label('_Gene Ontology') - label.set_use_underline(True) - fluents.app['bottom_notebook'].append_page(browser, label) - - -class LoadTextDatasetFunction(workflow.Function): - - def __init__(self): - workflow.Function.__init__(self, 'load-text-ds', 'Load GO Evidence') - - def run(self): - f = open('/home/einarr/data/goa-condensed.ftsv') - global evidence - evidence = dataset.read_ftsv(f) - return [evidence] - - -class LoadAnnotationsFunction(workflow.Function): - - def __init__(self): - workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') - self.annotations = None - - def run(self): - global evidence - f = open(GO_DATA_DIR + '/goa-condensed') - ev_codes = f.readline().split() - go_terms = [] - - lines = f.readlines() - m = zeros((len(lines), len(ev_codes))) - - for i, l in enumerate(lines): - values = l.split() - go_terms.append(values[0]) - for j, v in enumerate(values[1:]): - m[i,j] = float(v.strip()) - - d = dataset.Dataset(m, - [['go-terms', go_terms], ['evidence', ev_codes]], - name='GO evidence') - - evidence = d - return [d] - - -class EvidenceCodeFrame(gtk.Frame): - def __init__(self): - gtk.Frame.__init__(self, 'Evidence Codes') - - self._ec_buttons = {} - - vbox = gtk.VBox(len(EVIDENCE_CODES)) - for code, desc in EVIDENCE_CODES: - btn = gtk.CheckButton('%s (%s)' % (code, desc)) - self._ec_buttons[code] = btn - vbox.add(btn) - self.add(vbox) - - def set_options(self, options): - for code, desc in EVIDENCE_CODES: - self._ec_buttons[code].set_active(options[code]) - - def update_options(self, options): - for code, desc in EVIDENCE_CODES: - options[code] = self._ec_buttons[code].get_active() - return options - - -class DistanceMetricFrame(gtk.Frame): - def __init__(self): - gtk.Frame.__init__(self, 'Distance Metrics') - - self._metric_buttons = {} - - vbox = gtk.VBox() - prev = None - for code, text in DISTANCE_METRICS: - btn = gtk.RadioButton(prev, '%s' % text) - self._metric_buttons[code] = btn - vbox.add(btn) - prev = btn - self.add(vbox) - - def set_options(self, options): - self._metric_buttons[options['metric']].set_active(True) - - def update_options(self, options): - for code, text in DISTANCE_METRICS: - if self._metric_buttons[code].get_active(): - options['metric'] = code - return options - return options - - -class GoDistanceDialog(gtk.Dialog): - - def __init__(self): - gtk.Dialog.__init__(self, 'GO term distance matrix', - None, - gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT, - (gtk.STOCK_OK, gtk.RESPONSE_OK, - gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)) - - self._ec_frame = EvidenceCodeFrame() - self._metric_frame = DistanceMetricFrame() - self.vbox.add(self._ec_frame) - self.vbox.add(self._metric_frame) - - def run(self): - self.vbox.show_all() - return gtk.Dialog.run(self) - - def set_options(self, options): - self._ec_frame.set_options(options) - self._metric_frame.set_options(options) - - def update_options(self, options): - self._ec_frame.update_options(options) - self._metric_frame.update_options(options) - return options - - def set_editable(self, editable): - self._ec_frame.set_sensitive(editable) - self._metric_frame.set_sensitive(editable) - - -class NumericDict(dict): - def __init__(self): - dict.__init__(self) - - def __getitem__(self, key): - retval = 0 - try: - retval = dict.__getitem__(self, key) - except: - retval = 0.0 - return retval - - -class SelectGoTermsFunction(workflow.Function): - def __init__(self, wf): - workflow.Function.__init__(self, 'go-select', 'Select GO Terms') - self.wf = wf - - def run(self, ds): - terms = [x['id'] for x in networkx.paths.bfs(go, go.get_bp())] - - self.wf.project.set_selection('go-terms', set(terms[:100])) -# self.wf.project.set_selection('go-terms', set(['GO:0007582', 'GO:0008150', 'GO:0051704', 'GO:0044419'])) - - -class GoDistanceFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'go-dist', 'GO term distance matrix') - self.options = GoDistanceOptions() - - def resnik_distance_matrix(self, selection, ic): - size = len(selection['go-terms']) - m = zeros((size, size)) - # Create resnik distance matrix - ids = list(selection['go-terms']) - for i, t1 in enumerate(ids): - for j, t2 in enumerate(ids): - term1 = go.by_id[t1] - term2 = go.by_id[t2] - subsumer = go.subsumer(term1, term2) - - if subsumer == None: - m[i, j] = 1000 - else: -# print "%s - %s - %s" % (t1, subsumer['id'], t2) - m[i, j] = ic[t1] + ic[t2] - 2.0 * ic[subsumer['id']] - ds = dataset.Dataset(m, (('go-terms', ids), ('_go-terms', ids)), 'Resnik') - return ds - - def run(self, x, selection): - global evidence, go - self.options = self.show_gui(self.options) - if not selection.has_key('go-terms') or len(selection['go-terms']) == 0: - logger.log('warning', 'No GO terms selected. Cannot make distance matrix.') - - codes = [c for c, d in EVIDENCE_CODES if self.options[c]] - ev_indices = evidence.get_indices('evidence', codes) - ann_count_matrix = evidence._array[:, ev_indices].sum(1) - total_ann = ann_count_matrix.sum(0) - - annotations = NumericDict() - ic = NumericDict() - - # Insert annotations into dict - for i, v in enumerate(evidence.get_identifiers('go-terms')): - annotations[v] = ann_count_matrix[i] - - # Accumulate annotations - for term in reversed(networkx.topological_sort(go)): - for parent in go.in_neighbors(term): - annotations[parent['id']] += annotations[term['id']] - - # Create information content dictionary - for term, count in annotations.items(): - ic[term] = -log(count / total_ann) - - return [self.resnik_distance_matrix(selection, ic)] - - def show_gui(self, options, edit=True): - dialog = GoDistanceDialog() - dialog.set_options(self.options) - dialog.show_all() - dialog.set_editable(edit) - response = dialog.run() - dialog.hide() - if response == gtk.RESPONSE_OK: - return dialog.update_options(self.options) - else: - return options - - -class SaveDistancesFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'save-matrix', 'Save Matrix') - - def run(self, ds): - filename = '/home/einarr/data/output.ftsv' - fd = open(filename, 'w') - dataset.write_ftsv(fd, ds) - fd.close() - - -class Options(dict): - def __init__(self): - dict.__init__(self) - - -class GoDistanceOptions(Options): - def __init__(self): - Options.__init__(self) - for code, desc in EVIDENCE_CODES: - self[code] = True - self['metric'] = 'fussimeg' - diff --git a/workflows/gobrowser.py b/workflows/gobrowser.py deleted file mode 100644 index 4530da1..0000000 --- a/workflows/gobrowser.py +++ /dev/null @@ -1,777 +0,0 @@ - -import gtk -from fluents import dataset, logger, plots, workflow, fluents, project, view, main -import geneontology -from matplotlib.nxutils import points_inside_poly -import matplotlib -#from scipy import array, randn, log, ones, zeros -from scipy import * -from numpy import matlib -import networkx -import re -import rpy - -EVIDENCE_CODES=[('IMP', 'Inferred from mutant phenotype'), - ('IGI', 'Inferred from genetic interaction'), - ('IPI', 'Inferred from physical interaction'), - ('ISS', 'Inferred from sequence or structure similarity'), - ('IDA', 'Inferred from direct assay'), - ('IEP', 'Inferred on expression pattern'), - ('IEA', 'Inferred from electronic annotation'), - ('TAS', 'Traceable author statement'), - ('NAS', 'Non-traceable author statement'), - ('ND', 'No biological data available'), - ('RCA', 'Inferred from reviewed computational analysis'), - ('IC', 'Inferred by curator')] - -DISTANCE_METRICS = [('resnik', 'Resnik'), - ('jiang', 'Jiang & Conrath'), - ('fussimeg', 'FuSSiMeG')] - -GO_DATA_DIR = '/home/einarr/data' - -evidence = None -go = None - -class GoTermView (gtk.Frame): - - def __init__(self): - gtk.Frame.__init__(self) - tab = gtk.Table(2, 3, False) - self._table = tab - - self._name = gtk.Label('') - self._name.set_line_wrap(True) - self._name.set_alignment(0, 0) - name_label = gtk.Label('Name:') - name_label.set_alignment(0, 0) - tab.attach(name_label, 0, 1, 0, 1, gtk.FILL, gtk.FILL, 5, 5) - tab.attach(self._name, 1, 2, 0, 1, gtk.FILL|gtk.EXPAND, gtk.FILL, 5, 5) - - self._isa_parents = gtk.HBox() - isa_parents_label = gtk.Label('Is a:') - tab.attach(isa_parents_label, 0, 1, 1, 2, gtk.FILL, gtk.FILL, 5, 5) - tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) - - self._def = gtk.TextBuffer() - textview = gtk.TextView(self._def) - textview.set_wrap_mode(gtk.WRAP_WORD) - scrolled_window = gtk.ScrolledWindow() - scrolled_window.add(textview) - def_label = gtk.Label('Def:') - def_label.set_alignment(0.0, 0.0) - tab.attach(def_label, 0, 1, 2, 3, gtk.FILL, gtk.FILL, 5, 5) - tab.attach(scrolled_window, 1, 2, 2, 3, gtk.FILL|gtk.EXPAND, gtk.FILL|gtk.EXPAND, 5, 5) - - self._tab = tab - self.add(tab) - self.set_go_term(None) - - def set_go_term(self, term): - if term: - self.set_label(term['id']) - self._name.set_text(term['name']) - self._def.set_text(term['def']) - self._tab.remove(self._isa_parents) - self._isa_parents = gtk.HBox() - for p in term['is_a']: - btn = gtk.Button(p) - btn.show() - self._isa_parents.add(btn) - self._isa_parents.show() - self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) - else: - self.set_label('GO Term') - self._name.set_text('') - self._def.set_text('') - self._tab.remove(self._isa_parents) - self._isa_parents = gtk.HBox() - self._tab.attach(self._isa_parents, 1, 2, 1, 2, gtk.FILL, gtk.FILL, 5, 5) - - -class GeneOntologyTree (gtk.HPaned): - - def __init__(self, network): - gtk.HPaned.__init__(self) - self.set_position(400) - - treemodel = geneontology.get_go_treestore(network) - self._treemodel = treemodel - self._tree_view = gtk.TreeView(treemodel) - - self._selected_terms = set() - - self._tree_view.set_fixed_height_mode(True) - - # Set up context menu - self._context_menu = GoTermContextMenu(treemodel, self._tree_view) - self._tree_view.connect('popup_menu', self._popup_menu) - self._tree_view.connect('button_press_event', self._on_button_press) - - renderer = gtk.CellRendererText() - go_column = gtk.TreeViewColumn('GO ID', renderer, text=0) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_fixed_width(200) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 0) - - renderer = gtk.CellRendererToggle() - renderer.set_property('activatable', True) - renderer.connect('toggled', self._toggle_selected) - renderer.set_active(True) - renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE) - go_column = gtk.TreeViewColumn('T', renderer, active=2) - go_column.set_fixed_width(20) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 1) - - renderer = gtk.CellRendererText() - go_column = gtk.TreeViewColumn('Name', renderer, text=1) - go_column.set_fixed_width(200) - go_column.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) - go_column.set_resizable(True) - self._tree_view.insert_column(go_column, 2) - - self._desc_view = GoTermView() - - self._tree_view.connect('cursor-changed', self._on_cursor_changed) - - scrolled_window = gtk.ScrolledWindow() - scrolled_window.add(self._tree_view) - self.add1(scrolled_window) - self.add2(self._desc_view) - self.show_all() - - def _on_cursor_changed(self, tree): - path, col = self._tree_view.get_cursor() - current = self._treemodel.get_iter(path) - term = self._treemodel.get_value(current, 3) - self._desc_view.set_go_term(term) - - - ## - ## GTK Callback functions - ## - def _popup_menu(self, *rest): - self.menu.popup(None, None, None, 0, 0) - - def _on_button_press(self, widget, event): - path = widget.get_path_at_pos(int(event.x), int(event.y)) - iter = None - - if path: - iter = self._treemodel.get_iter(path[0]) - obj = self._treemodel.get_value(iter, 3) - else: - obj = None - - self._context_menu.set_current_term(obj, iter) - - if event.button == 3: - self._context_menu.popup(None, None, None, event.button, event.time) - - def _toggle_selected(self, renderer, path): - iter = self._treemodel.get_iter(path) - - selected = self._treemodel.get_value(iter, 2) - id = self._treemodel.get_value(iter, 0) - - self._treemodel.set_value(iter, 2, not selected) - - if selected: - self._selected_terms.remove(id) - else: - self._selected_terms.add(id) - - -class GoTermContextMenu (gtk.Menu): - """Context menu for GO terms in the gene ontology browser""" - - def __init__(self, treemodel, treeview): - self._treemodel = treemodel - self._treeview = treeview - self._current_term = None - self._current_iter = None - - gtk.Menu.__init__(self) - - # Popuplate tree - self._expand_item = i = gtk.MenuItem('Expand') - i.connect('activate', self._on_expand_subtree, treemodel, treeview) - self.append(i) - i.show() - - self._collapse_item = i = gtk.MenuItem('Collapse') - i.connect('activate', self._on_collapse_subtree, treemodel, treeview) - self.append(i) - i.show() - - self._select_subtree_item = i = gtk.MenuItem('Select subtree') - i.connect('activate', self._on_select_subtree, treemodel, treeview) - self.append(i) - i.show() - - def set_current_term(self, term, it): - self._current_term = term - self._current_iter = it - - def _on_expand_subtree(self, item, treemodel, treeview): - path = treemodel.get_path(self._current_iter) - treeview.expand_row(path, True) - - def _on_collapse_subtree(self, item, treemodel, treeview): - treeview.collapse_row(treemodel.get_path(self._current_iter)) - - def _on_select_subtree(self, item, treemodel, treeview): - logger.log('notice', 'Selecting subtree from GO id: %s (%s)' % - (self._current_term['id'], self._current_term['name'])) - ids = [x['id'] for x in networkx.bfs(go, self._current_term)] - project.project.set_selection('go-terms', set(ids)) - - -class LoadGOFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'load-go', 'Load Gene Ontology') - - def run(self): - global go - if go: - return - - go = geneontology.read_default_go() - browser = GeneOntologyTree(go) - label = gtk.Label('_Gene Ontology') - label.set_use_underline(True) - fluents.app['bottom_notebook'].append_page(browser, label) - -class LoadAnnotationsFunction(workflow.Function): - - def __init__(self): - workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations') - self.annotations = None - - def run(self): - global evidence - f = open(GO_DATA_DIR + '/goa-condensed') - ev_codes = f.readline().split() - go_terms = [] - - lines = f.readlines() - m = zeros((len(lines), len(ev_codes))) - - for i, l in enumerate(lines): - values = l.split() - go_terms.append(values[0]) - for j, v in enumerate(values[1:]): - m[i,j] = float(v.strip()) - - d = dataset.Dataset(m, - [['go-terms', go_terms], ['evidence', ev_codes]], - name='GO evidence') - - evidence = d - return [d] - - -class GOWeightDialog(gtk.Dialog): - def __init__(self): - gtk.Dialog.__init__(self, 'GO Gene List Influence', - None, - gtk.DIALOG_MODAL | gtk.DIALOG_DESTROY_WITH_PARENT, - (gtk.STOCK_OK, gtk.RESPONSE_OK, - gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)) - - table = gtk.Table(2, 2) - - sim_lbl = gtk.Label('Similarity threshold: ') - table.attach(sim_lbl, 0, 1, 0, 1) - adjustment = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) - sim_spin = gtk.SpinButton(adjustment, 0.0, 2) - table.attach(sim_spin, 1, 2, 0, 1) - - rank_lbl = gtk.Label('Rank threshold: ') - table.attach(rank_lbl, 0, 1, 1, 2) - rank_adj = gtk.Adjustment(0, 0, 10, 0.1, 1.0, 1.0) - rank_spin = gtk.SpinButton(rank_adj, 0.0, 2) - table.attach(rank_spin, 1, 2, 1, 2) - - sim_lbl.show() - sim_spin.show() - rank_lbl.show() - rank_spin.show() - - table.show() - self.vbox.add(table) - self._sim_spin = sim_spin - self._rank_spin = rank_spin - - def set_options(self, options): - self._sim_spin.set_value(options['similarity_threshold']) - self._rank_spin.set_value(options['rank_threshold']) - - def set_editable(self, editable): - self._sim_spin.set_sensitive(editable) - self._rank_spin.set_sensitive(editable) - - def update_options(self, options): - options['similarity_threshold'] = self._sim_spin.get_value() - options['rank_threshold'] = self._rank_spin.get_value() - - -class DistanceToSelectionFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'dist-to-sel', 'Dist. to Selection') - self.options = DistanceToSelectionOptions() - - def run(self, similarities, selection): - self.show_gui(similarities, self.options) - - retval = [] - - dims = similarities.get_dim_name() - if dims[0] != "_%s" %dims[1] and dims[1] != "_%s" %dims[0]: - logger.log('warning', 'Are you sure this is a similarity matrix?') - - dim = dims[0] - print "dim", dim - - print "selection", selection[dim] - print "indices", similarities.get_indices(dim, selection[dim]) - indices = similarities.get_indices(dim, selection[dim]) - m = apply_along_axis(max, 1, similarities.asarray().take(indices, 1)) - retval.append(dataset.Dataset(m, [(dim, similarities[dim]), - ("_dummy", '0')])) - - return retval - - def show_gui(self, similarities, options, edit=True): - dialog = DistanceToSelectionOptionsDialog([similarities], self.options) - response = dialog.run() - dialog.hide() - if response == gtk.RESPONSE_OK: - dialog.set_output() - return dialog.get_options() - else: - return options - -class GOWeightFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'load-go-ann', 'GO Influence') - self.options = GOWeightOptions() - - def run(self, genelist, similarity): - ## Show dialog box - self.show_gui(self.options) - - ## assure that data is "correct", i.e., that we can perform - ## the desired operations. - common_dims = genelist.common_dims(similarity) - if len(common_dims) == 0: - logger.log('error', 'No common dimension in the selected datasets.') - elif len(common_dims) > 1: - logger.log('error', "More than one common dimension in the " + - "selected datasets. Don't know what to do.") - gene_dim = common_dims[0] - logger.log('debug', 'Assuming genes are in dimension: %s' % gene_dim) - - ## Do the calculations. - d = {} - - - def show_gui(self, options, edit=True): - dialog = GOWeightDialog() - dialog.set_options(self.options) - dialog.show_all() - dialog.set_editable(edit) - response = dialog.run() - dialog.hide() - if response == gtk.RESPONSE_OK: - return dialog.update_options(self.options) - else: - return options - - -class DistanceToSelectionOptionsDialog(workflow.OptionsDialog): - def __init__(self, data, options): - workflow.OptionsDialog.__init__(self, data, options, ['X']) - - -class TTestOptionsDialog(workflow.OptionsDialog): - - def __init__(self, data, options): - workflow.OptionsDialog.__init__(self, data, options, - ['X', 'Categories']) - - vb = gtk.VBox() - l = gtk.Label("Limit") - adj = gtk.Adjustment(0, 0.0, 1.0, 0.01, 1.0, 1.0) - sb = gtk.SpinButton(adj, 0.0, 2) - l.show() - sb.show() - - vb.add(l) - vb.add(sb) - vb.show() - self.nb.insert_page(vb, gtk.Label("Limit"), -1) - - -class TTestFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 't-test', 't-test') - self.options = TTestOptions() - - def run(self, x, categories): - self.show_gui(x, categories) - - retval = [] - m = x.asarray() - c = categories.asarray() - - # Nonsmokers and current smokers - ns = m.take(nonzero(c[:,0]), 0)[0] - cs = m.take(nonzero(c[:,2]), 0)[0] - - tscores = stats.ttest_ind(ns, cs) - - print "Out data:", self.options['out_data'] - tds = dataset.Dataset(tscores[0], [('gene_id', x['gene_id']), - ('_t', ['0'])], - name='t-values') - if 't-value' in self.options['out_data']: - retval.append(tds) - - pds = dataset.Dataset(tscores[1], [('gene_id', x['gene_id']), - ('_p', ['0'])], - name='p-values') - if 'p-value' in self.options['out_data']: - retval.append(pds) - - if ProbabilityHistogramPlot in self.options['out_plots']: - retval.append(ProbabilityHistogramPlot(pds)) - - if VolcanoPlot in self.options['out_plots']: - fc = apply_along_axis(mean, 0, ns) / apply_along_axis(mean, 0, cs) - fcds = dataset.Dataset(fc, [('gene_id', x['gene_id']), - ('_dummy', ['0'])], - name="Fold change") - - retval.append(VolcanoPlot(fcds, pds, 'gene_id')) - - return retval - - def show_gui(self, x, categories): - dialog = TTestOptionsDialog([x, categories], self.options) - response = dialog.run() - dialog.hide() - if response == gtk.RESPONSE_OK: - dialog.set_output() - return dialog.get_options() - else: - return options - - -class SetICFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'set-ic', 'Set IC') - - def run(self, ds): - if 'go-terms' in ds.get_dim_name(): - main.workflow.current_ic = ds - else: - logger.log('warning', 'Cannot use this dataset as IC on the go-terms dimension') - return - -class PlotDagFunction(workflow.Function): - def __init__(self): - workflow.Function.__init__(self, 'go-dag', 'Build DAG') - - def run(self, selection): - g = self.get_network(list(selection['go-terms'])) - ds = dataset.GraphDataset(networkx.adj_matrix(g), - [('go-terms', g.nodes()), ('_go-terms', g.nodes())], - name="DAG") - return [ThresholdDagPlot(g)] - - def get_network(self, terms, subtree='bp'): - """Returns a DAG connecting the given terms by including their parents - up to the level needed to connect them. The subtree parameter is one of - mf - molecular function - bp - biological process - cc - cellular component""" - - rpy.r.library("GOstats") - - if subtree == 'mf': - subtree_r = rpy.r.GOMFPARENTS - elif subtree == 'bp': - subtree_r = rpy.r.GOBPPARENTS - elif subtree == 'cc': - subtree_r = rpy.r.GOCCPARENTS - else: - raise Exception("Unknown subtree. Use mf, bp or cc.") - - g = rpy.r.GOGraph(terms, subtree_r) - edges = rpy.r.edges(g) - - nxgraph = networkx.DiGraph() - for child, d in edges.items(): - for parent in d.keys(): - nxgraph.add_edge(parent, child) - - return nxgraph - - -class TTestOptions(workflow.Options): - - def __init__(self): - workflow.Options.__init__(self) - self['all_plots'] = [(ProbabilityHistogramPlot, 'Histogram', True), - (VolcanoPlot, 'Histogram', True)] - self['all_data'] = [('t-value', 't-values', True), - ('p-value', 'Probabilities', True), - ('categories', 'Categories', False)] - self['out_data'] = ['t-value', 'p-value'] - - -class DistanceToSelectionOptions(workflow.Options): - def __init__(self): - workflow.Options.__init__(self) - self['all_data'] = [('mindist', 'Minimum distance', True)] - - -class GOWeightOptions(workflow.Options): - def __init__(self): - workflow.Options.__init__(self) - self['similarity_threshold'] = 0.0 - self['rank_threshold'] = 0.0 - -class ProbabilityHistogramPlot(plots.HistogramPlot): - def __init__(self, ds): - plots.HistogramPlot.__init__(self, ds, name="Confidence", bins=50) - - -class VolcanoPlot(plots.ScatterPlot): - def __init__(self, fold_ds, p_ds, dim, **kw): - plots.ScatterPlot.__init__(self, fold_ds, p_ds, 'gene_id', '_dummy', - '0', '0', - name="Volcano plot", - sel_dim_2='_p', **kw) - - -class DagPlot(plots.Plot): - def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40, - with_labels=False, name='DAG Plot'): - - plots.Plot.__init__(self, name) - self.nodes = graph.nodes() - self._map_ids = self.nodes - self.graph = graph - self._pos = pos - self._cmap = matplotlib.cm.summer - self._nodesize = nodesize - self._nodecolor = nodecolor - self._with_labels = with_labels - self.visible = set() - - self.current_dim = dim - - if not self._pos: - self._pos = self._calc_pos(graph) - self._xy = asarray([self._pos[node] for node in self.nodes]) - self.xaxis_data = self._xy[:,0] - self.yaxis_data = self._xy[:,1] - - # Initial draw - self.default_props = {'nodesize' : 50, - 'nodecolor' : 'blue', - 'edge_color' : 'gray', - 'edge_color_selected' : 'red'} - self.node_collection = None - self.edge_collection = None - self.node_labels = None - lw = zeros(self.xaxis_data.shape) - self.node_collection = self.axes.scatter(self.xaxis_data, self.yaxis_data, - s=self._nodesize, - c=self._nodecolor, - linewidth=lw, - zorder=3) - self._mappable = self.node_collection - self._mappable.set_cmap(self._cmap) - - # selected nodes is a transparent graph that adjust node-edge visibility - # according to the current selection needed to get get the selected - # nodes 'on top' as zorder may not be defined individually - self.selected_nodes = self.axes.scatter(self.xaxis_data, - self.yaxis_data, - s=self._nodesize, - c=self._nodecolor, - edgecolor='r', - linewidth=lw, - zorder=4, - alpha=0) - - edge_color = self.default_props['edge_color'] - self.edge_collection = networkx.draw_networkx_edges(self.graph, - self._pos, - ax=self.axes, - edge_color=edge_color) - # edge color rgba-arrays - self._edge_color_rgba = matlib.repmat(plots.ColorConverter().to_rgba(edge_color), - self.graph.number_of_edges(),1) - self._edge_color_selected = plots.ColorConverter().to_rgba(self.default_props['edge_color_selected']) - if self._with_labels: - self.node_labels = networkx.draw_networkx_labels(self.graph, - self._pos, - ax=self.axes) - - # remove axes, frame and grid - self.axes.set_xticks([]) - self.axes.set_yticks([]) - self.axes.grid(False) - self.axes.set_frame_on(False) - self.fig.subplots_adjust(left=0, right=1, bottom=0, top=1) - - def _calc_pos(self, graph): - """Calculates position for graph nodes using 'dot' layout.""" - gv_graph = networkx.DiGraph() - for start, end in graph.edges(): - gv_graph.add_edge(start.replace('GO:', ''), end.replace('GO:', '')) - - pos_gv = networkx.pygraphviz_layout(gv_graph, prog="dot") - pos = {} - for k, v in pos_gv.items(): - if k != "all": - pos["GO:%s" % k] = v - else: - pos[k] = v - return pos - - def points_in_rect(self, x1, y1, x2, y2, key): - ydata = self.yaxis_data - xdata = self.xaxis_data - - # find indices of selected area - if x1>x2: - x1, x2 = x2, x1 - if y1>y2: - y1, y2 = y2, y1 - assert x1<=x2 - assert y1<=y2 - - index = nonzero((xdata>x1) & (xdatay1) & (ydata 0: - linewidth[index] = 2 - idents = selection[self.current_dim] - edge_index = [i for i,edge in enumerate(self.graph.edges()) if (edge[0] in idents and edge[1] in idents)] - if len(edge_index)>0: - for i in edge_index: - edge_color_rgba[i,:] = self._edge_color_selected - self._A = None - - self.edge_collection._colors = edge_color_rgba - self.selected_nodes.set_linewidth(linewidth) - self.canvas.draw() - - def is_mappable_with(self, obj): - """Returns True if dataset/selection is mappable with this plot. - """ - if isinstance(obj, fluents.dataset.Dataset): - if self.current_dim in obj.get_dim_name(): - return True - return False - - def _update_color_from_dataset(self, ds): - """Updates the facecolors from a dataset. - """ - - array = ds.asarray() - - #only support for 2d-arrays: - try: - m, n = array.shape - except: - raise ValueError, "No support for more than 2 dimensions." - # is dataset a vector or matrix? - if not n==1: - # we have a category dataset - if isinstance(ds, fluents.dataset.CategoryDataset): - vec = dot(array, diag(arange(n))).sum(1) - else: - vec = array.sum(1) - else: - vec = array.ravel() - - indices = ds.get_indices(self.current_dim, self.nodes) - nodes = ds.existing_identifiers(self.current_dim, self.nodes) - - v = vec.take(indices, 0) - vec_min = min(vec[vec > -inf]) - vec_max = max(vec[vec < inf]) - v[v==inf] = vec_max - v[v==-inf] = vec_min - - d = dict(zip(nodes, list(v))) - - map_vec = zeros(len(self.nodes)) - for i, n in enumerate(self.nodes): - map_vec[i] = d.get(n, vec_min) - - # update facecolors - self.node_collection.set_array(map_vec) - self.node_collection.set_clim(vec_min, vec_max) - self.node_collection.update_scalarmappable() #sets facecolors from array - - self.canvas.draw() - - -class ThresholdDagPlot(DagPlot, plots.PlotThresholder): - def __init__(self, graph, dim='go-terms', pos=None, nodecolor='b', nodesize=40, - with_labels=False, name='DAG Plot'): - DagPlot.__init__(self, graph, dim='go-terms', pos=None, - nodecolor='b', nodesize=40, - with_labels=False, name='DAG Plot') - plots.PlotThresholder.__init__(self, "IC") - - def rectangle_select_callback(self, x1, y1, x2, y2, key): - ids = self.points_in_rect(x1, y1, x2, y2, key) - ids = self.visible.intersection(ids) - ids = self.update_selection(ids, key) - - self.selection_listener(self.current_dim, ids) - - def _update_color_from_dataset(self, ds): - DagPlot._update_color_from_dataset(self, ds) - self.set_threshold_dataset(ds) - a = ds.asarray() - a_max = max(a[a-inf]) - self._sb_min.set_range(a_min-0.1, a_max+0.1) - self._sb_min.set_value(a_min-0.1) - self._sb_max.set_range(a_min-0.1, a_max+0.1) - self._sb_max.set_value(a_max+0.1) - - diff --git a/workflows/smokers.py b/workflows/smokers.py deleted file mode 100644 index 995268a..0000000 --- a/workflows/smokers.py +++ /dev/null @@ -1,445 +0,0 @@ -import sys,os -import webbrowser - -from fluents import logger, plots,workflow,dataset,main -from fluents.lib import blmfuncs,nx_utils,validation,engines,cx_stats,cx_utils -import gobrowser, geneontology -import scipy -import networkx as nx - - -class SmallTestWorkflow(workflow.Workflow): - name = 'Smokers' - ident = 'smokers' - description = 'A small test workflow for gene expression analysis.' - - def __init__(self): - workflow.Workflow.__init__(self) - - # DATA IMPORT - load = workflow.Stage('load', 'Data') - load.add_function(DatasetLoadFunctionSmokerSmall()) - load.add_function(DatasetLoadFunctionSmokerMedium()) - load.add_function(DatasetLoadFunctionSmokerFull()) - load.add_function(DatasetLoadFunctionSmokerGO()) - #load.add_function(DatasetLoadFunctionCYCLE()) - self.add_stage(load) - - # NETWORK PREPROCESSING - #net = workflow.Stage('net', 'Network integration') - #net.add_function(DiffKernelFunction()) - #net.add_function(ModKernelFunction()) - #net.add_function(RandDiffKernelFunction()) - #self.add_stage(net) - - # BLM's - model = workflow.Stage('models', 'Models') - model.add_function(blmfuncs.PCA()) - model.add_function(blmfuncs.PLS()) - model.add_function(blmfuncs.LPLS()) - model.add_function(SAM()) - - #model.add_function(bioconFuncs.SAM(app)) - self.add_stage(model) - - query = workflow.Stage('query', 'Gene Query') - query.add_function(NCBIQuery()) - query.add_function(KEGGQuery()) - self.add_stage(query) - - # Gene Ontology - go = workflow.Stage('go', 'Gene Ontology') - go.add_function(gobrowser.LoadGOFunction()) - go.add_function(gobrowser.SetICFunction()) - # go.add_function(gobrowser.GOWeightFunction()) - # go.add_function(gobrowser.DistanceToSelectionFunction()) - # go.add_function(gobrowser.TTestFunction()) - go.add_function(gobrowser.PlotDagFunction()) - go.add_function(GoEnrichment()) - go.add_function(GoEnrichmentCond()) - self.add_stage(go) - - # EXTRA PLOTS - #plt = workflow.Stage('net', 'Network') - #plt.add_function(nx_analyser.KeggNetworkAnalyser()) - #self.add_stage(plt) - - logger.log('debug', 'Small test workflow is now active') - - -class DatasetLoadFunctionSmokerSmall(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_small', 'Smoker (Small)') - - def run(self): - path = 'data/smokers-small/' - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - - -class DatasetLoadFunctionSmokerMedium(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_medium', 'Smoker (Medium)') - - def run(self): - path = 'data/smokers-medium/' - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - - -class DatasetLoadFunctionSmokerFull(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_full', 'Smoker (Full)') - - def run(self): - path = 'data/smokers-full/' - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - -class DatasetLoadFunctionSmokerGO(workflow.Function): - """Loader for all ftsv files of smokers small datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_go', 'Smoker (GO)') - - def run(self): - path = 'data/smokers-go/' - files = os.listdir(path) - out = [] - for fname in files: - if fname.endswith('.ftsv'): - input_file = open(os.path.join(path, fname)) - out.append(dataset.read_ftsv(input_file)) - return out - -class DatasetLoadFunctionCYCLE(workflow.Function): - """Loader for pickled CYCLE datasets.""" - def __init__(self): - workflow.Function.__init__(self, 'load_data', 'Cycle') - - def run(self): - filename='fluents/data/CYCLE' - if filename: - return dataset.from_file(filename) - - -##### WORKFLOW SPECIFIC FUNCTIONS ###### -class SAM(workflow.Function): - def __init__(self, id='sam', name='SAM'): - workflow.Function.__init__(self, id, name) - - def run(self, x, y): - - n_iter = 50 #B - alpha = 0.01 #cut off on qvals - - ############### - - # Main function call - - # setup prelimenaries - import rpy - rpy.r.library("siggenes") - rpy.r.library("multtest") - - cl = scipy.dot(y.asarray(), scipy.diag(scipy.arange(y.shape[1]))).sum(1) - data = x.asarray().T - sam = rpy.r.sam(data, cl=cl, B=n_iter, var_equal=False,med=False,s0=scipy.nan,rand=scipy.nan) - qvals = scipy.asarray(rpy.r.slot(sam, "p.value")) - pvals = scipy.asarray(rpy.r.slot(sam, "q.value")) - - sam_index = (qvals