import gtk import logger from annotations import Annotations from workflow import * import plots import dataset #import geneontology #import gostat import rpy from scipy import array,randn import cPickle class EinarsWorkflow (Workflow): def __init__(self, app): Workflow.__init__(self, app) self.name = 'Einar\'s Workflow' load = Stage('load', 'Load Data') load.add_function(Function('load', 'Load Microarrays')) load.add_function(CelFileImportFunction()) load.add_function(TestDataFunction()) load.add_function(DatasetLoadFunction()) self.add_stage(load) preproc = Stage('preprocess', 'Preprocessing') preproc.add_function(Function('rma', 'RMA')) self.add_stage(preproc) go = Stage('go', 'Gene Ontology Data') go.add_function(LoadAnnotationsFunction()) go.add_function(GODistanceFunction()) self.add_stage(go) regression = Stage('regression', 'Regression') regression.add_function(Function('pls', 'PLS')) self.add_stage(regression) save = Stage('save', 'Save Data') save.add_function(DatasetSaveFunction()) self.add_stage(save) logger.log('debug', '\tEinar\'s workflow is now active') class LoadAnnotationsFunction(Function): def __init__(self): Function.__init__(self, 'load-go-ann', 'Load Annotations') self.annotations = None def load_file(self, filename): f = open(filename) self.annotations = Annotations('genes', 'go-terms') logger.log('notice', 'Loading annotation file: %s' % filename) for line in f.readlines(): val = line.split(' \t') if len(val) > 1: val = [v.strip() for v in val] retval.add_annotations('genes', val[0], 'go-terms', set(val[1:])) def on_response(self, dialog, response): if response == gtk.RESPONSE_OK: logger.log('notice', 'Reading file: %s' % dialog.get_filename()) self.load_file(dialog.get_filename()) def run(self, data): btns = ('Open', gtk.RESPONSE_OK, \ 'Cancel', gtk.RESPONSE_CANCEL) dialog = gtk.FileChooserDialog('Open GO Annotation File', buttons=btns) dialog.connect('response', self.on_response) dialog.run() dialog.destroy() return [self.annotations] class GODistanceFunction(Function): def __init__(self): Function.__init__(self, 'go_diatance', 'GO Distances') self.output = None def run(self, data): logger.log('debug', 'datatype: %s' % type(data)) if not type(data) == Annotations: return None logger.log('debug', 'dimensions: %s' % data.dimensions) genes = data.get_ids('genes') gene_distances = array((len(genes), len(genes))) return gene_distances class TestDataFunction(Function): def __init__(self): Function.__init__(self, 'test_data', 'Generate Test Data') def run(self, data): logger.log('notice', 'Injecting foo test data') x = randn(20,30) axis_0 = ['rows',[]] axis_1 = ['cols',[]] X = dataset.Dataset(x,[axis_0,axis_1]) return [X, plots.SinePlot(None)] class DatasetLoadFunction(Function): """Loader for previously pickled Datasets.""" def __init__(self): Function.__init__(self, 'load_data', 'Load Pickled Dataset') def run(self, data): chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, action=gtk.FILE_CHOOSER_ACTION_OPEN, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) pkl_filter = gtk.FileFilter() pkl_filter.set_name("Python pickled data files (*.pkl)") pkl_filter.add_pattern("*.[pP][kK][lL]") all_filter = gtk.FileFilter() all_filter.set_name("All Files (*.*)") all_filter.add_pattern("*") chooser.add_filter(pkl_filter) chooser.add_filter(all_filter) try: if chooser.run() == gtk.RESPONSE_OK: return [cPickle.load(open(chooser.get_filename()))] finally: chooser.destroy() class DatasetSaveFunction(Function): """QND way to save data to file for later import to this program.""" def __init__(self): Function.__init__(self, 'save_data', 'Save Pickled Dataset') def run(self, data): if not data: logger.log("notice", "No data to save.") return chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None, action=gtk.FILE_CHOOSER_ACTION_SAVE, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_SAVE, gtk.RESPONSE_OK)) pkl_filter = gtk.FileFilter() pkl_filter.set_name("Python pickled data files (*.pkl)") pkl_filter.add_pattern("*.[pP][kK][lL]") all_filter = gtk.FileFilter() all_filter.set_name("All Files (*.*)") all_filter.add_pattern("*") chooser.add_filter(pkl_filter) chooser.add_filter(all_filter) chooser.set_current_name(data.get_name() + ".pkl") try: if chooser.run() == gtk.RESPONSE_OK: cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2) logger.log("notice", "Saved data to %r." % chooser.get_filename()) finally: chooser.destroy() class CelFileImportFunction(Function): """Loads AffyMetrix .CEL-files into matrix.""" def __init__(self): Function.__init__(self, 'cel_import', 'Import Affy') def run(self, data): chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, action=gtk.FILE_CHOOSER_ACTION_OPEN, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) chooser.set_select_multiple(True) cel_filter = gtk.FileFilter() cel_filter.set_name("Cel Files (*.cel)") cel_filter.add_pattern("*.[cC][eE][lL]") all_filter = gtk.FileFilter() all_filter.set_name("All Files (*.*)") all_filter.add_pattern("*") chooser.add_filter(cel_filter) chooser.add_filter(all_filter) try: if chooser.run() == gtk.RESPONSE_OK: rpy.r.library("affy") silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames())) m = rpy.r('m <- E@exprs') vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r) rownames = vector_eval('rownames(m)') colnames = vector_eval('colnames(m)') # We should be nice and clean up after ourselves rpy.r.rm(["E", "m"]) if m: return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), "AffyMatrix Data")] else: logger.log("notice", "No data loaded from importer.") finally: chooser.destroy()