From fe0cf736f584775b162bf2ad383fef05755cc25c Mon Sep 17 00:00:00 2001 From: tangstad Date: Thu, 27 Apr 2006 12:15:30 +0000 Subject: [PATCH] Created affy workflow. --- workflows/ma_workflow.py | 174 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 workflows/ma_workflow.py diff --git a/workflows/ma_workflow.py b/workflows/ma_workflow.py new file mode 100644 index 0000000..445abee --- /dev/null +++ b/workflows/ma_workflow.py @@ -0,0 +1,174 @@ +import gtk +from system import dataset, logger, plots, workflow +from scipy import randn +import cPickle + +class AffyWorkflow (workflow.Workflow): + + name = 'Affy Workflow' + ident = 'affy' + description = 'Affymetrics Workflow. Analysis of Affy-data.' + def __init__(self, app): + workflow.Workflow.__init__(self, app) + + load = workflow.Stage('load', 'Load Data') + load.add_function(CelFileImportFunction()) + load.add_function(TestDataFunction()) + load.add_function(DatasetLoadFunction()) + self.add_stage(load) + + explore = workflow.Stage('explore', 'Explorative analysis') + explore.add_function(PCAFunction(self)) + self.add_stage(explore) + + save = workflow.Stage('save', 'Save Data') + save.add_function(DatasetSaveFunction()) + self.add_stage(save) + + +class TestDataFunction(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'test_data', 'Generate Test Data') + + def run(self, data): + logger.log('notice', 'Injecting foo test data') + x = randn(20,30) + X = dataset.Dataset(x) + return [X, plots.SinePlot()] + + +class DatasetLoadFunction(workflow.Function): + """Loader for previously pickled Datasets.""" + def __init__(self): + workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset') + + def run(self, data): + chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, + action=gtk.FILE_CHOOSER_ACTION_OPEN, + buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, + gtk.STOCK_OPEN, gtk.RESPONSE_OK)) + pkl_filter = gtk.FileFilter() + pkl_filter.set_name("Python pickled data files (*.pkl)") + pkl_filter.add_pattern("*.[pP][kK][lL]") + all_filter = gtk.FileFilter() + all_filter.set_name("All Files (*.*)") + all_filter.add_pattern("*") + chooser.add_filter(pkl_filter) + chooser.add_filter(all_filter) + + try: + if chooser.run() == gtk.RESPONSE_OK: + return [cPickle.load(open(chooser.get_filename()))] + finally: + chooser.destroy() + + +class DatasetSaveFunction(workflow.Function): + """QND way to save data to file for later import to this program.""" + def __init__(self): + workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset') + + def run(self, data): + if not data: + logger.log("notice", "No data to save.") + return + + chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None, + action=gtk.FILE_CHOOSER_ACTION_SAVE, + buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, + gtk.STOCK_SAVE, gtk.RESPONSE_OK)) + pkl_filter = gtk.FileFilter() + pkl_filter.set_name("Python pickled data files (*.pkl)") + pkl_filter.add_pattern("*.[pP][kK][lL]") + all_filter = gtk.FileFilter() + all_filter.set_name("All Files (*.*)") + all_filter.add_pattern("*") + chooser.add_filter(pkl_filter) + chooser.add_filter(all_filter) + chooser.set_current_name(data.get_name() + ".pkl") + + try: + if chooser.run() == gtk.RESPONSE_OK: + cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2) + logger.log("notice", "Saved data to %r." % chooser.get_filename()) + finally: + chooser.destroy() + + +class CelFileImportFunction(workflow.Function): + """Loads Affymetrics .CEL-files into matrix.""" + def __init__(self): + workflow.Function.__init__(self, 'cel_import', 'Import Affy') + + def run(self, data): + import rpy + chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, + action=gtk.FILE_CHOOSER_ACTION_OPEN, + buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, + gtk.STOCK_OPEN, gtk.RESPONSE_OK)) + chooser.set_select_multiple(True) + cel_filter = gtk.FileFilter() + cel_filter.set_name("Cel Files (*.cel)") + cel_filter.add_pattern("*.[cC][eE][lL]") + all_filter = gtk.FileFilter() + all_filter.set_name("All Files (*.*)") + all_filter.add_pattern("*") + chooser.add_filter(cel_filter) + chooser.add_filter(all_filter) + + try: + if chooser.run() == gtk.RESPONSE_OK: + rpy.r.library("affy") + + silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) + silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames())) + silent_eval('E <- rma(E)') + + m = rpy.r('m <- E@exprs') + + vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r) + rownames = vector_eval('rownames(m)') + colnames = vector_eval('colnames(m)') + + # We should be nice and clean up after ourselves + rpy.r.rm(["E", "m"]) + + if m: + data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="Affymetrics Data") + plot = plots.LinePlot(data, "Gene profiles") + return [data, plot] + else: + logger.log("notice", "No data loaded from importer.") + finally: + chooser.destroy() + + +class PCAFunction(workflow.Function): + """Generic PCA function.""" + def __init__(self, wf): + workflow.Function.__init__(self, 'pca', 'PCA') + self._workflow = wf + + def run(self, data): + import rpy + + dim_2, dim_1 = data.get_dim_names() + + + silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) + rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix()) + silent_eval("t = prcomp(t(m))") + + T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1)) + T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)), + ("component", T_ids)], name="T") + P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)), + ("component", T_ids)], name="P") + + # cleanup + rpy.r.rm(["t", "m"]) + + loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings") + score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores") + + return [T, P, loading_plot, score_plot]