Created affy workflow.
This commit is contained in:
parent
06805aa5e4
commit
fe0cf736f5
174
workflows/ma_workflow.py
Normal file
174
workflows/ma_workflow.py
Normal file
@ -0,0 +1,174 @@
|
||||
import gtk
|
||||
from system import dataset, logger, plots, workflow
|
||||
from scipy import randn
|
||||
import cPickle
|
||||
|
||||
class AffyWorkflow (workflow.Workflow):
|
||||
|
||||
name = 'Affy Workflow'
|
||||
ident = 'affy'
|
||||
description = 'Affymetrics Workflow. Analysis of Affy-data.'
|
||||
def __init__(self, app):
|
||||
workflow.Workflow.__init__(self, app)
|
||||
|
||||
load = workflow.Stage('load', 'Load Data')
|
||||
load.add_function(CelFileImportFunction())
|
||||
load.add_function(TestDataFunction())
|
||||
load.add_function(DatasetLoadFunction())
|
||||
self.add_stage(load)
|
||||
|
||||
explore = workflow.Stage('explore', 'Explorative analysis')
|
||||
explore.add_function(PCAFunction(self))
|
||||
self.add_stage(explore)
|
||||
|
||||
save = workflow.Stage('save', 'Save Data')
|
||||
save.add_function(DatasetSaveFunction())
|
||||
self.add_stage(save)
|
||||
|
||||
|
||||
class TestDataFunction(workflow.Function):
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'test_data', 'Generate Test Data')
|
||||
|
||||
def run(self, data):
|
||||
logger.log('notice', 'Injecting foo test data')
|
||||
x = randn(20,30)
|
||||
X = dataset.Dataset(x)
|
||||
return [X, plots.SinePlot()]
|
||||
|
||||
|
||||
class DatasetLoadFunction(workflow.Function):
|
||||
"""Loader for previously pickled Datasets."""
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')
|
||||
|
||||
def run(self, data):
|
||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
||||
pkl_filter = gtk.FileFilter()
|
||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
||||
all_filter = gtk.FileFilter()
|
||||
all_filter.set_name("All Files (*.*)")
|
||||
all_filter.add_pattern("*")
|
||||
chooser.add_filter(pkl_filter)
|
||||
chooser.add_filter(all_filter)
|
||||
|
||||
try:
|
||||
if chooser.run() == gtk.RESPONSE_OK:
|
||||
return [cPickle.load(open(chooser.get_filename()))]
|
||||
finally:
|
||||
chooser.destroy()
|
||||
|
||||
|
||||
class DatasetSaveFunction(workflow.Function):
|
||||
"""QND way to save data to file for later import to this program."""
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')
|
||||
|
||||
def run(self, data):
|
||||
if not data:
|
||||
logger.log("notice", "No data to save.")
|
||||
return
|
||||
|
||||
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
|
||||
action=gtk.FILE_CHOOSER_ACTION_SAVE,
|
||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
|
||||
pkl_filter = gtk.FileFilter()
|
||||
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
||||
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
||||
all_filter = gtk.FileFilter()
|
||||
all_filter.set_name("All Files (*.*)")
|
||||
all_filter.add_pattern("*")
|
||||
chooser.add_filter(pkl_filter)
|
||||
chooser.add_filter(all_filter)
|
||||
chooser.set_current_name(data.get_name() + ".pkl")
|
||||
|
||||
try:
|
||||
if chooser.run() == gtk.RESPONSE_OK:
|
||||
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
|
||||
logger.log("notice", "Saved data to %r." % chooser.get_filename())
|
||||
finally:
|
||||
chooser.destroy()
|
||||
|
||||
|
||||
class CelFileImportFunction(workflow.Function):
|
||||
"""Loads Affymetrics .CEL-files into matrix."""
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'cel_import', 'Import Affy')
|
||||
|
||||
def run(self, data):
|
||||
import rpy
|
||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
||||
chooser.set_select_multiple(True)
|
||||
cel_filter = gtk.FileFilter()
|
||||
cel_filter.set_name("Cel Files (*.cel)")
|
||||
cel_filter.add_pattern("*.[cC][eE][lL]")
|
||||
all_filter = gtk.FileFilter()
|
||||
all_filter.set_name("All Files (*.*)")
|
||||
all_filter.add_pattern("*")
|
||||
chooser.add_filter(cel_filter)
|
||||
chooser.add_filter(all_filter)
|
||||
|
||||
try:
|
||||
if chooser.run() == gtk.RESPONSE_OK:
|
||||
rpy.r.library("affy")
|
||||
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
||||
silent_eval('E <- rma(E)')
|
||||
|
||||
m = rpy.r('m <- E@exprs')
|
||||
|
||||
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
||||
rownames = vector_eval('rownames(m)')
|
||||
colnames = vector_eval('colnames(m)')
|
||||
|
||||
# We should be nice and clean up after ourselves
|
||||
rpy.r.rm(["E", "m"])
|
||||
|
||||
if m:
|
||||
data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="Affymetrics Data")
|
||||
plot = plots.LinePlot(data, "Gene profiles")
|
||||
return [data, plot]
|
||||
else:
|
||||
logger.log("notice", "No data loaded from importer.")
|
||||
finally:
|
||||
chooser.destroy()
|
||||
|
||||
|
||||
class PCAFunction(workflow.Function):
|
||||
"""Generic PCA function."""
|
||||
def __init__(self, wf):
|
||||
workflow.Function.__init__(self, 'pca', 'PCA')
|
||||
self._workflow = wf
|
||||
|
||||
def run(self, data):
|
||||
import rpy
|
||||
|
||||
dim_2, dim_1 = data.get_dim_names()
|
||||
|
||||
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
|
||||
silent_eval("t = prcomp(t(m))")
|
||||
|
||||
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
||||
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
||||
("component", T_ids)], name="T")
|
||||
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
||||
("component", T_ids)], name="P")
|
||||
|
||||
# cleanup
|
||||
rpy.r.rm(["t", "m"])
|
||||
|
||||
loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings")
|
||||
score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores")
|
||||
|
||||
return [T, P, loading_plot, score_plot]
|
Reference in New Issue
Block a user