Added matrix-getter to Dataset.
Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data.
This commit is contained in:
parent
cebf801cf6
commit
97e09b39a1
|
@ -160,6 +160,10 @@ class Dataset:
|
|||
"""Returns dataset name"""
|
||||
return self._name
|
||||
|
||||
def get_matrix(self):
|
||||
"""Returns internal numeric matrix for dataset."""
|
||||
return self._array
|
||||
|
||||
def get_all_dims(self):
|
||||
"""Returns all dimensions in project"""
|
||||
return self._all_dims
|
||||
|
|
|
@ -36,6 +36,10 @@ class EinarsWorkflow (Workflow):
|
|||
regression.add_function(Function('pls', 'PLS'))
|
||||
self.add_stage(regression)
|
||||
|
||||
explore = Stage('explore', 'Explorative analysis')
|
||||
explore.add_function(PCAFunction(self))
|
||||
self.add_stage(explore)
|
||||
|
||||
save = Stage('save', 'Save Data')
|
||||
save.add_function(DatasetSaveFunction())
|
||||
self.add_stage(save)
|
||||
|
@ -191,6 +195,7 @@ class CelFileImportFunction(Function):
|
|||
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
||||
silent_eval('E <- rma(E)')
|
||||
|
||||
m = rpy.r('m <- E@exprs')
|
||||
|
||||
|
@ -202,8 +207,40 @@ class CelFileImportFunction(Function):
|
|||
rpy.r.rm(["E", "m"])
|
||||
|
||||
if m:
|
||||
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), "AffyMatrix Data")]
|
||||
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")]
|
||||
else:
|
||||
logger.log("notice", "No data loaded from importer.")
|
||||
finally:
|
||||
chooser.destroy()
|
||||
|
||||
|
||||
class PCAFunction(Function):
|
||||
"""Generic PCA function."""
|
||||
def __init__(self, workflow):
|
||||
Function.__init__(self, 'pca', 'PCA')
|
||||
self._workflow = workflow
|
||||
|
||||
def run(self, data):
|
||||
import rpy
|
||||
|
||||
dim_2, dim_1 = data.get_dim_names()
|
||||
|
||||
|
||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
|
||||
silent_eval("t = prcomp(t(m))")
|
||||
|
||||
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
||||
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
||||
("component", T_ids)], name="T")
|
||||
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
||||
("component", T_ids)], name="P")
|
||||
|
||||
# cleanup
|
||||
rpy.r.rm(["t", "m"])
|
||||
|
||||
loading_plot1 = plots.ScatterPlot(self._workflow.project,P,'ids','component','1','2')
|
||||
loading_plot2 = plots.ScatterPlot(self._workflow.project,P,'ids','component','3','4')
|
||||
score_plot = plots.ScatterPlot(self._workflow.project,T,'filename','component','1','2')
|
||||
|
||||
return [T, P, loading_plot1, loading_plot2, score_plot]
|
||||
|
|
Reference in New Issue