Added matrix-getter to Dataset.
Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data.
This commit is contained in:
parent
cebf801cf6
commit
97e09b39a1
|
@ -160,6 +160,10 @@ class Dataset:
|
||||||
"""Returns dataset name"""
|
"""Returns dataset name"""
|
||||||
return self._name
|
return self._name
|
||||||
|
|
||||||
|
def get_matrix(self):
|
||||||
|
"""Returns internal numeric matrix for dataset."""
|
||||||
|
return self._array
|
||||||
|
|
||||||
def get_all_dims(self):
|
def get_all_dims(self):
|
||||||
"""Returns all dimensions in project"""
|
"""Returns all dimensions in project"""
|
||||||
return self._all_dims
|
return self._all_dims
|
||||||
|
|
|
@ -36,6 +36,10 @@ class EinarsWorkflow (Workflow):
|
||||||
regression.add_function(Function('pls', 'PLS'))
|
regression.add_function(Function('pls', 'PLS'))
|
||||||
self.add_stage(regression)
|
self.add_stage(regression)
|
||||||
|
|
||||||
|
explore = Stage('explore', 'Explorative analysis')
|
||||||
|
explore.add_function(PCAFunction(self))
|
||||||
|
self.add_stage(explore)
|
||||||
|
|
||||||
save = Stage('save', 'Save Data')
|
save = Stage('save', 'Save Data')
|
||||||
save.add_function(DatasetSaveFunction())
|
save.add_function(DatasetSaveFunction())
|
||||||
self.add_stage(save)
|
self.add_stage(save)
|
||||||
|
@ -191,6 +195,7 @@ class CelFileImportFunction(Function):
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
||||||
|
silent_eval('E <- rma(E)')
|
||||||
|
|
||||||
m = rpy.r('m <- E@exprs')
|
m = rpy.r('m <- E@exprs')
|
||||||
|
|
||||||
|
@ -202,8 +207,40 @@ class CelFileImportFunction(Function):
|
||||||
rpy.r.rm(["E", "m"])
|
rpy.r.rm(["E", "m"])
|
||||||
|
|
||||||
if m:
|
if m:
|
||||||
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), "AffyMatrix Data")]
|
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")]
|
||||||
else:
|
else:
|
||||||
logger.log("notice", "No data loaded from importer.")
|
logger.log("notice", "No data loaded from importer.")
|
||||||
finally:
|
finally:
|
||||||
chooser.destroy()
|
chooser.destroy()
|
||||||
|
|
||||||
|
|
||||||
|
class PCAFunction(Function):
|
||||||
|
"""Generic PCA function."""
|
||||||
|
def __init__(self, workflow):
|
||||||
|
Function.__init__(self, 'pca', 'PCA')
|
||||||
|
self._workflow = workflow
|
||||||
|
|
||||||
|
def run(self, data):
|
||||||
|
import rpy
|
||||||
|
|
||||||
|
dim_2, dim_1 = data.get_dim_names()
|
||||||
|
|
||||||
|
|
||||||
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
|
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
|
||||||
|
silent_eval("t = prcomp(t(m))")
|
||||||
|
|
||||||
|
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
||||||
|
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
||||||
|
("component", T_ids)], name="T")
|
||||||
|
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
||||||
|
("component", T_ids)], name="P")
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
rpy.r.rm(["t", "m"])
|
||||||
|
|
||||||
|
loading_plot1 = plots.ScatterPlot(self._workflow.project,P,'ids','component','1','2')
|
||||||
|
loading_plot2 = plots.ScatterPlot(self._workflow.project,P,'ids','component','3','4')
|
||||||
|
score_plot = plots.ScatterPlot(self._workflow.project,T,'filename','component','1','2')
|
||||||
|
|
||||||
|
return [T, P, loading_plot1, loading_plot2, score_plot]
|
||||||
|
|
Reference in New Issue