Added matrix-getter to Dataset.

Added rma-function to preprocessing incorporated in AffyMatrix-importer.
Added PCAFunction to go_workflow for processing loaded affymatrix data.
This commit is contained in:
Truls Alexander Tangstad 2006-04-24 14:07:34 +00:00
parent cebf801cf6
commit 97e09b39a1
2 changed files with 42 additions and 1 deletions

View File

@ -160,6 +160,10 @@ class Dataset:
"""Returns dataset name"""
return self._name
def get_matrix(self):
"""Returns internal numeric matrix for dataset."""
return self._array
def get_all_dims(self):
"""Returns all dimensions in project"""
return self._all_dims

View File

@ -36,6 +36,10 @@ class EinarsWorkflow (Workflow):
regression.add_function(Function('pls', 'PLS'))
self.add_stage(regression)
explore = Stage('explore', 'Explorative analysis')
explore.add_function(PCAFunction(self))
self.add_stage(explore)
save = Stage('save', 'Save Data')
save.add_function(DatasetSaveFunction())
self.add_stage(save)
@ -191,6 +195,7 @@ class CelFileImportFunction(Function):
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
silent_eval('E <- rma(E)')
m = rpy.r('m <- E@exprs')
@ -202,8 +207,40 @@ class CelFileImportFunction(Function):
rpy.r.rm(["E", "m"])
if m:
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), "AffyMatrix Data")]
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")]
else:
logger.log("notice", "No data loaded from importer.")
finally:
chooser.destroy()
class PCAFunction(Function):
"""Generic PCA function."""
def __init__(self, workflow):
Function.__init__(self, 'pca', 'PCA')
self._workflow = workflow
def run(self, data):
import rpy
dim_2, dim_1 = data.get_dim_names()
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
silent_eval("t = prcomp(t(m))")
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
("component", T_ids)], name="T")
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
("component", T_ids)], name="P")
# cleanup
rpy.r.rm(["t", "m"])
loading_plot1 = plots.ScatterPlot(self._workflow.project,P,'ids','component','1','2')
loading_plot2 = plots.ScatterPlot(self._workflow.project,P,'ids','component','3','4')
score_plot = plots.ScatterPlot(self._workflow.project,T,'filename','component','1','2')
return [T, P, loading_plot1, loading_plot2, score_plot]