From a2d3a9d5cc1110174537202ea81f419a68130a9f Mon Sep 17 00:00:00 2001 From: tangstad Date: Fri, 28 Apr 2006 11:44:55 +0000 Subject: [PATCH] Made PCA more generic, even enable PCA of existing PCA-results as dimension names are modified to handle dimension name component in input. --- system/plots.py | 6 ++++-- workflows/affy_workflow.py | 25 ++++++++++++++++++------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/system/plots.py b/system/plots.py index 079b346..31f42c2 100644 --- a/system/plots.py +++ b/system/plots.py @@ -392,9 +392,11 @@ class LinePlot(Plot): rows, cols = self._bg_matrix.shape self.ax.imshow(self._bg_matrix, cmap=cm.Greys, extent=(0.5, cols+0.5, self._ymin, self._ymax)) + dim_2, dim_1 = self._dataset.get_dim_names() + if selection: - ids = selection['ids'] # current identifiers - index = [ind for id,ind in self._dataset['ids'].items() if id in ids] #conversion to index + ids = selection[dim_2] # current identifiers + index = [ind for id,ind in self._dataset[dim_2].items() if id in ids] #conversion to index for i in index: line = self._dataset.get_matrix()[i] self.ax.plot(range(1, len(line)+1), line) diff --git a/workflows/affy_workflow.py b/workflows/affy_workflow.py index 445abee..89a4dc4 100644 --- a/workflows/affy_workflow.py +++ b/workflows/affy_workflow.py @@ -3,6 +3,7 @@ from system import dataset, logger, plots, workflow from scipy import randn import cPickle + class AffyWorkflow (workflow.Workflow): name = 'Affy Workflow' @@ -34,7 +35,7 @@ class TestDataFunction(workflow.Function): logger.log('notice', 'Injecting foo test data') x = randn(20,30) X = dataset.Dataset(x) - return [X, plots.SinePlot()] + return [X, plots.LinePlot(X)] class DatasetLoadFunction(workflow.Function): @@ -153,22 +154,32 @@ class PCAFunction(workflow.Function): import rpy dim_2, dim_1 = data.get_dim_names() - silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix()) silent_eval("t = prcomp(t(m))") + # we make a unique name for component dimension + c = 0 + component_dim = prefix = "component" + while component_dim in data.get_all_dims(): + component_dim = prefix + "_" + str(c) + c += 1 + T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1)) T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)), - ("component", T_ids)], name="T") + (component_dim, T_ids)], + all_dims = data.get_all_dims(), name="T") P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)), - ("component", T_ids)], name="P") - + (component_dim, T_ids)], + all_dims = data.get_all_dims(), name="P") # cleanup rpy.r.rm(["t", "m"]) - loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings") - score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores") + loading_plot = plots.ScatterPlot(P, dim_2, component_dim, '1', '2', + "Loadings") + score_plot = plots.ScatterPlot(T, dim_1,component_dim, '1', '2', + "Scores") return [T, P, loading_plot, score_plot] +