Made PCA more generic, even enable PCA of existing PCA-results as dimension names are modified to handle dimension name component in input.

2006-04-28 11:44:55 +00:00
parent fa3b1182bc
commit a2d3a9d5cc
2 changed files with 22 additions and 9 deletions
--- a/system/plots.py
+++ b/system/plots.py
@@ -392,9 +392,11 @@ class LinePlot(Plot):
        rows, cols = self._bg_matrix.shape
        self.ax.imshow(self._bg_matrix, cmap=cm.Greys, extent=(0.5, cols+0.5, self._ymin, self._ymax))

+        dim_2, dim_1 = self._dataset.get_dim_names()
+
        if selection:
-            ids = selection['ids'] # current identifiers
-            index = [ind for id,ind in self._dataset['ids'].items() if id in ids] #conversion to index
+            ids = selection[dim_2] # current identifiers
+            index = [ind for id,ind in self._dataset[dim_2].items() if id in ids] #conversion to index
            for i in index:
                line = self._dataset.get_matrix()[i]
                self.ax.plot(range(1, len(line)+1), line)
--- a/workflows/affy_workflow.py
+++ b/workflows/affy_workflow.py
@@ -3,6 +3,7 @@ from system import dataset, logger, plots, workflow
 from scipy import randn
 import cPickle

+
 class AffyWorkflow (workflow.Workflow):

    name = 'Affy Workflow'
@@ -34,7 +35,7 @@ class TestDataFunction(workflow.Function):
        logger.log('notice', 'Injecting foo test data')
        x = randn(20,30)
        X = dataset.Dataset(x)
-        return [X, plots.SinePlot()]
+        return [X, plots.LinePlot(X)]


 class DatasetLoadFunction(workflow.Function):
@@ -154,21 +155,31 @@ class PCAFunction(workflow.Function):
        
        dim_2, dim_1 = data.get_dim_names()
    
-    
        silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
        rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
        silent_eval("t = prcomp(t(m))")

+        # we make a unique name for component dimension
+        c = 0
+        component_dim = prefix = "component"
+        while component_dim in data.get_all_dims():
+            component_dim = prefix + "_" + str(c)
+            c += 1
+
        T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
        T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
-                                   ("component", T_ids)], name="T")
+                                           (component_dim, T_ids)],
+                            all_dims = data.get_all_dims(), name="T")
        P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
-                                          ("component", T_ids)], name="P")
-
+                                                  (component_dim, T_ids)],
+                            all_dims = data.get_all_dims(), name="P")
        # cleanup
        rpy.r.rm(["t", "m"])

-        loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings")
-        score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores")
+        loading_plot = plots.ScatterPlot(P, dim_2, component_dim, '1', '2',
+                                         "Loadings")
+        score_plot = plots.ScatterPlot(T, dim_1,component_dim, '1', '2',
+                                       "Scores")
        
        return [T, P, loading_plot, score_plot]
+