Made PCA more generic, even enable PCA of existing PCA-results as dimension names are modified to handle dimension name component in input.

This commit is contained in:
Truls Alexander Tangstad 2006-04-28 11:44:55 +00:00
parent fa3b1182bc
commit a2d3a9d5cc
2 changed files with 22 additions and 9 deletions

View File

@ -392,9 +392,11 @@ class LinePlot(Plot):
rows, cols = self._bg_matrix.shape
self.ax.imshow(self._bg_matrix, cmap=cm.Greys, extent=(0.5, cols+0.5, self._ymin, self._ymax))
dim_2, dim_1 = self._dataset.get_dim_names()
if selection:
ids = selection['ids'] # current identifiers
index = [ind for id,ind in self._dataset['ids'].items() if id in ids] #conversion to index
ids = selection[dim_2] # current identifiers
index = [ind for id,ind in self._dataset[dim_2].items() if id in ids] #conversion to index
for i in index:
line = self._dataset.get_matrix()[i]
self.ax.plot(range(1, len(line)+1), line)

View File

@ -3,6 +3,7 @@ from system import dataset, logger, plots, workflow
from scipy import randn
import cPickle
class AffyWorkflow (workflow.Workflow):
name = 'Affy Workflow'
@ -34,7 +35,7 @@ class TestDataFunction(workflow.Function):
logger.log('notice', 'Injecting foo test data')
x = randn(20,30)
X = dataset.Dataset(x)
return [X, plots.SinePlot()]
return [X, plots.LinePlot(X)]
class DatasetLoadFunction(workflow.Function):
@ -153,22 +154,32 @@ class PCAFunction(workflow.Function):
import rpy
dim_2, dim_1 = data.get_dim_names()
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
silent_eval("t = prcomp(t(m))")
# we make a unique name for component dimension
c = 0
component_dim = prefix = "component"
while component_dim in data.get_all_dims():
component_dim = prefix + "_" + str(c)
c += 1
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
("component", T_ids)], name="T")
(component_dim, T_ids)],
all_dims = data.get_all_dims(), name="T")
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
("component", T_ids)], name="P")
(component_dim, T_ids)],
all_dims = data.get_all_dims(), name="P")
# cleanup
rpy.r.rm(["t", "m"])
loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings")
score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores")
loading_plot = plots.ScatterPlot(P, dim_2, component_dim, '1', '2',
"Loadings")
score_plot = plots.ScatterPlot(T, dim_1,component_dim, '1', '2',
"Scores")
return [T, P, loading_plot, score_plot]