Made PCA more generic, even enable PCA of existing PCA-results as dimension names are modified to handle dimension name component in input.
This commit is contained in:
parent
fa3b1182bc
commit
a2d3a9d5cc
|
@ -392,9 +392,11 @@ class LinePlot(Plot):
|
||||||
rows, cols = self._bg_matrix.shape
|
rows, cols = self._bg_matrix.shape
|
||||||
self.ax.imshow(self._bg_matrix, cmap=cm.Greys, extent=(0.5, cols+0.5, self._ymin, self._ymax))
|
self.ax.imshow(self._bg_matrix, cmap=cm.Greys, extent=(0.5, cols+0.5, self._ymin, self._ymax))
|
||||||
|
|
||||||
|
dim_2, dim_1 = self._dataset.get_dim_names()
|
||||||
|
|
||||||
if selection:
|
if selection:
|
||||||
ids = selection['ids'] # current identifiers
|
ids = selection[dim_2] # current identifiers
|
||||||
index = [ind for id,ind in self._dataset['ids'].items() if id in ids] #conversion to index
|
index = [ind for id,ind in self._dataset[dim_2].items() if id in ids] #conversion to index
|
||||||
for i in index:
|
for i in index:
|
||||||
line = self._dataset.get_matrix()[i]
|
line = self._dataset.get_matrix()[i]
|
||||||
self.ax.plot(range(1, len(line)+1), line)
|
self.ax.plot(range(1, len(line)+1), line)
|
||||||
|
|
|
@ -3,6 +3,7 @@ from system import dataset, logger, plots, workflow
|
||||||
from scipy import randn
|
from scipy import randn
|
||||||
import cPickle
|
import cPickle
|
||||||
|
|
||||||
|
|
||||||
class AffyWorkflow (workflow.Workflow):
|
class AffyWorkflow (workflow.Workflow):
|
||||||
|
|
||||||
name = 'Affy Workflow'
|
name = 'Affy Workflow'
|
||||||
|
@ -34,7 +35,7 @@ class TestDataFunction(workflow.Function):
|
||||||
logger.log('notice', 'Injecting foo test data')
|
logger.log('notice', 'Injecting foo test data')
|
||||||
x = randn(20,30)
|
x = randn(20,30)
|
||||||
X = dataset.Dataset(x)
|
X = dataset.Dataset(x)
|
||||||
return [X, plots.SinePlot()]
|
return [X, plots.LinePlot(X)]
|
||||||
|
|
||||||
|
|
||||||
class DatasetLoadFunction(workflow.Function):
|
class DatasetLoadFunction(workflow.Function):
|
||||||
|
@ -154,21 +155,31 @@ class PCAFunction(workflow.Function):
|
||||||
|
|
||||||
dim_2, dim_1 = data.get_dim_names()
|
dim_2, dim_1 = data.get_dim_names()
|
||||||
|
|
||||||
|
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
|
rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
|
||||||
silent_eval("t = prcomp(t(m))")
|
silent_eval("t = prcomp(t(m))")
|
||||||
|
|
||||||
|
# we make a unique name for component dimension
|
||||||
|
c = 0
|
||||||
|
component_dim = prefix = "component"
|
||||||
|
while component_dim in data.get_all_dims():
|
||||||
|
component_dim = prefix + "_" + str(c)
|
||||||
|
c += 1
|
||||||
|
|
||||||
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
|
||||||
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
|
||||||
("component", T_ids)], name="T")
|
(component_dim, T_ids)],
|
||||||
|
all_dims = data.get_all_dims(), name="T")
|
||||||
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
|
||||||
("component", T_ids)], name="P")
|
(component_dim, T_ids)],
|
||||||
|
all_dims = data.get_all_dims(), name="P")
|
||||||
# cleanup
|
# cleanup
|
||||||
rpy.r.rm(["t", "m"])
|
rpy.r.rm(["t", "m"])
|
||||||
|
|
||||||
loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings")
|
loading_plot = plots.ScatterPlot(P, dim_2, component_dim, '1', '2',
|
||||||
score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores")
|
"Loadings")
|
||||||
|
score_plot = plots.ScatterPlot(T, dim_1,component_dim, '1', '2',
|
||||||
|
"Scores")
|
||||||
|
|
||||||
return [T, P, loading_plot, score_plot]
|
return [T, P, loading_plot, score_plot]
|
||||||
|
|
||||||
|
|
Reference in New Issue