Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

... just lots of stuff

This commit is contained in:
Arnar Flatberg 2007-08-24 09:14:24 +00:00
parent 21b63b17e5
commit 7dbf28f65d
6 changed files with 134 additions and 109 deletions

View File

@ -18,6 +18,39 @@ from packer import Packer
import blmplots import blmplots
class NewStyleModel(Function):
def __init__(self, id='johndoe', name='JohnDoe'):
Function.__init__(self, id, name)
self.name = name
self.options = Options
self.input_data = []
self.parts = {}
self.io_table = {}
self.datasets = []
self.plots = []
def create_dataset(self, param, Dataset=Dataset):
for ds in self.datasets:
if ds.get_name()==param: return ds
if not param in self.parts.keys():
logger.log('notice', 'Parameter: %s not present' %param)
return
if not param in self.io_table.keys():
logger.log('notice', 'Parameter: %s not in defined in io table' %param)
identifiers = self.io_table.get(param)
data = self.parts.get(param)
ds = Dataset(data, identifiers=identifiers, name=param)
self.datasets.append(dataset)
return ds
def create_plot(self, blmplot):
if blmplot.validate_model(self.parts):
plt = blmplot(self.parts)
self.plots.append(plt)
return plt
class Model(Function): class Model(Function):
"""Base class of bilinear models. """Base class of bilinear models.
""" """
@ -68,7 +101,7 @@ class PCA(Model):
logger.log('notice', 'Aopt at first component!') logger.log('notice', 'Aopt at first component!')
def confidence(self, aopt, n_sets, alpha, p_center, def confidence(self, aopt, n_sets, alpha, p_center,
crot, strict, cov_center ): crot, strict, cov_center):
"""Returns a confidence measure for model parameters. """Returns a confidence measure for model parameters.
Based on aopt. Based on aopt.
@ -80,7 +113,7 @@ class PCA(Model):
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets) jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
Pcal = self.model['P'][:,:aopt] Pcal = self.model['P'][:,:aopt]
# add the scale to P # ensure scaled P
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt]) tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
Pcal = Pcal*tnorm Pcal = Pcal*tnorm
tsq = hotelling(jk_segments, Pcal, p_center, tsq = hotelling(jk_segments, Pcal, p_center,
@ -91,32 +124,13 @@ class PCA(Model):
"""Model on optimal number of components. """Model on optimal number of components.
""" """
dat = pca(self.model['E0'], amax, scale, mode) dat = pca(self.model['E0'], amax, scale, mode)
# explained variance
var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P'])
dat['var_x'] = var_x
dat['exp_var_x'] = exp_var_x
#fixme###
do_lev_s = False
do_lev_v = False
#####
if do_lev_s:
# sample leverages
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
s_lev = leverage(amax, tnorm)
dat['s_lev'] = s_lev
if do_lev_v:
# variable leverages
v_lev = leverage(amax, dat['P'])
dat['v_lev'] = v_lev
self.model.update(dat) self.model.update(dat)
def as_dataset(self, param, dtype='dataset'): def as_dataset(self, param, dtype='dataset'):
"""Return model parameter as Dataset. """Return model parameter as Dataset.
""" """
if not param in self.model.keys(): if not param in self.model.keys():
logger.log('notice', 'Parameter: %s not in model' %param)
return return
DX = self._dataset['X'] #input dataset DX = self._dataset['X'] #input dataset
dim_name_0, dim_name_1 = DX.get_dim_name() dim_name_0, dim_name_1 = DX.get_dim_name()
@ -128,17 +142,18 @@ class PCA(Model):
pc_ids = ['_amax', map(str,range(self._options['amax'])) ] pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ] pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
zero_dim = ['_doe', ['0']] # null dim, vector (hidden) zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
match_ids = {'E':[ids_0, ids_1], match_ids = {'E' : [ids_0, ids_1],
'E0':[ids_0, ids_1], 'E0' : [ids_0, ids_1],
'P':[ids_1, pc_ids], 'P' : [ids_1, pc_ids],
'T':[ids_0, pc_ids], 'T' : [ids_0, pc_ids],
'W':[ids_1, pc_ids], 'W' : [ids_1, pc_ids],
'p_tsq':[ids_1, zero_dim], 'p_tsq' : [ids_1, zero_dim],
'rmsep':[pc_ids, zero_dim], 'rmsep' : [pc_ids, zero_dim],
'var_leverages':[ids_1, zero_dim], 'var_leverages' : [ids_1, zero_dim],
'sample_leverages':[pc_ids, zero_dim], 'sample_leverages' : [pc_ids, zero_dim],
'exp_var_x': [pc_ids, zero_dim], 'exp_var_x' : [pc_ids, zero_dim],
'var_x': [pc_ids, zero_dim], 'var_x' : [pc_ids, zero_dim],
'eigvals' : [pc_ids, zero_dim]
} }
out = Dataset(self.model[param], match_ids[param], name=param) out = Dataset(self.model[param], match_ids[param], name=param)
@ -256,7 +271,7 @@ class PLS(Model):
def make_model(self, a, b, amax, scale, mode, engine): def make_model(self, a, b, amax, scale, mode, engine):
"""Make model on amax components. """Make model on amax components.
""" """
print "MAking model" print "Making model"
dat = engine(a, b, amax, scale, mode) dat = engine(a, b, amax, scale, mode)
self.model.update(dat) self.model.update(dat)
@ -478,13 +493,6 @@ class LPLS(Model):
self._data['Z'] = c.asarray() self._data['Z'] = c.asarray()
self.validation(options) self.validation(options)
self.make_model(options) self.make_model(options)
print self.model['evx']
evx_str = [str(i)[:3] for i in self.model['evx']]
logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str))
evy_str = [str(i)[:3] for i in self.model['evy']]
logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str))
evz_str = [str(i)[:3] for i in self.model['evz']]
logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str))
if options['calc_conf']: if options['calc_conf']:
self.confidence(options) self.confidence(options)
@ -553,13 +561,15 @@ class PcaOptions(Options):
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True), opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
(blmplots.PcaLoadingPlot, 'Loadings', True), (blmplots.PcaLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True), (blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False) (blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.PcaScreePlot, 'Scree', True)
] ]
opt['out_data'] = ['T','P', 'p_tsq'] opt['out_data'] = ['T','P', 'p_tsq']
opt['out_plots'] = [blmplots.PcaScorePlot, opt['out_plots'] = [blmplots.PcaScorePlot,
blmplots.PcaLoadingPlot, blmplots.PcaLoadingPlot,
blmplots.LineViewXc] blmplots.LineViewXc,
blmplots.PcaScreePlot]
self.update(opt) self.update(opt)
@ -621,7 +631,8 @@ class PlsOptions(Options):
# (class, name, sensitive, ticked) # (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
(blmplots.PlsLoadingPlot, 'Loadings', True), (blmplots.PlsXLoadingPlot, 'X-Loadings', True),
(blmplots.PlsYLoadingPlot, 'Y-Loadings', True),
(blmplots.LineViewXc, 'Line view', True), (blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False), (blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.RMSEPPlot, 'RMSEP', False), (blmplots.RMSEPPlot, 'RMSEP', False),
@ -629,7 +640,7 @@ class PlsOptions(Options):
] ]
opt['out_data'] = ['T','P', 'w_tsq'] opt['out_data'] = ['T','P', 'w_tsq']
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc] opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc]
#opt['out_data'] = None #opt['out_data'] = None
@ -699,11 +710,9 @@ class LplsOptions(Options):
] ]
# (class, name, sensitive, ticked) # (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True), opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True),
(blmplots.PlsLoadingPlot, 'Loadings', True), (blmplots.LplsXLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True), (blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.RMSEPPlot, 'RMSEP', False),
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False), (blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
(blmplots.LplsXCorrelationPlot, 'X corr.', True), (blmplots.LplsXCorrelationPlot, 'X corr.', True),
(blmplots.LplsZCorrelationPlot, 'Z corr.', True) (blmplots.LplsZCorrelationPlot, 'Z corr.', True)

View File

@ -8,7 +8,7 @@ fixme:
from matplotlib import cm,patches from matplotlib import cm,patches
import gtk import gtk
import fluents import fluents
from fluents import plots, main from fluents import plots, main,logger
import scipy import scipy
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
from scipy.stats import corrcoef from scipy.stats import corrcoef
@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot):
"""Set patch sizes.""" """Set patch sizes."""
pass pass
def set_expvar_axlabels(self, param="evx"): def set_expvar_axlabels(self, param=None):
if param == None:
param = self._expvar_param
else:
self._expvar_param = param
if not self.model.model.has_key(param): if not self.model.model.has_key(param):
self.model.model[param] = None self.model.model[param] = None
if self.model.model[param]==None: if self.model.model[param]==None:
logger.log('notice', 'Param: %s not in model' %param)
print self.model.model.keys()
print self.model.model[param]
pass #fixme: do expvar calc here if not present pass #fixme: do expvar calc here if not present
else: else:
expvar = self.model.model[param] expvar = self.model.model[param]
@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot):
self.selection_collection._offsets = xy self.selection_collection._offsets = xy
self.canvas.draw_idle() self.canvas.draw_idle()
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05 pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad) new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
self.axes.set_xlim(new_lims, emit=True) self.axes.set_xlim(new_lims, emit=True)
self.set_expvar_axlabels() self.set_expvar_axlabels()
self.canvas.draw_idle() self.canvas.draw_idle()
@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot):
self.sc._offsets = xy self.sc._offsets = xy
self.selection_collection._offsets = xy self.selection_collection._offsets = xy
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05 pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad) new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
self.axes.set_ylim(new_lims, emit=True) self.axes.set_ylim(new_lims, emit=True)
self.set_expvar_axlabels() self.set_expvar_axlabels()
self.canvas.draw_idle() self.canvas.draw_idle()
@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot):
for indx,txt in self._text_labels.items(): for indx,txt in self._text_labels.items():
if indx in index: if indx in index:
txt.set_visible(True) txt.set_visible(True)
self.canvas.draw() self.canvas.draw_idle()
def hide_labels(self): def hide_labels(self):
for txt in self._text_labels.values(): for txt in self._text_labels.values():
txt.set_visible(False) txt.set_visible(False)
self.canvas.draw() self.canvas.draw_idle()
class PcaScreePlot(plots.BarPlot):
def __init__(self, model):
title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
ds = model.as_dataset('eigvals')
if ds==None:
logger.log('notice', 'Model does not contain eigvals')
plots.BarPlot.__init__(self, ds, name=title)
class PcaScorePlot(BlmScatterPlot): class PcaScorePlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1): def __init__(self, model, absi=0, ordi=1):
title = "Pca scores (%s)" %model._dataset['X'].get_name() title = "Pca scores (%s)" %model._dataset['X'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
self.set_expvar_axlabels(param="expvarx")
class PcaLoadingPlot(BlmScatterPlot): class PcaLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1): def __init__(self, model, absi=0, ordi=1):
@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot):
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T') BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
class PlsLoadingPlot(BlmScatterPlot): class PlsXLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1): def __init__(self, model, absi=0, ordi=1):
title = "Pls loadings (%s)" %model._dataset['X'].get_name() title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq') BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
#self.set_expvar_axlabels(self, param="expvarx") #self.set_expvar_axlabels(self, param="expvarx")
class PlsYLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
class PlsCorrelationLoadingPlot(BlmScatterPlot): class PlsCorrelationLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1): def __init__(self, model, absi=0, ordi=1):
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name() title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot):
class InfluencePlot(plots.ScatterPlot): class InfluencePlot(plots.ScatterPlot):
""" Returns a leverage vs resiudal scatter plot.
""" """
""" def __init__(self, model, dim, name="Influence"):
pass if not model.model.has_key('levx'):
logger.log('notice', 'Model has no calculations of leverages')
return
if not model.model.has_key('ssqx'):
logger.log('notice', 'Model has no calculations of residuals')
return
ds1 = model.as_dataset('levx')
ds2 = model.as_dataset('ssqx')
plots.ScatterPlot.__init__(self, ds1, ds2,
id_dim, sel_dim, id_1, id_2,
c=col, s=20, sel_dim_2=sel_dim_2,
name='Load Volcano')
class RMSEPPlot(plots.BarPlot): class RMSEPPlot(plots.BarPlot):

View File

@ -11,7 +11,7 @@ import time
def hotelling(Pcv, P, p_center='med', cov_center='med', def hotelling(Pcv, P, p_center='med', cov_center='med',
alpha=0.3, crot=True, strict=False, metric=None): alpha=0.3, crot=True, strict=False):
"""Returns regularized hotelling T^2. """Returns regularized hotelling T^2.
alpha -- regularisation towards pooled cov estimates alpha -- regularisation towards pooled cov estimates
@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
alpha -- regularisation alpha -- regularisation
crot -- rotate submodels toward full? crot -- rotate submodels toward full?
strict -- only rotate 90 degree ? strict -- only rotate 90 degree ?
metric -- inverse metric matrix (if Pcv and P from metric pca/pls)
""" """
m, n = P.shape m, n = P.shape
if metric==None:
metric = eye(m, dtype='<f8')
P = dot(metric.T, asarray(P))
n_sets, n, amax = Pcv.shape n_sets, n, amax = Pcv.shape
# allocate # allocate
T_sq = empty((n, ),dtype='f') T_sq = empty((n, ),dtype='f')
@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
# rotate sub_models to full model # rotate sub_models to full model
if crot: if crot:
for i, Pi in enumerate(Pcv): for i, Pi in enumerate(Pcv):
Pi = dot(metric.T, Pi)
Pcv[i] = procrustes(P, Pi, strict=strict) Pcv[i] = procrustes(P, Pi, strict=strict)
# center of pnull # center of pnull
@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
center=True, center=True,
sim_method='shuffle', sim_method='shuffle',
p_center='med', cov_center='med', p_center='med', cov_center='med',
crot=True, strict=False, metric=None): crot=True, strict=False):
"""Returns qvals for pls model. """Returns qvals for pls model.
@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
cov_center -- location estimator for covariance of submodels ['med'] cov_center -- location estimator for covariance of submodels ['med']
crot -- bool, use rotations of sub models? crot -- bool, use rotations of sub models?
strict -- bool, use stict (rot/flips only) rotations? strict -- bool, use stict (rot/flips only) rotations?
metric -- bool, use row metric?
""" """
m, n = a.shape m, n = a.shape
@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
if center: if center:
ac = a - a.mean(0) ac = a - a.mean(0)
bc = b - b.mean(0) bc = b - b.mean(0)
if metric!=None:
ac = dot(ac, metric)
if algo=='bridge': if algo=='bridge':
dat = bridge(ac, bc, aopt, 'loads', 'fast') dat = bridge(ac, bc, aopt, 'loads', 'fast')
else: else:
dat = pls(ac, bc, aopt, 'loads', 'fast') dat = pls(ac, bc, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True) Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict, alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center) cov_center=cov_center)
@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
dat = bridge(ac, b_shuff, aopt, 'loads','fast') dat = bridge(ac, b_shuff, aopt, 'loads','fast')
else: else:
dat = pls(ac, b_shuff, aopt, 'loads', 'fast') dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric) Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict, alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center) cov_center=cov_center)
@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
n_iter=20, algo='pls', n_iter=20, algo='pls',
sim_method='shuffle', sim_method='shuffle',
p_center='med', cov_center='med', p_center='med', cov_center='med',
crot=True, strict=False, metric=None): crot=True, strict=False):
"""Returns qvals for pls model. """Returns qvals for pls model.
Shuffling of variables in X is preprocessed in metric. Shuffling of variables in X.
Null model is 'If I put genes randomly on network' ... if they are sign: Null model is 'If I put genes randomly on network' ... if they are sign:
then this is due to network structure and not covariance with response. then this is due to network structure and not covariance with response.
@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
cov_center -- location estimator for covariance of submodels ['med'] cov_center -- location estimator for covariance of submodels ['med']
crot -- bool, use rotations of sub models? crot -- bool, use rotations of sub models?
strict -- bool, use stict (rot/flips only) rotations? strict -- bool, use stict (rot/flips only) rotations?
metric -- bool, use row metric?
""" """
m, n = a.shape m, n = a.shape
@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
if center==True: if center==True:
ac = a - a.mean(0) ac = a - a.mean(0)
bc = b - b.mean(0) bc = b - b.mean(0)
if metric==None:
metric = eye(n,n)
if algo=='bridge': if algo=='bridge':
dat = bridge(ac, bc, aopt, 'loads', 'fast') dat = bridge(ac, bc, aopt, 'loads', 'fast')
else: else:
dat = pls(ac, bc, aopt, 'loads', 'fast') dat = pls(ac, bc, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center, tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict, alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center) cov_center=cov_center)
@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
for i, a_shuff in enumerate(Vs): for i, a_shuff in enumerate(Vs):
t1 = time.time() t1 = time.time()
a = a_shuff - a_shuff.mean(0) a = a_shuff - a_shuff.mean(0)
a = dot(a, metric)
if algo=='bridge': if algo=='bridge':
dat = bridge(a, b, aopt, 'loads','fast') dat = bridge(a, b, aopt, 'loads','fast')
else: else:
dat = pls(a, b, aopt, 'loads', 'fast') dat = pls(a, b, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric) Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center, TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict, alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center) cov_center=cov_center)

View File

@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
lev --leverages, ssq -- sum of squares, expvar -- cumulative lev --leverages, ssq -- sum of squares, expvar -- cumulative
explained variance, aopt -- number of components used explained variance, aopt -- number of components used
:OtherParameters: :OtherParam eters:
mode : str mode : str
Amount of info retained, ('fast', 'normal', 'detailed') Amount of info retained, ('fast', 'normal', 'detailed')
center_axis : int center_axis : int
@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
a = a - expand_dims(a.mean(center_axis), center_axis) a = a - expand_dims(a.mean(center_axis), center_axis)
if m>(n+100) or n>(m+100): if m>(n+100) or n>(m+100):
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
print s[:10]
else: else:
u, s, vt = svd(a, 0) u, s, vt = svd(a, 0)
v = vt.T v = vt.T
@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
lev = [] lev = []
for ai in range(aopt): for ai in range(aopt):
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T) E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)]) ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
if scale=='loads': if scale=='loads':
lev.append([((s*T)**2).sum(1), (P**2).sum(1)]) lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
else: else:
@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
# variances # variances
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1] expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt} return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0): def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
""" Principal Component Regression. """ Principal Component Regression.
@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
if i>0: if i>0:
for j in range(0, i, 1): for j in range(0, i, 1):
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis] r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
print vnorm(r)
t = dot(a, r) t = dot(a, r)
tt = vnorm(t)**2 tt = vnorm(t)**2
p = dot(a.T, t)/tt p = dot(a.T, t)/tt
@ -375,14 +374,11 @@ def w_pls(aat, b, aopt):
u = dot(b , q) #y-factor scores u = dot(b , q) #y-factor scores
U[:,i] = u.ravel() U[:,i] = u.ravel()
t = dot(aat, u) t = dot(aat, u)
print "Norm of t: %s" %vnorm(t)
print "s: %s" %s
t = t/vnorm(t) t = t/vnorm(t)
T[:,i] = t.ravel() T[:,i] = t.ravel()
r = dot(aat, t)#score-weights r = dot(aat, t)#score-weights
#r = r/vnorm(r) #r = r/vnorm(r)
print "Norm R: %s" %vnorm(r)
R[:,i] = r.ravel() R[:,i] = r.ravel()
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) ) PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
if i<aopt: if i<aopt:
@ -701,10 +697,9 @@ def esvd(data, amax=None):
pcrange = None pcrange = None
else: else:
pcrange = [n-amax, n] pcrange = [n-amax, n]
print "symm>n"
s, v = symeig(kernel, range=pcrange, overwrite=True) s, v = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1] s = s[::-1].real
v = v[:,::-1] v = v[:,::-1].real
else: else:
u, s, vt = svd(kernel) u, s, vt = svd(kernel)
v = vt.T v = vt.T
@ -718,7 +713,6 @@ def esvd(data, amax=None):
pcrange = None pcrange = None
else: else:
pcrange = [m-amax, m] pcrange = [m-amax, m]
print "sym (m<n)"
s, u = symeig(kernel, range=pcrange, overwrite=True) s, u = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1] s = s[::-1]
u = u[:,::-1] u = u[:,::-1]
@ -726,8 +720,8 @@ def esvd(data, amax=None):
u, s, vt = svd(kernel) u, s, vt = svd(kernel)
s = sqrt(s) s = sqrt(s)
v = dot(data.T, u)/s v = dot(data.T, u)/s
print s[:2] # some use of symeig returns the 0 imaginary part
return u, s, v return u.real, s.real, v.real
def vnorm(x): def vnorm(x):
# assume column arrays (or vectors) # assume column arrays (or vectors)

View File

@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
else: else:
yield aat_in,aat_out,b_in,b_out yield aat_in,aat_out,b_in,b_out
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None): def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
"""Random block crossvalidation """Random block crossvalidation
Leave-one-out is a subset, with n_blocks equals a.shape[-1] Leave-one-out is a subset, with n_blocks equals a.shape[-1]
""" """
@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
mn_b = bcal.mean(0)[newaxis] mn_b = bcal.mean(0)[newaxis]
bcal = bcal - mn_b bcal = bcal - mn_b
btrue = btrue - mn_b btrue = btrue - mn_b
if metric!=None:
acal = dot(acal, metric)
if index_out: if index_out:
yield acal, atrue, bcal, btrue, out yield acal, atrue, bcal, btrue, out
else: else:
yield acal, atrue, bcal, btrue yield acal, atrue, bcal, btrue
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None): def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
"""Returns a generator of crossvalidation sample segments. """Returns a generator of crossvalidation sample segments.
input: input:
@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
mn_a = acal.mean(0)[newaxis] mn_a = acal.mean(0)[newaxis]
acal = acal - mn_a acal = acal - mn_a
atrue = atrue - mn_a atrue = atrue - mn_a
if metric!=None:
acal = dot(acal, metric)
if index_out: if index_out:
yield acal, atrue, out yield acal, atrue, out
else: else:

View File

@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
aopt = find_aopt_from_sep(msep) aopt = find_aopt_from_sep(msep)
return sqrt(msep) return sqrt(msep)
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None): def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
k, l = m_shape(Y) k, l = m_shape(Y)
PRESS = zeros((l, amax+1), dtype='<f8') PRESS = zeros((l, amax+1), dtype='<f8')
EE = zeros((amax, k, l), dtype='<f8') EE = zeros((amax, k, l), dtype='<f8')
Yhat = zeros((amax, k, l), dtype='<f8') Yhat = zeros((amax, k, l), dtype='<f8')
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric) V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
for Xin, Xout, Yin, Yout, out in V: for Xin, Xout, Yin, Yout, out in V:
ym = -sum(Yout,0)[newaxis]/Yin.shape[0] ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
Yin = (Yin - ym) Yin = (Yin - ym)
@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets):
return sep, aopt return sep, aopt
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None): def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
""" Returns CV-segments of paramter W for wide X. """ Returns CV-segments of paramter W for wide X.
todo: add support for T,Q and B todo: add support for T,Q and B
@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
n_blocks = b.shape[0] n_blocks = b.shape[0]
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d') Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
if use_pack and metric==None: if use_pack:
u, s, inflater = svd(a, full_matrices=0) u, s, inflater = svd(a, full_matrices=0)
a = u*s a = u*s
V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric) V = pls_gen(a, b, n_blocks=n_blocks, center=center)
for nn,(a_in, a_out, b_in, b_out) in enumerate(V): for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
if algo=='pls': if algo=='pls':
dat = pls(a_in, b_in, amax, 'loads', 'fast') dat = pls(a_in, b_in, amax, 'loads', 'fast')
@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
dat = bridge(a_in, b_in, amax, 'loads', 'fast') dat = bridge(a_in, b_in, amax, 'loads', 'fast')
W = dat['W'] W = dat['W']
if use_pack and metric==None: if use_pack:
W = dot(inflater.T, W) W = dot(inflater.T, W)
Wcv[nn,:,:] = W[:,:,] Wcv[nn,:,:] = W[:,:,]
return Wcv return Wcv
def pca_jkP(a, aopt, n_blocks=None, metric=None): def pca_jkP(a, aopt, n_blocks=None):
"""Returns loading from PCA on CV-segments. """Returns loading from PCA on CV-segments.
input: input: