... just lots of stuff
This commit is contained in:
parent
21b63b17e5
commit
7dbf28f65d
|
@ -18,6 +18,39 @@ from packer import Packer
|
|||
import blmplots
|
||||
|
||||
|
||||
class NewStyleModel(Function):
|
||||
def __init__(self, id='johndoe', name='JohnDoe'):
|
||||
Function.__init__(self, id, name)
|
||||
self.name = name
|
||||
self.options = Options
|
||||
self.input_data = []
|
||||
self.parts = {}
|
||||
self.io_table = {}
|
||||
self.datasets = []
|
||||
self.plots = []
|
||||
|
||||
def create_dataset(self, param, Dataset=Dataset):
|
||||
for ds in self.datasets:
|
||||
if ds.get_name()==param: return ds
|
||||
if not param in self.parts.keys():
|
||||
logger.log('notice', 'Parameter: %s not present' %param)
|
||||
return
|
||||
if not param in self.io_table.keys():
|
||||
logger.log('notice', 'Parameter: %s not in defined in io table' %param)
|
||||
|
||||
identifiers = self.io_table.get(param)
|
||||
data = self.parts.get(param)
|
||||
ds = Dataset(data, identifiers=identifiers, name=param)
|
||||
self.datasets.append(dataset)
|
||||
return ds
|
||||
|
||||
def create_plot(self, blmplot):
|
||||
if blmplot.validate_model(self.parts):
|
||||
plt = blmplot(self.parts)
|
||||
self.plots.append(plt)
|
||||
return plt
|
||||
|
||||
|
||||
class Model(Function):
|
||||
"""Base class of bilinear models.
|
||||
"""
|
||||
|
@ -68,7 +101,7 @@ class PCA(Model):
|
|||
logger.log('notice', 'Aopt at first component!')
|
||||
|
||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||
crot, strict, cov_center ):
|
||||
crot, strict, cov_center):
|
||||
"""Returns a confidence measure for model parameters.
|
||||
Based on aopt.
|
||||
|
||||
|
@ -80,7 +113,7 @@ class PCA(Model):
|
|||
|
||||
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
||||
Pcal = self.model['P'][:,:aopt]
|
||||
# add the scale to P
|
||||
# ensure scaled P
|
||||
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
|
||||
Pcal = Pcal*tnorm
|
||||
tsq = hotelling(jk_segments, Pcal, p_center,
|
||||
|
@ -90,33 +123,14 @@ class PCA(Model):
|
|||
def make_model(self, amax, mode, scale):
|
||||
"""Model on optimal number of components.
|
||||
"""
|
||||
dat = pca(self.model['E0'], amax, scale, mode)
|
||||
|
||||
# explained variance
|
||||
var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P'])
|
||||
dat['var_x'] = var_x
|
||||
dat['exp_var_x'] = exp_var_x
|
||||
|
||||
#fixme###
|
||||
do_lev_s = False
|
||||
do_lev_v = False
|
||||
#####
|
||||
if do_lev_s:
|
||||
# sample leverages
|
||||
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
|
||||
s_lev = leverage(amax, tnorm)
|
||||
dat['s_lev'] = s_lev
|
||||
if do_lev_v:
|
||||
# variable leverages
|
||||
v_lev = leverage(amax, dat['P'])
|
||||
dat['v_lev'] = v_lev
|
||||
|
||||
dat = pca(self.model['E0'], amax, scale, mode)
|
||||
self.model.update(dat)
|
||||
|
||||
def as_dataset(self, param, dtype='dataset'):
|
||||
"""Return model parameter as Dataset.
|
||||
"""
|
||||
if not param in self.model.keys():
|
||||
logger.log('notice', 'Parameter: %s not in model' %param)
|
||||
return
|
||||
DX = self._dataset['X'] #input dataset
|
||||
dim_name_0, dim_name_1 = DX.get_dim_name()
|
||||
|
@ -128,17 +142,18 @@ class PCA(Model):
|
|||
pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
|
||||
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
|
||||
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
||||
match_ids = {'E':[ids_0, ids_1],
|
||||
'E0':[ids_0, ids_1],
|
||||
'P':[ids_1, pc_ids],
|
||||
'T':[ids_0, pc_ids],
|
||||
'W':[ids_1, pc_ids],
|
||||
'p_tsq':[ids_1, zero_dim],
|
||||
'rmsep':[pc_ids, zero_dim],
|
||||
'var_leverages':[ids_1, zero_dim],
|
||||
'sample_leverages':[pc_ids, zero_dim],
|
||||
'exp_var_x': [pc_ids, zero_dim],
|
||||
'var_x': [pc_ids, zero_dim],
|
||||
match_ids = {'E' : [ids_0, ids_1],
|
||||
'E0' : [ids_0, ids_1],
|
||||
'P' : [ids_1, pc_ids],
|
||||
'T' : [ids_0, pc_ids],
|
||||
'W' : [ids_1, pc_ids],
|
||||
'p_tsq' : [ids_1, zero_dim],
|
||||
'rmsep' : [pc_ids, zero_dim],
|
||||
'var_leverages' : [ids_1, zero_dim],
|
||||
'sample_leverages' : [pc_ids, zero_dim],
|
||||
'exp_var_x' : [pc_ids, zero_dim],
|
||||
'var_x' : [pc_ids, zero_dim],
|
||||
'eigvals' : [pc_ids, zero_dim]
|
||||
}
|
||||
|
||||
out = Dataset(self.model[param], match_ids[param], name=param)
|
||||
|
@ -256,7 +271,7 @@ class PLS(Model):
|
|||
def make_model(self, a, b, amax, scale, mode, engine):
|
||||
"""Make model on amax components.
|
||||
"""
|
||||
print "MAking model"
|
||||
print "Making model"
|
||||
dat = engine(a, b, amax, scale, mode)
|
||||
self.model.update(dat)
|
||||
|
||||
|
@ -478,13 +493,6 @@ class LPLS(Model):
|
|||
self._data['Z'] = c.asarray()
|
||||
self.validation(options)
|
||||
self.make_model(options)
|
||||
print self.model['evx']
|
||||
evx_str = [str(i)[:3] for i in self.model['evx']]
|
||||
logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str))
|
||||
evy_str = [str(i)[:3] for i in self.model['evy']]
|
||||
logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str))
|
||||
evz_str = [str(i)[:3] for i in self.model['evz']]
|
||||
logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str))
|
||||
|
||||
if options['calc_conf']:
|
||||
self.confidence(options)
|
||||
|
@ -553,13 +561,15 @@ class PcaOptions(Options):
|
|||
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
||||
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False)
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||
(blmplots.PcaScreePlot, 'Scree', True)
|
||||
]
|
||||
|
||||
opt['out_data'] = ['T','P', 'p_tsq']
|
||||
opt['out_plots'] = [blmplots.PcaScorePlot,
|
||||
blmplots.PcaLoadingPlot,
|
||||
blmplots.LineViewXc]
|
||||
blmplots.LineViewXc,
|
||||
blmplots.PcaScreePlot]
|
||||
|
||||
self.update(opt)
|
||||
|
||||
|
@ -621,7 +631,8 @@ class PlsOptions(Options):
|
|||
|
||||
# (class, name, sensitive, ticked)
|
||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||
(blmplots.PlsXLoadingPlot, 'X-Loadings', True),
|
||||
(blmplots.PlsYLoadingPlot, 'Y-Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||
|
@ -629,7 +640,7 @@ class PlsOptions(Options):
|
|||
]
|
||||
|
||||
opt['out_data'] = ['T','P', 'w_tsq']
|
||||
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
|
||||
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc]
|
||||
|
||||
#opt['out_data'] = None
|
||||
|
||||
|
@ -699,11 +710,9 @@ class LplsOptions(Options):
|
|||
]
|
||||
|
||||
# (class, name, sensitive, ticked)
|
||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||
opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True),
|
||||
(blmplots.LplsXLoadingPlot, 'Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
|
||||
(blmplots.LplsXCorrelationPlot, 'X corr.', True),
|
||||
(blmplots.LplsZCorrelationPlot, 'Z corr.', True)
|
||||
|
|
|
@ -8,7 +8,7 @@ fixme:
|
|||
from matplotlib import cm,patches
|
||||
import gtk
|
||||
import fluents
|
||||
from fluents import plots, main
|
||||
from fluents import plots, main,logger
|
||||
import scipy
|
||||
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
|
||||
from scipy.stats import corrcoef
|
||||
|
@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot):
|
|||
"""Set patch sizes."""
|
||||
pass
|
||||
|
||||
def set_expvar_axlabels(self, param="evx"):
|
||||
def set_expvar_axlabels(self, param=None):
|
||||
if param == None:
|
||||
param = self._expvar_param
|
||||
else:
|
||||
self._expvar_param = param
|
||||
if not self.model.model.has_key(param):
|
||||
self.model.model[param] = None
|
||||
if self.model.model[param]==None:
|
||||
logger.log('notice', 'Param: %s not in model' %param)
|
||||
print self.model.model.keys()
|
||||
print self.model.model[param]
|
||||
pass #fixme: do expvar calc here if not present
|
||||
else:
|
||||
expvar = self.model.model[param]
|
||||
|
@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot):
|
|||
self.selection_collection._offsets = xy
|
||||
self.canvas.draw_idle()
|
||||
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
|
||||
new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad)
|
||||
new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
|
||||
self.axes.set_xlim(new_lims, emit=True)
|
||||
self.set_expvar_axlabels()
|
||||
self.canvas.draw_idle()
|
||||
|
@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot):
|
|||
self.sc._offsets = xy
|
||||
self.selection_collection._offsets = xy
|
||||
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
|
||||
new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad)
|
||||
new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
|
||||
self.axes.set_ylim(new_lims, emit=True)
|
||||
self.set_expvar_axlabels()
|
||||
self.canvas.draw_idle()
|
||||
|
@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot):
|
|||
for indx,txt in self._text_labels.items():
|
||||
if indx in index:
|
||||
txt.set_visible(True)
|
||||
self.canvas.draw()
|
||||
self.canvas.draw_idle()
|
||||
|
||||
def hide_labels(self):
|
||||
for txt in self._text_labels.values():
|
||||
txt.set_visible(False)
|
||||
self.canvas.draw()
|
||||
self.canvas.draw_idle()
|
||||
|
||||
|
||||
class PcaScreePlot(plots.BarPlot):
|
||||
def __init__(self, model):
|
||||
title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
|
||||
ds = model.as_dataset('eigvals')
|
||||
if ds==None:
|
||||
logger.log('notice', 'Model does not contain eigvals')
|
||||
plots.BarPlot.__init__(self, ds, name=title)
|
||||
|
||||
|
||||
class PcaScorePlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pca scores (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||
|
||||
self.set_expvar_axlabels(param="expvarx")
|
||||
|
||||
class PcaLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
|
@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot):
|
|||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||
|
||||
|
||||
class PlsLoadingPlot(BlmScatterPlot):
|
||||
class PlsXLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls loadings (%s)" %model._dataset['X'].get_name()
|
||||
title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
|
||||
#self.set_expvar_axlabels(self, param="expvarx")
|
||||
|
||||
|
||||
|
||||
class PlsYLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
|
||||
|
||||
|
||||
class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
||||
|
@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot):
|
|||
|
||||
|
||||
class InfluencePlot(plots.ScatterPlot):
|
||||
""" Returns a leverage vs resiudal scatter plot.
|
||||
"""
|
||||
"""
|
||||
pass
|
||||
def __init__(self, model, dim, name="Influence"):
|
||||
if not model.model.has_key('levx'):
|
||||
logger.log('notice', 'Model has no calculations of leverages')
|
||||
return
|
||||
if not model.model.has_key('ssqx'):
|
||||
logger.log('notice', 'Model has no calculations of residuals')
|
||||
return
|
||||
ds1 = model.as_dataset('levx')
|
||||
ds2 = model.as_dataset('ssqx')
|
||||
plots.ScatterPlot.__init__(self, ds1, ds2,
|
||||
id_dim, sel_dim, id_1, id_2,
|
||||
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||
name='Load Volcano')
|
||||
|
||||
|
||||
class RMSEPPlot(plots.BarPlot):
|
||||
|
|
|
@ -11,7 +11,7 @@ import time
|
|||
|
||||
|
||||
def hotelling(Pcv, P, p_center='med', cov_center='med',
|
||||
alpha=0.3, crot=True, strict=False, metric=None):
|
||||
alpha=0.3, crot=True, strict=False):
|
||||
"""Returns regularized hotelling T^2.
|
||||
|
||||
alpha -- regularisation towards pooled cov estimates
|
||||
|
@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
|
|||
alpha -- regularisation
|
||||
crot -- rotate submodels toward full?
|
||||
strict -- only rotate 90 degree ?
|
||||
metric -- inverse metric matrix (if Pcv and P from metric pca/pls)
|
||||
|
||||
"""
|
||||
m, n = P.shape
|
||||
if metric==None:
|
||||
metric = eye(m, dtype='<f8')
|
||||
P = dot(metric.T, asarray(P))
|
||||
n_sets, n, amax = Pcv.shape
|
||||
# allocate
|
||||
T_sq = empty((n, ),dtype='f')
|
||||
|
@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
|
|||
# rotate sub_models to full model
|
||||
if crot:
|
||||
for i, Pi in enumerate(Pcv):
|
||||
Pi = dot(metric.T, Pi)
|
||||
Pcv[i] = procrustes(P, Pi, strict=strict)
|
||||
|
||||
# center of pnull
|
||||
|
@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
|||
center=True,
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False, metric=None):
|
||||
crot=True, strict=False):
|
||||
|
||||
"""Returns qvals for pls model.
|
||||
|
||||
|
@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
|||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
metric -- bool, use row metric?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
|
@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
|||
if center:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
if metric!=None:
|
||||
ac = dot(ac, metric)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
|
@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
|||
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
|
@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
|||
n_iter=20, algo='pls',
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False, metric=None):
|
||||
crot=True, strict=False):
|
||||
|
||||
"""Returns qvals for pls model.
|
||||
Shuffling of variables in X is preprocessed in metric.
|
||||
Shuffling of variables in X.
|
||||
Null model is 'If I put genes randomly on network' ... if they are sign:
|
||||
then this is due to network structure and not covariance with response.
|
||||
|
||||
|
@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
|||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
metric -- bool, use row metric?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
|
@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
|||
if center==True:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
if metric==None:
|
||||
metric = eye(n,n)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
|
@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
|||
for i, a_shuff in enumerate(Vs):
|
||||
t1 = time.time()
|
||||
a = a_shuff - a_shuff.mean(0)
|
||||
a = dot(a, metric)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(a, b, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
|
|
|
@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
|||
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
:OtherParameters:
|
||||
:OtherParam eters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
|
@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
|||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||
if m>(n+100) or n>(m+100):
|
||||
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
|
||||
print s[:10]
|
||||
else:
|
||||
u, s, vt = svd(a, 0)
|
||||
v = vt.T
|
||||
|
@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
|||
lev = []
|
||||
for ai in range(aopt):
|
||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
|
||||
ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
|
||||
if scale=='loads':
|
||||
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
||||
else:
|
||||
|
@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
|||
# variances
|
||||
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
|
||||
|
||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
|
||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
|
||||
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
""" Principal Component Regression.
|
||||
|
@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
|
|||
if i>0:
|
||||
for j in range(0, i, 1):
|
||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||
print vnorm(r)
|
||||
|
||||
t = dot(a, r)
|
||||
tt = vnorm(t)**2
|
||||
p = dot(a.T, t)/tt
|
||||
|
@ -375,14 +374,11 @@ def w_pls(aat, b, aopt):
|
|||
u = dot(b , q) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
print "Norm of t: %s" %vnorm(t)
|
||||
print "s: %s" %s
|
||||
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
r = dot(aat, t)#score-weights
|
||||
#r = r/vnorm(r)
|
||||
print "Norm R: %s" %vnorm(r)
|
||||
R[:,i] = r.ravel()
|
||||
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||
if i<aopt:
|
||||
|
@ -701,10 +697,9 @@ def esvd(data, amax=None):
|
|||
pcrange = None
|
||||
else:
|
||||
pcrange = [n-amax, n]
|
||||
print "symm>n"
|
||||
s, v = symeig(kernel, range=pcrange, overwrite=True)
|
||||
s = s[::-1]
|
||||
v = v[:,::-1]
|
||||
s = s[::-1].real
|
||||
v = v[:,::-1].real
|
||||
else:
|
||||
u, s, vt = svd(kernel)
|
||||
v = vt.T
|
||||
|
@ -718,7 +713,6 @@ def esvd(data, amax=None):
|
|||
pcrange = None
|
||||
else:
|
||||
pcrange = [m-amax, m]
|
||||
print "sym (m<n)"
|
||||
s, u = symeig(kernel, range=pcrange, overwrite=True)
|
||||
s = s[::-1]
|
||||
u = u[:,::-1]
|
||||
|
@ -726,8 +720,8 @@ def esvd(data, amax=None):
|
|||
u, s, vt = svd(kernel)
|
||||
s = sqrt(s)
|
||||
v = dot(data.T, u)/s
|
||||
print s[:2]
|
||||
return u, s, v
|
||||
# some use of symeig returns the 0 imaginary part
|
||||
return u.real, s.real, v.real
|
||||
|
||||
def vnorm(x):
|
||||
# assume column arrays (or vectors)
|
||||
|
|
|
@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
|||
else:
|
||||
yield aat_in,aat_out,b_in,b_out
|
||||
|
||||
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
|
||||
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
|
||||
"""Random block crossvalidation
|
||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||
"""
|
||||
|
@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
|
|||
mn_b = bcal.mean(0)[newaxis]
|
||||
bcal = bcal - mn_b
|
||||
btrue = btrue - mn_b
|
||||
if metric!=None:
|
||||
acal = dot(acal, metric)
|
||||
|
||||
if index_out:
|
||||
yield acal, atrue, bcal, btrue, out
|
||||
else:
|
||||
yield acal, atrue, bcal, btrue
|
||||
|
||||
|
||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
||||
"""Returns a generator of crossvalidation sample segments.
|
||||
|
||||
input:
|
||||
|
@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
|||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
if metric!=None:
|
||||
acal = dot(acal, metric)
|
||||
|
||||
if index_out:
|
||||
yield acal, atrue, out
|
||||
else:
|
||||
|
|
|
@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
|||
aopt = find_aopt_from_sep(msep)
|
||||
return sqrt(msep)
|
||||
|
||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||
EE = zeros((amax, k, l), dtype='<f8')
|
||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric)
|
||||
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
|
||||
for Xin, Xout, Yin, Yout, out in V:
|
||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||
Yin = (Yin - ym)
|
||||
|
@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets):
|
|||
|
||||
return sep, aopt
|
||||
|
||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None):
|
||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
|
||||
""" Returns CV-segments of paramter W for wide X.
|
||||
|
||||
todo: add support for T,Q and B
|
||||
|
@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
|||
n_blocks = b.shape[0]
|
||||
|
||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
||||
if use_pack and metric==None:
|
||||
if use_pack:
|
||||
u, s, inflater = svd(a, full_matrices=0)
|
||||
a = u*s
|
||||
|
||||
V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric)
|
||||
V = pls_gen(a, b, n_blocks=n_blocks, center=center)
|
||||
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
||||
if algo=='pls':
|
||||
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
||||
|
@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
|||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||
|
||||
W = dat['W']
|
||||
if use_pack and metric==None:
|
||||
if use_pack:
|
||||
W = dot(inflater.T, W)
|
||||
|
||||
Wcv[nn,:,:] = W[:,:,]
|
||||
|
||||
return Wcv
|
||||
|
||||
def pca_jkP(a, aopt, n_blocks=None, metric=None):
|
||||
def pca_jkP(a, aopt, n_blocks=None):
|
||||
"""Returns loading from PCA on CV-segments.
|
||||
|
||||
input:
|
||||
|
|
Reference in New Issue