... just lots of stuff
This commit is contained in:
parent
21b63b17e5
commit
7dbf28f65d
|
@ -18,6 +18,39 @@ from packer import Packer
|
||||||
import blmplots
|
import blmplots
|
||||||
|
|
||||||
|
|
||||||
|
class NewStyleModel(Function):
|
||||||
|
def __init__(self, id='johndoe', name='JohnDoe'):
|
||||||
|
Function.__init__(self, id, name)
|
||||||
|
self.name = name
|
||||||
|
self.options = Options
|
||||||
|
self.input_data = []
|
||||||
|
self.parts = {}
|
||||||
|
self.io_table = {}
|
||||||
|
self.datasets = []
|
||||||
|
self.plots = []
|
||||||
|
|
||||||
|
def create_dataset(self, param, Dataset=Dataset):
|
||||||
|
for ds in self.datasets:
|
||||||
|
if ds.get_name()==param: return ds
|
||||||
|
if not param in self.parts.keys():
|
||||||
|
logger.log('notice', 'Parameter: %s not present' %param)
|
||||||
|
return
|
||||||
|
if not param in self.io_table.keys():
|
||||||
|
logger.log('notice', 'Parameter: %s not in defined in io table' %param)
|
||||||
|
|
||||||
|
identifiers = self.io_table.get(param)
|
||||||
|
data = self.parts.get(param)
|
||||||
|
ds = Dataset(data, identifiers=identifiers, name=param)
|
||||||
|
self.datasets.append(dataset)
|
||||||
|
return ds
|
||||||
|
|
||||||
|
def create_plot(self, blmplot):
|
||||||
|
if blmplot.validate_model(self.parts):
|
||||||
|
plt = blmplot(self.parts)
|
||||||
|
self.plots.append(plt)
|
||||||
|
return plt
|
||||||
|
|
||||||
|
|
||||||
class Model(Function):
|
class Model(Function):
|
||||||
"""Base class of bilinear models.
|
"""Base class of bilinear models.
|
||||||
"""
|
"""
|
||||||
|
@ -68,7 +101,7 @@ class PCA(Model):
|
||||||
logger.log('notice', 'Aopt at first component!')
|
logger.log('notice', 'Aopt at first component!')
|
||||||
|
|
||||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||||
crot, strict, cov_center ):
|
crot, strict, cov_center):
|
||||||
"""Returns a confidence measure for model parameters.
|
"""Returns a confidence measure for model parameters.
|
||||||
Based on aopt.
|
Based on aopt.
|
||||||
|
|
||||||
|
@ -80,7 +113,7 @@ class PCA(Model):
|
||||||
|
|
||||||
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
||||||
Pcal = self.model['P'][:,:aopt]
|
Pcal = self.model['P'][:,:aopt]
|
||||||
# add the scale to P
|
# ensure scaled P
|
||||||
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
|
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
|
||||||
Pcal = Pcal*tnorm
|
Pcal = Pcal*tnorm
|
||||||
tsq = hotelling(jk_segments, Pcal, p_center,
|
tsq = hotelling(jk_segments, Pcal, p_center,
|
||||||
|
@ -90,33 +123,14 @@ class PCA(Model):
|
||||||
def make_model(self, amax, mode, scale):
|
def make_model(self, amax, mode, scale):
|
||||||
"""Model on optimal number of components.
|
"""Model on optimal number of components.
|
||||||
"""
|
"""
|
||||||
dat = pca(self.model['E0'], amax, scale, mode)
|
dat = pca(self.model['E0'], amax, scale, mode)
|
||||||
|
|
||||||
# explained variance
|
|
||||||
var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P'])
|
|
||||||
dat['var_x'] = var_x
|
|
||||||
dat['exp_var_x'] = exp_var_x
|
|
||||||
|
|
||||||
#fixme###
|
|
||||||
do_lev_s = False
|
|
||||||
do_lev_v = False
|
|
||||||
#####
|
|
||||||
if do_lev_s:
|
|
||||||
# sample leverages
|
|
||||||
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
|
|
||||||
s_lev = leverage(amax, tnorm)
|
|
||||||
dat['s_lev'] = s_lev
|
|
||||||
if do_lev_v:
|
|
||||||
# variable leverages
|
|
||||||
v_lev = leverage(amax, dat['P'])
|
|
||||||
dat['v_lev'] = v_lev
|
|
||||||
|
|
||||||
self.model.update(dat)
|
self.model.update(dat)
|
||||||
|
|
||||||
def as_dataset(self, param, dtype='dataset'):
|
def as_dataset(self, param, dtype='dataset'):
|
||||||
"""Return model parameter as Dataset.
|
"""Return model parameter as Dataset.
|
||||||
"""
|
"""
|
||||||
if not param in self.model.keys():
|
if not param in self.model.keys():
|
||||||
|
logger.log('notice', 'Parameter: %s not in model' %param)
|
||||||
return
|
return
|
||||||
DX = self._dataset['X'] #input dataset
|
DX = self._dataset['X'] #input dataset
|
||||||
dim_name_0, dim_name_1 = DX.get_dim_name()
|
dim_name_0, dim_name_1 = DX.get_dim_name()
|
||||||
|
@ -128,17 +142,18 @@ class PCA(Model):
|
||||||
pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
|
pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
|
||||||
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
|
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
|
||||||
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
||||||
match_ids = {'E':[ids_0, ids_1],
|
match_ids = {'E' : [ids_0, ids_1],
|
||||||
'E0':[ids_0, ids_1],
|
'E0' : [ids_0, ids_1],
|
||||||
'P':[ids_1, pc_ids],
|
'P' : [ids_1, pc_ids],
|
||||||
'T':[ids_0, pc_ids],
|
'T' : [ids_0, pc_ids],
|
||||||
'W':[ids_1, pc_ids],
|
'W' : [ids_1, pc_ids],
|
||||||
'p_tsq':[ids_1, zero_dim],
|
'p_tsq' : [ids_1, zero_dim],
|
||||||
'rmsep':[pc_ids, zero_dim],
|
'rmsep' : [pc_ids, zero_dim],
|
||||||
'var_leverages':[ids_1, zero_dim],
|
'var_leverages' : [ids_1, zero_dim],
|
||||||
'sample_leverages':[pc_ids, zero_dim],
|
'sample_leverages' : [pc_ids, zero_dim],
|
||||||
'exp_var_x': [pc_ids, zero_dim],
|
'exp_var_x' : [pc_ids, zero_dim],
|
||||||
'var_x': [pc_ids, zero_dim],
|
'var_x' : [pc_ids, zero_dim],
|
||||||
|
'eigvals' : [pc_ids, zero_dim]
|
||||||
}
|
}
|
||||||
|
|
||||||
out = Dataset(self.model[param], match_ids[param], name=param)
|
out = Dataset(self.model[param], match_ids[param], name=param)
|
||||||
|
@ -256,7 +271,7 @@ class PLS(Model):
|
||||||
def make_model(self, a, b, amax, scale, mode, engine):
|
def make_model(self, a, b, amax, scale, mode, engine):
|
||||||
"""Make model on amax components.
|
"""Make model on amax components.
|
||||||
"""
|
"""
|
||||||
print "MAking model"
|
print "Making model"
|
||||||
dat = engine(a, b, amax, scale, mode)
|
dat = engine(a, b, amax, scale, mode)
|
||||||
self.model.update(dat)
|
self.model.update(dat)
|
||||||
|
|
||||||
|
@ -478,13 +493,6 @@ class LPLS(Model):
|
||||||
self._data['Z'] = c.asarray()
|
self._data['Z'] = c.asarray()
|
||||||
self.validation(options)
|
self.validation(options)
|
||||||
self.make_model(options)
|
self.make_model(options)
|
||||||
print self.model['evx']
|
|
||||||
evx_str = [str(i)[:3] for i in self.model['evx']]
|
|
||||||
logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str))
|
|
||||||
evy_str = [str(i)[:3] for i in self.model['evy']]
|
|
||||||
logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str))
|
|
||||||
evz_str = [str(i)[:3] for i in self.model['evz']]
|
|
||||||
logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str))
|
|
||||||
|
|
||||||
if options['calc_conf']:
|
if options['calc_conf']:
|
||||||
self.confidence(options)
|
self.confidence(options)
|
||||||
|
@ -553,13 +561,15 @@ class PcaOptions(Options):
|
||||||
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
||||||
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
||||||
(blmplots.LineViewXc, 'Line view', True),
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
(blmplots.PredictionErrorPlot, 'Residual Error', False)
|
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||||
|
(blmplots.PcaScreePlot, 'Scree', True)
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['out_data'] = ['T','P', 'p_tsq']
|
opt['out_data'] = ['T','P', 'p_tsq']
|
||||||
opt['out_plots'] = [blmplots.PcaScorePlot,
|
opt['out_plots'] = [blmplots.PcaScorePlot,
|
||||||
blmplots.PcaLoadingPlot,
|
blmplots.PcaLoadingPlot,
|
||||||
blmplots.LineViewXc]
|
blmplots.LineViewXc,
|
||||||
|
blmplots.PcaScreePlot]
|
||||||
|
|
||||||
self.update(opt)
|
self.update(opt)
|
||||||
|
|
||||||
|
@ -621,7 +631,8 @@ class PlsOptions(Options):
|
||||||
|
|
||||||
# (class, name, sensitive, ticked)
|
# (class, name, sensitive, ticked)
|
||||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
(blmplots.PlsXLoadingPlot, 'X-Loadings', True),
|
||||||
|
(blmplots.PlsYLoadingPlot, 'Y-Loadings', True),
|
||||||
(blmplots.LineViewXc, 'Line view', True),
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||||
|
@ -629,7 +640,7 @@ class PlsOptions(Options):
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['out_data'] = ['T','P', 'w_tsq']
|
opt['out_data'] = ['T','P', 'w_tsq']
|
||||||
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
|
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc]
|
||||||
|
|
||||||
#opt['out_data'] = None
|
#opt['out_data'] = None
|
||||||
|
|
||||||
|
@ -699,11 +710,9 @@ class LplsOptions(Options):
|
||||||
]
|
]
|
||||||
|
|
||||||
# (class, name, sensitive, ticked)
|
# (class, name, sensitive, ticked)
|
||||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True),
|
||||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
(blmplots.LplsXLoadingPlot, 'Loadings', True),
|
||||||
(blmplots.LineViewXc, 'Line view', True),
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
|
||||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
|
||||||
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
|
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
|
||||||
(blmplots.LplsXCorrelationPlot, 'X corr.', True),
|
(blmplots.LplsXCorrelationPlot, 'X corr.', True),
|
||||||
(blmplots.LplsZCorrelationPlot, 'Z corr.', True)
|
(blmplots.LplsZCorrelationPlot, 'Z corr.', True)
|
||||||
|
|
|
@ -8,7 +8,7 @@ fixme:
|
||||||
from matplotlib import cm,patches
|
from matplotlib import cm,patches
|
||||||
import gtk
|
import gtk
|
||||||
import fluents
|
import fluents
|
||||||
from fluents import plots, main
|
from fluents import plots, main,logger
|
||||||
import scipy
|
import scipy
|
||||||
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
|
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
|
||||||
from scipy.stats import corrcoef
|
from scipy.stats import corrcoef
|
||||||
|
@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot):
|
||||||
"""Set patch sizes."""
|
"""Set patch sizes."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_expvar_axlabels(self, param="evx"):
|
def set_expvar_axlabels(self, param=None):
|
||||||
|
if param == None:
|
||||||
|
param = self._expvar_param
|
||||||
|
else:
|
||||||
|
self._expvar_param = param
|
||||||
if not self.model.model.has_key(param):
|
if not self.model.model.has_key(param):
|
||||||
self.model.model[param] = None
|
self.model.model[param] = None
|
||||||
if self.model.model[param]==None:
|
if self.model.model[param]==None:
|
||||||
|
logger.log('notice', 'Param: %s not in model' %param)
|
||||||
|
print self.model.model.keys()
|
||||||
|
print self.model.model[param]
|
||||||
pass #fixme: do expvar calc here if not present
|
pass #fixme: do expvar calc here if not present
|
||||||
else:
|
else:
|
||||||
expvar = self.model.model[param]
|
expvar = self.model.model[param]
|
||||||
|
@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot):
|
||||||
self.selection_collection._offsets = xy
|
self.selection_collection._offsets = xy
|
||||||
self.canvas.draw_idle()
|
self.canvas.draw_idle()
|
||||||
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
|
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
|
||||||
new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad)
|
new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
|
||||||
self.axes.set_xlim(new_lims, emit=True)
|
self.axes.set_xlim(new_lims, emit=True)
|
||||||
self.set_expvar_axlabels()
|
self.set_expvar_axlabels()
|
||||||
self.canvas.draw_idle()
|
self.canvas.draw_idle()
|
||||||
|
@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot):
|
||||||
self.sc._offsets = xy
|
self.sc._offsets = xy
|
||||||
self.selection_collection._offsets = xy
|
self.selection_collection._offsets = xy
|
||||||
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
|
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
|
||||||
new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad)
|
new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
|
||||||
self.axes.set_ylim(new_lims, emit=True)
|
self.axes.set_ylim(new_lims, emit=True)
|
||||||
self.set_expvar_axlabels()
|
self.set_expvar_axlabels()
|
||||||
self.canvas.draw_idle()
|
self.canvas.draw_idle()
|
||||||
|
@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot):
|
||||||
for indx,txt in self._text_labels.items():
|
for indx,txt in self._text_labels.items():
|
||||||
if indx in index:
|
if indx in index:
|
||||||
txt.set_visible(True)
|
txt.set_visible(True)
|
||||||
self.canvas.draw()
|
self.canvas.draw_idle()
|
||||||
|
|
||||||
def hide_labels(self):
|
def hide_labels(self):
|
||||||
for txt in self._text_labels.values():
|
for txt in self._text_labels.values():
|
||||||
txt.set_visible(False)
|
txt.set_visible(False)
|
||||||
self.canvas.draw()
|
self.canvas.draw_idle()
|
||||||
|
|
||||||
|
|
||||||
|
class PcaScreePlot(plots.BarPlot):
|
||||||
|
def __init__(self, model):
|
||||||
|
title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
|
||||||
|
ds = model.as_dataset('eigvals')
|
||||||
|
if ds==None:
|
||||||
|
logger.log('notice', 'Model does not contain eigvals')
|
||||||
|
plots.BarPlot.__init__(self, ds, name=title)
|
||||||
|
|
||||||
|
|
||||||
class PcaScorePlot(BlmScatterPlot):
|
class PcaScorePlot(BlmScatterPlot):
|
||||||
def __init__(self, model, absi=0, ordi=1):
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
title = "Pca scores (%s)" %model._dataset['X'].get_name()
|
title = "Pca scores (%s)" %model._dataset['X'].get_name()
|
||||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||||
|
self.set_expvar_axlabels(param="expvarx")
|
||||||
|
|
||||||
class PcaLoadingPlot(BlmScatterPlot):
|
class PcaLoadingPlot(BlmScatterPlot):
|
||||||
def __init__(self, model, absi=0, ordi=1):
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
|
@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot):
|
||||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
|
||||||
|
|
||||||
|
|
||||||
class PlsLoadingPlot(BlmScatterPlot):
|
class PlsXLoadingPlot(BlmScatterPlot):
|
||||||
def __init__(self, model, absi=0, ordi=1):
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
title = "Pls loadings (%s)" %model._dataset['X'].get_name()
|
title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
|
||||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
|
||||||
#self.set_expvar_axlabels(self, param="expvarx")
|
#self.set_expvar_axlabels(self, param="expvarx")
|
||||||
|
|
||||||
|
|
||||||
|
class PlsYLoadingPlot(BlmScatterPlot):
|
||||||
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
|
title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
|
||||||
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
|
||||||
|
|
||||||
|
|
||||||
class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
||||||
def __init__(self, model, absi=0, ordi=1):
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
||||||
|
@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot):
|
||||||
|
|
||||||
|
|
||||||
class InfluencePlot(plots.ScatterPlot):
|
class InfluencePlot(plots.ScatterPlot):
|
||||||
|
""" Returns a leverage vs resiudal scatter plot.
|
||||||
"""
|
"""
|
||||||
"""
|
def __init__(self, model, dim, name="Influence"):
|
||||||
pass
|
if not model.model.has_key('levx'):
|
||||||
|
logger.log('notice', 'Model has no calculations of leverages')
|
||||||
|
return
|
||||||
|
if not model.model.has_key('ssqx'):
|
||||||
|
logger.log('notice', 'Model has no calculations of residuals')
|
||||||
|
return
|
||||||
|
ds1 = model.as_dataset('levx')
|
||||||
|
ds2 = model.as_dataset('ssqx')
|
||||||
|
plots.ScatterPlot.__init__(self, ds1, ds2,
|
||||||
|
id_dim, sel_dim, id_1, id_2,
|
||||||
|
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||||
|
name='Load Volcano')
|
||||||
|
|
||||||
|
|
||||||
class RMSEPPlot(plots.BarPlot):
|
class RMSEPPlot(plots.BarPlot):
|
||||||
|
|
|
@ -11,7 +11,7 @@ import time
|
||||||
|
|
||||||
|
|
||||||
def hotelling(Pcv, P, p_center='med', cov_center='med',
|
def hotelling(Pcv, P, p_center='med', cov_center='med',
|
||||||
alpha=0.3, crot=True, strict=False, metric=None):
|
alpha=0.3, crot=True, strict=False):
|
||||||
"""Returns regularized hotelling T^2.
|
"""Returns regularized hotelling T^2.
|
||||||
|
|
||||||
alpha -- regularisation towards pooled cov estimates
|
alpha -- regularisation towards pooled cov estimates
|
||||||
|
@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
|
||||||
alpha -- regularisation
|
alpha -- regularisation
|
||||||
crot -- rotate submodels toward full?
|
crot -- rotate submodels toward full?
|
||||||
strict -- only rotate 90 degree ?
|
strict -- only rotate 90 degree ?
|
||||||
metric -- inverse metric matrix (if Pcv and P from metric pca/pls)
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m, n = P.shape
|
m, n = P.shape
|
||||||
if metric==None:
|
|
||||||
metric = eye(m, dtype='<f8')
|
|
||||||
P = dot(metric.T, asarray(P))
|
|
||||||
n_sets, n, amax = Pcv.shape
|
n_sets, n, amax = Pcv.shape
|
||||||
# allocate
|
# allocate
|
||||||
T_sq = empty((n, ),dtype='f')
|
T_sq = empty((n, ),dtype='f')
|
||||||
|
@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
|
||||||
# rotate sub_models to full model
|
# rotate sub_models to full model
|
||||||
if crot:
|
if crot:
|
||||||
for i, Pi in enumerate(Pcv):
|
for i, Pi in enumerate(Pcv):
|
||||||
Pi = dot(metric.T, Pi)
|
|
||||||
Pcv[i] = procrustes(P, Pi, strict=strict)
|
Pcv[i] = procrustes(P, Pi, strict=strict)
|
||||||
|
|
||||||
# center of pnull
|
# center of pnull
|
||||||
|
@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
center=True,
|
center=True,
|
||||||
sim_method='shuffle',
|
sim_method='shuffle',
|
||||||
p_center='med', cov_center='med',
|
p_center='med', cov_center='med',
|
||||||
crot=True, strict=False, metric=None):
|
crot=True, strict=False):
|
||||||
|
|
||||||
"""Returns qvals for pls model.
|
"""Returns qvals for pls model.
|
||||||
|
|
||||||
|
@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
cov_center -- location estimator for covariance of submodels ['med']
|
cov_center -- location estimator for covariance of submodels ['med']
|
||||||
crot -- bool, use rotations of sub models?
|
crot -- bool, use rotations of sub models?
|
||||||
strict -- bool, use stict (rot/flips only) rotations?
|
strict -- bool, use stict (rot/flips only) rotations?
|
||||||
metric -- bool, use row metric?
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
|
@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
if center:
|
if center:
|
||||||
ac = a - a.mean(0)
|
ac = a - a.mean(0)
|
||||||
bc = b - b.mean(0)
|
bc = b - b.mean(0)
|
||||||
if metric!=None:
|
|
||||||
ac = dot(ac, metric)
|
|
||||||
if algo=='bridge':
|
if algo=='bridge':
|
||||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
|
||||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
|
@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
|
||||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
|
@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||||
n_iter=20, algo='pls',
|
n_iter=20, algo='pls',
|
||||||
sim_method='shuffle',
|
sim_method='shuffle',
|
||||||
p_center='med', cov_center='med',
|
p_center='med', cov_center='med',
|
||||||
crot=True, strict=False, metric=None):
|
crot=True, strict=False):
|
||||||
|
|
||||||
"""Returns qvals for pls model.
|
"""Returns qvals for pls model.
|
||||||
Shuffling of variables in X is preprocessed in metric.
|
Shuffling of variables in X.
|
||||||
Null model is 'If I put genes randomly on network' ... if they are sign:
|
Null model is 'If I put genes randomly on network' ... if they are sign:
|
||||||
then this is due to network structure and not covariance with response.
|
then this is due to network structure and not covariance with response.
|
||||||
|
|
||||||
|
@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||||
cov_center -- location estimator for covariance of submodels ['med']
|
cov_center -- location estimator for covariance of submodels ['med']
|
||||||
crot -- bool, use rotations of sub models?
|
crot -- bool, use rotations of sub models?
|
||||||
strict -- bool, use stict (rot/flips only) rotations?
|
strict -- bool, use stict (rot/flips only) rotations?
|
||||||
metric -- bool, use row metric?
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
|
@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||||
if center==True:
|
if center==True:
|
||||||
ac = a - a.mean(0)
|
ac = a - a.mean(0)
|
||||||
bc = b - b.mean(0)
|
bc = b - b.mean(0)
|
||||||
if metric==None:
|
|
||||||
metric = eye(n,n)
|
|
||||||
if algo=='bridge':
|
if algo=='bridge':
|
||||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
|
@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||||
for i, a_shuff in enumerate(Vs):
|
for i, a_shuff in enumerate(Vs):
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
a = a_shuff - a_shuff.mean(0)
|
a = a_shuff - a_shuff.mean(0)
|
||||||
a = dot(a, metric)
|
|
||||||
|
|
||||||
if algo=='bridge':
|
if algo=='bridge':
|
||||||
dat = bridge(a, b, aopt, 'loads','fast')
|
dat = bridge(a, b, aopt, 'loads','fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
|
||||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
|
|
|
@ -32,7 +32,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||||
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
||||||
explained variance, aopt -- number of components used
|
explained variance, aopt -- number of components used
|
||||||
|
|
||||||
:OtherParameters:
|
:OtherParam eters:
|
||||||
mode : str
|
mode : str
|
||||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||||
center_axis : int
|
center_axis : int
|
||||||
|
@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||||
if m>(n+100) or n>(m+100):
|
if m>(n+100) or n>(m+100):
|
||||||
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
|
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
|
||||||
print s[:10]
|
|
||||||
else:
|
else:
|
||||||
u, s, vt = svd(a, 0)
|
u, s, vt = svd(a, 0)
|
||||||
v = vt.T
|
v = vt.T
|
||||||
|
@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||||
lev = []
|
lev = []
|
||||||
for ai in range(aopt):
|
for ai in range(aopt):
|
||||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||||
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
|
ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
||||||
else:
|
else:
|
||||||
|
@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||||
# variances
|
# variances
|
||||||
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
|
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
|
||||||
|
|
||||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
|
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
|
||||||
|
|
||||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
""" Principal Component Regression.
|
""" Principal Component Regression.
|
||||||
|
@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
|
||||||
if i>0:
|
if i>0:
|
||||||
for j in range(0, i, 1):
|
for j in range(0, i, 1):
|
||||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||||
print vnorm(r)
|
|
||||||
t = dot(a, r)
|
t = dot(a, r)
|
||||||
tt = vnorm(t)**2
|
tt = vnorm(t)**2
|
||||||
p = dot(a.T, t)/tt
|
p = dot(a.T, t)/tt
|
||||||
|
@ -375,14 +374,11 @@ def w_pls(aat, b, aopt):
|
||||||
u = dot(b , q) #y-factor scores
|
u = dot(b , q) #y-factor scores
|
||||||
U[:,i] = u.ravel()
|
U[:,i] = u.ravel()
|
||||||
t = dot(aat, u)
|
t = dot(aat, u)
|
||||||
print "Norm of t: %s" %vnorm(t)
|
|
||||||
print "s: %s" %s
|
|
||||||
|
|
||||||
t = t/vnorm(t)
|
t = t/vnorm(t)
|
||||||
T[:,i] = t.ravel()
|
T[:,i] = t.ravel()
|
||||||
r = dot(aat, t)#score-weights
|
r = dot(aat, t)#score-weights
|
||||||
#r = r/vnorm(r)
|
#r = r/vnorm(r)
|
||||||
print "Norm R: %s" %vnorm(r)
|
|
||||||
R[:,i] = r.ravel()
|
R[:,i] = r.ravel()
|
||||||
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||||
if i<aopt:
|
if i<aopt:
|
||||||
|
@ -701,10 +697,9 @@ def esvd(data, amax=None):
|
||||||
pcrange = None
|
pcrange = None
|
||||||
else:
|
else:
|
||||||
pcrange = [n-amax, n]
|
pcrange = [n-amax, n]
|
||||||
print "symm>n"
|
|
||||||
s, v = symeig(kernel, range=pcrange, overwrite=True)
|
s, v = symeig(kernel, range=pcrange, overwrite=True)
|
||||||
s = s[::-1]
|
s = s[::-1].real
|
||||||
v = v[:,::-1]
|
v = v[:,::-1].real
|
||||||
else:
|
else:
|
||||||
u, s, vt = svd(kernel)
|
u, s, vt = svd(kernel)
|
||||||
v = vt.T
|
v = vt.T
|
||||||
|
@ -718,7 +713,6 @@ def esvd(data, amax=None):
|
||||||
pcrange = None
|
pcrange = None
|
||||||
else:
|
else:
|
||||||
pcrange = [m-amax, m]
|
pcrange = [m-amax, m]
|
||||||
print "sym (m<n)"
|
|
||||||
s, u = symeig(kernel, range=pcrange, overwrite=True)
|
s, u = symeig(kernel, range=pcrange, overwrite=True)
|
||||||
s = s[::-1]
|
s = s[::-1]
|
||||||
u = u[:,::-1]
|
u = u[:,::-1]
|
||||||
|
@ -726,8 +720,8 @@ def esvd(data, amax=None):
|
||||||
u, s, vt = svd(kernel)
|
u, s, vt = svd(kernel)
|
||||||
s = sqrt(s)
|
s = sqrt(s)
|
||||||
v = dot(data.T, u)/s
|
v = dot(data.T, u)/s
|
||||||
print s[:2]
|
# some use of symeig returns the 0 imaginary part
|
||||||
return u, s, v
|
return u.real, s.real, v.real
|
||||||
|
|
||||||
def vnorm(x):
|
def vnorm(x):
|
||||||
# assume column arrays (or vectors)
|
# assume column arrays (or vectors)
|
||||||
|
|
|
@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
||||||
else:
|
else:
|
||||||
yield aat_in,aat_out,b_in,b_out
|
yield aat_in,aat_out,b_in,b_out
|
||||||
|
|
||||||
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
|
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
|
||||||
"""Random block crossvalidation
|
"""Random block crossvalidation
|
||||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||||
"""
|
"""
|
||||||
|
@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
|
||||||
mn_b = bcal.mean(0)[newaxis]
|
mn_b = bcal.mean(0)[newaxis]
|
||||||
bcal = bcal - mn_b
|
bcal = bcal - mn_b
|
||||||
btrue = btrue - mn_b
|
btrue = btrue - mn_b
|
||||||
if metric!=None:
|
|
||||||
acal = dot(acal, metric)
|
|
||||||
if index_out:
|
if index_out:
|
||||||
yield acal, atrue, bcal, btrue, out
|
yield acal, atrue, bcal, btrue, out
|
||||||
else:
|
else:
|
||||||
yield acal, atrue, bcal, btrue
|
yield acal, atrue, bcal, btrue
|
||||||
|
|
||||||
|
|
||||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
||||||
"""Returns a generator of crossvalidation sample segments.
|
"""Returns a generator of crossvalidation sample segments.
|
||||||
|
|
||||||
input:
|
input:
|
||||||
|
@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
||||||
mn_a = acal.mean(0)[newaxis]
|
mn_a = acal.mean(0)[newaxis]
|
||||||
acal = acal - mn_a
|
acal = acal - mn_a
|
||||||
atrue = atrue - mn_a
|
atrue = atrue - mn_a
|
||||||
if metric!=None:
|
|
||||||
acal = dot(acal, metric)
|
|
||||||
if index_out:
|
if index_out:
|
||||||
yield acal, atrue, out
|
yield acal, atrue, out
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
aopt = find_aopt_from_sep(msep)
|
aopt = find_aopt_from_sep(msep)
|
||||||
return sqrt(msep)
|
return sqrt(msep)
|
||||||
|
|
||||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||||
EE = zeros((amax, k, l), dtype='<f8')
|
EE = zeros((amax, k, l), dtype='<f8')
|
||||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||||
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric)
|
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
|
||||||
for Xin, Xout, Yin, Yout, out in V:
|
for Xin, Xout, Yin, Yout, out in V:
|
||||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||||
Yin = (Yin - ym)
|
Yin = (Yin - ym)
|
||||||
|
@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets):
|
||||||
|
|
||||||
return sep, aopt
|
return sep, aopt
|
||||||
|
|
||||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None):
|
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
|
||||||
""" Returns CV-segments of paramter W for wide X.
|
""" Returns CV-segments of paramter W for wide X.
|
||||||
|
|
||||||
todo: add support for T,Q and B
|
todo: add support for T,Q and B
|
||||||
|
@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||||
n_blocks = b.shape[0]
|
n_blocks = b.shape[0]
|
||||||
|
|
||||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
||||||
if use_pack and metric==None:
|
if use_pack:
|
||||||
u, s, inflater = svd(a, full_matrices=0)
|
u, s, inflater = svd(a, full_matrices=0)
|
||||||
a = u*s
|
a = u*s
|
||||||
|
|
||||||
V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric)
|
V = pls_gen(a, b, n_blocks=n_blocks, center=center)
|
||||||
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
||||||
if algo=='pls':
|
if algo=='pls':
|
||||||
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
||||||
|
@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||||
|
|
||||||
W = dat['W']
|
W = dat['W']
|
||||||
if use_pack and metric==None:
|
if use_pack:
|
||||||
W = dot(inflater.T, W)
|
W = dot(inflater.T, W)
|
||||||
|
|
||||||
Wcv[nn,:,:] = W[:,:,]
|
Wcv[nn,:,:] = W[:,:,]
|
||||||
|
|
||||||
return Wcv
|
return Wcv
|
||||||
|
|
||||||
def pca_jkP(a, aopt, n_blocks=None, metric=None):
|
def pca_jkP(a, aopt, n_blocks=None):
|
||||||
"""Returns loading from PCA on CV-segments.
|
"""Returns loading from PCA on CV-segments.
|
||||||
|
|
||||||
input:
|
input:
|
||||||
|
|
Reference in New Issue