Multiple lib changes
This commit is contained in:
parent
a65d79697f
commit
1c2c2c8895
|
@ -1,7 +1,9 @@
|
||||||
"""This module contains bilinear models(Functions)
|
"""This module contains bilinear models(Functions)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import pygtk
|
||||||
import gtk
|
import gtk
|
||||||
|
import gtk.glade
|
||||||
from fluents.workflow import Function, OptionsDialog, Options
|
from fluents.workflow import Function, OptionsDialog, Options
|
||||||
from fluents.dataset import Dataset
|
from fluents.dataset import Dataset
|
||||||
from fluents import plots, dataset, workflow, logger
|
from fluents import plots, dataset, workflow, logger
|
||||||
|
@ -12,7 +14,7 @@ from cx_utils import mat_center
|
||||||
from validation import *
|
from validation import *
|
||||||
import blmplots
|
import blmplots
|
||||||
import engines
|
import engines
|
||||||
|
import copy
|
||||||
|
|
||||||
class Model(Function):
|
class Model(Function):
|
||||||
"""Base class of bilinear models.
|
"""Base class of bilinear models.
|
||||||
|
@ -39,19 +41,38 @@ class PCA(Model):
|
||||||
Model.__init__(self,id,name)
|
Model.__init__(self,id,name)
|
||||||
self._options = PcaOptions()
|
self._options = PcaOptions()
|
||||||
|
|
||||||
def pre_validation(self, amax, n_sets, val_engine):
|
def validation(self, amax, cv_val_sets, pert_val_sets, cv_val_method, pert_val_method):
|
||||||
"""Model calculations for maximum number of components.
|
"""Model validation and estimate of optimal numer of components.
|
||||||
"""
|
"""
|
||||||
rmsep = val_engine(self.model['E0'], amax, n_sets)
|
if self._options['calc_cv']:
|
||||||
self.model['rmsep'] = rmsep
|
if cv_val_method == 'random':
|
||||||
self.model['aopt'] = rmsep.argmin()
|
sep, aopt = pca_cv_val(self.model['E0'], amax, cv_val_sets)
|
||||||
|
self.model['sep'] = sep
|
||||||
|
|
||||||
|
if self._options['calc_pert']:
|
||||||
|
if pert_val_method == 'random_diag':
|
||||||
|
sep, aopt = pca_alter_val(self.model['E0'], amax, pert_val_sets)
|
||||||
|
self.model['sep'] = sep
|
||||||
|
|
||||||
|
if self._options['calc_cv']==False and self._options['calc_pert']==False:
|
||||||
|
self.model['sep'] = None
|
||||||
|
aopt = self._options['amax']
|
||||||
|
|
||||||
|
if self._options['auto_aopt']:
|
||||||
|
logger.log("notice", "Auto aopt: " + str(aopt))
|
||||||
|
self._options['aopt'] = aopt
|
||||||
|
if aopt==1:
|
||||||
|
logger.log('notice', 'Aopt at first component!')
|
||||||
|
|
||||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||||
crot, strict, cov_center ):
|
crot, strict, cov_center ):
|
||||||
"""Returns a confidence measure for model parameters.
|
"""Returns a confidence measure for model parameters.
|
||||||
Based on aopt.
|
Based on aopt.
|
||||||
"""
|
"""
|
||||||
aopt = self.model['aopt']
|
if aopt<2:
|
||||||
|
aopt = 2
|
||||||
|
logger.log('notice','Hotellings T2 needs more than 1 comp.\n switching to 2!!')
|
||||||
|
|
||||||
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
||||||
Pcal = self.model['P'][:,:aopt]
|
Pcal = self.model['P'][:,:aopt]
|
||||||
tsq = hotelling(jk_segments, Pcal, p_center,
|
tsq = hotelling(jk_segments, Pcal, p_center,
|
||||||
|
@ -96,8 +117,8 @@ class PCA(Model):
|
||||||
# vars
|
# vars
|
||||||
ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)]
|
ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)]
|
||||||
# components (hidden)
|
# components (hidden)
|
||||||
pc_ids = ['_comp_a', map(str,range(self.model['aopt'])) ]
|
pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
|
||||||
pc_ids_opt = ['_comp_o', map(str, range(self.model['aopt'])) ]
|
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
|
||||||
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
||||||
match_ids = {'E':[ids_0, ids_1],
|
match_ids = {'E':[ids_0, ids_1],
|
||||||
'E0':[ids_0, ids_1],
|
'E0':[ids_0, ids_1],
|
||||||
|
@ -121,8 +142,7 @@ class PCA(Model):
|
||||||
#try:
|
#try:
|
||||||
out.append(plt(self))
|
out.append(plt(self))
|
||||||
#except:
|
#except:
|
||||||
# print plt
|
# logger.log('debug', 'Plot: %s failed') %str(plt)
|
||||||
#logger.log('debug', 'Plot: %s failed') %plt
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def run_o(self, data):
|
def run_o(self, data):
|
||||||
|
@ -130,6 +150,8 @@ class PCA(Model):
|
||||||
"""
|
"""
|
||||||
self.clear()
|
self.clear()
|
||||||
options = self._options
|
options = self._options
|
||||||
|
for item in options.items():
|
||||||
|
print item
|
||||||
self._dataset['X'] = data
|
self._dataset['X'] = data
|
||||||
self._data['X'] = data.asarray().astype('<f8')
|
self._data['X'] = data.asarray().astype('<f8')
|
||||||
if options['center']:
|
if options['center']:
|
||||||
|
@ -138,7 +160,8 @@ class PCA(Model):
|
||||||
else:
|
else:
|
||||||
self.model['E0'] = data.asarray()
|
self.model['E0'] = data.asarray()
|
||||||
|
|
||||||
self.pre_validation(**options.pre_validation_options())
|
self.validation(**options.validation_options())
|
||||||
|
self.model['aopt'] = self._options['aopt']
|
||||||
self.make_model(**options.make_model_options())
|
self.make_model(**options.make_model_options())
|
||||||
if options['calc_conf']:
|
if options['calc_conf']:
|
||||||
self.confidence(**options.confidence_options())
|
self.confidence(**options.confidence_options())
|
||||||
|
@ -159,7 +182,6 @@ class PCA(Model):
|
||||||
if response == gtk.RESPONSE_OK:
|
if response == gtk.RESPONSE_OK:
|
||||||
# set output data and plots
|
# set output data and plots
|
||||||
dialog.set_output()
|
dialog.set_output()
|
||||||
|
|
||||||
#run with current data and options
|
#run with current data and options
|
||||||
return self.run_o(data)
|
return self.run_o(data)
|
||||||
|
|
||||||
|
@ -172,10 +194,10 @@ class PLS(Model):
|
||||||
def pre_validation(self, amax, n_sets, val_engine):
|
def pre_validation(self, amax, n_sets, val_engine):
|
||||||
"""Returns rmsec,rmsep for model.
|
"""Returns rmsec,rmsep for model.
|
||||||
"""
|
"""
|
||||||
rmsep = val_engine(self.model['E0'], self.model['F0'],
|
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
|
||||||
amax, n_sets)
|
amax, n_sets)
|
||||||
self.model['rmsep'] = rmsep.mean(0)
|
self.model['rmsep'] = rmsep.mean(0)
|
||||||
self.model['aopt'] = rmsep.mean(0).argmin()
|
self.model['aopt'] = aopt
|
||||||
|
|
||||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||||
crot, strict, cov_center ):
|
crot, strict, cov_center ):
|
||||||
|
@ -341,34 +363,39 @@ class PcaOptions(Options):
|
||||||
opt['algo'] = 'pca'
|
opt['algo'] = 'pca'
|
||||||
opt['engine'] = engines.pca
|
opt['engine'] = engines.pca
|
||||||
opt['mode'] = 'normal' # how much info to calculate
|
opt['mode'] = 'normal' # how much info to calculate
|
||||||
opt['lod'] = 'compact' # how much info to store
|
opt['amax'] = 10
|
||||||
opt['amax'] = 5
|
opt['aopt'] = 100
|
||||||
opt['aopt'] = 5
|
opt['auto_aopt'] = False
|
||||||
opt['center'] = True
|
opt['center'] = True
|
||||||
opt['center_mth'] = mat_center
|
opt['center_mth'] = mat_center
|
||||||
opt['scale'] = 'scores'
|
opt['scale'] = 'scores'
|
||||||
opt['calc_conf'] = True
|
|
||||||
opt['n_sets'] = 5
|
|
||||||
|
|
||||||
|
opt['calc_conf'] = False
|
||||||
|
opt['n_sets'] = 5
|
||||||
opt['strict'] = True
|
opt['strict'] = True
|
||||||
opt['p_center'] = 'med'
|
opt['p_center'] = 'med'
|
||||||
opt['alpha'] = .8
|
opt['alpha'] = .8
|
||||||
opt['cov_center'] = 'med'
|
opt['cov_center'] = 'med'
|
||||||
opt['crot'] = True
|
opt['crot'] = True
|
||||||
|
|
||||||
opt['val_engine'] = pca_alter_val
|
opt['calc_cv'] = False
|
||||||
opt['val_n_sets'] = 10
|
opt['calc_pert'] = True
|
||||||
|
opt['pert_val_method'] = 'random_diag'
|
||||||
|
opt['cv_val_method'] = 'random'
|
||||||
|
opt['cv_val_sets'] = 10
|
||||||
|
opt['pert_val_sets'] = 10
|
||||||
|
|
||||||
opt['all_data'] = [('T', 'scores', True),
|
opt['all_data'] = [('T', 'scores', True),
|
||||||
('P', 'loadings', True),
|
('P', 'loadings', True),
|
||||||
('E','residuals', False),
|
('E','residuals', False),
|
||||||
('p_tsq', 't2', False),
|
('p_tsq', 't2', False),
|
||||||
('rmsep', 'root mean square error of prediction', False)
|
('rmsep', 'RMSEP', False)
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
||||||
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
||||||
(blmplots.LineViewXc, 'Line view', True)
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
|
(blmplots.PredictionErrorPlot, 'Residual Error', True)
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['out_data'] = ['T','P', 'p_tsq']
|
opt['out_data'] = ['T','P', 'p_tsq']
|
||||||
|
@ -387,9 +414,10 @@ class PcaOptions(Options):
|
||||||
'strict', 'crot', 'cov_center']
|
'strict', 'crot', 'cov_center']
|
||||||
return self._copy_from_list(opt_list)
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
def pre_validation_options(self):
|
def validation_options(self):
|
||||||
"""Options for pre_validation method."""
|
"""Options for pre_validation method."""
|
||||||
opt_list = ['amax', 'n_sets', 'val_engine']
|
opt_list = ['amax', 'cv_val_sets', 'pert_val_sets',
|
||||||
|
'cv_val_method', 'pert_val_method']
|
||||||
return self._copy_from_list(opt_list)
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
|
|
||||||
|
@ -411,7 +439,7 @@ class PlsOptions(Options):
|
||||||
opt['center'] = True
|
opt['center'] = True
|
||||||
opt['center_mth'] = mat_center
|
opt['center_mth'] = mat_center
|
||||||
opt['scale'] = 'scores'
|
opt['scale'] = 'scores'
|
||||||
opt['calc_conf'] = True
|
opt['calc_conf'] = False
|
||||||
opt['n_sets'] = 10
|
opt['n_sets'] = 10
|
||||||
|
|
||||||
opt['strict'] = True
|
opt['strict'] = True
|
||||||
|
@ -420,13 +448,15 @@ class PlsOptions(Options):
|
||||||
opt['cov_center'] = 'med'
|
opt['cov_center'] = 'med'
|
||||||
opt['crot'] = True
|
opt['crot'] = True
|
||||||
|
|
||||||
|
opt['calc_cv'] = True
|
||||||
|
opt['calc_pert'] = False
|
||||||
opt['val_engine'] = w_pls_cv_val
|
opt['val_engine'] = w_pls_cv_val
|
||||||
|
|
||||||
opt['all_data'] = [('T', 'scores', True),
|
opt['all_data'] = [('T', 'scores', True),
|
||||||
('P', 'loadings', True),
|
('P', 'loadings', True),
|
||||||
('E','residuals', False),
|
('E','residuals', False),
|
||||||
('p_tsq', 't2', False),
|
('p_tsq', 't2', False),
|
||||||
('rmsep', 'root mean square error of prediction', False)
|
('rmsep', 'RMSEP', False)
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||||
|
@ -468,9 +498,175 @@ class PcaOptionsDialog(OptionsDialog):
|
||||||
def __init__(self, data, options, input_names=['X']):
|
def __init__(self, data, options, input_names=['X']):
|
||||||
OptionsDialog.__init__(self, data, options, input_names)
|
OptionsDialog.__init__(self, data, options, input_names)
|
||||||
|
|
||||||
|
glade_file = "/home/flatberg/Projects/project4/project4.glade"
|
||||||
|
notebook_name = "vbox1"
|
||||||
|
page_name = "Options"
|
||||||
|
self.add_page_from_glade(glade_file, notebook_name, page_name)
|
||||||
|
# connect signals to handlers
|
||||||
|
dic = {"on_amax_value_changed" : self.on_amax_changed,
|
||||||
|
"on_aopt_value_changed" : self.on_aopt_changed,
|
||||||
|
"auto_aopt_toggled" : self.auto_aopt_toggled,
|
||||||
|
"center_toggled" : self.center_toggled,
|
||||||
|
"on_scale_changed" : self.on_scale_changed,
|
||||||
|
"on_val_none" : self.val_toggled,
|
||||||
|
"on_val_cv" : self.cv_toggled,
|
||||||
|
"on_val_pert" : self.pert_toggled,
|
||||||
|
"on_cv_method_changed" : self.on_cv_method_changed,
|
||||||
|
"on_cv_sets_changed" : self.on_cv_sets_changed,
|
||||||
|
"on_pert_sets_changed" : self.on_pert_sets_changed,
|
||||||
|
"on_conf_toggled" : self.on_conf_toggled
|
||||||
|
}
|
||||||
|
|
||||||
|
self.wTree.signal_autoconnect(dic)
|
||||||
|
|
||||||
|
# set/ensure valid default values/ranges
|
||||||
|
amax_sb = self.wTree.get_widget("amax_spinbutton")
|
||||||
|
max_comp = min(data[0].shape) # max num of components
|
||||||
|
if self._options['amax']>max_comp:
|
||||||
|
logger.log('debug', 'amax default too large ... adjusting')
|
||||||
|
self._options['amax'] = max_comp
|
||||||
|
amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0)
|
||||||
|
# aopt spin button
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
if self._options['aopt']>self._options['amax']:
|
||||||
|
self._options['aopt'] = self._options['amax'] + 1 - 1
|
||||||
|
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
|
||||||
|
|
||||||
|
# scale
|
||||||
|
scale_cb = self.wTree.get_widget("scale_combobox")
|
||||||
|
scale_cb.set_active(0)
|
||||||
|
|
||||||
|
# validation frames
|
||||||
|
if self._options['calc_cv']==False:
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
if self._options['calc_pert']==False:
|
||||||
|
pert_frame = self.wTree.get_widget("pert_frame")
|
||||||
|
pert_frame.set_sensitive(False)
|
||||||
|
|
||||||
|
cv = self.wTree.get_widget("cv_method").set_active(0)
|
||||||
|
pm = self.wTree.get_widget("pert_method").set_active(0)
|
||||||
|
|
||||||
|
# confidence
|
||||||
|
if self._options['calc_conf']==True:
|
||||||
|
self.wTree.get_widget("subset_frame").set_sensitive(True)
|
||||||
|
else:
|
||||||
|
self.wTree.get_widget("subset_frame").set_sensitive(False)
|
||||||
|
|
||||||
|
|
||||||
|
def on_amax_changed(self, sb):
|
||||||
|
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
|
||||||
|
amax = sb.get_value_as_int()
|
||||||
|
# update aopt if needed
|
||||||
|
if amax<self._options['aopt']:
|
||||||
|
self._options['aopt'] = amax
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0)
|
||||||
|
self._options['amax'] = sb.get_value_as_int()
|
||||||
|
|
||||||
|
def on_aopt_changed(self, sb):
|
||||||
|
aopt = sb.get_value_as_int()
|
||||||
|
self._options['aopt'] = aopt
|
||||||
|
|
||||||
|
def auto_aopt_toggled(self, tb):
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['auto_aopt'] = True
|
||||||
|
aopt_sb.set_sensitive(False)
|
||||||
|
else:
|
||||||
|
self._options['auto_aopt'] = False
|
||||||
|
aopt_sb.set_sensitive(True)
|
||||||
|
|
||||||
|
def center_toggled(self, tb):
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['center'] = True
|
||||||
|
else:
|
||||||
|
logger.log("debug", "centering set to False")
|
||||||
|
self._options['center'] = False
|
||||||
|
|
||||||
|
def on_scale_changed(self, cb):
|
||||||
|
scale = cb.get_active_text()
|
||||||
|
if scale=='Scores':
|
||||||
|
self._options['scale'] = 'scores'
|
||||||
|
elif scale=='Loadings':
|
||||||
|
self._options['scale'] = 'loads'
|
||||||
|
else:
|
||||||
|
raise IOError
|
||||||
|
|
||||||
|
def val_toggled(self, tb):
|
||||||
|
"""Callback for validation: None. """
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
pert_frame = self.wTree.get_widget("pert_frame")
|
||||||
|
cv_tb = self.wTree.get_widget("cv_toggle")
|
||||||
|
p_tb = self.wTree.get_widget("pert_toggle")
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['calc_cv'] = False
|
||||||
|
self._options['calc_pert'] = False
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
pert_frame.set_sensitive(False)
|
||||||
|
cv_tb.set_sensitive(False)
|
||||||
|
p_tb.set_sensitive(False)
|
||||||
|
else:
|
||||||
|
p_tb.set_sensitive(True)
|
||||||
|
cv_tb.set_sensitive(True)
|
||||||
|
if p_tb.get_active():
|
||||||
|
pert_frame.set_sensitive(True)
|
||||||
|
self._options['calc_pert'] = True
|
||||||
|
if cv_tb.get_active():
|
||||||
|
cv_frame.set_sensitive(True)
|
||||||
|
self._options['calc_cv'] = True
|
||||||
|
|
||||||
|
def cv_toggled(self, tb):
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
if tb.get_active():
|
||||||
|
cv_frame.set_sensitive(True)
|
||||||
|
self._options['calc_cv'] = True
|
||||||
|
else:
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
self._options['calc_cv'] = False
|
||||||
|
|
||||||
|
def pert_toggled(self, tb):
|
||||||
|
pert_frame = self.wTree.get_widget("pert_frame")
|
||||||
|
if tb.get_active():
|
||||||
|
pert_frame.set_sensitive(True)
|
||||||
|
self._options['calc_pert'] = True
|
||||||
|
else:
|
||||||
|
pert_frame.set_sensitive(False)
|
||||||
|
self._options['calc_pert'] = False
|
||||||
|
|
||||||
|
|
||||||
|
def on_cv_method_changed(self, cb):
|
||||||
|
method = cb.get_active_text()
|
||||||
|
if method == 'Random':
|
||||||
|
self._options['cv_val_method'] = 'random'
|
||||||
|
|
||||||
|
def on_pert_method_changed(self, cb):
|
||||||
|
method = cb.get_active_text()
|
||||||
|
if method == 'Random diags':
|
||||||
|
self._options['pert_val_method'] = 'random_diag'
|
||||||
|
|
||||||
|
def on_cv_sets_changed(self, sb):
|
||||||
|
val = sb.get_value_as_int()
|
||||||
|
self._options['cv_val_sets'] = val
|
||||||
|
|
||||||
|
def on_pert_sets_changed(self, sb):
|
||||||
|
val = sb.get_value_as_int()
|
||||||
|
self._options['pert_val_sets'] = val
|
||||||
|
|
||||||
|
def on_conf_toggled(self, tb):
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['calc_conf'] = False
|
||||||
|
self.wTree.get_widget("subset_frame").set_sensitive(False)
|
||||||
|
else:
|
||||||
|
self._options['calc_conf'] = True
|
||||||
|
self.wTree.get_widget("subset_frame").set_sensitive(True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PlsOptionsDialog(OptionsDialog):
|
class PlsOptionsDialog(OptionsDialog):
|
||||||
"""Options dialog for Partial Least Squares Regression.
|
"""Options dialog for Partial Least Squares Regression.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, data, options, input_names=['X', 'Y']):
|
def __init__(self, data, options, input_names=['X', 'Y']):
|
||||||
OptionsDialog.__init__(self, data, options, input_names)
|
OptionsDialog.__init__(self, data, options, input_names)
|
||||||
|
|
|
@ -12,8 +12,9 @@ fixme2:
|
||||||
colorbar, but when adding colors the colorbar shoud be created.
|
colorbar, but when adding colors the colorbar shoud be created.
|
||||||
"""
|
"""
|
||||||
from fluents import plots
|
from fluents import plots
|
||||||
from scipy import dot,sum,diag,arange,log,mean,newaxis
|
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt
|
||||||
from matplotlib import cm
|
from matplotlib import cm
|
||||||
|
import pylab as PB
|
||||||
|
|
||||||
class PcaScorePlot(plots.ScatterPlot):
|
class PcaScorePlot(plots.ScatterPlot):
|
||||||
"""PCA Score plot"""
|
"""PCA Score plot"""
|
||||||
|
@ -103,38 +104,40 @@ class PlsLoadingPlot(plots.ScatterPlot):
|
||||||
def set_ordinate(self, n):
|
def set_ordinate(self, n):
|
||||||
self.yaxis_data = self._T[:,n]
|
self.yaxis_data = self._T[:,n]
|
||||||
|
|
||||||
|
|
||||||
class LineViewXc(plots.LineViewPlot):
|
class LineViewXc(plots.LineViewPlot):
|
||||||
"""A line view of centered raw data
|
"""A line view of centered raw data
|
||||||
"""
|
"""
|
||||||
def __init__(self, func_class, name='Profiles'):
|
def __init__(self, model, name='Profiles'):
|
||||||
# copy, center, plot
|
# copy, center, plot
|
||||||
x = func_class._dataset['X'].copy()
|
x = model._dataset['X'].copy()
|
||||||
x._array = x._array - mean(x._array,0)[newaxis]
|
x._array = x._array - mean(x._array,0)[newaxis]
|
||||||
plots.LineViewPlot.__init__(self, x, 1, None, name)
|
plots.LineViewPlot.__init__(self, x, 1, None, name)
|
||||||
|
|
||||||
|
|
||||||
class ParalellCoordinates(plots.Plot):
|
class ParalellCoordinates(plots.Plot):
|
||||||
"""Parallell coordinates for score loads with many comp.
|
"""Parallell coordinates for score loads with many comp.
|
||||||
"""
|
"""
|
||||||
def __init__(self, model, p='loads'):
|
def __init__(self, model, p='loads'):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class PlsQvalScatter(plots.ScatterPlot):
|
class PlsQvalScatter(plots.ScatterPlot):
|
||||||
"""A vulcano like plot of loads vs qvals
|
"""A vulcano like plot of loads vs qvals
|
||||||
"""
|
"""
|
||||||
def __init__(self, func_class, pc=0):
|
def __init__(self, model, pc=0):
|
||||||
model = func_class.model
|
if not model.model.has_key('w_tsq'):
|
||||||
if not model.has_key('w_tsq'):
|
|
||||||
return
|
return
|
||||||
self._W = model['P']
|
self._W = model.model['P']
|
||||||
dataset_1 = func_class.as_dataset('P')
|
dataset_1 = model.as_dataset('P')
|
||||||
dataset_2 = func_class.as_dataset('w_tsq')
|
dataset_2 = model.as_dataset('w_tsq')
|
||||||
id_dim = dataset_1.get_dim_name(0) #genes
|
id_dim = dataset_1.get_dim_name(0) #genes
|
||||||
sel_dim = dataset_1.get_dim_name(1) #_comp
|
sel_dim = dataset_1.get_dim_name(1) #_comp
|
||||||
sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
|
sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
|
||||||
id_1, = dataset_1.get_identifiers(sel_dim, [0])
|
id_1, = dataset_1.get_identifiers(sel_dim, [0])
|
||||||
id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
|
id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
|
||||||
if model.has_key('w_tsq'):
|
if model.model.has_key('w_tsq'):
|
||||||
col = model['w_tsq'].ravel()
|
col = model.model['w_tsq'].ravel()
|
||||||
col = normalise(col)
|
col = normalise(col)
|
||||||
else:
|
else:
|
||||||
col = 'g'
|
col = 'g'
|
||||||
|
@ -143,6 +146,33 @@ class PlsQvalScatter(plots.ScatterPlot):
|
||||||
c=col, s=20, sel_dim_2=sel_dim_2,
|
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||||
name='Load Volcano')
|
name='Load Volcano')
|
||||||
|
|
||||||
|
class PredictionErrorPlot(plots.Plot):
|
||||||
|
"""A boxplot of prediction error vs. comp. number.
|
||||||
|
"""
|
||||||
|
def __init__(self, model, name="Pred. Err."):
|
||||||
|
if not model.model.has_key('sep'):
|
||||||
|
logger.log('notice', 'Model has no calculations of sep')
|
||||||
|
return
|
||||||
|
plots.Plot.__init__(self, name)
|
||||||
|
self._frozen = True
|
||||||
|
self.current_dim = 'johndoe'
|
||||||
|
self.ax = self.fig.add_subplot(111)
|
||||||
|
|
||||||
|
# draw
|
||||||
|
sep = model.model['sep']
|
||||||
|
aopt = model.model['aopt']
|
||||||
|
bx_plot_lines = self.ax.boxplot(sqrt(sep))
|
||||||
|
aopt_marker = self.ax.axvline(aopt, linewidth=10,
|
||||||
|
color='r',zorder=0,
|
||||||
|
alpha=.5)
|
||||||
|
|
||||||
|
# add canvas
|
||||||
|
self.add(self.canvas)
|
||||||
|
self.canvas.show()
|
||||||
|
|
||||||
|
def set_current_selection(self, selection):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class InfluencePlot(plots.ScatterPlot):
|
class InfluencePlot(plots.ScatterPlot):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from scipy import apply_along_axis,newaxis,zeros,\
|
from scipy import apply_along_axis,newaxis,zeros,\
|
||||||
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
||||||
trace,zeros_like,sign,sort,real,argsort,rand,array
|
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
||||||
|
matrix
|
||||||
from scipy.linalg import norm,svd,inv,eig
|
from scipy.linalg import norm,svd,inv,eig
|
||||||
from scipy.stats import median,mean
|
from scipy.stats import median,mean
|
||||||
|
|
||||||
|
@ -106,3 +107,7 @@ def mat_center(X,axis=0,ret_mn=False):
|
||||||
return Xs,mnX
|
return Xs,mnX
|
||||||
else:
|
else:
|
||||||
return Xs
|
return Xs
|
||||||
|
|
||||||
|
def m_shape(array):
|
||||||
|
"""Returns the array shape on the form of a numpy.matrix."""
|
||||||
|
return matrix(array).shape
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
"""Module contain algorithms for (burdensome) calculations.
|
"""Module contain algorithms for (burdensome) calculations.
|
||||||
|
|
||||||
There is no typechecking of any kind here, just focus on speed
|
There is no typechecking of any kind here, just focus on speed
|
||||||
|
@ -7,7 +6,7 @@ There is no typechecking of any kind here, just focus on speed
|
||||||
from scipy.linalg import svd,norm,inv,pinv,qr
|
from scipy.linalg import svd,norm,inv,pinv,qr
|
||||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||||
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
||||||
rand,sum,cumsum
|
rand,sum,cumsum,matrix
|
||||||
|
|
||||||
def pca(a, aopt, scale='scores', mode='normal'):
|
def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
""" Principal Component Analysis model
|
""" Principal Component Analysis model
|
||||||
|
@ -19,6 +18,7 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
|
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
u, s, vt = svd(a, full_matrices=0)
|
u, s, vt = svd(a, full_matrices=0)
|
||||||
|
eigvals = (1./m)*s
|
||||||
T = u*s
|
T = u*s
|
||||||
T = T[:,:aopt]
|
T = T[:,:aopt]
|
||||||
P = vt[:aopt,:].T
|
P = vt[:aopt,:].T
|
||||||
|
@ -43,6 +43,29 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
|
|
||||||
return {'T':T, 'P':P, 'E':E}
|
return {'T':T, 'P':P, 'E':E}
|
||||||
|
|
||||||
|
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
||||||
|
"""Returns Principal component regression model."""
|
||||||
|
m, n = a.shape
|
||||||
|
try:
|
||||||
|
k, l = b.shape
|
||||||
|
except:
|
||||||
|
k = b.shape[0]
|
||||||
|
l = 1
|
||||||
|
B = empty((aopt, n, l))
|
||||||
|
U, s, Vt = svd(a, full_matrices=True)
|
||||||
|
T = U*s
|
||||||
|
T = T[:,:aopt]
|
||||||
|
P = Vt[:aopt,:].T
|
||||||
|
Q = dot(dot(inv(dot(T.T, T)), T.T), b).T
|
||||||
|
for i in range(aopt):
|
||||||
|
ti = T[:,:i+1]
|
||||||
|
r = dot(dot(inv(dot(ti.T,ti)), ti.T), b)
|
||||||
|
B[i] = dot(P[:,:i+1], r)
|
||||||
|
E = a - dot(T, P.T)
|
||||||
|
F = b - dot(T, Q.T)
|
||||||
|
|
||||||
|
return {'T':T, 'P':P,'Q': Q, 'B':B, 'E':E, 'F':F}
|
||||||
|
|
||||||
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
"""Kernel pls for tall/wide matrices.
|
"""Kernel pls for tall/wide matrices.
|
||||||
|
|
||||||
|
@ -51,9 +74,9 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
"""
|
"""
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
if ab!=None:
|
if ab!=None:
|
||||||
mm,l = ab.shape
|
mm, l = m_shape(ab)
|
||||||
else:
|
else:
|
||||||
k,l = b.shape
|
k, l = m_shape(b)
|
||||||
|
|
||||||
W = empty((n, aopt))
|
W = empty((n, aopt))
|
||||||
P = empty((n, aopt))
|
P = empty((n, aopt))
|
||||||
|
@ -66,7 +89,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
ab = dot(a.T, b)
|
ab = dot(a.T, b)
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
if ab.shape[1]==1:
|
if ab.shape[1]==1:
|
||||||
w = ab
|
w = ab.reshape(mm, l)
|
||||||
else:
|
else:
|
||||||
u, s, vh = svd(dot(ab.T, ab))
|
u, s, vh = svd(dot(ab.T, ab))
|
||||||
w = dot(ab, u[:,:1])
|
w = dot(ab, u[:,:1])
|
||||||
|
@ -147,12 +170,11 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
"""Undeflated Ridged svd(X'Y)
|
"""Undeflated Ridged svd(X'Y)
|
||||||
"""
|
"""
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
k, l = b.shape
|
k, l = m_shape(b)
|
||||||
u, s, vt = svd(b, full_matrices=0)
|
u, s, vt = svd(b, full_matrices=0)
|
||||||
g0 = dot(u*s, u.T)
|
g0 = dot(u*s, u.T)
|
||||||
g = (1 - r)*g0 + r*eye(m)
|
g = (1 - r)*g0 + r*eye(m)
|
||||||
ag = dot(a.T, g)
|
ag = dot(a.T, g)
|
||||||
|
|
||||||
u, s, vt = svd(ag, full_matrices=0)
|
u, s, vt = svd(ag, full_matrices=0)
|
||||||
W = u[:,:aopt]
|
W = u[:,:aopt]
|
||||||
K = vt[:aopt,:].T
|
K = vt[:aopt,:].T
|
||||||
|
@ -167,7 +189,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
|
|
||||||
U = dot(g0, K) #fixme check this
|
U = dot(g0, K) #fixme check this
|
||||||
Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
|
Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
|
||||||
B = zeros((aopt, n, l))
|
B = zeros((aopt, n, l), dtype='f')
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
||||||
# leverages
|
# leverages
|
||||||
|
@ -198,3 +220,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
|
|
||||||
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
||||||
|
|
||||||
|
|
||||||
|
def m_shape(array):
|
||||||
|
return matrix(array).shape
|
||||||
|
|
|
@ -418,6 +418,31 @@ def weighted_laplacian(G,with_labels=False):
|
||||||
else:
|
else:
|
||||||
return L
|
return L
|
||||||
|
|
||||||
|
def subnetworks(G, T2):
|
||||||
|
"""Return the highest scoring (T2-test) subgraph og G.
|
||||||
|
|
||||||
|
Use simulated annealing to identify highly scoring subgraphs.
|
||||||
|
|
||||||
|
ref: -- Ideker et.al (Bioinformatics 18, 2002)
|
||||||
|
-- Patil and Nielsen (PNAS 2006)
|
||||||
|
|
||||||
|
"""
|
||||||
|
N = 1000
|
||||||
|
states = [(node, False) for node in G.nodes()]
|
||||||
|
t2_last = 0.0
|
||||||
|
for i in xrange(N):
|
||||||
|
if i==0: #assign random states
|
||||||
|
states = [(state[0], True) for state in states if rand(1)>.5]
|
||||||
|
sub_nodes = [state[0] for state in states if state[1]]
|
||||||
|
Gsub = NX.subgraph(G, sub_nodes)
|
||||||
|
Gsub = NX.connected_components_subgraphs(Gsub)[0]
|
||||||
|
t2 = [T2[node] for node in Gsub]
|
||||||
|
if t2>t2_last:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
p = numpy.exp()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""Below are methods for calculating graph metrics
|
"""Below are methods for calculating graph metrics
|
||||||
|
|
||||||
|
@ -473,7 +498,7 @@ Ke = expm(A) .... expm(-A)?
|
||||||
# 13.09.2206: update for use in numpy
|
# 13.09.2206: update for use in numpy
|
||||||
|
|
||||||
|
|
||||||
def K_expAdj(W, normalised=False, alpha=1.0):
|
def K_expAdj(W, normalised=True, alpha=1.0):
|
||||||
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
||||||
"""
|
"""
|
||||||
W = asarray(W)
|
W = asarray(W)
|
||||||
|
@ -499,7 +524,7 @@ def K_expAdj(W, normalised=False, alpha=1.0):
|
||||||
|
|
||||||
return dot(dot(vr,psigma),vri)
|
return dot(dot(vr,psigma),vri)
|
||||||
|
|
||||||
def K_vonNeumann(W,normalised=False,alpha=1.0):
|
def K_vonNeumann(W, normalised=True, alpha=1.0):
|
||||||
""" The geometric series of path lengths.
|
""" The geometric series of path lengths.
|
||||||
Returns matrix square root of pseudo inverse of the adjacency matrix.
|
Returns matrix square root of pseudo inverse of the adjacency matrix.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -30,22 +30,14 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
||||||
b_in = b[inn,:]
|
b_in = b[inn,:]
|
||||||
b_out = b[out,:]
|
b_out = b[out,:]
|
||||||
if center:
|
if center:
|
||||||
# centering projector: I - (1/n)11'
|
aat_in, mn = outerprod_centering(aat_in)
|
||||||
# nin = len(inn)
|
aat_out = aat_out - mn
|
||||||
# Pc = eye(nin) - outer(ones((nin,)),ones((nin,)))/nin
|
|
||||||
# xxt - x( outer(ones((nin,)),ones((nin,)))/nin ) x.T
|
|
||||||
|
|
||||||
# de jong:
|
|
||||||
h = sum(aat_in,0)[ :,newaxis]
|
|
||||||
h = (h - mean(h)/2)/len(inn)
|
|
||||||
mn_a = h + h.T
|
|
||||||
aat_in = aat_in - mn_a
|
|
||||||
if index_out:
|
if index_out:
|
||||||
yield aat_in,aat_out,b_in,b_out,out
|
yield aat_in,aat_out,b_in,b_out,out
|
||||||
else:
|
else:
|
||||||
yield aat_in,aat_out,b_in,b_out
|
yield aat_in,aat_out,b_in,b_out
|
||||||
|
|
||||||
def pls_gen(a,b, n_blocks=None, center=False, index_out=False,axis=0):
|
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
|
||||||
"""Random block crossvalidation
|
"""Random block crossvalidation
|
||||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||||
"""
|
"""
|
||||||
|
@ -56,17 +48,38 @@ def pls_gen(a,b, n_blocks=None, center=False, index_out=False,axis=0):
|
||||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
||||||
for out in out_ind_sets:
|
for out in out_ind_sets:
|
||||||
inn = [i for i in index if i not in out]
|
inn = [i for i in index if i not in out]
|
||||||
|
acal = a.take(inn, 0)
|
||||||
|
atrue = a.take(out, 0)
|
||||||
|
bcal = b.take(inn, 0)
|
||||||
|
btrue = b.take(out, 0)
|
||||||
if center:
|
if center:
|
||||||
a = a - mean(a,0)[newaxis]
|
mn_a = acal.mean(0)[newaxis]
|
||||||
b = b - mean(b,0)[newaxis]
|
acal = acal - mn_a
|
||||||
|
atrue = atrue - mn_a
|
||||||
|
mn_b = bcal.mean(0)[newaxis]
|
||||||
|
bcal = bcal - mn_b
|
||||||
|
btrue = btrue - mn_b
|
||||||
|
if metric!=None:
|
||||||
|
acal = dot(acal, metric)
|
||||||
if index_out:
|
if index_out:
|
||||||
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0),out
|
yield acal, atrue, bcal, btrue, out
|
||||||
else:
|
else:
|
||||||
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0)
|
yield acal, atrue, bcal, btrue
|
||||||
|
|
||||||
|
|
||||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
||||||
"""PCA random block crossval generator.
|
"""Returns a generator of crossvalidation sample segments.
|
||||||
|
|
||||||
|
input:
|
||||||
|
-- a, data matrix (m x n)
|
||||||
|
-- n_sets, number of segments/subsets to generate.
|
||||||
|
-- center, bool, choice of centering each subset
|
||||||
|
-- index_out, bool, return subset index
|
||||||
|
-- axis, int, which axis to get subset from
|
||||||
|
|
||||||
|
ouput:
|
||||||
|
-- V, generator with (n_sets) memebers (subsets)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m = a.shape[axis]
|
m = a.shape[axis]
|
||||||
index = randperm(m)
|
index = randperm(m)
|
||||||
|
@ -76,14 +89,19 @@ def pca_gen(a,n_sets=None, center=False, index_out=False,axis=0):
|
||||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
|
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
|
||||||
for out in out_ind_sets:
|
for out in out_ind_sets:
|
||||||
inn = [i for i in index if i not in out]
|
inn = [i for i in index if i not in out]
|
||||||
|
acal = a.take(inn, 0)
|
||||||
|
atrue = a.take(out, 0)
|
||||||
if center:
|
if center:
|
||||||
a = a - mean(a,0)[newaxis]
|
mn_a = acal.mean(0)[newaxis]
|
||||||
|
acal = acal - mn_a
|
||||||
|
atrue = atrue - mn_a
|
||||||
if index_out:
|
if index_out:
|
||||||
yield a.take(inn,0),a.take(out,0),out
|
yield acal, atrue, out
|
||||||
else:
|
else:
|
||||||
yield a.take(inn,0),a.take(out,0)
|
yield acal, atrue
|
||||||
|
|
||||||
def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
def w_pls_gen_jk(a, b, n_sets=None, center=True,
|
||||||
|
index_out=False, axis=0):
|
||||||
"""Random block crossvalidation for wide X (m>>n)
|
"""Random block crossvalidation for wide X (m>>n)
|
||||||
Leave-one-out is a subset, with n_sets equals a.shape[-1]
|
Leave-one-out is a subset, with n_sets equals a.shape[-1]
|
||||||
|
|
||||||
|
@ -103,9 +121,8 @@ def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
||||||
a_in = a[inn,:]
|
a_in = a[inn,:]
|
||||||
mn_a = 0
|
mn_a = 0
|
||||||
mAB = 0
|
mAB = 0
|
||||||
|
|
||||||
if center:
|
if center:
|
||||||
mn_a = mean(a,0)[newaxis]
|
mn_a = a_in.mean(0)[newaxis]
|
||||||
mAin = dot(-ones((1,nout)), a[out,:])/nin
|
mAin = dot(-ones((1,nout)), a[out,:])/nin
|
||||||
mBin = dot(-ones((1,nout)), b[out,:])/nin
|
mBin = dot(-ones((1,nout)), b[out,:])/nin
|
||||||
mAB = dot(mAin.T, (mBin*nin))
|
mAB = dot(mAin.T, (mBin*nin))
|
||||||
|
@ -113,9 +130,9 @@ def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
||||||
a_in = a_in - mn_a
|
a_in = a_in - mn_a
|
||||||
|
|
||||||
if index_out:
|
if index_out:
|
||||||
yield ain,ab, out
|
yield a_in, ab_in, out
|
||||||
else:
|
else:
|
||||||
yield a_in, ab
|
yield a_in, ab_in
|
||||||
|
|
||||||
def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
||||||
"""Random block shuffling along 1d axis
|
"""Random block shuffling along 1d axis
|
||||||
|
@ -185,3 +202,19 @@ def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||||
yield a_out, asarray(out)
|
yield a_out, asarray(out)
|
||||||
else:
|
else:
|
||||||
yield a_out
|
yield a_out
|
||||||
|
|
||||||
|
|
||||||
|
def outerprod_centering(aat, ret_mn=True):
|
||||||
|
"""Returns mean centered symmetric outerproduct matrix.
|
||||||
|
"""
|
||||||
|
n = aat.shape[0]
|
||||||
|
h = aat.sum(0)[:,newaxis]
|
||||||
|
h = (h - mean(h)/2)/n
|
||||||
|
mn_a = h + h.T
|
||||||
|
aatc = aat - mn_a
|
||||||
|
if ret_mn:
|
||||||
|
return aatc, aat.mean(0)
|
||||||
|
return aat - mn_a
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,24 @@
|
||||||
|
"""This module implements some common validation schemes from pca and pls.
|
||||||
|
"""
|
||||||
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
||||||
apply_along_axis,eye, kron
|
apply_along_axis,eye,kron,array,sort
|
||||||
|
from scipy.stats import median
|
||||||
from scipy.linalg import triu,inv,svd,norm
|
from scipy.linalg import triu,inv,svd,norm
|
||||||
|
|
||||||
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
|
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
|
||||||
from engines import w_simpls,pls,bridge,pca
|
from engines import w_simpls,pls,bridge,pca
|
||||||
from pylab import *
|
from cx_utils import m_shape
|
||||||
|
|
||||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
"""RMSEP calc for pls with wide X.
|
"""Returns and RMSEP for pls tailored for wide X.
|
||||||
"""
|
"""
|
||||||
k, l = Y.shape
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='f')
|
PRESS = zeros((l, amax+1), dtype='f')
|
||||||
# X,Y are centered
|
# X,Y are centered0
|
||||||
if n_blocks==None:
|
if n_blocks==None:
|
||||||
n_blocks = Y.shape[0]
|
n_blocks = Y.shape[0]
|
||||||
V = w_pls_gen(dot(X, X.T), Y, n_blocks=n_blocks, center=True)
|
XXt = dot(X, X.T)
|
||||||
|
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||||
for Din, Doi, Yin, Yout in V:
|
for Din, Doi, Yin, Yout in V:
|
||||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||||
Yin = Yin - ym
|
Yin = Yin - ym
|
||||||
|
@ -24,7 +28,6 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
||||||
else:
|
else:
|
||||||
"Other algo-support comming soon"
|
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
#Yhat = empty((amax, k, l),dtype='<f8')
|
#Yhat = empty((amax, k, l),dtype='<f8')
|
||||||
Yhat = []
|
Yhat = []
|
||||||
|
@ -34,13 +37,14 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
E = E + sum(E, 0)/Din.shape[0]
|
E = E + sum(E, 0)/Din.shape[0]
|
||||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||||
#Yhat = Y - dot(That,Q.T)
|
#Yhat = Y - dot(That,Q.T)
|
||||||
return sqrt(PRESS/Y.shape[0])
|
rmsep = sqrt(PRESS/Y.shape[0])
|
||||||
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
|
return rmsep, aopt
|
||||||
|
|
||||||
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||||
""" Validation results of pls model.
|
""" Validation results of pls model.
|
||||||
"""
|
"""
|
||||||
|
k, l = m_shape(Y)
|
||||||
k, l = Y.shape
|
|
||||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||||
EE = zeros((amax, k, l), dtype='<f8')
|
EE = zeros((amax, k, l), dtype='<f8')
|
||||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||||
|
@ -50,6 +54,7 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||||
Yin = (Yin - ym)
|
Yin = (Yin - ym)
|
||||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||||
|
|
||||||
if algo=='pls':
|
if algo=='pls':
|
||||||
dat = pls(Xin, Yin, amax, mode='normal')
|
dat = pls(Xin, Yin, amax, mode='normal')
|
||||||
elif algo=='bridge':
|
elif algo=='bridge':
|
||||||
|
@ -62,7 +67,9 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||||
EE[a,out,:] = E
|
EE[a,out,:] = E
|
||||||
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
||||||
|
|
||||||
return sqrt(PRESS/(k-1.)), EE, Yhat
|
rmsep = sqrt(PRESS/(k-1.))
|
||||||
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
|
return rmsep, aopt
|
||||||
|
|
||||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||||
"""Pca validation by altering elements in X.
|
"""Pca validation by altering elements in X.
|
||||||
|
@ -79,18 +86,27 @@ def pca_alter_val(a, amax, n_sets=10,method='diag'):
|
||||||
EE = a_sub - Xhat.ravel().take(ind)
|
EE = a_sub - Xhat.ravel().take(ind)
|
||||||
tot = (a_sub**2).sum()
|
tot = (a_sub**2).sum()
|
||||||
sep[i,j] = (EE**2).sum()/tot
|
sep[i,j] = (EE**2).sum()/tot
|
||||||
return sqrt(sep.mean(0))
|
sep = sqrt(sep)
|
||||||
#return sep
|
aopt = find_aopt_from_sep(sep)
|
||||||
|
return sep, aopt
|
||||||
|
|
||||||
def pca_cv_val(X, amax, n_sets):
|
def pca_cv_val(a, amax, n_sets):
|
||||||
""" Cross validation of pca using random sets crossval.
|
""" Returns PRESS from cross-validated pca using random segments.
|
||||||
|
|
||||||
|
input:
|
||||||
|
-- a, data matrix (m x n)
|
||||||
|
-- amax, maximum nuber of components used
|
||||||
|
-- n_sets, number of segments to calculate
|
||||||
|
output:
|
||||||
|
-- sep, (amax x m x n), squared error of prediction (press)
|
||||||
|
-- aopt, guestimated optimal number of components
|
||||||
"""
|
"""
|
||||||
m, n = X.shape
|
m, n = a.shape
|
||||||
xtot = (X**2).sum()
|
|
||||||
V = pca_gen(X, n_sets=7, center=True, index_out=True)
|
|
||||||
E = empty((amax, m, n), dtype='f')
|
E = empty((amax, m, n), dtype='f')
|
||||||
|
xtot = (a**2).sum() # this needs centering
|
||||||
|
V = pca_gen(a, n_sets=7, center=True, index_out=True)
|
||||||
for xi, xout, ind in V:
|
for xi, xout, ind in V:
|
||||||
dat_i = pca(xi, amax, mode='detailed')
|
dat_i = pca(xi, amax, mode='fast')
|
||||||
Pi = dat_i['P']
|
Pi = dat_i['P']
|
||||||
for a in xrange(amax):
|
for a in xrange(amax):
|
||||||
Pia = Pi[:,:a+1]
|
Pia = Pi[:,:a+1]
|
||||||
|
@ -99,7 +115,9 @@ def pca_cv_val(X, amax, n_sets):
|
||||||
sep = []
|
sep = []
|
||||||
for a in xrange(amax):
|
for a in xrange(amax):
|
||||||
sep.append(E[a].sum()/xtot)
|
sep.append(E[a].sum()/xtot)
|
||||||
return sqrt(sep.mean(0))
|
sep = array(sep)
|
||||||
|
aopt = find_aopt_from_sep(sep)
|
||||||
|
return sep, aopt
|
||||||
|
|
||||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
||||||
""" Returns CV-segments of paramter W for wide X.
|
""" Returns CV-segments of paramter W for wide X.
|
||||||
|
@ -128,7 +146,20 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
||||||
return WW
|
return WW
|
||||||
|
|
||||||
def pca_jkP(a, aopt, n_blocks=None):
|
def pca_jkP(a, aopt, n_blocks=None):
|
||||||
""" Returns CV-segments of paramter P.
|
"""Returns loading from PCA on CV-segments.
|
||||||
|
|
||||||
|
input:
|
||||||
|
-- a, data matrix (n x m)
|
||||||
|
-- aopt, number of components in model.
|
||||||
|
-- nblocks, number of segments
|
||||||
|
output:
|
||||||
|
-- PP, loadings collected in a three way matrix
|
||||||
|
(n_segments, m, aopt)
|
||||||
|
|
||||||
|
comments:
|
||||||
|
* The loadings are scaled with the (1/samples)*eigenvalues.
|
||||||
|
* Crossvalidation method is currently set to random blocks of samples.
|
||||||
|
|
||||||
todo: add support for T
|
todo: add support for T
|
||||||
fixme: more efficient to add this in validation loop
|
fixme: more efficient to add this in validation loop
|
||||||
"""
|
"""
|
||||||
|
@ -138,8 +169,30 @@ def pca_jkP(a, aopt, n_blocks=None):
|
||||||
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
|
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
|
||||||
V = pca_gen(a, n_sets=n_blocks, center=True)
|
V = pca_gen(a, n_sets=n_blocks, center=True)
|
||||||
for nn,(a_in, a_out) in enumerate(V):
|
for nn,(a_in, a_out) in enumerate(V):
|
||||||
dat = pca(a_in, aopt, mode='fast')
|
dat = pca(a_in, aopt, mode='fast', scale='loads')
|
||||||
P = dat['P']
|
P = dat['P']
|
||||||
PP[nn,:,:] = P
|
PP[nn,:,:] = P
|
||||||
|
|
||||||
return PP
|
return PP
|
||||||
|
|
||||||
|
def find_aopt_from_sep(sep, method='75perc'):
|
||||||
|
"""Returns an estimate of optimal number of components from rmsecv.
|
||||||
|
"""
|
||||||
|
if method=='vanilla':
|
||||||
|
# min rmsep
|
||||||
|
rmsecv = sqrt(sep.mean(0))
|
||||||
|
return rmsecv.argmin() + 1
|
||||||
|
|
||||||
|
elif method=='75perc':
|
||||||
|
prct = .75 #percentile
|
||||||
|
ind = 1.*sep.shape[0]*prct
|
||||||
|
med = median(sep)
|
||||||
|
prc_75 = []
|
||||||
|
for col in sep.T:
|
||||||
|
col.sort()
|
||||||
|
prc_75.append(col[int(ind)])
|
||||||
|
prc_75 = array(prc_75)
|
||||||
|
for i in range(1, sep.shape[1], 1):
|
||||||
|
if med[i-1]<prc_75[i]:
|
||||||
|
return i
|
||||||
|
return len(med)
|
||||||
|
|
Reference in New Issue