pls options added

This commit is contained in:
Arnar Flatberg 2007-01-31 12:03:11 +00:00
parent 088f180b5d
commit 63be80aa92

View File

@ -1,9 +1,10 @@
"""This module contains bilinear models(Functions)
"""
import os
import pygtk
import gtk
import gtk.glade
import fluents
from fluents.workflow import Function, OptionsDialog, Options
from fluents.dataset import Dataset
from fluents import plots, dataset, workflow, logger
@ -75,6 +76,9 @@ class PCA(Model):
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
Pcal = self.model['P'][:,:aopt]
# add the scale to P
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'])
Pcal = Pcal*tnorm
tsq = hotelling(jk_segments, Pcal, p_center,
cov_center, alpha, crot, strict)
self.model['p_tsq'] = tsq
@ -95,7 +99,7 @@ class PCA(Model):
#####
if do_lev_s:
# sample leverages
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of T-columns
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
s_lev = leverage(amax, tnorm)
dat['s_lev'] = s_lev
if do_lev_v:
@ -191,13 +195,22 @@ class PLS(Model):
Model.__init__(self, id, name)
self._options = PlsOptions()
def pre_validation(self, amax, n_sets, val_engine):
def validation(self, amax, n_sets, cv_val_method):
"""Returns rmsec,rmsep for model.
"""
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
amax, n_sets)
self.model['rmsep'] = rmsep.mean(0)
self.model['aopt'] = aopt
m, n = self.model['E0'].shape
if m>n:
val_engine = w_pls_cv_val
else:
val_engine = pls_val
if self._options['calc_cv']==True:
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
amax, n_sets)
self.model['rmsep'] = rmsep[:,:-1]
self.model['aopt'] = aopt
else:
self.model['rmsep'] = None
self.model['aopt'] = self._options['aopt']
def confidence(self, aopt, n_sets, alpha, p_center,
crot, strict, cov_center ):
@ -205,21 +218,35 @@ class PLS(Model):
Supported parameters: W
"""
aopt = self.model['aopt']
jk_segments = pls_jkW(self.model['E0'], self.model['F0'],
aopt, n_sets)
Wcal = self.model['W'][:,:aopt]
tsq = hotelling(jk_segments, Wcal, p_center,
alpha, crot, strict, cov_center)
self.model['w_tsq'] = tsq
if self._options['calc_conf']:
jk_segments = pls_jkW(self.model['E0'], self.model['F0'],
aopt, n_sets)
Wcal = self.model['W'][:,:aopt]
# ensure that Wcal is scaled
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
Wcal = Wcal*tnorm
tsq = hotelling(jk_segments, Wcal, p_center,
alpha, crot, strict, cov_center)
self.model['w_tsq'] = tsq
else:
self.model['w_tsq'] = None
def permutation_confidence(self, a, b, aopt, reg, n_iter, algo,
sim_method):
"""Estimates sign. vars by controlling fdr."""
"""Estimates cut off on significant vars by controlling fdr."""
qvals_sorted, qvals = pls_qvals(a, b, aopt=None,
alpha=.4, n_iter=20, algo='pls',
sim_method='shuffle', )
if self._options['calc_qvals']==True:
qvals_sorted, qvals = pls_qvals(a, b,
aopt=None,
alpha=reg,
n_iter=n_iter,
algo='pls',
sim_method=sim_method)
self.model['qval'] = qvals
self.model['qval_sorted'] = qvals_sorted
else:
self.model['qval'] = None
self.model['qval_sorted'] = None
def make_model(self, a, b, amax, scale, mode, engine):
"""Make model on amax components.
@ -243,7 +270,8 @@ class PLS(Model):
# y vars
ids_3 = [dim_name_3, DY.get_identifiers(dim_name_3, sorted=True)]
# components (hidden)
pc_ids = ['_comp', map(str, range(self.model['aopt']))]
pc_ids = ['_comp', map(str, range(self._options['amax']))]
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
match_ids = {'E':[ids_0, ids_1],
@ -257,11 +285,11 @@ class PLS(Model):
'qval':[ids_1, zero_dim],
'qval_sorted':[ids_1, zero_dim],
'w_tsq':[ids_1, zero_dim],
'rmsep':[pc_ids, zero_dim],
'rmsep':[ids_3, pc_ids],
}
array = self.model[name]
M = Dataset(array,identifiers=match_ids[name],name=name)
M = Dataset(array, identifiers=match_ids[name], name=name)
return M
def get_out_plots(self, options):
@ -274,19 +302,21 @@ class PLS(Model):
return out
def run_o(self, a, b):
"""Run PLS with present options."""
options = self._options
self._dataset['X'] = a
self._dataset['Y'] = b
self._data['X'] = a.asarray()
self._data['Y'] = b.asarray()
if options['center']:
self.model['E0'] = options['center_mth'](self._data['X'])
self.model['F0'] = options['center_mth'](self._data['Y'])
else:
self.model['E0'] = self._data['X']
self.model['F0'] = self._data['Y']
self.pre_validation(**options.pre_validation_options())
self.validation(**options.validation_options())
self.make_model(self.model['E0'], self.model['F0'],
**options.make_model_options())
# variance captured
@ -307,7 +337,7 @@ class PLS(Model):
return out
def run(self, a, b):
"""Run Pls with option gui.
"""Run PLS with option gui.
"""
dialog = PlsOptionsDialog([a, b], self._options)
dialog.show_all()
@ -317,8 +347,10 @@ class PLS(Model):
if response == gtk.RESPONSE_OK:
# set output data and plots
dialog.set_output()
#run with current data and options
#run with current data and options
for key, val in self._options.items():
print (key, val)
return self.run_o(a, b)
class Packer:
@ -342,7 +374,7 @@ class Packer:
if axis == 1:
self._array = self._array.T
u,s,vt = svd(self._array,full_matrices=0)
u, s, vt = svd(self._array,full_matrices=0)
self._inflater = vt.T
self._packed_data = u*s
return self._packed_data
@ -391,11 +423,12 @@ class PcaOptions(Options):
('p_tsq', 't2', False),
('rmsep', 'RMSEP', False)
]
# (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
(blmplots.PcaLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', True)
(blmplots.PredictionErrorPlot, 'Residual Error', False)
]
opt['out_data'] = ['T','P', 'p_tsq']
@ -405,7 +438,7 @@ class PcaOptions(Options):
def make_model_options(self):
"""Options for make_model method."""
opt_list = ['scale','mode', 'amax']
opt_list = ['scale', 'mode', 'amax']
return self._copy_from_list(opt_list)
def confidence_options(self):
@ -428,29 +461,29 @@ class PlsOptions(Options):
Options.__init__(self)
self._set_default()
def _set_default(self):
def _set_default(self):
opt = {}
opt['algo'] = 'pls'
opt['engine'] = engines.pls
opt['mode'] = 'normal' # how much info to calculate
opt['lod'] = 'compact' # how much info to store
opt['amax'] = 3
opt['aopt'] = 3
opt['amax'] = 10
opt['aopt'] = 10
opt['auto_aopt'] = False
opt['center'] = True
opt['center_mth'] = mat_center
opt['scale'] = 'scores'
opt['calc_conf'] = False
opt['n_sets'] = 10
opt['calc_conf'] = False
opt['n_sets'] = 5
opt['strict'] = True
opt['p_center'] = 'med'
opt['alpha'] = .2
opt['alpha'] = .8
opt['cov_center'] = 'med'
opt['crot'] = True
opt['calc_cv'] = True
opt['calc_pert'] = False
opt['val_engine'] = w_pls_cv_val
opt['calc_cv'] = False
opt['cv_val_method'] = 'random'
opt['cv_val_sets'] = opt['n_sets']
opt['all_data'] = [('T', 'scores', True),
('P', 'loadings', True),
@ -458,19 +491,23 @@ class PlsOptions(Options):
('p_tsq', 't2', False),
('rmsep', 'RMSEP', False)
]
# (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
(blmplots.PlsLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True)]
opt['out_plots'] = [blmplots.PlsScorePlot,
blmplots.PlsLoadingPlot,
blmplots.LineViewXc]
(blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.RMSEPPlot, 'RMSEP', False)
]
opt['out_data'] = ['T','P', 'p_tsq']
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
opt['out_data'] = None
opt['pack'] = False
opt['calc_qvals'] = False
opt['q_pert_mth'] = 'shuffle_vars'
opt['q_pert_method'] = 'shuffle_rows'
opt['q_iter'] = 20
self.update(opt)
@ -485,20 +522,23 @@ class PlsOptions(Options):
opt_list = ['n_sets', 'aopt', 'alpha', 'p_center',
'strict', 'crot', 'cov_center']
return self._copy_from_list(opt_list)
def pre_validation_options(self):
def validation_options(self):
"""Options for pre_validation method."""
opt_list = ['amax', 'n_sets', 'val_engine']
return self._copy_from_list(opt_list)
opt_list = ['amax', 'n_sets', 'cv_val_method']
return self._copy_from_list(opt_list)
def permutation_confidence(self):
opt_list = ['q_pert_method', 'q_iter']
return self._copy_from_list(opt_list)
class PcaOptionsDialog(OptionsDialog):
"""Options dialog for Principal Component Analysis.
"""
def __init__(self, data, options, input_names=['X']):
OptionsDialog.__init__(self, data, options, input_names)
glade_file = os.path.join(fluents.DATADIR, 'pca_options.glade')
#glade_file = os.path.join(fluents.DATADIR, 'pca_options.glade')
glade_file = os.path.join("/home/flatberg/fluents/fluents/", 'pca_options.glade')
notebook_name = "vbox1"
page_name = "Options"
@ -508,19 +548,24 @@ class PcaOptionsDialog(OptionsDialog):
"on_aopt_value_changed" : self.on_aopt_changed,
"auto_aopt_toggled" : self.auto_aopt_toggled,
"center_toggled" : self.center_toggled,
"on_scale_changed" : self.on_scale_changed,
#"on_scale_changed" : self.on_scale_changed,
"on_val_none" : self.val_toggled,
"on_val_cv" : self.cv_toggled,
"on_val_pert" : self.pert_toggled,
"on_cv_method_changed" : self.on_cv_method_changed,
"on_cv_sets_changed" : self.on_cv_sets_changed,
"on_pert_sets_changed" : self.on_pert_sets_changed,
"on_conf_toggled" : self.on_conf_toggled
"on_conf_toggled" : self.on_conf_toggled,
"on_subset_loc_changed" : self.on_subset_loc_changed,
"on_cov_loc_changed" : self.on_cov_loc_changed,
"on_alpha_changed" : self.on_alpha_changed,
"on_rot_changed" : self.on_rot_changed
}
self.wTree.signal_autoconnect(dic)
# set/ensure valid default values/ranges
#
amax_sb = self.wTree.get_widget("amax_spinbutton")
max_comp = min(data[0].shape) # max num of components
if self._options['amax']>max_comp:
@ -534,8 +579,8 @@ class PcaOptionsDialog(OptionsDialog):
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
# scale
scale_cb = self.wTree.get_widget("scale_combobox")
scale_cb.set_active(0)
# scale_cb = self.wTree.get_widget("scale_combobox")
# scale_cb.set_active(0)
# validation frames
if self._options['calc_cv']==False:
@ -550,11 +595,22 @@ class PcaOptionsDialog(OptionsDialog):
# confidence
if self._options['calc_conf']==True:
self.wTree.get_widget("subset_frame").set_sensitive(True)
self.wTree.get_widget("subset_expander").set_sensitive(True)
else:
self.wTree.get_widget("subset_frame").set_sensitive(False)
self.wTree.get_widget("subset_expander").set_sensitive(False)
cb = self.wTree.get_widget("subset_loc")
_m = {'med': 0, 'mean': 1, 'full_model': 2}
cb.set_active(_m.get(self._options['p_center']))
cb = self.wTree.get_widget("cov_loc")
_m = {'med': 0, 'mean': 1}
cb.set_active(_m.get(self._options['cov_center']))
hs = self.wTree.get_widget("alpha_scale")
hs.set_value(self._options['alpha'])
def on_amax_changed(self, sb):
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
amax = sb.get_value_as_int()
@ -585,14 +641,14 @@ class PcaOptionsDialog(OptionsDialog):
logger.log("debug", "centering set to False")
self._options['center'] = False
def on_scale_changed(self, cb):
scale = cb.get_active_text()
if scale=='Scores':
self._options['scale'] = 'scores'
elif scale=='Loadings':
self._options['scale'] = 'loads'
else:
raise IOError
#def on_scale_changed(self, cb):
# scale = cb.get_active_text()
# if scale=='Scores':
# self._options['scale'] = 'scores'
# elif scale=='Loadings':
# self._options['scale'] = 'loads'
# else:
# raise IOError
def val_toggled(self, tb):
"""Callback for validation: None. """
@ -657,17 +713,242 @@ class PcaOptionsDialog(OptionsDialog):
def on_conf_toggled(self, tb):
if tb.get_active():
self._options['calc_conf'] = False
self.wTree.get_widget("subset_frame").set_sensitive(False)
self.wTree.get_widget("subset_expander").set_sensitive(False)
else:
self._options['calc_conf'] = True
self.wTree.get_widget("subset_frame").set_sensitive(True)
self.wTree.get_widget("subset_expander").set_sensitive(True)
def on_subset_loc_changed(self, cb):
method = cb.get_active_text()
if method=='Full model':
self._options['p_center'] = 'full_model'
elif method=='Median':
self._options['p_center'] = 'med'
elif method=='Mean':
self._options['p_center'] = 'mean'
def on_cov_loc_changed(self, cb):
method = cb.get_active_text()
if method=='Median':
self._options['cov_center'] = 'med'
elif method=='Mean':
self._options['cov_center'] = 'mean'
def on_alpha_changed(self, hs):
self._options['alpha'] = hs.get_value()
def on_rot_changed(self, rg):
proc, strict = rg
if proc.get_active():
self._options['crot'] = True
else:
self._options['crot'] = True
self._options['strict'] = True
class PlsOptionsDialog(OptionsDialog):
"""Options dialog for Partial Least Squares Regression.
"""Options dialog for Partial Least squares regression.
"""
def __init__(self, data, options, input_names=['X', 'Y']):
OptionsDialog.__init__(self, data, options, input_names)
#glade_file = os.path.join(fluents.DATADIR, 'pca_options.glade')
glade_file = os.path.join("/home/flatberg/fluents/fluents/", 'pls_options.glade')
notebook_name = "vbox1"
page_name = "Options"
self.add_page_from_glade(glade_file, notebook_name, page_name)
# connect signals to handlers
dic = {"on_amax_value_changed" : self.on_amax_changed,
"on_aopt_value_changed" : self.on_aopt_changed,
"auto_aopt_toggled" : self.auto_aopt_toggled,
"center_toggled" : self.center_toggled,
#"on_scale_changed" : self.on_scale_changed,
"on_val_none" : self.val_toggled,
"on_val_cv" : self.cv_toggled,
"on_cv_method_changed" : self.on_cv_method_changed,
"on_cv_sets_changed" : self.on_cv_sets_changed,
"on_conf_toggled" : self.conf_toggled,
"on_subset_loc_changed" : self.on_subset_loc_changed,
"on_cov_loc_changed" : self.on_cov_loc_changed,
"on_alpha_changed" : self.on_alpha_changed,
"on_rot_changed" : self.on_rot_changed,
"on__toggled" : self.conf_toggled,
"on_qval_changed" : self.on_qval_changed,
"on_iter_changed" : self.on_iter_changed
}
self.wTree.signal_autoconnect(dic)
# set/ensure valid default values/ranges
#
amax_sb = self.wTree.get_widget("amax_spinbutton")
max_comp = min(data[0].shape) # max num of components
if self._options['amax']>max_comp:
logger.log('debug', 'amax default too large ... adjusting')
self._options['amax'] = max_comp
amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0)
# aopt spin button
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
if self._options['aopt']>self._options['amax']:
self._options['aopt'] = self._options['amax'] + 1 - 1
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
# scale
# scale_cb = self.wTree.get_widget("scale_combobox")
# scale_cb.set_active(0)
# validation frames
if self._options['calc_cv']==False:
cv_frame = self.wTree.get_widget("cv_frame")
cv_frame.set_sensitive(False)
cv = self.wTree.get_widget("cv_method").set_active(0)
# confidence
if self._options['calc_conf']==True:
self.wTree.get_widget("subset_expander").set_sensitive(True)
else:
self.wTree.get_widget("subset_expander").set_sensitive(False)
cb = self.wTree.get_widget("subset_loc")
_m = {'med': 0, 'mean': 1, 'full_model': 2}
cb.set_active(_m.get(self._options['p_center']))
cb = self.wTree.get_widget("cov_loc")
_m = {'med': 0, 'mean': 1}
cb.set_active(_m.get(self._options['cov_center']))
hs = self.wTree.get_widget("alpha_scale")
hs.set_value(self._options['alpha'])
tb = self.wTree.get_widget("qvals")
tb.set_sensitive(True)
def on_amax_changed(self, sb):
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
amax = sb.get_value_as_int()
# update aopt if needed
if amax<self._options['aopt']:
self._options['aopt'] = amax
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0)
self._options['amax'] = sb.get_value_as_int()
def on_aopt_changed(self, sb):
aopt = sb.get_value_as_int()
self._options['aopt'] = aopt
def auto_aopt_toggled(self, tb):
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
if tb.get_active():
self._options['auto_aopt'] = True
aopt_sb.set_sensitive(False)
else:
self._options['auto_aopt'] = False
aopt_sb.set_sensitive(True)
def center_toggled(self, tb):
if tb.get_active():
self._options['center'] = True
else:
logger.log("debug", "centering set to False")
self._options['center'] = False
#def on_scale_changed(self, cb):
# scale = cb.get_active_text()
# if scale=='Scores':
# self._options['scale'] = 'scores'
# elif scale=='Loadings':
# self._options['scale'] = 'loads'
# else:
# raise IOError
def val_toggled(self, tb):
"""Callback for validation: None. """
cv_frame = self.wTree.get_widget("cv_frame")
cv_tb = self.wTree.get_widget("cv_toggle")
if tb.get_active():
self._options['calc_cv'] = False
cv_frame.set_sensitive(False)
cv_tb.set_sensitive(False)
else:
cv_tb.set_sensitive(True)
if cv_tb.get_active():
cv_frame.set_sensitive(True)
self._options['calc_cv'] = True
def cv_toggled(self, tb):
cv_frame = self.wTree.get_widget("cv_frame")
val_tb = self.wTree.get_widget("val_none_toggle")
if tb.get_active():
cv_frame.set_sensitive(True)
self._options['calc_cv'] = True
else:
cv_frame.set_sensitive(False)
self._options['calc_cv'] = False
def on_cv_method_changed(self, cb):
method = cb.get_active_text()
if method == 'Random':
self._options['cv_val_method'] = 'random'
def on_cv_sets_changed(self, sb):
val = sb.get_value_as_int()
self._options['cv_val_sets'] = val
def conf_toggled(self, tb):
if tb.get_active():
self._options['calc_conf'] = False
self.wTree.get_widget("subset_expander").set_sensitive(False)
else:
self._options['calc_conf'] = True
self.wTree.get_widget("subset_expander").set_sensitive(True)
def on_subset_loc_changed(self, cb):
method = cb.get_active_text()
if method=='Full model':
self._options['p_center'] = 'full_model'
elif method=='Median':
self._options['p_center'] = 'med'
elif method=='Mean':
self._options['p_center'] = 'mean'
def on_cov_loc_changed(self, cb):
method = cb.get_active_text()
if method=='Median':
self._options['cov_center'] = 'med'
elif method=='Mean':
self._options['cov_center'] = 'mean'
def on_alpha_changed(self, hs):
self._options['alpha'] = hs.get_value()
def on_rot_changed(self, rg):
proc, strict = rg
if proc.get_active():
self._options['crot'] = True
else:
self._options['crot'] = True
self._options['strict'] = True
def qval_toggled(self, tb):
if tb.get_active():
self._options['calc_qval'] = False
print "Setting not sens"
self.wTree.get_widget("qval_method").set_sensitive(False)
self.wTree.get_widget("q_iter").set_sensitive(False)
else:
self._options['calc_qval'] = True
self.wTree.get_widget("qval_method").set_sensitive(True)
self.wTree.get_widget("q_iter").set_sensitive(True)
def on_iter_changed(self, sb):
self._options['q_iter'] = sb.get_value()
def on_qval_changed(self, cb):
q_method = cb.get_active_text()
if method=='Shuffle rows':
self._options['q_pert_method'] = 'shuffle'