Lib updates
This commit is contained in:
parent
7ea87e646a
commit
a05d0faa0d
@ -10,7 +10,7 @@ from fluents.workflow import Function, OptionsDialog, Options
|
||||
from fluents.dataset import Dataset
|
||||
from fluents import plots, dataset, workflow, logger
|
||||
import scipy
|
||||
from engines import pca, pls
|
||||
from engines import pca, pls, nipals_lpls
|
||||
from cx_stats import leverage, variances, hotelling
|
||||
from cx_utils import mat_center
|
||||
from validation import *
|
||||
@ -238,14 +238,14 @@ class PLS(Model):
|
||||
"""Estimates cut off on significant vars by controlling fdr."""
|
||||
|
||||
if self._options['calc_qvals']==True:
|
||||
qvals_sorted, qvals = pls_qvals(a, b,
|
||||
aopt=None,
|
||||
alpha=reg,
|
||||
n_iter=n_iter,
|
||||
algo='pls',
|
||||
sim_method=sim_method)
|
||||
qvals = pls_qvals(a, b,
|
||||
aopt=None,
|
||||
alpha=reg,
|
||||
n_iter=n_iter,
|
||||
algo='pls',
|
||||
sim_method=sim_method)
|
||||
self.model['qval'] = qvals
|
||||
self.model['qval_sorted'] = qvals_sorted
|
||||
#self.model['qval_sorted'] = qvals_sorted
|
||||
else:
|
||||
self.model['qval'] = None
|
||||
self.model['qval_sorted'] = None
|
||||
@ -276,18 +276,19 @@ class PLS(Model):
|
||||
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
|
||||
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
|
||||
|
||||
match_ids = {'E':[ids_0, ids_1],
|
||||
'P':[ids_1, pc_ids],
|
||||
'T':[ids_0, pc_ids],
|
||||
'W': [ids_1, pc_ids],
|
||||
'R': [ids_1, pc_ids],
|
||||
'Q':[ids_3, pc_ids],
|
||||
'F':[ids_0, ids_3],
|
||||
'B':[ids_1, ids_3],
|
||||
'qval':[ids_1, zero_dim],
|
||||
match_ids = {'E' : [ids_0, ids_1],
|
||||
'P' : [ids_1, pc_ids],
|
||||
'T' : [ids_0, pc_ids],
|
||||
'W' : [ids_1, pc_ids],
|
||||
'R' : [ids_1, pc_ids],
|
||||
'Q' : [ids_3, pc_ids],
|
||||
'F' : [ids_0, ids_3],
|
||||
'B' : [ids_1, ids_3],
|
||||
'qval' : [ids_1, zero_dim],
|
||||
'qval_sorted':[ids_1, zero_dim],
|
||||
'w_tsq':[ids_1, zero_dim],
|
||||
'rmsep':[ids_3, pc_ids],
|
||||
'w_tsq' : [ids_1, zero_dim],
|
||||
'rmsep' : [ids_3, pc_ids],
|
||||
'CP': [ids_1, pc_ids]
|
||||
}
|
||||
|
||||
array = self.model[name]
|
||||
@ -302,7 +303,7 @@ class PLS(Model):
|
||||
#except:
|
||||
# logger.log('debug', 'Plot: %s failed' %plt)
|
||||
return out
|
||||
|
||||
|
||||
def run_o(self, a, b):
|
||||
"""Run PLS with present options."""
|
||||
options = self._options
|
||||
@ -330,6 +331,17 @@ class PLS(Model):
|
||||
self.model['var_y'] = var_y
|
||||
self.model['exp_var_y'] = exp_var_y
|
||||
|
||||
if options['calc_corrloads']:
|
||||
corr_load = scipy.empty_like(self.model['P'].copy())
|
||||
T = self.model['T']
|
||||
X = self._data['X']
|
||||
# For each variable/attribute in original matrix (not meancentered)
|
||||
for i,score in enumerate(T.T):
|
||||
for j, profile in enumerate(X.T):
|
||||
corrs = scipy.corrcoef(score, profile)
|
||||
corr_load[j,i] = corrs[0,1]
|
||||
self.model['CP'] = corr_load
|
||||
|
||||
if options['calc_conf']:
|
||||
self.confidence(**options.confidence_options())
|
||||
|
||||
@ -353,6 +365,141 @@ class PLS(Model):
|
||||
#run with current data and options
|
||||
return self.run_o(a, b)
|
||||
|
||||
class LPLS(Model):
|
||||
def __init__(self, id='lpls', name='LPLS'):
|
||||
Model.__init__(self, id, name)
|
||||
self._options = LplsOptions()
|
||||
|
||||
def validation(self, opt):
|
||||
"""Returns rmsep for lpls model.
|
||||
"""
|
||||
|
||||
if opt['calc_cv']==True:
|
||||
val_engine = opt['val_engine']
|
||||
rmsep, aopt = val_engine(self.model['X'], self.model['Y'],
|
||||
self.model['Z'], opt['amax'], opt['n_sets'], opt['xz_alpha'])
|
||||
self.model['rmsep'] = rmsep
|
||||
self.model['aopt'] = aopt
|
||||
else:
|
||||
self.model['rmsep'] = None
|
||||
self.model['aopt'] = opt['aopt']
|
||||
|
||||
def confidence(self, opt):
|
||||
"""Returns a confidence measure for model parameters
|
||||
Supported parameters: W
|
||||
"""
|
||||
aopt = self.model['aopt']
|
||||
if opt['calc_conf']:
|
||||
Wx, Wz = lpls_jk(self.model['X'], self.model['Y'], self.model['Z'], aopt, n_sets)
|
||||
Wcal = self.model['W'][:,:aopt]
|
||||
Lcal = self.model['L'][:,:aopt]
|
||||
# ensure that Wcal is scaled
|
||||
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
|
||||
Wcal = Wcal*tnorm
|
||||
a,b,c,d,e = opt['p_center'], opt['crot'], opt['alpha'], opt['strict'], opt['cov_center']
|
||||
tsqx = hotelling(Wx, Wcal, a,b,c,d,e)
|
||||
tsqz = hotelling(Wz, Lcal, a,b,c,d,e)
|
||||
self.model['tsqx'] = tsqx
|
||||
self.model['tsqz'] = tsqz
|
||||
else:
|
||||
self.model['tsqx'] = None
|
||||
self.model['tsqz'] = None
|
||||
|
||||
def permutation_confidence(self, opt):
|
||||
"""Estimates cut off on significant vars by controlling fdr.
|
||||
|
||||
"""
|
||||
self.model['qval'] = None
|
||||
self.model['qval_sorted'] = None
|
||||
|
||||
def make_model(self, opt):
|
||||
"""Make model on amax components.
|
||||
"""
|
||||
engine = opt['engine']
|
||||
dat = engine(self._data['X'], self._data['Y'], self._data['Z'],
|
||||
opt['amax'], opt['xz_alpha'], opt['center_mth'],
|
||||
opt['mode'], opt['scale'], False)
|
||||
self.model.update(dat)
|
||||
|
||||
def as_dataset(self, name, dtype='Dataset'):
|
||||
"""Return any model parameter as Dataset
|
||||
No ids matching
|
||||
"""
|
||||
if name not in self.model.keys():
|
||||
return
|
||||
DX, DY, DZ = self._dataset['X'], self._dataset['Y'], self._dataset['Z']
|
||||
dim_name_0, dim_name_1 = DX.get_dim_name()
|
||||
dim_name_2, dim_name_3 = DY.get_dim_name()
|
||||
dim_name_4, dim_name_5 = DZ.get_dim_name()
|
||||
#samples
|
||||
ids_0 = [dim_name_0, DX.get_identifiers(dim_name_0, sorted=True)]
|
||||
# x vars (genes)
|
||||
ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)]
|
||||
# y vars (sample descriptors)
|
||||
ids_3 = [dim_name_3, DY.get_identifiers(dim_name_3, sorted=True)]
|
||||
#z-vars (variable descriptors)
|
||||
ids_4 = [dim_name_4, DZ.get_identifiers(dim_name_4, sorted=True)]
|
||||
# components (hidden)
|
||||
pc_ids = ['_comp', map(str, range(self._options['amax']))]
|
||||
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
|
||||
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
|
||||
|
||||
match_ids = {'E' : [ids_0, ids_1],
|
||||
'P' : [ids_1, pc_ids],
|
||||
'T' : [ids_0, pc_ids],
|
||||
'W' : [ids_1, pc_ids],
|
||||
'L' : [ids_4, pc_ids],
|
||||
'Q' : [ids_3, pc_ids],
|
||||
'F' : [ids_0, ids_3],
|
||||
'B' : [ids_1, ids_3],
|
||||
'tsqx' : [ids_1, zero_dim],
|
||||
'tsqz' : [ids_4, zero_dim],
|
||||
'K' : [ids_1, pc_ids],
|
||||
'rmsep' : [ids_3, pc_ids]
|
||||
}
|
||||
|
||||
array = self.model[name]
|
||||
M = Dataset(array, identifiers=match_ids[name], name=name)
|
||||
return M
|
||||
|
||||
def get_out_plots(self, options):
|
||||
out=[]
|
||||
for plt in options['out_plots']:
|
||||
out.append(plt(self))
|
||||
return out
|
||||
|
||||
def run(self, a, b, c):
|
||||
"""Run L-PLS with present options."""
|
||||
options = self._options
|
||||
self._dataset['X'] = a
|
||||
self._dataset['Y'] = b
|
||||
self._dataset['Z'] = c
|
||||
self._data['X'] = a.asarray()
|
||||
self._data['Y'] = b.asarray()
|
||||
self._data['Z'] = c.asarray()
|
||||
self.validation(options)
|
||||
self.make_model(options)
|
||||
if options['calc_conf']:
|
||||
self.confidence(options)
|
||||
|
||||
out = [self.as_dataset(p) for p in options['out_data']]
|
||||
for plt in self.get_out_plots(options):
|
||||
out.append(plt)
|
||||
return out
|
||||
|
||||
def run_gui(self, a, b, c):
|
||||
"""Run LPLS with option gui.
|
||||
"""
|
||||
dialog = LPlsOptionsDialog([a, b, c], self._options)
|
||||
dialog.show_all()
|
||||
response = dialog.run()
|
||||
dialog.hide()
|
||||
|
||||
if response == gtk.RESPONSE_OK:
|
||||
# set output data and plots
|
||||
dialog.set_output()
|
||||
#run with current data and options
|
||||
return self.run(a, b, c)
|
||||
|
||||
class PcaOptions(Options):
|
||||
"""Options for Principal Component Analysis.
|
||||
@ -403,7 +550,9 @@ class PcaOptions(Options):
|
||||
]
|
||||
|
||||
opt['out_data'] = ['T','P', 'p_tsq']
|
||||
opt['out_plots'] = [blmplots.PcaScorePlot,blmplots.PcaLoadingPlot,blmplots.LineViewXc]
|
||||
opt['out_plots'] = [blmplots.PcaScorePlot,
|
||||
blmplots.PcaLoadingPlot,
|
||||
blmplots.LineViewXc]
|
||||
|
||||
self.update(opt)
|
||||
|
||||
@ -444,6 +593,7 @@ class PlsOptions(Options):
|
||||
opt['center_mth'] = mat_center
|
||||
opt['scale'] = 'scores'
|
||||
|
||||
opt['calc_corrloads'] = True
|
||||
opt['calc_conf'] = False
|
||||
opt['n_sets'] = 5
|
||||
opt['strict'] = True
|
||||
@ -468,7 +618,8 @@ class PlsOptions(Options):
|
||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||
(blmplots.RMSEPPlot, 'RMSEP', False)
|
||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||
(blmplots.PlsCorrelationLoadingPlot, 'Corr. loadings', True)
|
||||
]
|
||||
|
||||
opt['out_data'] = ['T','P', 'p_tsq']
|
||||
@ -494,14 +645,87 @@ class PlsOptions(Options):
|
||||
'strict', 'crot', 'cov_center']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
|
||||
def permutation_confidence(self):
|
||||
opt_list = ['q_pert_method', 'q_iter']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
|
||||
class LplsOptions(Options):
|
||||
"""Options for L-shaped Partial Least Squares Regression.
|
||||
"""
|
||||
def __init__(self):
|
||||
Options.__init__(self)
|
||||
self._set_default()
|
||||
|
||||
def _set_default(self):
|
||||
opt = {}
|
||||
opt['engine'] = nipals_lpls
|
||||
opt['mode'] = 'normal' # how much info to calculate
|
||||
opt['amax'] = 10
|
||||
opt['aopt'] = 9
|
||||
opt['xz_alpha'] = .5
|
||||
opt['auto_aopt'] = False
|
||||
opt['center'] = True
|
||||
opt['center_mth'] = [2, 0, 1]
|
||||
opt['scale'] = 'scores'
|
||||
opt['calc_conf'] = False
|
||||
opt['n_sets'] = 7
|
||||
opt['strict'] = False
|
||||
opt['p_center'] = 'med'
|
||||
opt['alpha'] = .3
|
||||
opt['cov_center'] = 'med'
|
||||
opt['crot'] = True
|
||||
|
||||
opt['calc_cv'] = False
|
||||
opt['cv_val_method'] = 'random'
|
||||
opt['cv_val_sets'] = opt['n_sets']
|
||||
|
||||
opt['all_data'] = [('T', 'scores', True),
|
||||
('Wx', 'X-weights', True),
|
||||
('Wz', 'Z-weights', True),
|
||||
('E','residuals', False),
|
||||
('tsq_x', 't2X', False),
|
||||
('rmsep', 'RMSEP', False)
|
||||
]
|
||||
|
||||
# (class, name, sensitive, ticked)
|
||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False)
|
||||
]
|
||||
|
||||
opt['out_data'] = ['T','P']
|
||||
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
|
||||
|
||||
#opt['out_data'] = None
|
||||
|
||||
opt['pack'] = False
|
||||
opt['calc_qvals'] = False
|
||||
opt['q_pert_method'] = 'shuffle_rows'
|
||||
opt['q_iter'] = 20
|
||||
|
||||
self.update(opt)
|
||||
|
||||
def make_model_options(self):
|
||||
"""Options for make_model method."""
|
||||
opt_list = ['scale','mode', 'amax', 'engine']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
def confidence_options(self):
|
||||
"""Options for confidence method."""
|
||||
opt_list = ['n_sets', 'aopt', 'alpha', 'p_center',
|
||||
'strict', 'crot', 'cov_center']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
def validation_options(self):
|
||||
"""Options for pre_validation method."""
|
||||
opt_list = ['amax', 'n_sets', 'cv_val_method']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
def permutation_confidence(self):
|
||||
opt_list = ['q_pert_method', 'q_iter']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
class PcaOptionsDialog(OptionsDialog):
|
||||
"""Options dialog for Principal Component Analysis.
|
||||
@ -716,6 +940,210 @@ class PcaOptionsDialog(OptionsDialog):
|
||||
self._options['strict'] = True
|
||||
|
||||
|
||||
class LplsOptionsDialog(OptionsDialog):
|
||||
"""Options dialog for L-shaped Partial Least squares regression.
|
||||
"""
|
||||
def __init__(self, data, options, input_names=['X', 'Y', 'Z']):
|
||||
OptionsDialog.__init__(self, data, options, input_names)
|
||||
glade_file = os.path.join(fluents.DATADIR, 'lpls_options.glade')
|
||||
|
||||
notebook_name = "vbox1"
|
||||
page_name = "Options"
|
||||
self.add_page_from_glade(glade_file, notebook_name, page_name)
|
||||
# connect signals to handlers
|
||||
dic = {"on_amax_value_changed" : self.on_amax_changed,
|
||||
"on_aopt_value_changed" : self.on_aopt_changed,
|
||||
"auto_aopt_toggled" : self.auto_aopt_toggled,
|
||||
"center_toggled" : self.center_toggled,
|
||||
#"on_scale_changed" : self.on_scale_changed,
|
||||
"on_val_none" : self.val_toggled,
|
||||
"on_val_cv" : self.cv_toggled,
|
||||
"on_cv_method_changed" : self.on_cv_method_changed,
|
||||
"on_cv_sets_changed" : self.on_cv_sets_changed,
|
||||
"on_conf_toggled" : self.conf_toggled,
|
||||
"on_subset_loc_changed" : self.on_subset_loc_changed,
|
||||
"on_cov_loc_changed" : self.on_cov_loc_changed,
|
||||
"on_alpha_changed" : self.on_alpha_changed,
|
||||
"on_rot_changed" : self.on_rot_changed,
|
||||
"on__toggled" : self.conf_toggled,
|
||||
"on_qval_changed" : self.on_qval_changed,
|
||||
"on_iter_changed" : self.on_iter_changed
|
||||
}
|
||||
|
||||
self.wTree.signal_autoconnect(dic)
|
||||
|
||||
# set/ensure valid default values/ranges
|
||||
#
|
||||
amax_sb = self.wTree.get_widget("amax_spinbutton")
|
||||
max_comp = min(data[0].shape) # max num of components
|
||||
if self._options['amax']>max_comp:
|
||||
logger.log('debug', 'amax default too large ... adjusting')
|
||||
self._options['amax'] = max_comp
|
||||
amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0)
|
||||
# aopt spin button
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
if self._options['aopt']>self._options['amax']:
|
||||
self._options['aopt'] = self._options['amax'] + 1 - 1
|
||||
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
|
||||
|
||||
# scale
|
||||
# scale_cb = self.wTree.get_widget("scale_combobox")
|
||||
# scale_cb.set_active(0)
|
||||
|
||||
# validation frames
|
||||
if self._options['calc_cv']==False:
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
cv_frame.set_sensitive(False)
|
||||
|
||||
cv = self.wTree.get_widget("cv_method").set_active(0)
|
||||
|
||||
# confidence
|
||||
if self._options['calc_conf']==True:
|
||||
self.wTree.get_widget("subset_expander").set_sensitive(True)
|
||||
else:
|
||||
self.wTree.get_widget("subset_expander").set_sensitive(False)
|
||||
|
||||
cb = self.wTree.get_widget("subset_loc")
|
||||
_m = {'med': 0, 'mean': 1, 'full_model': 2}
|
||||
cb.set_active(_m.get(self._options['p_center']))
|
||||
|
||||
cb = self.wTree.get_widget("cov_loc")
|
||||
_m = {'med': 0, 'mean': 1}
|
||||
cb.set_active(_m.get(self._options['cov_center']))
|
||||
|
||||
hs = self.wTree.get_widget("alpha_scale")
|
||||
hs.set_value(self._options['alpha'])
|
||||
|
||||
tb = self.wTree.get_widget("qvals")
|
||||
tb.set_sensitive(True)
|
||||
|
||||
|
||||
def on_amax_changed(self, sb):
|
||||
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
|
||||
amax = sb.get_value_as_int()
|
||||
# update aopt if needed
|
||||
if amax<self._options['aopt']:
|
||||
self._options['aopt'] = amax
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0)
|
||||
self._options['amax'] = sb.get_value_as_int()
|
||||
|
||||
def on_aopt_changed(self, sb):
|
||||
aopt = sb.get_value_as_int()
|
||||
self._options['aopt'] = aopt
|
||||
|
||||
def auto_aopt_toggled(self, tb):
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
if tb.get_active():
|
||||
self._options['auto_aopt'] = True
|
||||
aopt_sb.set_sensitive(False)
|
||||
else:
|
||||
self._options['auto_aopt'] = False
|
||||
aopt_sb.set_sensitive(True)
|
||||
|
||||
def center_toggled(self, tb):
|
||||
if tb.get_active():
|
||||
self._options['center'] = True
|
||||
else:
|
||||
logger.log("debug", "centering set to False")
|
||||
self._options['center'] = False
|
||||
|
||||
#def on_scale_changed(self, cb):
|
||||
# scale = cb.get_active_text()
|
||||
# if scale=='Scores':
|
||||
# self._options['scale'] = 'scores'
|
||||
# elif scale=='Loadings':
|
||||
# self._options['scale'] = 'loads'
|
||||
# else:
|
||||
# raise IOError
|
||||
|
||||
def val_toggled(self, tb):
|
||||
"""Callback for validation: None. """
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
cv_tb = self.wTree.get_widget("cv_toggle")
|
||||
if tb.get_active():
|
||||
self._options['calc_cv'] = False
|
||||
cv_frame.set_sensitive(False)
|
||||
cv_tb.set_sensitive(False)
|
||||
else:
|
||||
cv_tb.set_sensitive(True)
|
||||
if cv_tb.get_active():
|
||||
cv_frame.set_sensitive(True)
|
||||
self._options['calc_cv'] = True
|
||||
|
||||
def cv_toggled(self, tb):
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
val_tb = self.wTree.get_widget("val_none_toggle")
|
||||
if tb.get_active():
|
||||
cv_frame.set_sensitive(True)
|
||||
self._options['calc_cv'] = True
|
||||
else:
|
||||
cv_frame.set_sensitive(False)
|
||||
self._options['calc_cv'] = False
|
||||
|
||||
def on_cv_method_changed(self, cb):
|
||||
method = cb.get_active_text()
|
||||
if method == 'Random':
|
||||
self._options['cv_val_method'] = 'random'
|
||||
|
||||
def on_cv_sets_changed(self, sb):
|
||||
val = sb.get_value_as_int()
|
||||
self._options['cv_val_sets'] = val
|
||||
|
||||
def conf_toggled(self, tb):
|
||||
if tb.get_active():
|
||||
self._options['calc_conf'] = False
|
||||
self.wTree.get_widget("subset_expander").set_sensitive(False)
|
||||
else:
|
||||
self._options['calc_conf'] = True
|
||||
self.wTree.get_widget("subset_expander").set_sensitive(True)
|
||||
|
||||
def on_subset_loc_changed(self, cb):
|
||||
method = cb.get_active_text()
|
||||
if method=='Full model':
|
||||
self._options['p_center'] = 'full_model'
|
||||
elif method=='Median':
|
||||
self._options['p_center'] = 'med'
|
||||
elif method=='Mean':
|
||||
self._options['p_center'] = 'mean'
|
||||
|
||||
def on_cov_loc_changed(self, cb):
|
||||
method = cb.get_active_text()
|
||||
if method=='Median':
|
||||
self._options['cov_center'] = 'med'
|
||||
elif method=='Mean':
|
||||
self._options['cov_center'] = 'mean'
|
||||
|
||||
def on_alpha_changed(self, hs):
|
||||
self._options['alpha'] = hs.get_value()
|
||||
|
||||
def on_rot_changed(self, rg):
|
||||
proc, strict = rg
|
||||
if proc.get_active():
|
||||
self._options['crot'] = True
|
||||
else:
|
||||
self._options['crot'] = True
|
||||
self._options['strict'] = True
|
||||
|
||||
def qval_toggled(self, tb):
|
||||
if tb.get_active():
|
||||
self._options['calc_qval'] = False
|
||||
self.wTree.get_widget("qval_method").set_sensitive(False)
|
||||
self.wTree.get_widget("q_iter").set_sensitive(False)
|
||||
else:
|
||||
self._options['calc_qval'] = True
|
||||
self.wTree.get_widget("qval_method").set_sensitive(True)
|
||||
self.wTree.get_widget("q_iter").set_sensitive(True)
|
||||
|
||||
def on_iter_changed(self, sb):
|
||||
self._options['q_iter'] = sb.get_value()
|
||||
|
||||
def on_qval_changed(self, cb):
|
||||
q_method = cb.get_active_text()
|
||||
if method=='Shuffle rows':
|
||||
self._options['q_pert_method'] = 'shuffle'
|
||||
|
||||
|
||||
class PlsOptionsDialog(OptionsDialog):
|
||||
"""Options dialog for Partial Least squares regression.
|
||||
"""
|
||||
@ -918,5 +1346,3 @@ class PlsOptionsDialog(OptionsDialog):
|
||||
q_method = cb.get_active_text()
|
||||
if method=='Shuffle rows':
|
||||
self._options['q_pert_method'] = 'shuffle'
|
||||
|
||||
|
||||
|
@ -190,7 +190,12 @@ class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
||||
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
|
||||
|
||||
|
||||
|
||||
class LplsHypoidCorrelationPlot(BlmScatterPlot):
|
||||
def __init__(self, model, absi=0, ordi=1):
|
||||
title = "Hypoid correlations(%s)" %model._dataset['X'].get_name()
|
||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W')
|
||||
|
||||
class LineViewXc(plots.LineViewPlot):
|
||||
"""A line view of centered raw data
|
||||
"""
|
||||
@ -214,8 +219,8 @@ class PlsQvalScatter(plots.ScatterPlot):
|
||||
def __init__(self, model, pc=0):
|
||||
if not model.model.has_key('w_tsq'):
|
||||
return None
|
||||
self._W = model.model['P']
|
||||
dataset_1 = model.as_dataset('P')
|
||||
self._W = model.model['W']
|
||||
dataset_1 = model.as_dataset('W')
|
||||
dataset_2 = model.as_dataset('w_tsq')
|
||||
id_dim = dataset_1.get_dim_name(0) #genes
|
||||
sel_dim = dataset_1.get_dim_name(1) #_comp
|
||||
|
@ -115,6 +115,7 @@ def expl_var_y(Y, T, Q):
|
||||
|
||||
def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||
n_iter=20, algo='pls',
|
||||
center=True,
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False, metric=None):
|
||||
@ -122,8 +123,98 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||
"""Returns qvals for pls model.
|
||||
|
||||
input:
|
||||
a -- centered data matrix
|
||||
b -- centered data matrix
|
||||
a -- data matrix
|
||||
b -- data matrix
|
||||
aopt -- scalar, opt. number of components
|
||||
alpha -- [0,1] regularisation parameter for T2-test
|
||||
n_iter -- number of permutations
|
||||
sim_method -- permutation method ['shuffle']
|
||||
p_center -- location estimator for sub models ['med']
|
||||
cov_center -- location estimator for covariance of submodels ['med']
|
||||
crot -- bool, use rotations of sub models?
|
||||
strict -- bool, use stict (rot/flips only) rotations?
|
||||
metric -- bool, use row metric?
|
||||
"""
|
||||
|
||||
m, n = a.shape
|
||||
TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets)
|
||||
n_false = zeros((n, n_iter), dtype='d')
|
||||
|
||||
#full model
|
||||
if center:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
if metric!=None:
|
||||
ac = dot(ac, metric)
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
t0 = time.time()
|
||||
Vs = shuffle_1d(bc, n_iter, axis=0)
|
||||
for i, b_shuff in enumerate(Vs):
|
||||
t1 = time.time()
|
||||
if algo=='bridge':
|
||||
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
print time.time() - t1
|
||||
sort_index = argsort(tsq_full)[::-1]
|
||||
back_sort_index = sort_index.argsort()
|
||||
print time.time() - t0
|
||||
|
||||
# count false positives
|
||||
tsq_full_sorted = tsq_full.take(sort_index)
|
||||
for i in xrange(n_iter):
|
||||
for j in xrange(n):
|
||||
n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j]) # number of false pos. genes (0-n)
|
||||
false_pos = median(n_false, 1)
|
||||
ll = arange(1, len(false_pos)+1, 1)
|
||||
sort_qval = false_pos.take(sort_index)/ll
|
||||
qval = false_pos/ll.take(back_sort_index)
|
||||
print time.time() - t0
|
||||
#return qval, false_pos, TSQ, tsq_full
|
||||
return qval
|
||||
|
||||
def ensure_strict(C, only_flips=True):
|
||||
"""Ensure that a rotation matrix does only 90 degree rotations.
|
||||
In multiplication with pcs this allows flips and reordering.
|
||||
|
||||
if only_flips is True there will onlt be flips allowed
|
||||
"""
|
||||
Cm = C
|
||||
S = sign(C) # signs
|
||||
if only_flips==True:
|
||||
C = eye(Cm.shape[0])*S
|
||||
return C
|
||||
Cm = zeros_like(C)
|
||||
Cm.putmask(1.,abs(C)>.6)
|
||||
if det(Cm)>1:
|
||||
raise ValueError,"Implement this!"
|
||||
return Cm*S
|
||||
|
||||
def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||
n_iter=20, algo='pls',
|
||||
sim_method='shuffle',
|
||||
p_center='med', cov_center='med',
|
||||
crot=True, strict=False, metric=None):
|
||||
|
||||
"""Returns qvals for pls model.
|
||||
Shuffling of variables in X is preprocessed in metric.
|
||||
Null model is 'If I put genes randomly on network' ... if they are sign:
|
||||
then this is due to network structure and not covariance with response.
|
||||
|
||||
input:
|
||||
a -- data matrix
|
||||
b -- data matrix
|
||||
aopt -- scalar, opt. number of components
|
||||
alpha -- [0,1] regularisation parameter for T2-test
|
||||
n_iter -- number of permutations
|
||||
@ -140,25 +231,33 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||
n_false = zeros((n, n_iter), dtype='<f8')
|
||||
|
||||
#full model
|
||||
if metric!=None:
|
||||
a = dot(a, metric)
|
||||
|
||||
# center?
|
||||
if center==True:
|
||||
ac = a - a.mean(0)
|
||||
bc = b - b.mean(0)
|
||||
if metric==None:
|
||||
metric = eye(n,n)
|
||||
if algo=='bridge':
|
||||
dat = bridge(a, b, aopt, 'loads', 'fast')
|
||||
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||
else:
|
||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
t0 = time.time()
|
||||
Vs = shuffle_1d(b, n_iter)
|
||||
for i, b_shuff in enumerate(Vs):
|
||||
Vs = shuffle_1d(a, n_iter, 1)
|
||||
for i, a_shuff in enumerate(Vs):
|
||||
t1 = time.time()
|
||||
a = a_shuff - a_shuff.mean(0)
|
||||
a = dot(a, metric)
|
||||
|
||||
if algo=='bridge':
|
||||
dat = bridge(a, b_shuff, aopt, 'loads','fast')
|
||||
dat = bridge(a, b, aopt, 'loads','fast')
|
||||
else:
|
||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||
alpha=alpha, crot=crot, strict=strict,
|
||||
cov_center=cov_center)
|
||||
@ -177,24 +276,8 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||
sort_qval = false_pos.take(sort_index)/ll
|
||||
qval = false_pos/ll.take(back_sort_index)
|
||||
print time.time() - t0
|
||||
return qval, false_pos, TSQ, tsq_full
|
||||
|
||||
def ensure_strict(C, only_flips=True):
|
||||
"""Ensure that a rotation matrix does only 90 degree rotations.
|
||||
In multiplication with pcs this allows flips and reordering.
|
||||
|
||||
if only_flips is True there will onlt be flips allowed
|
||||
"""
|
||||
Cm = C
|
||||
S = sign(C) # signs
|
||||
if only_flips==True:
|
||||
C = eye(Cm.shape[0])*S
|
||||
return C
|
||||
Cm = zeros_like(C)
|
||||
Cm.putmask(1.,abs(C)>.6)
|
||||
if det(Cm)>1:
|
||||
raise ValueError,"Implement this!"
|
||||
return Cm*S
|
||||
#return qval, false_pos, TSQ, tsq_full
|
||||
return qval
|
||||
|
||||
def leverage(aopt=1,*args):
|
||||
"""Returns leverages
|
||||
@ -253,3 +336,10 @@ def ssq(E, axis=0, weights=None):
|
||||
raise NotImplementedError, "Higher order modes not supported"
|
||||
return pow(Ew,2).sum(axis)
|
||||
|
||||
|
||||
def vnorm(x):
|
||||
"""Returns the euclidian norm of a vector.
|
||||
|
||||
This is considerably faster than linalg.norm
|
||||
"""
|
||||
return sqrt(dot(x,x.conj()))
|
||||
|
@ -1,23 +1,25 @@
|
||||
from scipy import apply_along_axis,newaxis,zeros,\
|
||||
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
||||
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
||||
matrix
|
||||
matrix,nan
|
||||
from scipy.linalg import norm,svd,inv,eig
|
||||
from scipy.stats import median,mean
|
||||
|
||||
def normalise(a,axis=0,return_scales=False):
|
||||
s = apply_along_axis(norm,axis,a)
|
||||
if axis==0:
|
||||
s = s[newaxis]
|
||||
else:
|
||||
s = s[:,newaxis]
|
||||
|
||||
a_s = a/s
|
||||
if return_scales:
|
||||
return a_s,s
|
||||
return a_s
|
||||
def normalise(a, axis=0, return_scales=False):
|
||||
s = apply_along_axis(norm, axis, a)
|
||||
if axis==0:
|
||||
s = s[newaxis]
|
||||
else:
|
||||
s = s[:,newaxis]
|
||||
|
||||
a_s = a/s
|
||||
|
||||
def sub2ind(shape,i,j):
|
||||
if return_scales:
|
||||
return a_s, s
|
||||
|
||||
return a_s
|
||||
|
||||
def sub2ind(shape, i, j):
|
||||
"""Indices from subscripts. Only support for 2d"""
|
||||
row,col = shape
|
||||
ind = []
|
||||
@ -41,13 +43,13 @@ def sorted_eig(a, b=None,sort_by='sm'):
|
||||
(This is reversed output compared to matlab)
|
||||
|
||||
"""
|
||||
s,v = eig(a,b)
|
||||
s,v = eig(a, b)
|
||||
s = real(s) # dont expect any imaginary part
|
||||
v = real(v)
|
||||
ind = argsort(s)
|
||||
if sort_by=='lm':
|
||||
ind = ind[::-1]
|
||||
v = v.take(ind,1)
|
||||
v = v.take(ind, 1)
|
||||
s = s.take(ind)
|
||||
|
||||
return s,v
|
||||
@ -67,15 +69,15 @@ def str2num(string_number):
|
||||
return num
|
||||
|
||||
def randperm(n):
|
||||
r=rand(n)
|
||||
r = rand(n)
|
||||
dict={}
|
||||
for i in range(n):
|
||||
dict[r[i]]=i
|
||||
r=sort(r)
|
||||
out=zeros(n)
|
||||
dict[r[i]] = i
|
||||
r = sort(r)
|
||||
out = zeros(n)
|
||||
for i in range(n):
|
||||
out[i]=dict[r[i]]
|
||||
return array(out,dtype='i')
|
||||
out[i] = dict[r[i]]
|
||||
return array(out).astype('i')
|
||||
|
||||
def mat_center(X,axis=0,ret_mn=False):
|
||||
"""Mean center matrix along axis.
|
||||
|
@ -3,11 +3,12 @@
|
||||
There is no typechecking of any kind here, just focus on speed
|
||||
"""
|
||||
|
||||
from scipy.linalg import svd,norm,inv,pinv,qr
|
||||
import math
|
||||
from scipy.linalg import svd,inv
|
||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
||||
rand,sum,cumsum,matrix
|
||||
|
||||
|
||||
def pca(a, aopt, scale='scores', mode='normal'):
|
||||
""" Principal Component Analysis model
|
||||
mode:
|
||||
@ -18,17 +19,18 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||
|
||||
m, n = a.shape
|
||||
|
||||
if m*10.>n:
|
||||
u, s, vt = esvd(a)
|
||||
if m*3>n:
|
||||
u, s, v = esvd(a)
|
||||
else:
|
||||
u, s, vt = svd(a, full_matrices=0)
|
||||
v = vt.T
|
||||
eigvals = (1./m)*s
|
||||
T = u*s
|
||||
T = T[:,:aopt]
|
||||
P = vt[:aopt,:].T
|
||||
P = v[:,:aopt]
|
||||
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(norm, 0, T)
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
P = P*tnorm
|
||||
|
||||
@ -47,6 +49,7 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||
|
||||
return {'T':T, 'P':P, 'E':E}
|
||||
|
||||
|
||||
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
||||
"""Returns Principal component regression model."""
|
||||
m, n = a.shape
|
||||
@ -98,13 +101,13 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||
u, s, vh = svd(dot(ab.T, ab))
|
||||
w = dot(ab, u[:,:1])
|
||||
|
||||
w = w/norm(w)
|
||||
w = w/vnorm(w)
|
||||
r = w.copy()
|
||||
if i>0:
|
||||
for j in range(0,i,1):
|
||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||
t = dot(a, r)
|
||||
tt = norm(t)**2
|
||||
tt = vnorm(t)**2
|
||||
p = dot(a.T, t)/tt
|
||||
q = dot(r.T, ab).T/tt
|
||||
ab = ab - dot(p, q.T)*tt
|
||||
@ -115,7 +118,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||
|
||||
if mode=='fast' and i==aopt-1:
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(norm, 0, T)
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
return {'T':T, 'W':W}
|
||||
@ -134,7 +137,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||
F = b - dot(T[:,:aopt], Q[:,:aopt].T)
|
||||
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(norm, 0, T)
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
@ -159,7 +162,7 @@ def w_simpls(aat, b, aopt):
|
||||
u = dot(b, u[:,:1]) #y-factor scores
|
||||
U[:,i] = u.ravel()
|
||||
t = dot(aat, u)
|
||||
t = t/norm(t)
|
||||
t = t/vnorm(t)
|
||||
T[:,i] = t.ravel()
|
||||
h = dot(aat, t) #score-weights
|
||||
H[:,i] = h.ravel()
|
||||
@ -183,7 +186,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||
W = u[:,:aopt]
|
||||
K = vt[:aopt,:].T
|
||||
T = dot(a, W)
|
||||
tnorm = apply_along_axis(norm, 0, T) # norm of T-columns
|
||||
tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns
|
||||
|
||||
if mode == 'fast':
|
||||
if scale=='loads':
|
||||
@ -196,16 +199,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||
B = zeros((aopt, n, l), dtype='f')
|
||||
for i in range(aopt):
|
||||
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
||||
# leverages
|
||||
# fixme: probably need an orthogonal basis for row-space leverage
|
||||
# T (scores) are not orthogonal
|
||||
# Using a qr decomp to get an orthonormal basis for row-space
|
||||
#Tq = qr(T)[0]
|
||||
#s_lev,v_lev = leverage(aopt,Tq,W)
|
||||
# explained variance
|
||||
#var_x, exp_var_x = variances(a,T,W)
|
||||
#qnorm = apply_along_axis(norm, 0, Q)
|
||||
#var_y, exp_var_y = variances(b,U,Q/qnorm)
|
||||
|
||||
if mode == 'detailed':
|
||||
E = empty((aopt, m, n))
|
||||
@ -225,10 +218,132 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
||||
|
||||
|
||||
|
||||
def nipals_lpls(X, Y, Z, amax, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', scale='scores', verbose=False):
|
||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
(X!Z)->Y
|
||||
:input:
|
||||
X : data matrix (m, n)
|
||||
Y : data matrix (m, l)
|
||||
Z : data matrix (n, o)
|
||||
|
||||
:output:
|
||||
T : X-scores
|
||||
W : X-weights/Z-weights
|
||||
P : X-loadings
|
||||
Q : Y-loadings
|
||||
U : X-Y relation
|
||||
L : Z-scores
|
||||
K : Z-loads
|
||||
B : Regression coefficients X->Y
|
||||
b0: Regression coefficient intercept
|
||||
evx : X-explained variance
|
||||
evy : Y-explained variance
|
||||
evz : Z-explained variance
|
||||
|
||||
:Notes:
|
||||
|
||||
"""
|
||||
if mean_ctr!=None:
|
||||
xctr, yctr, zctr = mean_ctr
|
||||
X, mnX = center(X, xctr)
|
||||
Y, mnY = center(Y, xctr)
|
||||
Z, mnZ = center(Z, zctr)
|
||||
|
||||
varX = pow(X, 2).sum()
|
||||
varY = pow(Y, 2).sum()
|
||||
varZ = pow(Z, 2).sum()
|
||||
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
u, o = Z.shape
|
||||
|
||||
# initialize
|
||||
U = empty((k, amax))
|
||||
Q = empty((l, amax))
|
||||
T = empty((m, amax))
|
||||
W = empty((n, amax))
|
||||
P = empty((n, amax))
|
||||
K = empty((o, amax))
|
||||
L = empty((u, amax))
|
||||
var_x = empty((amax,))
|
||||
var_y = empty((amax,))
|
||||
var_z = empty((amax,))
|
||||
|
||||
for a in range(amax):
|
||||
if verbose:
|
||||
print "\n Working on comp. %s" %a
|
||||
u = Y[:,:1]
|
||||
diff = 1
|
||||
MAX_ITER = 100
|
||||
lim = 1e-5
|
||||
niter = 0
|
||||
while (diff>lim and niter<MAX_ITER):
|
||||
niter += 1
|
||||
u1 = u.copy()
|
||||
w = dot(X.T, u)
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
l = dot(Z, w)
|
||||
k = dot(Z.T, l)
|
||||
k = k/sqrt(dot(k.T, k))
|
||||
w = alpha*k + (1-alpha)*w
|
||||
w = w/sqrt(dot(w.T, w))
|
||||
t = dot(X, w)
|
||||
c = dot(Y.T, t)
|
||||
c = c/sqrt(dot(c.T, c))
|
||||
u = dot(Y, c)
|
||||
diff = abs(u1 - u).max()
|
||||
if verbose:
|
||||
print "Converged after %s iterations" %niter
|
||||
tt = dot(t.T, t)
|
||||
p = dot(X.T, t)/tt
|
||||
q = dot(Y.T, t)/tt
|
||||
l = dot(Z, w)
|
||||
|
||||
U[:,a] = u.ravel()
|
||||
W[:,a] = w.ravel()
|
||||
P[:,a] = p.ravel()
|
||||
T[:,a] = t.ravel()
|
||||
Q[:,a] = q.ravel()
|
||||
L[:,a] = l.ravel()
|
||||
K[:,a] = k.ravel()
|
||||
|
||||
X = X - dot(t, p.T)
|
||||
Y = Y - dot(t, q.T)
|
||||
Z = (Z.T - dot(w, l.T)).T
|
||||
|
||||
var_x[a] = pow(X, 2).sum()
|
||||
var_y[a] = pow(Y, 2).sum()
|
||||
var_z[a] = pow(Z, 2).sum()
|
||||
|
||||
B = dot(dot(W, inv(dot(P.T, W))), Q.T)
|
||||
b0 = mnY - dot(mnX, B)
|
||||
|
||||
# variance explained
|
||||
evx = 100.0*(1 - var_x/varX)
|
||||
evy = 100.0*(1 - var_y/varY)
|
||||
evz = 100.0*(1 - var_z/varZ)
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
W = W*tnorm
|
||||
Q = Q*tnorm
|
||||
knorm = apply_along_axis(vnorm, 0, K)
|
||||
L = L*knorm
|
||||
K = K/knorm
|
||||
|
||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy, 'evz':evz}
|
||||
|
||||
|
||||
|
||||
|
||||
########### Helper routines #########
|
||||
|
||||
def m_shape(array):
|
||||
return matrix(array).shape
|
||||
|
||||
def esvd(data,economy=1):
|
||||
def esvd(data, economy=1):
|
||||
"""SVD with the option of economy sized calculation
|
||||
Calculate subspaces of X'X or XX' depending on the shape
|
||||
of the matrix.
|
||||
@ -239,17 +354,40 @@ def esvd(data,economy=1):
|
||||
"""
|
||||
m, n = data.shape
|
||||
if m>=n:
|
||||
u, s, vt = svd(dot(data.T, data))
|
||||
data = dot(data.T, data)
|
||||
u, s, vt = svd(data)
|
||||
u = dot(data, vt.T)
|
||||
v = vt.T
|
||||
for i in xrange(n):
|
||||
s[i] = norm(u[:,i])
|
||||
s[i] = vnorm(u[:,i])
|
||||
u[:,i] = u[:,i]/s[i]
|
||||
else:
|
||||
u, s, vt = svd(dot(data, data.T))
|
||||
data = dot(data, data.T)
|
||||
data = (data + data.T)/2.0
|
||||
u, s, vt = svd(data)
|
||||
v = dot(u.T, data)
|
||||
for i in xrange(m):
|
||||
s[i] = norm(v[i,:])
|
||||
s[i] = vnorm(v[i,:])
|
||||
v[i,:] = v[i,:]/s[i]
|
||||
|
||||
return u, s, v
|
||||
return u, s, v.T
|
||||
|
||||
def vnorm(x):
|
||||
# assume column arrays (or vectors)
|
||||
return math.sqrt(dot(x.T, x))
|
||||
|
||||
def center(a, axis):
|
||||
# 0 = col center, 1 = row center, 2 = double center
|
||||
# -1 = nothing
|
||||
if axis==-1:
|
||||
mn = zeros((a.shape[1],))
|
||||
elif axis==0:
|
||||
mn = a.mean(0)
|
||||
elif axis==1:
|
||||
mn = a.mean(1)[:,newaxis]
|
||||
elif axis==2:
|
||||
mn = a.mean(0) + a.mean(1)[:,newaxis] - a.mean()
|
||||
else:
|
||||
raise IOError("input error: axis must be in [-1,0,1,2]")
|
||||
|
||||
return a - mn, mn
|
||||
|
@ -53,6 +53,7 @@ def gene_hypergeo_test(selection, category_dataset):
|
||||
cat_count)
|
||||
|
||||
pvals = scipy.where(cat_count==0, 2, pvals)
|
||||
pvals = scipy.where(scipy.isnan(pvals), 2, pvals)
|
||||
out = {}
|
||||
for i in range(pvals.size):
|
||||
out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
|
||||
|
@ -2,7 +2,7 @@ import os,sys
|
||||
from itertools import izip
|
||||
import networkx as NX
|
||||
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
|
||||
outer,maximum,sum,diag,real
|
||||
outer,maximum,sum,diag,real,atleast_2d
|
||||
from scipy.linalg import eig,svd,inv,expm,norm
|
||||
from cx_utils import sorted_eig
|
||||
|
||||
@ -378,6 +378,7 @@ Ke = expm(A) .... expm(-A)?
|
||||
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
|
||||
# 13.09.2206: update for use in numpy
|
||||
|
||||
# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last
|
||||
|
||||
def K_expAdj(W, normalised=True, alpha=1.0):
|
||||
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
||||
@ -433,8 +434,8 @@ def K_vonNeumann(W, normalised=True, alpha=1.0):
|
||||
return dot(dot(vr,psigma),vri).astype(t)
|
||||
|
||||
def K_laplacian(W, normalised=True, alpha=1.0):
|
||||
""" This is the matrix square root of the pseudo inverse of L.
|
||||
Also known as th eaverage commute time matrix.
|
||||
""" This is the matrix pseudo inverse of L.
|
||||
Also known as the average commute time matrix.
|
||||
"""
|
||||
W = asarray(W)
|
||||
t = W.dtype.char
|
||||
@ -464,8 +465,7 @@ def K_laplacian(W, normalised=True, alpha=1.0):
|
||||
return K
|
||||
|
||||
|
||||
|
||||
def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5):
|
||||
def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
|
||||
"""Returns diffusion kernel.
|
||||
input:
|
||||
-- W, adj. matrix
|
||||
@ -477,27 +477,45 @@ def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5):
|
||||
t = W.dtype.char
|
||||
if len(W.shape)!=2:
|
||||
raise ValueError, "Non-matrix input to matrix function."
|
||||
m,n = W.shape
|
||||
m, n = W.shape
|
||||
if t in ['F','D']:
|
||||
raise TypeError, "Complex input!"
|
||||
D = diag(sum(W,0))
|
||||
L = D-W
|
||||
D = diag(W.sum(0))
|
||||
L = D - W
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./(sum(W,0))))
|
||||
L = dot(dot(T,L),T)
|
||||
e,vr = eig(L)
|
||||
T = diag(sqrt(1./W.sum(0)))
|
||||
L = dot(dot(T, L), T)
|
||||
e, vr = eig(L)
|
||||
vri = inv(vr) #inv
|
||||
cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||
cutoff = 1.*abs(cond*maximum.reduce(e))
|
||||
psigma = eye(m) # if sing vals are 0 exp(0)=1 (unnecessary)
|
||||
psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
|
||||
#psigma = zeros((m,n), dtype='<f8')
|
||||
for i in range(len(e)):
|
||||
if abs(e[i]) > cutoff:
|
||||
psigma[i,i] = exp(-beta*e[i])
|
||||
#else:
|
||||
# psigma[i,i] = 0.0
|
||||
K = real(dot(dot(vr, psigma), vri))
|
||||
I = eye(n, dtype='<f8')
|
||||
K = (1. - alpha)*I + alpha*K
|
||||
return K
|
||||
|
||||
def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
|
||||
"""Returns diffusion kernel, using fast pade approximation.
|
||||
input:
|
||||
-- W, adj. matrix
|
||||
-- normalised [True/False]
|
||||
-- beta, [0->), (diffusion degree)
|
||||
"""
|
||||
|
||||
D = diag(W.sum(0))
|
||||
L = D - W
|
||||
if normalised==True:
|
||||
T = diag(sqrt(1./W.sum(0)))
|
||||
L = dot(dot(T, L), T)
|
||||
return expm(-beta*L)
|
||||
|
||||
|
||||
def K_modularity(W,alpha=1.0):
|
||||
""" Returns the matrix square root of Newmans modularity."""
|
||||
@ -530,3 +548,20 @@ def kernel_score(K, W):
|
||||
score = diag(dot(W, dot(K, W)) )
|
||||
tot = sum(score)
|
||||
return score, tot
|
||||
|
||||
|
||||
def modularity_matrix(G, nodelist=None):
|
||||
if not nodelist:
|
||||
nodelist = G.nodes()
|
||||
else:
|
||||
G = NX.subgraph(G, nodelist)
|
||||
|
||||
A = NX.adj_matrix(G, nodelist=nodelist)
|
||||
d = atleast_2d(G.degree(nbunch=nodelist))
|
||||
m = 1.*G.number_of_edges()
|
||||
B = A - A/m
|
||||
return B
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -41,30 +41,31 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
|
||||
"""Random block crossvalidation
|
||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||
"""
|
||||
index = randperm(a.shape[axis])
|
||||
#index = randperm(a.shape[axis])
|
||||
index = arange(a.shape[axis])
|
||||
if n_blocks==None:
|
||||
n_blocks = a.shape[axis]
|
||||
n_in_set = ceil(float(a.shape[axis])/n_blocks)
|
||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
bcal = b.take(inn, 0)
|
||||
btrue = b.take(out, 0)
|
||||
if center:
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
mn_b = bcal.mean(0)[newaxis]
|
||||
bcal = bcal - mn_b
|
||||
btrue = btrue - mn_b
|
||||
if metric!=None:
|
||||
acal = dot(acal, metric)
|
||||
if index_out:
|
||||
yield acal, atrue, bcal, btrue, out
|
||||
else:
|
||||
yield acal, atrue, bcal, btrue
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
bcal = b.take(inn, 0)
|
||||
btrue = b.take(out, 0)
|
||||
if center:
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
mn_b = bcal.mean(0)[newaxis]
|
||||
bcal = bcal - mn_b
|
||||
btrue = btrue - mn_b
|
||||
if metric!=None:
|
||||
acal = dot(acal, metric)
|
||||
if index_out:
|
||||
yield acal, atrue, bcal, btrue, out
|
||||
else:
|
||||
yield acal, atrue, bcal, btrue
|
||||
|
||||
|
||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
||||
@ -151,6 +152,7 @@ def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
||||
index = arange(m)
|
||||
dummy = map(random.shuffle, array_split(index, blocks))
|
||||
a_out = a.take(index, axis)
|
||||
|
||||
if index_out:
|
||||
yield a_out, index
|
||||
else:
|
||||
@ -164,7 +166,8 @@ def shuffle_1d(a, n_sets, axis=0):
|
||||
m = a.shape[axis]
|
||||
for ii in xrange(n_sets):
|
||||
index = randperm(m)
|
||||
yield a.take(index, axis)
|
||||
a = a.take(index, axis)
|
||||
yield a
|
||||
|
||||
def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||
"""Alter generator returning sets perturbed with means at diagonals.
|
||||
@ -205,18 +208,17 @@ def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||
else:
|
||||
yield a_out
|
||||
|
||||
|
||||
|
||||
def outerprod_centering(aat, ret_mn=True):
|
||||
"""Returns mean centered symmetric outerproduct matrix.
|
||||
"""Returns double centered symmetric outerproduct matrix.
|
||||
"""
|
||||
n = aat.shape[0]
|
||||
h = aat.sum(0)[:,newaxis]
|
||||
h = (h - mean(h)/2)/n
|
||||
mn_a = h + h.T
|
||||
h = aat.mean(0)[newaxis]
|
||||
h = h - 0.5*h.mean()
|
||||
mn_a = h + h.T # beauty of broadcasting
|
||||
aatc = aat - mn_a
|
||||
if ret_mn:
|
||||
return aatc, aat.mean(0)
|
||||
return aat - mn_a
|
||||
return aatc, mn_a
|
||||
return aatc
|
||||
|
||||
|
||||
|
||||
|
@ -12,11 +12,47 @@ from cx_utils import m_shape
|
||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||
|
||||
The root mean square error of cross validation is calculated
|
||||
based on random block cross-validation. With number of blocks equal to
|
||||
number of samples [default] gives leave-one-out cv.
|
||||
The pls model is based on the simpls algorithm for wide X.
|
||||
|
||||
comments:
|
||||
-- X, Y inputs need to be centered (fixme: check)
|
||||
:Parameters:
|
||||
X : ndarray
|
||||
column centered data matrix of size (samples x variables)
|
||||
Y : ndarray
|
||||
column centered response matrix of size (samples x responses)
|
||||
amax : scalar
|
||||
Maximum number of components
|
||||
n_blocks : scalar
|
||||
Number of blocks in cross validation
|
||||
|
||||
:Returns:
|
||||
rmsep : ndarray
|
||||
Root Mean Square Error of cross-validated Predictions
|
||||
aopt : scalar
|
||||
Guestimate of the optimal number of components
|
||||
|
||||
:SeeAlso:
|
||||
- pls_cv_val : Same output, not optimised for wide X
|
||||
- w_simpls : Simpls algorithm for wide X
|
||||
|
||||
Notes
|
||||
-----
|
||||
Based (cowardly translated) on m-files from the Chemoact toolbox
|
||||
X, Y inputs need to be centered (fixme: check)
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as n
|
||||
>>> X = n.array([[1., 2., 3.],[]])
|
||||
>>> Y = n.array([[1., 2., 3.],[]])
|
||||
>>> w_pls(X, Y, 1)
|
||||
[4,5,6], 1
|
||||
"""
|
||||
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='f')
|
||||
if n_blocks==None:
|
||||
@ -30,7 +66,7 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
if algo=='simpls':
|
||||
dat = w_simpls(Din, Yin, amax)
|
||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
@ -40,21 +76,13 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
E = Yout[:,j][:,newaxis] - TQ
|
||||
E = E + sum(E, 0)/Din.shape[0]
|
||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||
#Yhat = Y - dot(That,Q.T)
|
||||
Yhat = Y - dot(That,Q.T)
|
||||
rmsep = sqrt(PRESS/Y.shape[0])
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
return rmsep, Yhat, aopt
|
||||
|
||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||
""" Validation results of pls model.
|
||||
|
||||
|
||||
|
||||
comments:
|
||||
-- X, Y inputs need to be centered (fixme: check)
|
||||
|
||||
|
||||
"""
|
||||
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||
EE = zeros((amax, k, l), dtype='<f8')
|
||||
@ -79,7 +107,30 @@ def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||
|
||||
rmsep = sqrt(PRESS/(k-1.))
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
return rmsep, Yhat, aopt
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
||||
"""Performs crossvalidation to get generalisation error in lpls"""
|
||||
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
|
||||
k, l = Y.shape
|
||||
Yhat = empty((a_max,k,l), 'd')
|
||||
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
|
||||
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||
a_max=a_max,
|
||||
alpha=alpha,
|
||||
mean_ctr=[2,0,1],
|
||||
verbose=False)
|
||||
for a in range(a_max):
|
||||
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
|
||||
Yhat_class = zeros_like(Yhat)
|
||||
for a in range(a_max):
|
||||
for i in range(k):
|
||||
Yhat_class[a,i,argmax(Yhat[a,i,:])]=1.0
|
||||
class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
|
||||
sep = (Y - Yhat)**2
|
||||
rmsep = sqrt(sep.mean(1))
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, Yhat, aopt
|
||||
|
||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||
"""Pca validation by altering elements in X.
|
||||
@ -146,8 +197,7 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||
if n_blocks == None:
|
||||
n_blocks = b.shape[0]
|
||||
|
||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='f')
|
||||
|
||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
||||
if use_pack and metric==None:
|
||||
u, s, inflater = svd(a, full_matrices=0)
|
||||
a = u*s
|
||||
@ -161,11 +211,10 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||
|
||||
W = dat['W']
|
||||
|
||||
if use_pack and metric==None:
|
||||
W = dot(inflater.T, W)
|
||||
|
||||
Wcv[nn,:,:] = W
|
||||
Wcv[nn,:,:] = W[:,:,]
|
||||
|
||||
return Wcv
|
||||
|
||||
@ -200,6 +249,29 @@ def pca_jkP(a, aopt, n_blocks=None, metric=None):
|
||||
return PP
|
||||
|
||||
|
||||
def lpls_jk(X, Y, Z, a_max, nsets=None, alpha=.5):
|
||||
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
o, p = Z.shape
|
||||
if nsets==None:
|
||||
nsets = m
|
||||
WWx = empty((nsets, n, a_max), 'd')
|
||||
WWz = empty((nsets, o, a_max), 'd')
|
||||
#WWy = empty((nsets, l, a_max), 'd')
|
||||
for i, (xcal,xi,ycal,yi) in enumerate(cv_iter):
|
||||
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||
a_max=a_max,
|
||||
alpha=alpha,
|
||||
mean_ctr=[2,0,1],
|
||||
scale='loads',
|
||||
verbose=False)
|
||||
WWx[i,:,:] = W
|
||||
WWz[i,:,:] = L
|
||||
#WWy[i,:,:] = Q
|
||||
|
||||
return WWx, WWz
|
||||
|
||||
def find_aopt_from_sep(sep, method='75perc'):
|
||||
"""Returns an estimate of optimal number of components from rmsecv.
|
||||
"""
|
||||
|
Reference in New Issue
Block a user