Lib updates
This commit is contained in:
parent
7ea87e646a
commit
a05d0faa0d
|
@ -10,7 +10,7 @@ from fluents.workflow import Function, OptionsDialog, Options
|
||||||
from fluents.dataset import Dataset
|
from fluents.dataset import Dataset
|
||||||
from fluents import plots, dataset, workflow, logger
|
from fluents import plots, dataset, workflow, logger
|
||||||
import scipy
|
import scipy
|
||||||
from engines import pca, pls
|
from engines import pca, pls, nipals_lpls
|
||||||
from cx_stats import leverage, variances, hotelling
|
from cx_stats import leverage, variances, hotelling
|
||||||
from cx_utils import mat_center
|
from cx_utils import mat_center
|
||||||
from validation import *
|
from validation import *
|
||||||
|
@ -238,14 +238,14 @@ class PLS(Model):
|
||||||
"""Estimates cut off on significant vars by controlling fdr."""
|
"""Estimates cut off on significant vars by controlling fdr."""
|
||||||
|
|
||||||
if self._options['calc_qvals']==True:
|
if self._options['calc_qvals']==True:
|
||||||
qvals_sorted, qvals = pls_qvals(a, b,
|
qvals = pls_qvals(a, b,
|
||||||
aopt=None,
|
aopt=None,
|
||||||
alpha=reg,
|
alpha=reg,
|
||||||
n_iter=n_iter,
|
n_iter=n_iter,
|
||||||
algo='pls',
|
algo='pls',
|
||||||
sim_method=sim_method)
|
sim_method=sim_method)
|
||||||
self.model['qval'] = qvals
|
self.model['qval'] = qvals
|
||||||
self.model['qval_sorted'] = qvals_sorted
|
#self.model['qval_sorted'] = qvals_sorted
|
||||||
else:
|
else:
|
||||||
self.model['qval'] = None
|
self.model['qval'] = None
|
||||||
self.model['qval_sorted'] = None
|
self.model['qval_sorted'] = None
|
||||||
|
@ -276,18 +276,19 @@ class PLS(Model):
|
||||||
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
|
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
|
||||||
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
|
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
|
||||||
|
|
||||||
match_ids = {'E':[ids_0, ids_1],
|
match_ids = {'E' : [ids_0, ids_1],
|
||||||
'P':[ids_1, pc_ids],
|
'P' : [ids_1, pc_ids],
|
||||||
'T':[ids_0, pc_ids],
|
'T' : [ids_0, pc_ids],
|
||||||
'W': [ids_1, pc_ids],
|
'W' : [ids_1, pc_ids],
|
||||||
'R': [ids_1, pc_ids],
|
'R' : [ids_1, pc_ids],
|
||||||
'Q':[ids_3, pc_ids],
|
'Q' : [ids_3, pc_ids],
|
||||||
'F':[ids_0, ids_3],
|
'F' : [ids_0, ids_3],
|
||||||
'B':[ids_1, ids_3],
|
'B' : [ids_1, ids_3],
|
||||||
'qval':[ids_1, zero_dim],
|
'qval' : [ids_1, zero_dim],
|
||||||
'qval_sorted':[ids_1, zero_dim],
|
'qval_sorted':[ids_1, zero_dim],
|
||||||
'w_tsq':[ids_1, zero_dim],
|
'w_tsq' : [ids_1, zero_dim],
|
||||||
'rmsep':[ids_3, pc_ids],
|
'rmsep' : [ids_3, pc_ids],
|
||||||
|
'CP': [ids_1, pc_ids]
|
||||||
}
|
}
|
||||||
|
|
||||||
array = self.model[name]
|
array = self.model[name]
|
||||||
|
@ -302,7 +303,7 @@ class PLS(Model):
|
||||||
#except:
|
#except:
|
||||||
# logger.log('debug', 'Plot: %s failed' %plt)
|
# logger.log('debug', 'Plot: %s failed' %plt)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def run_o(self, a, b):
|
def run_o(self, a, b):
|
||||||
"""Run PLS with present options."""
|
"""Run PLS with present options."""
|
||||||
options = self._options
|
options = self._options
|
||||||
|
@ -330,6 +331,17 @@ class PLS(Model):
|
||||||
self.model['var_y'] = var_y
|
self.model['var_y'] = var_y
|
||||||
self.model['exp_var_y'] = exp_var_y
|
self.model['exp_var_y'] = exp_var_y
|
||||||
|
|
||||||
|
if options['calc_corrloads']:
|
||||||
|
corr_load = scipy.empty_like(self.model['P'].copy())
|
||||||
|
T = self.model['T']
|
||||||
|
X = self._data['X']
|
||||||
|
# For each variable/attribute in original matrix (not meancentered)
|
||||||
|
for i,score in enumerate(T.T):
|
||||||
|
for j, profile in enumerate(X.T):
|
||||||
|
corrs = scipy.corrcoef(score, profile)
|
||||||
|
corr_load[j,i] = corrs[0,1]
|
||||||
|
self.model['CP'] = corr_load
|
||||||
|
|
||||||
if options['calc_conf']:
|
if options['calc_conf']:
|
||||||
self.confidence(**options.confidence_options())
|
self.confidence(**options.confidence_options())
|
||||||
|
|
||||||
|
@ -353,6 +365,141 @@ class PLS(Model):
|
||||||
#run with current data and options
|
#run with current data and options
|
||||||
return self.run_o(a, b)
|
return self.run_o(a, b)
|
||||||
|
|
||||||
|
class LPLS(Model):
|
||||||
|
def __init__(self, id='lpls', name='LPLS'):
|
||||||
|
Model.__init__(self, id, name)
|
||||||
|
self._options = LplsOptions()
|
||||||
|
|
||||||
|
def validation(self, opt):
|
||||||
|
"""Returns rmsep for lpls model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if opt['calc_cv']==True:
|
||||||
|
val_engine = opt['val_engine']
|
||||||
|
rmsep, aopt = val_engine(self.model['X'], self.model['Y'],
|
||||||
|
self.model['Z'], opt['amax'], opt['n_sets'], opt['xz_alpha'])
|
||||||
|
self.model['rmsep'] = rmsep
|
||||||
|
self.model['aopt'] = aopt
|
||||||
|
else:
|
||||||
|
self.model['rmsep'] = None
|
||||||
|
self.model['aopt'] = opt['aopt']
|
||||||
|
|
||||||
|
def confidence(self, opt):
|
||||||
|
"""Returns a confidence measure for model parameters
|
||||||
|
Supported parameters: W
|
||||||
|
"""
|
||||||
|
aopt = self.model['aopt']
|
||||||
|
if opt['calc_conf']:
|
||||||
|
Wx, Wz = lpls_jk(self.model['X'], self.model['Y'], self.model['Z'], aopt, n_sets)
|
||||||
|
Wcal = self.model['W'][:,:aopt]
|
||||||
|
Lcal = self.model['L'][:,:aopt]
|
||||||
|
# ensure that Wcal is scaled
|
||||||
|
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
|
||||||
|
Wcal = Wcal*tnorm
|
||||||
|
a,b,c,d,e = opt['p_center'], opt['crot'], opt['alpha'], opt['strict'], opt['cov_center']
|
||||||
|
tsqx = hotelling(Wx, Wcal, a,b,c,d,e)
|
||||||
|
tsqz = hotelling(Wz, Lcal, a,b,c,d,e)
|
||||||
|
self.model['tsqx'] = tsqx
|
||||||
|
self.model['tsqz'] = tsqz
|
||||||
|
else:
|
||||||
|
self.model['tsqx'] = None
|
||||||
|
self.model['tsqz'] = None
|
||||||
|
|
||||||
|
def permutation_confidence(self, opt):
|
||||||
|
"""Estimates cut off on significant vars by controlling fdr.
|
||||||
|
|
||||||
|
"""
|
||||||
|
self.model['qval'] = None
|
||||||
|
self.model['qval_sorted'] = None
|
||||||
|
|
||||||
|
def make_model(self, opt):
|
||||||
|
"""Make model on amax components.
|
||||||
|
"""
|
||||||
|
engine = opt['engine']
|
||||||
|
dat = engine(self._data['X'], self._data['Y'], self._data['Z'],
|
||||||
|
opt['amax'], opt['xz_alpha'], opt['center_mth'],
|
||||||
|
opt['mode'], opt['scale'], False)
|
||||||
|
self.model.update(dat)
|
||||||
|
|
||||||
|
def as_dataset(self, name, dtype='Dataset'):
|
||||||
|
"""Return any model parameter as Dataset
|
||||||
|
No ids matching
|
||||||
|
"""
|
||||||
|
if name not in self.model.keys():
|
||||||
|
return
|
||||||
|
DX, DY, DZ = self._dataset['X'], self._dataset['Y'], self._dataset['Z']
|
||||||
|
dim_name_0, dim_name_1 = DX.get_dim_name()
|
||||||
|
dim_name_2, dim_name_3 = DY.get_dim_name()
|
||||||
|
dim_name_4, dim_name_5 = DZ.get_dim_name()
|
||||||
|
#samples
|
||||||
|
ids_0 = [dim_name_0, DX.get_identifiers(dim_name_0, sorted=True)]
|
||||||
|
# x vars (genes)
|
||||||
|
ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)]
|
||||||
|
# y vars (sample descriptors)
|
||||||
|
ids_3 = [dim_name_3, DY.get_identifiers(dim_name_3, sorted=True)]
|
||||||
|
#z-vars (variable descriptors)
|
||||||
|
ids_4 = [dim_name_4, DZ.get_identifiers(dim_name_4, sorted=True)]
|
||||||
|
# components (hidden)
|
||||||
|
pc_ids = ['_comp', map(str, range(self._options['amax']))]
|
||||||
|
pc_ids_opt = ['_comp', map(str, range(self.model['aopt']))]
|
||||||
|
zero_dim = ['_doe',['0']] # null dim, vector (hidden)
|
||||||
|
|
||||||
|
match_ids = {'E' : [ids_0, ids_1],
|
||||||
|
'P' : [ids_1, pc_ids],
|
||||||
|
'T' : [ids_0, pc_ids],
|
||||||
|
'W' : [ids_1, pc_ids],
|
||||||
|
'L' : [ids_4, pc_ids],
|
||||||
|
'Q' : [ids_3, pc_ids],
|
||||||
|
'F' : [ids_0, ids_3],
|
||||||
|
'B' : [ids_1, ids_3],
|
||||||
|
'tsqx' : [ids_1, zero_dim],
|
||||||
|
'tsqz' : [ids_4, zero_dim],
|
||||||
|
'K' : [ids_1, pc_ids],
|
||||||
|
'rmsep' : [ids_3, pc_ids]
|
||||||
|
}
|
||||||
|
|
||||||
|
array = self.model[name]
|
||||||
|
M = Dataset(array, identifiers=match_ids[name], name=name)
|
||||||
|
return M
|
||||||
|
|
||||||
|
def get_out_plots(self, options):
|
||||||
|
out=[]
|
||||||
|
for plt in options['out_plots']:
|
||||||
|
out.append(plt(self))
|
||||||
|
return out
|
||||||
|
|
||||||
|
def run(self, a, b, c):
|
||||||
|
"""Run L-PLS with present options."""
|
||||||
|
options = self._options
|
||||||
|
self._dataset['X'] = a
|
||||||
|
self._dataset['Y'] = b
|
||||||
|
self._dataset['Z'] = c
|
||||||
|
self._data['X'] = a.asarray()
|
||||||
|
self._data['Y'] = b.asarray()
|
||||||
|
self._data['Z'] = c.asarray()
|
||||||
|
self.validation(options)
|
||||||
|
self.make_model(options)
|
||||||
|
if options['calc_conf']:
|
||||||
|
self.confidence(options)
|
||||||
|
|
||||||
|
out = [self.as_dataset(p) for p in options['out_data']]
|
||||||
|
for plt in self.get_out_plots(options):
|
||||||
|
out.append(plt)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def run_gui(self, a, b, c):
|
||||||
|
"""Run LPLS with option gui.
|
||||||
|
"""
|
||||||
|
dialog = LPlsOptionsDialog([a, b, c], self._options)
|
||||||
|
dialog.show_all()
|
||||||
|
response = dialog.run()
|
||||||
|
dialog.hide()
|
||||||
|
|
||||||
|
if response == gtk.RESPONSE_OK:
|
||||||
|
# set output data and plots
|
||||||
|
dialog.set_output()
|
||||||
|
#run with current data and options
|
||||||
|
return self.run(a, b, c)
|
||||||
|
|
||||||
class PcaOptions(Options):
|
class PcaOptions(Options):
|
||||||
"""Options for Principal Component Analysis.
|
"""Options for Principal Component Analysis.
|
||||||
|
@ -403,7 +550,9 @@ class PcaOptions(Options):
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['out_data'] = ['T','P', 'p_tsq']
|
opt['out_data'] = ['T','P', 'p_tsq']
|
||||||
opt['out_plots'] = [blmplots.PcaScorePlot,blmplots.PcaLoadingPlot,blmplots.LineViewXc]
|
opt['out_plots'] = [blmplots.PcaScorePlot,
|
||||||
|
blmplots.PcaLoadingPlot,
|
||||||
|
blmplots.LineViewXc]
|
||||||
|
|
||||||
self.update(opt)
|
self.update(opt)
|
||||||
|
|
||||||
|
@ -444,6 +593,7 @@ class PlsOptions(Options):
|
||||||
opt['center_mth'] = mat_center
|
opt['center_mth'] = mat_center
|
||||||
opt['scale'] = 'scores'
|
opt['scale'] = 'scores'
|
||||||
|
|
||||||
|
opt['calc_corrloads'] = True
|
||||||
opt['calc_conf'] = False
|
opt['calc_conf'] = False
|
||||||
opt['n_sets'] = 5
|
opt['n_sets'] = 5
|
||||||
opt['strict'] = True
|
opt['strict'] = True
|
||||||
|
@ -468,7 +618,8 @@ class PlsOptions(Options):
|
||||||
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||||
(blmplots.LineViewXc, 'Line view', True),
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||||
(blmplots.RMSEPPlot, 'RMSEP', False)
|
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||||
|
(blmplots.PlsCorrelationLoadingPlot, 'Corr. loadings', True)
|
||||||
]
|
]
|
||||||
|
|
||||||
opt['out_data'] = ['T','P', 'p_tsq']
|
opt['out_data'] = ['T','P', 'p_tsq']
|
||||||
|
@ -494,14 +645,87 @@ class PlsOptions(Options):
|
||||||
'strict', 'crot', 'cov_center']
|
'strict', 'crot', 'cov_center']
|
||||||
return self._copy_from_list(opt_list)
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
|
|
||||||
|
def permutation_confidence(self):
|
||||||
|
opt_list = ['q_pert_method', 'q_iter']
|
||||||
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
|
|
||||||
|
class LplsOptions(Options):
|
||||||
|
"""Options for L-shaped Partial Least Squares Regression.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
Options.__init__(self)
|
||||||
|
self._set_default()
|
||||||
|
|
||||||
|
def _set_default(self):
|
||||||
|
opt = {}
|
||||||
|
opt['engine'] = nipals_lpls
|
||||||
|
opt['mode'] = 'normal' # how much info to calculate
|
||||||
|
opt['amax'] = 10
|
||||||
|
opt['aopt'] = 9
|
||||||
|
opt['xz_alpha'] = .5
|
||||||
|
opt['auto_aopt'] = False
|
||||||
|
opt['center'] = True
|
||||||
|
opt['center_mth'] = [2, 0, 1]
|
||||||
|
opt['scale'] = 'scores'
|
||||||
|
opt['calc_conf'] = False
|
||||||
|
opt['n_sets'] = 7
|
||||||
|
opt['strict'] = False
|
||||||
|
opt['p_center'] = 'med'
|
||||||
|
opt['alpha'] = .3
|
||||||
|
opt['cov_center'] = 'med'
|
||||||
|
opt['crot'] = True
|
||||||
|
|
||||||
|
opt['calc_cv'] = False
|
||||||
|
opt['cv_val_method'] = 'random'
|
||||||
|
opt['cv_val_sets'] = opt['n_sets']
|
||||||
|
|
||||||
|
opt['all_data'] = [('T', 'scores', True),
|
||||||
|
('Wx', 'X-weights', True),
|
||||||
|
('Wz', 'Z-weights', True),
|
||||||
|
('E','residuals', False),
|
||||||
|
('tsq_x', 't2X', False),
|
||||||
|
('rmsep', 'RMSEP', False)
|
||||||
|
]
|
||||||
|
|
||||||
|
# (class, name, sensitive, ticked)
|
||||||
|
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||||
|
(blmplots.PlsLoadingPlot, 'Loadings', True),
|
||||||
|
(blmplots.LineViewXc, 'Line view', True),
|
||||||
|
(blmplots.PredictionErrorPlot, 'Residual Error', False),
|
||||||
|
(blmplots.RMSEPPlot, 'RMSEP', False),
|
||||||
|
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False)
|
||||||
|
]
|
||||||
|
|
||||||
|
opt['out_data'] = ['T','P']
|
||||||
|
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
|
||||||
|
|
||||||
|
#opt['out_data'] = None
|
||||||
|
|
||||||
|
opt['pack'] = False
|
||||||
|
opt['calc_qvals'] = False
|
||||||
|
opt['q_pert_method'] = 'shuffle_rows'
|
||||||
|
opt['q_iter'] = 20
|
||||||
|
|
||||||
|
self.update(opt)
|
||||||
|
|
||||||
|
def make_model_options(self):
|
||||||
|
"""Options for make_model method."""
|
||||||
|
opt_list = ['scale','mode', 'amax', 'engine']
|
||||||
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
|
def confidence_options(self):
|
||||||
|
"""Options for confidence method."""
|
||||||
|
opt_list = ['n_sets', 'aopt', 'alpha', 'p_center',
|
||||||
|
'strict', 'crot', 'cov_center']
|
||||||
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
def validation_options(self):
|
def validation_options(self):
|
||||||
"""Options for pre_validation method."""
|
"""Options for pre_validation method."""
|
||||||
opt_list = ['amax', 'n_sets', 'cv_val_method']
|
opt_list = ['amax', 'n_sets', 'cv_val_method']
|
||||||
return self._copy_from_list(opt_list)
|
return self._copy_from_list(opt_list)
|
||||||
|
|
||||||
def permutation_confidence(self):
|
|
||||||
opt_list = ['q_pert_method', 'q_iter']
|
|
||||||
return self._copy_from_list(opt_list)
|
|
||||||
|
|
||||||
class PcaOptionsDialog(OptionsDialog):
|
class PcaOptionsDialog(OptionsDialog):
|
||||||
"""Options dialog for Principal Component Analysis.
|
"""Options dialog for Principal Component Analysis.
|
||||||
|
@ -716,6 +940,210 @@ class PcaOptionsDialog(OptionsDialog):
|
||||||
self._options['strict'] = True
|
self._options['strict'] = True
|
||||||
|
|
||||||
|
|
||||||
|
class LplsOptionsDialog(OptionsDialog):
|
||||||
|
"""Options dialog for L-shaped Partial Least squares regression.
|
||||||
|
"""
|
||||||
|
def __init__(self, data, options, input_names=['X', 'Y', 'Z']):
|
||||||
|
OptionsDialog.__init__(self, data, options, input_names)
|
||||||
|
glade_file = os.path.join(fluents.DATADIR, 'lpls_options.glade')
|
||||||
|
|
||||||
|
notebook_name = "vbox1"
|
||||||
|
page_name = "Options"
|
||||||
|
self.add_page_from_glade(glade_file, notebook_name, page_name)
|
||||||
|
# connect signals to handlers
|
||||||
|
dic = {"on_amax_value_changed" : self.on_amax_changed,
|
||||||
|
"on_aopt_value_changed" : self.on_aopt_changed,
|
||||||
|
"auto_aopt_toggled" : self.auto_aopt_toggled,
|
||||||
|
"center_toggled" : self.center_toggled,
|
||||||
|
#"on_scale_changed" : self.on_scale_changed,
|
||||||
|
"on_val_none" : self.val_toggled,
|
||||||
|
"on_val_cv" : self.cv_toggled,
|
||||||
|
"on_cv_method_changed" : self.on_cv_method_changed,
|
||||||
|
"on_cv_sets_changed" : self.on_cv_sets_changed,
|
||||||
|
"on_conf_toggled" : self.conf_toggled,
|
||||||
|
"on_subset_loc_changed" : self.on_subset_loc_changed,
|
||||||
|
"on_cov_loc_changed" : self.on_cov_loc_changed,
|
||||||
|
"on_alpha_changed" : self.on_alpha_changed,
|
||||||
|
"on_rot_changed" : self.on_rot_changed,
|
||||||
|
"on__toggled" : self.conf_toggled,
|
||||||
|
"on_qval_changed" : self.on_qval_changed,
|
||||||
|
"on_iter_changed" : self.on_iter_changed
|
||||||
|
}
|
||||||
|
|
||||||
|
self.wTree.signal_autoconnect(dic)
|
||||||
|
|
||||||
|
# set/ensure valid default values/ranges
|
||||||
|
#
|
||||||
|
amax_sb = self.wTree.get_widget("amax_spinbutton")
|
||||||
|
max_comp = min(data[0].shape) # max num of components
|
||||||
|
if self._options['amax']>max_comp:
|
||||||
|
logger.log('debug', 'amax default too large ... adjusting')
|
||||||
|
self._options['amax'] = max_comp
|
||||||
|
amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0)
|
||||||
|
# aopt spin button
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
if self._options['aopt']>self._options['amax']:
|
||||||
|
self._options['aopt'] = self._options['amax'] + 1 - 1
|
||||||
|
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
|
||||||
|
|
||||||
|
# scale
|
||||||
|
# scale_cb = self.wTree.get_widget("scale_combobox")
|
||||||
|
# scale_cb.set_active(0)
|
||||||
|
|
||||||
|
# validation frames
|
||||||
|
if self._options['calc_cv']==False:
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
|
||||||
|
cv = self.wTree.get_widget("cv_method").set_active(0)
|
||||||
|
|
||||||
|
# confidence
|
||||||
|
if self._options['calc_conf']==True:
|
||||||
|
self.wTree.get_widget("subset_expander").set_sensitive(True)
|
||||||
|
else:
|
||||||
|
self.wTree.get_widget("subset_expander").set_sensitive(False)
|
||||||
|
|
||||||
|
cb = self.wTree.get_widget("subset_loc")
|
||||||
|
_m = {'med': 0, 'mean': 1, 'full_model': 2}
|
||||||
|
cb.set_active(_m.get(self._options['p_center']))
|
||||||
|
|
||||||
|
cb = self.wTree.get_widget("cov_loc")
|
||||||
|
_m = {'med': 0, 'mean': 1}
|
||||||
|
cb.set_active(_m.get(self._options['cov_center']))
|
||||||
|
|
||||||
|
hs = self.wTree.get_widget("alpha_scale")
|
||||||
|
hs.set_value(self._options['alpha'])
|
||||||
|
|
||||||
|
tb = self.wTree.get_widget("qvals")
|
||||||
|
tb.set_sensitive(True)
|
||||||
|
|
||||||
|
|
||||||
|
def on_amax_changed(self, sb):
|
||||||
|
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
|
||||||
|
amax = sb.get_value_as_int()
|
||||||
|
# update aopt if needed
|
||||||
|
if amax<self._options['aopt']:
|
||||||
|
self._options['aopt'] = amax
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0)
|
||||||
|
self._options['amax'] = sb.get_value_as_int()
|
||||||
|
|
||||||
|
def on_aopt_changed(self, sb):
|
||||||
|
aopt = sb.get_value_as_int()
|
||||||
|
self._options['aopt'] = aopt
|
||||||
|
|
||||||
|
def auto_aopt_toggled(self, tb):
|
||||||
|
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['auto_aopt'] = True
|
||||||
|
aopt_sb.set_sensitive(False)
|
||||||
|
else:
|
||||||
|
self._options['auto_aopt'] = False
|
||||||
|
aopt_sb.set_sensitive(True)
|
||||||
|
|
||||||
|
def center_toggled(self, tb):
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['center'] = True
|
||||||
|
else:
|
||||||
|
logger.log("debug", "centering set to False")
|
||||||
|
self._options['center'] = False
|
||||||
|
|
||||||
|
#def on_scale_changed(self, cb):
|
||||||
|
# scale = cb.get_active_text()
|
||||||
|
# if scale=='Scores':
|
||||||
|
# self._options['scale'] = 'scores'
|
||||||
|
# elif scale=='Loadings':
|
||||||
|
# self._options['scale'] = 'loads'
|
||||||
|
# else:
|
||||||
|
# raise IOError
|
||||||
|
|
||||||
|
def val_toggled(self, tb):
|
||||||
|
"""Callback for validation: None. """
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
cv_tb = self.wTree.get_widget("cv_toggle")
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['calc_cv'] = False
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
cv_tb.set_sensitive(False)
|
||||||
|
else:
|
||||||
|
cv_tb.set_sensitive(True)
|
||||||
|
if cv_tb.get_active():
|
||||||
|
cv_frame.set_sensitive(True)
|
||||||
|
self._options['calc_cv'] = True
|
||||||
|
|
||||||
|
def cv_toggled(self, tb):
|
||||||
|
cv_frame = self.wTree.get_widget("cv_frame")
|
||||||
|
val_tb = self.wTree.get_widget("val_none_toggle")
|
||||||
|
if tb.get_active():
|
||||||
|
cv_frame.set_sensitive(True)
|
||||||
|
self._options['calc_cv'] = True
|
||||||
|
else:
|
||||||
|
cv_frame.set_sensitive(False)
|
||||||
|
self._options['calc_cv'] = False
|
||||||
|
|
||||||
|
def on_cv_method_changed(self, cb):
|
||||||
|
method = cb.get_active_text()
|
||||||
|
if method == 'Random':
|
||||||
|
self._options['cv_val_method'] = 'random'
|
||||||
|
|
||||||
|
def on_cv_sets_changed(self, sb):
|
||||||
|
val = sb.get_value_as_int()
|
||||||
|
self._options['cv_val_sets'] = val
|
||||||
|
|
||||||
|
def conf_toggled(self, tb):
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['calc_conf'] = False
|
||||||
|
self.wTree.get_widget("subset_expander").set_sensitive(False)
|
||||||
|
else:
|
||||||
|
self._options['calc_conf'] = True
|
||||||
|
self.wTree.get_widget("subset_expander").set_sensitive(True)
|
||||||
|
|
||||||
|
def on_subset_loc_changed(self, cb):
|
||||||
|
method = cb.get_active_text()
|
||||||
|
if method=='Full model':
|
||||||
|
self._options['p_center'] = 'full_model'
|
||||||
|
elif method=='Median':
|
||||||
|
self._options['p_center'] = 'med'
|
||||||
|
elif method=='Mean':
|
||||||
|
self._options['p_center'] = 'mean'
|
||||||
|
|
||||||
|
def on_cov_loc_changed(self, cb):
|
||||||
|
method = cb.get_active_text()
|
||||||
|
if method=='Median':
|
||||||
|
self._options['cov_center'] = 'med'
|
||||||
|
elif method=='Mean':
|
||||||
|
self._options['cov_center'] = 'mean'
|
||||||
|
|
||||||
|
def on_alpha_changed(self, hs):
|
||||||
|
self._options['alpha'] = hs.get_value()
|
||||||
|
|
||||||
|
def on_rot_changed(self, rg):
|
||||||
|
proc, strict = rg
|
||||||
|
if proc.get_active():
|
||||||
|
self._options['crot'] = True
|
||||||
|
else:
|
||||||
|
self._options['crot'] = True
|
||||||
|
self._options['strict'] = True
|
||||||
|
|
||||||
|
def qval_toggled(self, tb):
|
||||||
|
if tb.get_active():
|
||||||
|
self._options['calc_qval'] = False
|
||||||
|
self.wTree.get_widget("qval_method").set_sensitive(False)
|
||||||
|
self.wTree.get_widget("q_iter").set_sensitive(False)
|
||||||
|
else:
|
||||||
|
self._options['calc_qval'] = True
|
||||||
|
self.wTree.get_widget("qval_method").set_sensitive(True)
|
||||||
|
self.wTree.get_widget("q_iter").set_sensitive(True)
|
||||||
|
|
||||||
|
def on_iter_changed(self, sb):
|
||||||
|
self._options['q_iter'] = sb.get_value()
|
||||||
|
|
||||||
|
def on_qval_changed(self, cb):
|
||||||
|
q_method = cb.get_active_text()
|
||||||
|
if method=='Shuffle rows':
|
||||||
|
self._options['q_pert_method'] = 'shuffle'
|
||||||
|
|
||||||
|
|
||||||
class PlsOptionsDialog(OptionsDialog):
|
class PlsOptionsDialog(OptionsDialog):
|
||||||
"""Options dialog for Partial Least squares regression.
|
"""Options dialog for Partial Least squares regression.
|
||||||
"""
|
"""
|
||||||
|
@ -918,5 +1346,3 @@ class PlsOptionsDialog(OptionsDialog):
|
||||||
q_method = cb.get_active_text()
|
q_method = cb.get_active_text()
|
||||||
if method=='Shuffle rows':
|
if method=='Shuffle rows':
|
||||||
self._options['q_pert_method'] = 'shuffle'
|
self._options['q_pert_method'] = 'shuffle'
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -190,7 +190,12 @@ class PlsCorrelationLoadingPlot(BlmScatterPlot):
|
||||||
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
|
||||||
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
|
||||||
|
|
||||||
|
|
||||||
|
class LplsHypoidCorrelationPlot(BlmScatterPlot):
|
||||||
|
def __init__(self, model, absi=0, ordi=1):
|
||||||
|
title = "Hypoid correlations(%s)" %model._dataset['X'].get_name()
|
||||||
|
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W')
|
||||||
|
|
||||||
class LineViewXc(plots.LineViewPlot):
|
class LineViewXc(plots.LineViewPlot):
|
||||||
"""A line view of centered raw data
|
"""A line view of centered raw data
|
||||||
"""
|
"""
|
||||||
|
@ -214,8 +219,8 @@ class PlsQvalScatter(plots.ScatterPlot):
|
||||||
def __init__(self, model, pc=0):
|
def __init__(self, model, pc=0):
|
||||||
if not model.model.has_key('w_tsq'):
|
if not model.model.has_key('w_tsq'):
|
||||||
return None
|
return None
|
||||||
self._W = model.model['P']
|
self._W = model.model['W']
|
||||||
dataset_1 = model.as_dataset('P')
|
dataset_1 = model.as_dataset('W')
|
||||||
dataset_2 = model.as_dataset('w_tsq')
|
dataset_2 = model.as_dataset('w_tsq')
|
||||||
id_dim = dataset_1.get_dim_name(0) #genes
|
id_dim = dataset_1.get_dim_name(0) #genes
|
||||||
sel_dim = dataset_1.get_dim_name(1) #_comp
|
sel_dim = dataset_1.get_dim_name(1) #_comp
|
||||||
|
|
|
@ -115,6 +115,7 @@ def expl_var_y(Y, T, Q):
|
||||||
|
|
||||||
def pls_qvals(a, b, aopt=None, alpha=.3,
|
def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
n_iter=20, algo='pls',
|
n_iter=20, algo='pls',
|
||||||
|
center=True,
|
||||||
sim_method='shuffle',
|
sim_method='shuffle',
|
||||||
p_center='med', cov_center='med',
|
p_center='med', cov_center='med',
|
||||||
crot=True, strict=False, metric=None):
|
crot=True, strict=False, metric=None):
|
||||||
|
@ -122,8 +123,98 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
"""Returns qvals for pls model.
|
"""Returns qvals for pls model.
|
||||||
|
|
||||||
input:
|
input:
|
||||||
a -- centered data matrix
|
a -- data matrix
|
||||||
b -- centered data matrix
|
b -- data matrix
|
||||||
|
aopt -- scalar, opt. number of components
|
||||||
|
alpha -- [0,1] regularisation parameter for T2-test
|
||||||
|
n_iter -- number of permutations
|
||||||
|
sim_method -- permutation method ['shuffle']
|
||||||
|
p_center -- location estimator for sub models ['med']
|
||||||
|
cov_center -- location estimator for covariance of submodels ['med']
|
||||||
|
crot -- bool, use rotations of sub models?
|
||||||
|
strict -- bool, use stict (rot/flips only) rotations?
|
||||||
|
metric -- bool, use row metric?
|
||||||
|
"""
|
||||||
|
|
||||||
|
m, n = a.shape
|
||||||
|
TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets)
|
||||||
|
n_false = zeros((n, n_iter), dtype='d')
|
||||||
|
|
||||||
|
#full model
|
||||||
|
if center:
|
||||||
|
ac = a - a.mean(0)
|
||||||
|
bc = b - b.mean(0)
|
||||||
|
if metric!=None:
|
||||||
|
ac = dot(ac, metric)
|
||||||
|
if algo=='bridge':
|
||||||
|
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||||
|
else:
|
||||||
|
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||||
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
|
||||||
|
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
|
cov_center=cov_center)
|
||||||
|
t0 = time.time()
|
||||||
|
Vs = shuffle_1d(bc, n_iter, axis=0)
|
||||||
|
for i, b_shuff in enumerate(Vs):
|
||||||
|
t1 = time.time()
|
||||||
|
if algo=='bridge':
|
||||||
|
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
|
||||||
|
else:
|
||||||
|
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
|
||||||
|
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||||
|
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
|
cov_center=cov_center)
|
||||||
|
print time.time() - t1
|
||||||
|
sort_index = argsort(tsq_full)[::-1]
|
||||||
|
back_sort_index = sort_index.argsort()
|
||||||
|
print time.time() - t0
|
||||||
|
|
||||||
|
# count false positives
|
||||||
|
tsq_full_sorted = tsq_full.take(sort_index)
|
||||||
|
for i in xrange(n_iter):
|
||||||
|
for j in xrange(n):
|
||||||
|
n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j]) # number of false pos. genes (0-n)
|
||||||
|
false_pos = median(n_false, 1)
|
||||||
|
ll = arange(1, len(false_pos)+1, 1)
|
||||||
|
sort_qval = false_pos.take(sort_index)/ll
|
||||||
|
qval = false_pos/ll.take(back_sort_index)
|
||||||
|
print time.time() - t0
|
||||||
|
#return qval, false_pos, TSQ, tsq_full
|
||||||
|
return qval
|
||||||
|
|
||||||
|
def ensure_strict(C, only_flips=True):
|
||||||
|
"""Ensure that a rotation matrix does only 90 degree rotations.
|
||||||
|
In multiplication with pcs this allows flips and reordering.
|
||||||
|
|
||||||
|
if only_flips is True there will onlt be flips allowed
|
||||||
|
"""
|
||||||
|
Cm = C
|
||||||
|
S = sign(C) # signs
|
||||||
|
if only_flips==True:
|
||||||
|
C = eye(Cm.shape[0])*S
|
||||||
|
return C
|
||||||
|
Cm = zeros_like(C)
|
||||||
|
Cm.putmask(1.,abs(C)>.6)
|
||||||
|
if det(Cm)>1:
|
||||||
|
raise ValueError,"Implement this!"
|
||||||
|
return Cm*S
|
||||||
|
|
||||||
|
def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
|
||||||
|
n_iter=20, algo='pls',
|
||||||
|
sim_method='shuffle',
|
||||||
|
p_center='med', cov_center='med',
|
||||||
|
crot=True, strict=False, metric=None):
|
||||||
|
|
||||||
|
"""Returns qvals for pls model.
|
||||||
|
Shuffling of variables in X is preprocessed in metric.
|
||||||
|
Null model is 'If I put genes randomly on network' ... if they are sign:
|
||||||
|
then this is due to network structure and not covariance with response.
|
||||||
|
|
||||||
|
input:
|
||||||
|
a -- data matrix
|
||||||
|
b -- data matrix
|
||||||
aopt -- scalar, opt. number of components
|
aopt -- scalar, opt. number of components
|
||||||
alpha -- [0,1] regularisation parameter for T2-test
|
alpha -- [0,1] regularisation parameter for T2-test
|
||||||
n_iter -- number of permutations
|
n_iter -- number of permutations
|
||||||
|
@ -140,25 +231,33 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
n_false = zeros((n, n_iter), dtype='<f8')
|
n_false = zeros((n, n_iter), dtype='<f8')
|
||||||
|
|
||||||
#full model
|
#full model
|
||||||
if metric!=None:
|
|
||||||
a = dot(a, metric)
|
# center?
|
||||||
|
if center==True:
|
||||||
|
ac = a - a.mean(0)
|
||||||
|
bc = b - b.mean(0)
|
||||||
|
if metric==None:
|
||||||
|
metric = eye(n,n)
|
||||||
if algo=='bridge':
|
if algo=='bridge':
|
||||||
dat = bridge(a, b, aopt, 'loads', 'fast')
|
dat = bridge(ac, bc, aopt, 'loads', 'fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
dat = pls(ac, bc, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||||
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
Vs = shuffle_1d(b, n_iter)
|
Vs = shuffle_1d(a, n_iter, 1)
|
||||||
for i, b_shuff in enumerate(Vs):
|
for i, a_shuff in enumerate(Vs):
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
|
a = a_shuff - a_shuff.mean(0)
|
||||||
|
a = dot(a, metric)
|
||||||
|
|
||||||
if algo=='bridge':
|
if algo=='bridge':
|
||||||
dat = bridge(a, b_shuff, aopt, 'loads','fast')
|
dat = bridge(a, b, aopt, 'loads','fast')
|
||||||
else:
|
else:
|
||||||
dat = pls(a, b, aopt, 'loads', 'fast')
|
dat = pls(a, b, aopt, 'loads', 'fast')
|
||||||
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
|
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
|
||||||
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
|
||||||
alpha=alpha, crot=crot, strict=strict,
|
alpha=alpha, crot=crot, strict=strict,
|
||||||
cov_center=cov_center)
|
cov_center=cov_center)
|
||||||
|
@ -177,24 +276,8 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
|
||||||
sort_qval = false_pos.take(sort_index)/ll
|
sort_qval = false_pos.take(sort_index)/ll
|
||||||
qval = false_pos/ll.take(back_sort_index)
|
qval = false_pos/ll.take(back_sort_index)
|
||||||
print time.time() - t0
|
print time.time() - t0
|
||||||
return qval, false_pos, TSQ, tsq_full
|
#return qval, false_pos, TSQ, tsq_full
|
||||||
|
return qval
|
||||||
def ensure_strict(C, only_flips=True):
|
|
||||||
"""Ensure that a rotation matrix does only 90 degree rotations.
|
|
||||||
In multiplication with pcs this allows flips and reordering.
|
|
||||||
|
|
||||||
if only_flips is True there will onlt be flips allowed
|
|
||||||
"""
|
|
||||||
Cm = C
|
|
||||||
S = sign(C) # signs
|
|
||||||
if only_flips==True:
|
|
||||||
C = eye(Cm.shape[0])*S
|
|
||||||
return C
|
|
||||||
Cm = zeros_like(C)
|
|
||||||
Cm.putmask(1.,abs(C)>.6)
|
|
||||||
if det(Cm)>1:
|
|
||||||
raise ValueError,"Implement this!"
|
|
||||||
return Cm*S
|
|
||||||
|
|
||||||
def leverage(aopt=1,*args):
|
def leverage(aopt=1,*args):
|
||||||
"""Returns leverages
|
"""Returns leverages
|
||||||
|
@ -253,3 +336,10 @@ def ssq(E, axis=0, weights=None):
|
||||||
raise NotImplementedError, "Higher order modes not supported"
|
raise NotImplementedError, "Higher order modes not supported"
|
||||||
return pow(Ew,2).sum(axis)
|
return pow(Ew,2).sum(axis)
|
||||||
|
|
||||||
|
|
||||||
|
def vnorm(x):
|
||||||
|
"""Returns the euclidian norm of a vector.
|
||||||
|
|
||||||
|
This is considerably faster than linalg.norm
|
||||||
|
"""
|
||||||
|
return sqrt(dot(x,x.conj()))
|
||||||
|
|
|
@ -1,23 +1,25 @@
|
||||||
from scipy import apply_along_axis,newaxis,zeros,\
|
from scipy import apply_along_axis,newaxis,zeros,\
|
||||||
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
||||||
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
||||||
matrix
|
matrix,nan
|
||||||
from scipy.linalg import norm,svd,inv,eig
|
from scipy.linalg import norm,svd,inv,eig
|
||||||
from scipy.stats import median,mean
|
from scipy.stats import median,mean
|
||||||
|
|
||||||
def normalise(a,axis=0,return_scales=False):
|
def normalise(a, axis=0, return_scales=False):
|
||||||
s = apply_along_axis(norm,axis,a)
|
s = apply_along_axis(norm, axis, a)
|
||||||
if axis==0:
|
if axis==0:
|
||||||
s = s[newaxis]
|
s = s[newaxis]
|
||||||
else:
|
else:
|
||||||
s = s[:,newaxis]
|
s = s[:,newaxis]
|
||||||
|
|
||||||
a_s = a/s
|
a_s = a/s
|
||||||
if return_scales:
|
|
||||||
return a_s,s
|
|
||||||
return a_s
|
|
||||||
|
|
||||||
def sub2ind(shape,i,j):
|
if return_scales:
|
||||||
|
return a_s, s
|
||||||
|
|
||||||
|
return a_s
|
||||||
|
|
||||||
|
def sub2ind(shape, i, j):
|
||||||
"""Indices from subscripts. Only support for 2d"""
|
"""Indices from subscripts. Only support for 2d"""
|
||||||
row,col = shape
|
row,col = shape
|
||||||
ind = []
|
ind = []
|
||||||
|
@ -41,13 +43,13 @@ def sorted_eig(a, b=None,sort_by='sm'):
|
||||||
(This is reversed output compared to matlab)
|
(This is reversed output compared to matlab)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
s,v = eig(a,b)
|
s,v = eig(a, b)
|
||||||
s = real(s) # dont expect any imaginary part
|
s = real(s) # dont expect any imaginary part
|
||||||
v = real(v)
|
v = real(v)
|
||||||
ind = argsort(s)
|
ind = argsort(s)
|
||||||
if sort_by=='lm':
|
if sort_by=='lm':
|
||||||
ind = ind[::-1]
|
ind = ind[::-1]
|
||||||
v = v.take(ind,1)
|
v = v.take(ind, 1)
|
||||||
s = s.take(ind)
|
s = s.take(ind)
|
||||||
|
|
||||||
return s,v
|
return s,v
|
||||||
|
@ -67,15 +69,15 @@ def str2num(string_number):
|
||||||
return num
|
return num
|
||||||
|
|
||||||
def randperm(n):
|
def randperm(n):
|
||||||
r=rand(n)
|
r = rand(n)
|
||||||
dict={}
|
dict={}
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
dict[r[i]]=i
|
dict[r[i]] = i
|
||||||
r=sort(r)
|
r = sort(r)
|
||||||
out=zeros(n)
|
out = zeros(n)
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
out[i]=dict[r[i]]
|
out[i] = dict[r[i]]
|
||||||
return array(out,dtype='i')
|
return array(out).astype('i')
|
||||||
|
|
||||||
def mat_center(X,axis=0,ret_mn=False):
|
def mat_center(X,axis=0,ret_mn=False):
|
||||||
"""Mean center matrix along axis.
|
"""Mean center matrix along axis.
|
||||||
|
|
|
@ -3,11 +3,12 @@
|
||||||
There is no typechecking of any kind here, just focus on speed
|
There is no typechecking of any kind here, just focus on speed
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from scipy.linalg import svd,norm,inv,pinv,qr
|
import math
|
||||||
|
from scipy.linalg import svd,inv
|
||||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||||
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
||||||
rand,sum,cumsum,matrix
|
rand,sum,cumsum,matrix
|
||||||
|
|
||||||
def pca(a, aopt, scale='scores', mode='normal'):
|
def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
""" Principal Component Analysis model
|
""" Principal Component Analysis model
|
||||||
mode:
|
mode:
|
||||||
|
@ -18,17 +19,18 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
|
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
|
|
||||||
if m*10.>n:
|
if m*3>n:
|
||||||
u, s, vt = esvd(a)
|
u, s, v = esvd(a)
|
||||||
else:
|
else:
|
||||||
u, s, vt = svd(a, full_matrices=0)
|
u, s, vt = svd(a, full_matrices=0)
|
||||||
|
v = vt.T
|
||||||
eigvals = (1./m)*s
|
eigvals = (1./m)*s
|
||||||
T = u*s
|
T = u*s
|
||||||
T = T[:,:aopt]
|
T = T[:,:aopt]
|
||||||
P = vt[:aopt,:].T
|
P = v[:,:aopt]
|
||||||
|
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
tnorm = apply_along_axis(norm, 0, T)
|
tnorm = apply_along_axis(vnorm, 0, T)
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
P = P*tnorm
|
P = P*tnorm
|
||||||
|
|
||||||
|
@ -47,6 +49,7 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
||||||
|
|
||||||
return {'T':T, 'P':P, 'E':E}
|
return {'T':T, 'P':P, 'E':E}
|
||||||
|
|
||||||
|
|
||||||
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
||||||
"""Returns Principal component regression model."""
|
"""Returns Principal component regression model."""
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
|
@ -98,13 +101,13 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
u, s, vh = svd(dot(ab.T, ab))
|
u, s, vh = svd(dot(ab.T, ab))
|
||||||
w = dot(ab, u[:,:1])
|
w = dot(ab, u[:,:1])
|
||||||
|
|
||||||
w = w/norm(w)
|
w = w/vnorm(w)
|
||||||
r = w.copy()
|
r = w.copy()
|
||||||
if i>0:
|
if i>0:
|
||||||
for j in range(0,i,1):
|
for j in range(0,i,1):
|
||||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||||
t = dot(a, r)
|
t = dot(a, r)
|
||||||
tt = norm(t)**2
|
tt = vnorm(t)**2
|
||||||
p = dot(a.T, t)/tt
|
p = dot(a.T, t)/tt
|
||||||
q = dot(r.T, ab).T/tt
|
q = dot(r.T, ab).T/tt
|
||||||
ab = ab - dot(p, q.T)*tt
|
ab = ab - dot(p, q.T)*tt
|
||||||
|
@ -115,7 +118,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
|
|
||||||
if mode=='fast' and i==aopt-1:
|
if mode=='fast' and i==aopt-1:
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
tnorm = apply_along_axis(norm, 0, T)
|
tnorm = apply_along_axis(vnorm, 0, T)
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
return {'T':T, 'W':W}
|
return {'T':T, 'W':W}
|
||||||
|
@ -134,7 +137,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
F = b - dot(T[:,:aopt], Q[:,:aopt].T)
|
F = b - dot(T[:,:aopt], Q[:,:aopt].T)
|
||||||
|
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
tnorm = apply_along_axis(norm, 0, T)
|
tnorm = apply_along_axis(vnorm, 0, T)
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
Q = Q*tnorm
|
Q = Q*tnorm
|
||||||
|
@ -159,7 +162,7 @@ def w_simpls(aat, b, aopt):
|
||||||
u = dot(b, u[:,:1]) #y-factor scores
|
u = dot(b, u[:,:1]) #y-factor scores
|
||||||
U[:,i] = u.ravel()
|
U[:,i] = u.ravel()
|
||||||
t = dot(aat, u)
|
t = dot(aat, u)
|
||||||
t = t/norm(t)
|
t = t/vnorm(t)
|
||||||
T[:,i] = t.ravel()
|
T[:,i] = t.ravel()
|
||||||
h = dot(aat, t) #score-weights
|
h = dot(aat, t) #score-weights
|
||||||
H[:,i] = h.ravel()
|
H[:,i] = h.ravel()
|
||||||
|
@ -183,7 +186,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
W = u[:,:aopt]
|
W = u[:,:aopt]
|
||||||
K = vt[:aopt,:].T
|
K = vt[:aopt,:].T
|
||||||
T = dot(a, W)
|
T = dot(a, W)
|
||||||
tnorm = apply_along_axis(norm, 0, T) # norm of T-columns
|
tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns
|
||||||
|
|
||||||
if mode == 'fast':
|
if mode == 'fast':
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
|
@ -196,16 +199,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
B = zeros((aopt, n, l), dtype='f')
|
B = zeros((aopt, n, l), dtype='f')
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
||||||
# leverages
|
|
||||||
# fixme: probably need an orthogonal basis for row-space leverage
|
|
||||||
# T (scores) are not orthogonal
|
|
||||||
# Using a qr decomp to get an orthonormal basis for row-space
|
|
||||||
#Tq = qr(T)[0]
|
|
||||||
#s_lev,v_lev = leverage(aopt,Tq,W)
|
|
||||||
# explained variance
|
|
||||||
#var_x, exp_var_x = variances(a,T,W)
|
|
||||||
#qnorm = apply_along_axis(norm, 0, Q)
|
|
||||||
#var_y, exp_var_y = variances(b,U,Q/qnorm)
|
|
||||||
|
|
||||||
if mode == 'detailed':
|
if mode == 'detailed':
|
||||||
E = empty((aopt, m, n))
|
E = empty((aopt, m, n))
|
||||||
|
@ -225,10 +218,132 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def nipals_lpls(X, Y, Z, amax, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', scale='scores', verbose=False):
|
||||||
|
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||||
|
|
||||||
|
(X!Z)->Y
|
||||||
|
:input:
|
||||||
|
X : data matrix (m, n)
|
||||||
|
Y : data matrix (m, l)
|
||||||
|
Z : data matrix (n, o)
|
||||||
|
|
||||||
|
:output:
|
||||||
|
T : X-scores
|
||||||
|
W : X-weights/Z-weights
|
||||||
|
P : X-loadings
|
||||||
|
Q : Y-loadings
|
||||||
|
U : X-Y relation
|
||||||
|
L : Z-scores
|
||||||
|
K : Z-loads
|
||||||
|
B : Regression coefficients X->Y
|
||||||
|
b0: Regression coefficient intercept
|
||||||
|
evx : X-explained variance
|
||||||
|
evy : Y-explained variance
|
||||||
|
evz : Z-explained variance
|
||||||
|
|
||||||
|
:Notes:
|
||||||
|
|
||||||
|
"""
|
||||||
|
if mean_ctr!=None:
|
||||||
|
xctr, yctr, zctr = mean_ctr
|
||||||
|
X, mnX = center(X, xctr)
|
||||||
|
Y, mnY = center(Y, xctr)
|
||||||
|
Z, mnZ = center(Z, zctr)
|
||||||
|
|
||||||
|
varX = pow(X, 2).sum()
|
||||||
|
varY = pow(Y, 2).sum()
|
||||||
|
varZ = pow(Z, 2).sum()
|
||||||
|
|
||||||
|
m, n = X.shape
|
||||||
|
k, l = Y.shape
|
||||||
|
u, o = Z.shape
|
||||||
|
|
||||||
|
# initialize
|
||||||
|
U = empty((k, amax))
|
||||||
|
Q = empty((l, amax))
|
||||||
|
T = empty((m, amax))
|
||||||
|
W = empty((n, amax))
|
||||||
|
P = empty((n, amax))
|
||||||
|
K = empty((o, amax))
|
||||||
|
L = empty((u, amax))
|
||||||
|
var_x = empty((amax,))
|
||||||
|
var_y = empty((amax,))
|
||||||
|
var_z = empty((amax,))
|
||||||
|
|
||||||
|
for a in range(amax):
|
||||||
|
if verbose:
|
||||||
|
print "\n Working on comp. %s" %a
|
||||||
|
u = Y[:,:1]
|
||||||
|
diff = 1
|
||||||
|
MAX_ITER = 100
|
||||||
|
lim = 1e-5
|
||||||
|
niter = 0
|
||||||
|
while (diff>lim and niter<MAX_ITER):
|
||||||
|
niter += 1
|
||||||
|
u1 = u.copy()
|
||||||
|
w = dot(X.T, u)
|
||||||
|
w = w/sqrt(dot(w.T, w))
|
||||||
|
l = dot(Z, w)
|
||||||
|
k = dot(Z.T, l)
|
||||||
|
k = k/sqrt(dot(k.T, k))
|
||||||
|
w = alpha*k + (1-alpha)*w
|
||||||
|
w = w/sqrt(dot(w.T, w))
|
||||||
|
t = dot(X, w)
|
||||||
|
c = dot(Y.T, t)
|
||||||
|
c = c/sqrt(dot(c.T, c))
|
||||||
|
u = dot(Y, c)
|
||||||
|
diff = abs(u1 - u).max()
|
||||||
|
if verbose:
|
||||||
|
print "Converged after %s iterations" %niter
|
||||||
|
tt = dot(t.T, t)
|
||||||
|
p = dot(X.T, t)/tt
|
||||||
|
q = dot(Y.T, t)/tt
|
||||||
|
l = dot(Z, w)
|
||||||
|
|
||||||
|
U[:,a] = u.ravel()
|
||||||
|
W[:,a] = w.ravel()
|
||||||
|
P[:,a] = p.ravel()
|
||||||
|
T[:,a] = t.ravel()
|
||||||
|
Q[:,a] = q.ravel()
|
||||||
|
L[:,a] = l.ravel()
|
||||||
|
K[:,a] = k.ravel()
|
||||||
|
|
||||||
|
X = X - dot(t, p.T)
|
||||||
|
Y = Y - dot(t, q.T)
|
||||||
|
Z = (Z.T - dot(w, l.T)).T
|
||||||
|
|
||||||
|
var_x[a] = pow(X, 2).sum()
|
||||||
|
var_y[a] = pow(Y, 2).sum()
|
||||||
|
var_z[a] = pow(Z, 2).sum()
|
||||||
|
|
||||||
|
B = dot(dot(W, inv(dot(P.T, W))), Q.T)
|
||||||
|
b0 = mnY - dot(mnX, B)
|
||||||
|
|
||||||
|
# variance explained
|
||||||
|
evx = 100.0*(1 - var_x/varX)
|
||||||
|
evy = 100.0*(1 - var_y/varY)
|
||||||
|
evz = 100.0*(1 - var_z/varZ)
|
||||||
|
if scale=='loads':
|
||||||
|
tnorm = apply_along_axis(vnorm, 0, T)
|
||||||
|
T = T/tnorm
|
||||||
|
W = W*tnorm
|
||||||
|
Q = Q*tnorm
|
||||||
|
knorm = apply_along_axis(vnorm, 0, K)
|
||||||
|
L = L*knorm
|
||||||
|
K = K/knorm
|
||||||
|
|
||||||
|
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy, 'evz':evz}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
########### Helper routines #########
|
||||||
|
|
||||||
def m_shape(array):
|
def m_shape(array):
|
||||||
return matrix(array).shape
|
return matrix(array).shape
|
||||||
|
|
||||||
def esvd(data,economy=1):
|
def esvd(data, economy=1):
|
||||||
"""SVD with the option of economy sized calculation
|
"""SVD with the option of economy sized calculation
|
||||||
Calculate subspaces of X'X or XX' depending on the shape
|
Calculate subspaces of X'X or XX' depending on the shape
|
||||||
of the matrix.
|
of the matrix.
|
||||||
|
@ -239,17 +354,40 @@ def esvd(data,economy=1):
|
||||||
"""
|
"""
|
||||||
m, n = data.shape
|
m, n = data.shape
|
||||||
if m>=n:
|
if m>=n:
|
||||||
u, s, vt = svd(dot(data.T, data))
|
data = dot(data.T, data)
|
||||||
|
u, s, vt = svd(data)
|
||||||
u = dot(data, vt.T)
|
u = dot(data, vt.T)
|
||||||
v = vt.T
|
v = vt.T
|
||||||
for i in xrange(n):
|
for i in xrange(n):
|
||||||
s[i] = norm(u[:,i])
|
s[i] = vnorm(u[:,i])
|
||||||
u[:,i] = u[:,i]/s[i]
|
u[:,i] = u[:,i]/s[i]
|
||||||
else:
|
else:
|
||||||
u, s, vt = svd(dot(data, data.T))
|
data = dot(data, data.T)
|
||||||
|
data = (data + data.T)/2.0
|
||||||
|
u, s, vt = svd(data)
|
||||||
v = dot(u.T, data)
|
v = dot(u.T, data)
|
||||||
for i in xrange(m):
|
for i in xrange(m):
|
||||||
s[i] = norm(v[i,:])
|
s[i] = vnorm(v[i,:])
|
||||||
v[i,:] = v[i,:]/s[i]
|
v[i,:] = v[i,:]/s[i]
|
||||||
|
|
||||||
return u, s, v
|
return u, s, v.T
|
||||||
|
|
||||||
|
def vnorm(x):
|
||||||
|
# assume column arrays (or vectors)
|
||||||
|
return math.sqrt(dot(x.T, x))
|
||||||
|
|
||||||
|
def center(a, axis):
|
||||||
|
# 0 = col center, 1 = row center, 2 = double center
|
||||||
|
# -1 = nothing
|
||||||
|
if axis==-1:
|
||||||
|
mn = zeros((a.shape[1],))
|
||||||
|
elif axis==0:
|
||||||
|
mn = a.mean(0)
|
||||||
|
elif axis==1:
|
||||||
|
mn = a.mean(1)[:,newaxis]
|
||||||
|
elif axis==2:
|
||||||
|
mn = a.mean(0) + a.mean(1)[:,newaxis] - a.mean()
|
||||||
|
else:
|
||||||
|
raise IOError("input error: axis must be in [-1,0,1,2]")
|
||||||
|
|
||||||
|
return a - mn, mn
|
||||||
|
|
|
@ -53,6 +53,7 @@ def gene_hypergeo_test(selection, category_dataset):
|
||||||
cat_count)
|
cat_count)
|
||||||
|
|
||||||
pvals = scipy.where(cat_count==0, 2, pvals)
|
pvals = scipy.where(cat_count==0, 2, pvals)
|
||||||
|
pvals = scipy.where(scipy.isnan(pvals), 2, pvals)
|
||||||
out = {}
|
out = {}
|
||||||
for i in range(pvals.size):
|
for i in range(pvals.size):
|
||||||
out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
|
out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
|
||||||
|
|
|
@ -2,7 +2,7 @@ import os,sys
|
||||||
from itertools import izip
|
from itertools import izip
|
||||||
import networkx as NX
|
import networkx as NX
|
||||||
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
|
from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
|
||||||
outer,maximum,sum,diag,real
|
outer,maximum,sum,diag,real,atleast_2d
|
||||||
from scipy.linalg import eig,svd,inv,expm,norm
|
from scipy.linalg import eig,svd,inv,expm,norm
|
||||||
from cx_utils import sorted_eig
|
from cx_utils import sorted_eig
|
||||||
|
|
||||||
|
@ -378,6 +378,7 @@ Ke = expm(A) .... expm(-A)?
|
||||||
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
|
# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
|
||||||
# 13.09.2206: update for use in numpy
|
# 13.09.2206: update for use in numpy
|
||||||
|
|
||||||
|
# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last
|
||||||
|
|
||||||
def K_expAdj(W, normalised=True, alpha=1.0):
|
def K_expAdj(W, normalised=True, alpha=1.0):
|
||||||
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
||||||
|
@ -433,8 +434,8 @@ def K_vonNeumann(W, normalised=True, alpha=1.0):
|
||||||
return dot(dot(vr,psigma),vri).astype(t)
|
return dot(dot(vr,psigma),vri).astype(t)
|
||||||
|
|
||||||
def K_laplacian(W, normalised=True, alpha=1.0):
|
def K_laplacian(W, normalised=True, alpha=1.0):
|
||||||
""" This is the matrix square root of the pseudo inverse of L.
|
""" This is the matrix pseudo inverse of L.
|
||||||
Also known as th eaverage commute time matrix.
|
Also known as the average commute time matrix.
|
||||||
"""
|
"""
|
||||||
W = asarray(W)
|
W = asarray(W)
|
||||||
t = W.dtype.char
|
t = W.dtype.char
|
||||||
|
@ -464,8 +465,7 @@ def K_laplacian(W, normalised=True, alpha=1.0):
|
||||||
return K
|
return K
|
||||||
|
|
||||||
|
|
||||||
|
def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
|
||||||
def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5):
|
|
||||||
"""Returns diffusion kernel.
|
"""Returns diffusion kernel.
|
||||||
input:
|
input:
|
||||||
-- W, adj. matrix
|
-- W, adj. matrix
|
||||||
|
@ -477,27 +477,45 @@ def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5):
|
||||||
t = W.dtype.char
|
t = W.dtype.char
|
||||||
if len(W.shape)!=2:
|
if len(W.shape)!=2:
|
||||||
raise ValueError, "Non-matrix input to matrix function."
|
raise ValueError, "Non-matrix input to matrix function."
|
||||||
m,n = W.shape
|
m, n = W.shape
|
||||||
if t in ['F','D']:
|
if t in ['F','D']:
|
||||||
raise TypeError, "Complex input!"
|
raise TypeError, "Complex input!"
|
||||||
D = diag(sum(W,0))
|
D = diag(W.sum(0))
|
||||||
L = D-W
|
L = D - W
|
||||||
if normalised==True:
|
if normalised==True:
|
||||||
T = diag(sqrt(1./(sum(W,0))))
|
T = diag(sqrt(1./W.sum(0)))
|
||||||
L = dot(dot(T,L),T)
|
L = dot(dot(T, L), T)
|
||||||
e,vr = eig(L)
|
e, vr = eig(L)
|
||||||
vri = inv(vr) #inv
|
vri = inv(vr) #inv
|
||||||
cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
|
||||||
cutoff = 1.*abs(cond*maximum.reduce(e))
|
cutoff = 1.*abs(cond*maximum.reduce(e))
|
||||||
psigma = eye(m) # if sing vals are 0 exp(0)=1 (unnecessary)
|
psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
|
||||||
#psigma = zeros((m,n), dtype='<f8')
|
#psigma = zeros((m,n), dtype='<f8')
|
||||||
for i in range(len(e)):
|
for i in range(len(e)):
|
||||||
if abs(e[i]) > cutoff:
|
if abs(e[i]) > cutoff:
|
||||||
psigma[i,i] = exp(-beta*e[i])
|
psigma[i,i] = exp(-beta*e[i])
|
||||||
|
#else:
|
||||||
|
# psigma[i,i] = 0.0
|
||||||
K = real(dot(dot(vr, psigma), vri))
|
K = real(dot(dot(vr, psigma), vri))
|
||||||
I = eye(n, dtype='<f8')
|
I = eye(n, dtype='<f8')
|
||||||
K = (1. - alpha)*I + alpha*K
|
K = (1. - alpha)*I + alpha*K
|
||||||
return K
|
return K
|
||||||
|
|
||||||
|
def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
|
||||||
|
"""Returns diffusion kernel, using fast pade approximation.
|
||||||
|
input:
|
||||||
|
-- W, adj. matrix
|
||||||
|
-- normalised [True/False]
|
||||||
|
-- beta, [0->), (diffusion degree)
|
||||||
|
"""
|
||||||
|
|
||||||
|
D = diag(W.sum(0))
|
||||||
|
L = D - W
|
||||||
|
if normalised==True:
|
||||||
|
T = diag(sqrt(1./W.sum(0)))
|
||||||
|
L = dot(dot(T, L), T)
|
||||||
|
return expm(-beta*L)
|
||||||
|
|
||||||
|
|
||||||
def K_modularity(W,alpha=1.0):
|
def K_modularity(W,alpha=1.0):
|
||||||
""" Returns the matrix square root of Newmans modularity."""
|
""" Returns the matrix square root of Newmans modularity."""
|
||||||
|
@ -530,3 +548,20 @@ def kernel_score(K, W):
|
||||||
score = diag(dot(W, dot(K, W)) )
|
score = diag(dot(W, dot(K, W)) )
|
||||||
tot = sum(score)
|
tot = sum(score)
|
||||||
return score, tot
|
return score, tot
|
||||||
|
|
||||||
|
|
||||||
|
def modularity_matrix(G, nodelist=None):
|
||||||
|
if not nodelist:
|
||||||
|
nodelist = G.nodes()
|
||||||
|
else:
|
||||||
|
G = NX.subgraph(G, nodelist)
|
||||||
|
|
||||||
|
A = NX.adj_matrix(G, nodelist=nodelist)
|
||||||
|
d = atleast_2d(G.degree(nbunch=nodelist))
|
||||||
|
m = 1.*G.number_of_edges()
|
||||||
|
B = A - A/m
|
||||||
|
return B
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -41,30 +41,31 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
|
||||||
"""Random block crossvalidation
|
"""Random block crossvalidation
|
||||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||||
"""
|
"""
|
||||||
index = randperm(a.shape[axis])
|
#index = randperm(a.shape[axis])
|
||||||
|
index = arange(a.shape[axis])
|
||||||
if n_blocks==None:
|
if n_blocks==None:
|
||||||
n_blocks = a.shape[axis]
|
n_blocks = a.shape[axis]
|
||||||
n_in_set = ceil(float(a.shape[axis])/n_blocks)
|
n_in_set = ceil(float(a.shape[axis])/n_blocks)
|
||||||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
||||||
for out in out_ind_sets:
|
for out in out_ind_sets:
|
||||||
inn = [i for i in index if i not in out]
|
inn = [i for i in index if i not in out]
|
||||||
acal = a.take(inn, 0)
|
acal = a.take(inn, 0)
|
||||||
atrue = a.take(out, 0)
|
atrue = a.take(out, 0)
|
||||||
bcal = b.take(inn, 0)
|
bcal = b.take(inn, 0)
|
||||||
btrue = b.take(out, 0)
|
btrue = b.take(out, 0)
|
||||||
if center:
|
if center:
|
||||||
mn_a = acal.mean(0)[newaxis]
|
mn_a = acal.mean(0)[newaxis]
|
||||||
acal = acal - mn_a
|
acal = acal - mn_a
|
||||||
atrue = atrue - mn_a
|
atrue = atrue - mn_a
|
||||||
mn_b = bcal.mean(0)[newaxis]
|
mn_b = bcal.mean(0)[newaxis]
|
||||||
bcal = bcal - mn_b
|
bcal = bcal - mn_b
|
||||||
btrue = btrue - mn_b
|
btrue = btrue - mn_b
|
||||||
if metric!=None:
|
if metric!=None:
|
||||||
acal = dot(acal, metric)
|
acal = dot(acal, metric)
|
||||||
if index_out:
|
if index_out:
|
||||||
yield acal, atrue, bcal, btrue, out
|
yield acal, atrue, bcal, btrue, out
|
||||||
else:
|
else:
|
||||||
yield acal, atrue, bcal, btrue
|
yield acal, atrue, bcal, btrue
|
||||||
|
|
||||||
|
|
||||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
|
||||||
|
@ -151,6 +152,7 @@ def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
||||||
index = arange(m)
|
index = arange(m)
|
||||||
dummy = map(random.shuffle, array_split(index, blocks))
|
dummy = map(random.shuffle, array_split(index, blocks))
|
||||||
a_out = a.take(index, axis)
|
a_out = a.take(index, axis)
|
||||||
|
|
||||||
if index_out:
|
if index_out:
|
||||||
yield a_out, index
|
yield a_out, index
|
||||||
else:
|
else:
|
||||||
|
@ -164,7 +166,8 @@ def shuffle_1d(a, n_sets, axis=0):
|
||||||
m = a.shape[axis]
|
m = a.shape[axis]
|
||||||
for ii in xrange(n_sets):
|
for ii in xrange(n_sets):
|
||||||
index = randperm(m)
|
index = randperm(m)
|
||||||
yield a.take(index, axis)
|
a = a.take(index, axis)
|
||||||
|
yield a
|
||||||
|
|
||||||
def diag_pert(a, n_sets=10, center=True, index_out=False):
|
def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||||
"""Alter generator returning sets perturbed with means at diagonals.
|
"""Alter generator returning sets perturbed with means at diagonals.
|
||||||
|
@ -205,18 +208,17 @@ def diag_pert(a, n_sets=10, center=True, index_out=False):
|
||||||
else:
|
else:
|
||||||
yield a_out
|
yield a_out
|
||||||
|
|
||||||
|
|
||||||
def outerprod_centering(aat, ret_mn=True):
|
def outerprod_centering(aat, ret_mn=True):
|
||||||
"""Returns mean centered symmetric outerproduct matrix.
|
"""Returns double centered symmetric outerproduct matrix.
|
||||||
"""
|
"""
|
||||||
n = aat.shape[0]
|
h = aat.mean(0)[newaxis]
|
||||||
h = aat.sum(0)[:,newaxis]
|
h = h - 0.5*h.mean()
|
||||||
h = (h - mean(h)/2)/n
|
mn_a = h + h.T # beauty of broadcasting
|
||||||
mn_a = h + h.T
|
|
||||||
aatc = aat - mn_a
|
aatc = aat - mn_a
|
||||||
if ret_mn:
|
if ret_mn:
|
||||||
return aatc, aat.mean(0)
|
return aatc, mn_a
|
||||||
return aat - mn_a
|
return aatc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,11 +12,47 @@ from cx_utils import m_shape
|
||||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
"""Returns rmsep and aopt for pls tailored for wide X.
|
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||||
|
|
||||||
|
The root mean square error of cross validation is calculated
|
||||||
|
based on random block cross-validation. With number of blocks equal to
|
||||||
|
number of samples [default] gives leave-one-out cv.
|
||||||
|
The pls model is based on the simpls algorithm for wide X.
|
||||||
|
|
||||||
comments:
|
:Parameters:
|
||||||
-- X, Y inputs need to be centered (fixme: check)
|
X : ndarray
|
||||||
|
column centered data matrix of size (samples x variables)
|
||||||
|
Y : ndarray
|
||||||
|
column centered response matrix of size (samples x responses)
|
||||||
|
amax : scalar
|
||||||
|
Maximum number of components
|
||||||
|
n_blocks : scalar
|
||||||
|
Number of blocks in cross validation
|
||||||
|
|
||||||
|
:Returns:
|
||||||
|
rmsep : ndarray
|
||||||
|
Root Mean Square Error of cross-validated Predictions
|
||||||
|
aopt : scalar
|
||||||
|
Guestimate of the optimal number of components
|
||||||
|
|
||||||
|
:SeeAlso:
|
||||||
|
- pls_cv_val : Same output, not optimised for wide X
|
||||||
|
- w_simpls : Simpls algorithm for wide X
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Based (cowardly translated) on m-files from the Chemoact toolbox
|
||||||
|
X, Y inputs need to be centered (fixme: check)
|
||||||
|
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
>>> import numpy as n
|
||||||
|
>>> X = n.array([[1., 2., 3.],[]])
|
||||||
|
>>> Y = n.array([[1., 2., 3.],[]])
|
||||||
|
>>> w_pls(X, Y, 1)
|
||||||
|
[4,5,6], 1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='f')
|
PRESS = zeros((l, amax+1), dtype='f')
|
||||||
if n_blocks==None:
|
if n_blocks==None:
|
||||||
|
@ -30,7 +66,7 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
if algo=='simpls':
|
if algo=='simpls':
|
||||||
dat = w_simpls(Din, Yin, amax)
|
dat = w_simpls(Din, Yin, amax)
|
||||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@ -40,21 +76,13 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
E = Yout[:,j][:,newaxis] - TQ
|
E = Yout[:,j][:,newaxis] - TQ
|
||||||
E = E + sum(E, 0)/Din.shape[0]
|
E = E + sum(E, 0)/Din.shape[0]
|
||||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||||
#Yhat = Y - dot(That,Q.T)
|
Yhat = Y - dot(That,Q.T)
|
||||||
rmsep = sqrt(PRESS/Y.shape[0])
|
rmsep = sqrt(PRESS/Y.shape[0])
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
return rmsep, aopt
|
return rmsep, Yhat, aopt
|
||||||
|
|
||||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||||
""" Validation results of pls model.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
comments:
|
|
||||||
-- X, Y inputs need to be centered (fixme: check)
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||||
EE = zeros((amax, k, l), dtype='<f8')
|
EE = zeros((amax, k, l), dtype='<f8')
|
||||||
|
@ -79,7 +107,30 @@ def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||||
|
|
||||||
rmsep = sqrt(PRESS/(k-1.))
|
rmsep = sqrt(PRESS/(k-1.))
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
return rmsep, aopt
|
return rmsep, Yhat, aopt
|
||||||
|
|
||||||
|
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
||||||
|
"""Performs crossvalidation to get generalisation error in lpls"""
|
||||||
|
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
|
||||||
|
k, l = Y.shape
|
||||||
|
Yhat = empty((a_max,k,l), 'd')
|
||||||
|
for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
|
||||||
|
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||||
|
a_max=a_max,
|
||||||
|
alpha=alpha,
|
||||||
|
mean_ctr=[2,0,1],
|
||||||
|
verbose=False)
|
||||||
|
for a in range(a_max):
|
||||||
|
Yhat[a,ind,:] = b0[a][0][0] + dot(xi, B[a])
|
||||||
|
Yhat_class = zeros_like(Yhat)
|
||||||
|
for a in range(a_max):
|
||||||
|
for i in range(k):
|
||||||
|
Yhat_class[a,i,argmax(Yhat[a,i,:])]=1.0
|
||||||
|
class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
|
||||||
|
sep = (Y - Yhat)**2
|
||||||
|
rmsep = sqrt(sep.mean(1))
|
||||||
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
|
return rmsep, Yhat, aopt
|
||||||
|
|
||||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||||
"""Pca validation by altering elements in X.
|
"""Pca validation by altering elements in X.
|
||||||
|
@ -146,8 +197,7 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||||
if n_blocks == None:
|
if n_blocks == None:
|
||||||
n_blocks = b.shape[0]
|
n_blocks = b.shape[0]
|
||||||
|
|
||||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='f')
|
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
|
||||||
|
|
||||||
if use_pack and metric==None:
|
if use_pack and metric==None:
|
||||||
u, s, inflater = svd(a, full_matrices=0)
|
u, s, inflater = svd(a, full_matrices=0)
|
||||||
a = u*s
|
a = u*s
|
||||||
|
@ -161,11 +211,10 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
|
||||||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||||
|
|
||||||
W = dat['W']
|
W = dat['W']
|
||||||
|
|
||||||
if use_pack and metric==None:
|
if use_pack and metric==None:
|
||||||
W = dot(inflater.T, W)
|
W = dot(inflater.T, W)
|
||||||
|
|
||||||
Wcv[nn,:,:] = W
|
Wcv[nn,:,:] = W[:,:,]
|
||||||
|
|
||||||
return Wcv
|
return Wcv
|
||||||
|
|
||||||
|
@ -200,6 +249,29 @@ def pca_jkP(a, aopt, n_blocks=None, metric=None):
|
||||||
return PP
|
return PP
|
||||||
|
|
||||||
|
|
||||||
|
def lpls_jk(X, Y, Z, a_max, nsets=None, alpha=.5):
|
||||||
|
cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
|
||||||
|
m, n = X.shape
|
||||||
|
k, l = Y.shape
|
||||||
|
o, p = Z.shape
|
||||||
|
if nsets==None:
|
||||||
|
nsets = m
|
||||||
|
WWx = empty((nsets, n, a_max), 'd')
|
||||||
|
WWz = empty((nsets, o, a_max), 'd')
|
||||||
|
#WWy = empty((nsets, l, a_max), 'd')
|
||||||
|
for i, (xcal,xi,ycal,yi) in enumerate(cv_iter):
|
||||||
|
T, W, P, Q, U, L, K, B, b0, evx, evy, evz = nipals_lpls(xcal,ycal,Z,
|
||||||
|
a_max=a_max,
|
||||||
|
alpha=alpha,
|
||||||
|
mean_ctr=[2,0,1],
|
||||||
|
scale='loads',
|
||||||
|
verbose=False)
|
||||||
|
WWx[i,:,:] = W
|
||||||
|
WWz[i,:,:] = L
|
||||||
|
#WWy[i,:,:] = Q
|
||||||
|
|
||||||
|
return WWx, WWz
|
||||||
|
|
||||||
def find_aopt_from_sep(sep, method='75perc'):
|
def find_aopt_from_sep(sep, method='75perc'):
|
||||||
"""Returns an estimate of optimal number of components from rmsecv.
|
"""Returns an estimate of optimal number of components from rmsecv.
|
||||||
"""
|
"""
|
||||||
|
|
Reference in New Issue