Multiple lib changes
This commit is contained in:
parent
a65d79697f
commit
1c2c2c8895
|
@ -1,7 +1,9 @@
|
|||
"""This module contains bilinear models(Functions)
|
||||
"""
|
||||
|
||||
import pygtk
|
||||
import gtk
|
||||
import gtk.glade
|
||||
from fluents.workflow import Function, OptionsDialog, Options
|
||||
from fluents.dataset import Dataset
|
||||
from fluents import plots, dataset, workflow, logger
|
||||
|
@ -12,7 +14,7 @@ from cx_utils import mat_center
|
|||
from validation import *
|
||||
import blmplots
|
||||
import engines
|
||||
|
||||
import copy
|
||||
|
||||
class Model(Function):
|
||||
"""Base class of bilinear models.
|
||||
|
@ -39,19 +41,38 @@ class PCA(Model):
|
|||
Model.__init__(self,id,name)
|
||||
self._options = PcaOptions()
|
||||
|
||||
def pre_validation(self, amax, n_sets, val_engine):
|
||||
"""Model calculations for maximum number of components.
|
||||
def validation(self, amax, cv_val_sets, pert_val_sets, cv_val_method, pert_val_method):
|
||||
"""Model validation and estimate of optimal numer of components.
|
||||
"""
|
||||
rmsep = val_engine(self.model['E0'], amax, n_sets)
|
||||
self.model['rmsep'] = rmsep
|
||||
self.model['aopt'] = rmsep.argmin()
|
||||
if self._options['calc_cv']:
|
||||
if cv_val_method == 'random':
|
||||
sep, aopt = pca_cv_val(self.model['E0'], amax, cv_val_sets)
|
||||
self.model['sep'] = sep
|
||||
|
||||
if self._options['calc_pert']:
|
||||
if pert_val_method == 'random_diag':
|
||||
sep, aopt = pca_alter_val(self.model['E0'], amax, pert_val_sets)
|
||||
self.model['sep'] = sep
|
||||
|
||||
if self._options['calc_cv']==False and self._options['calc_pert']==False:
|
||||
self.model['sep'] = None
|
||||
aopt = self._options['amax']
|
||||
|
||||
if self._options['auto_aopt']:
|
||||
logger.log("notice", "Auto aopt: " + str(aopt))
|
||||
self._options['aopt'] = aopt
|
||||
if aopt==1:
|
||||
logger.log('notice', 'Aopt at first component!')
|
||||
|
||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||
crot, strict, cov_center ):
|
||||
"""Returns a confidence measure for model parameters.
|
||||
Based on aopt.
|
||||
"""
|
||||
aopt = self.model['aopt']
|
||||
if aopt<2:
|
||||
aopt = 2
|
||||
logger.log('notice','Hotellings T2 needs more than 1 comp.\n switching to 2!!')
|
||||
|
||||
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
|
||||
Pcal = self.model['P'][:,:aopt]
|
||||
tsq = hotelling(jk_segments, Pcal, p_center,
|
||||
|
@ -96,8 +117,8 @@ class PCA(Model):
|
|||
# vars
|
||||
ids_1 = [dim_name_1, DX.get_identifiers(dim_name_1, sorted=True)]
|
||||
# components (hidden)
|
||||
pc_ids = ['_comp_a', map(str,range(self.model['aopt'])) ]
|
||||
pc_ids_opt = ['_comp_o', map(str, range(self.model['aopt'])) ]
|
||||
pc_ids = ['_amax', map(str,range(self._options['amax'])) ]
|
||||
pc_ids_opt = ['_aopt', map(str, range(self._options['aopt'])) ]
|
||||
zero_dim = ['_doe', ['0']] # null dim, vector (hidden)
|
||||
match_ids = {'E':[ids_0, ids_1],
|
||||
'E0':[ids_0, ids_1],
|
||||
|
@ -121,8 +142,7 @@ class PCA(Model):
|
|||
#try:
|
||||
out.append(plt(self))
|
||||
#except:
|
||||
# print plt
|
||||
#logger.log('debug', 'Plot: %s failed') %plt
|
||||
# logger.log('debug', 'Plot: %s failed') %str(plt)
|
||||
return out
|
||||
|
||||
def run_o(self, data):
|
||||
|
@ -130,6 +150,8 @@ class PCA(Model):
|
|||
"""
|
||||
self.clear()
|
||||
options = self._options
|
||||
for item in options.items():
|
||||
print item
|
||||
self._dataset['X'] = data
|
||||
self._data['X'] = data.asarray().astype('<f8')
|
||||
if options['center']:
|
||||
|
@ -138,7 +160,8 @@ class PCA(Model):
|
|||
else:
|
||||
self.model['E0'] = data.asarray()
|
||||
|
||||
self.pre_validation(**options.pre_validation_options())
|
||||
self.validation(**options.validation_options())
|
||||
self.model['aopt'] = self._options['aopt']
|
||||
self.make_model(**options.make_model_options())
|
||||
if options['calc_conf']:
|
||||
self.confidence(**options.confidence_options())
|
||||
|
@ -159,7 +182,6 @@ class PCA(Model):
|
|||
if response == gtk.RESPONSE_OK:
|
||||
# set output data and plots
|
||||
dialog.set_output()
|
||||
|
||||
#run with current data and options
|
||||
return self.run_o(data)
|
||||
|
||||
|
@ -172,10 +194,10 @@ class PLS(Model):
|
|||
def pre_validation(self, amax, n_sets, val_engine):
|
||||
"""Returns rmsec,rmsep for model.
|
||||
"""
|
||||
rmsep = val_engine(self.model['E0'], self.model['F0'],
|
||||
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
|
||||
amax, n_sets)
|
||||
self.model['rmsep'] = rmsep.mean(0)
|
||||
self.model['aopt'] = rmsep.mean(0).argmin()
|
||||
self.model['aopt'] = aopt
|
||||
|
||||
def confidence(self, aopt, n_sets, alpha, p_center,
|
||||
crot, strict, cov_center ):
|
||||
|
@ -341,34 +363,39 @@ class PcaOptions(Options):
|
|||
opt['algo'] = 'pca'
|
||||
opt['engine'] = engines.pca
|
||||
opt['mode'] = 'normal' # how much info to calculate
|
||||
opt['lod'] = 'compact' # how much info to store
|
||||
opt['amax'] = 5
|
||||
opt['aopt'] = 5
|
||||
opt['amax'] = 10
|
||||
opt['aopt'] = 100
|
||||
opt['auto_aopt'] = False
|
||||
opt['center'] = True
|
||||
opt['center_mth'] = mat_center
|
||||
opt['scale'] = 'scores'
|
||||
opt['calc_conf'] = True
|
||||
opt['n_sets'] = 5
|
||||
|
||||
opt['calc_conf'] = False
|
||||
opt['n_sets'] = 5
|
||||
opt['strict'] = True
|
||||
opt['p_center'] = 'med'
|
||||
opt['alpha'] = .8
|
||||
opt['cov_center'] = 'med'
|
||||
opt['crot'] = True
|
||||
|
||||
opt['val_engine'] = pca_alter_val
|
||||
opt['val_n_sets'] = 10
|
||||
opt['calc_cv'] = False
|
||||
opt['calc_pert'] = True
|
||||
opt['pert_val_method'] = 'random_diag'
|
||||
opt['cv_val_method'] = 'random'
|
||||
opt['cv_val_sets'] = 10
|
||||
opt['pert_val_sets'] = 10
|
||||
|
||||
opt['all_data'] = [('T', 'scores', True),
|
||||
('P', 'loadings', True),
|
||||
('E','residuals', False),
|
||||
('p_tsq', 't2', False),
|
||||
('rmsep', 'root mean square error of prediction', False)
|
||||
('rmsep', 'RMSEP', False)
|
||||
]
|
||||
|
||||
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
|
||||
(blmplots.PcaLoadingPlot, 'Loadings', True),
|
||||
(blmplots.LineViewXc, 'Line view', True)
|
||||
(blmplots.LineViewXc, 'Line view', True),
|
||||
(blmplots.PredictionErrorPlot, 'Residual Error', True)
|
||||
]
|
||||
|
||||
opt['out_data'] = ['T','P', 'p_tsq']
|
||||
|
@ -387,9 +414,10 @@ class PcaOptions(Options):
|
|||
'strict', 'crot', 'cov_center']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
def pre_validation_options(self):
|
||||
def validation_options(self):
|
||||
"""Options for pre_validation method."""
|
||||
opt_list = ['amax', 'n_sets', 'val_engine']
|
||||
opt_list = ['amax', 'cv_val_sets', 'pert_val_sets',
|
||||
'cv_val_method', 'pert_val_method']
|
||||
return self._copy_from_list(opt_list)
|
||||
|
||||
|
||||
|
@ -411,7 +439,7 @@ class PlsOptions(Options):
|
|||
opt['center'] = True
|
||||
opt['center_mth'] = mat_center
|
||||
opt['scale'] = 'scores'
|
||||
opt['calc_conf'] = True
|
||||
opt['calc_conf'] = False
|
||||
opt['n_sets'] = 10
|
||||
|
||||
opt['strict'] = True
|
||||
|
@ -420,13 +448,15 @@ class PlsOptions(Options):
|
|||
opt['cov_center'] = 'med'
|
||||
opt['crot'] = True
|
||||
|
||||
opt['calc_cv'] = True
|
||||
opt['calc_pert'] = False
|
||||
opt['val_engine'] = w_pls_cv_val
|
||||
|
||||
opt['all_data'] = [('T', 'scores', True),
|
||||
('P', 'loadings', True),
|
||||
('E','residuals', False),
|
||||
('p_tsq', 't2', False),
|
||||
('rmsep', 'root mean square error of prediction', False)
|
||||
('rmsep', 'RMSEP', False)
|
||||
]
|
||||
|
||||
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
|
||||
|
@ -468,9 +498,175 @@ class PcaOptionsDialog(OptionsDialog):
|
|||
def __init__(self, data, options, input_names=['X']):
|
||||
OptionsDialog.__init__(self, data, options, input_names)
|
||||
|
||||
glade_file = "/home/flatberg/Projects/project4/project4.glade"
|
||||
notebook_name = "vbox1"
|
||||
page_name = "Options"
|
||||
self.add_page_from_glade(glade_file, notebook_name, page_name)
|
||||
# connect signals to handlers
|
||||
dic = {"on_amax_value_changed" : self.on_amax_changed,
|
||||
"on_aopt_value_changed" : self.on_aopt_changed,
|
||||
"auto_aopt_toggled" : self.auto_aopt_toggled,
|
||||
"center_toggled" : self.center_toggled,
|
||||
"on_scale_changed" : self.on_scale_changed,
|
||||
"on_val_none" : self.val_toggled,
|
||||
"on_val_cv" : self.cv_toggled,
|
||||
"on_val_pert" : self.pert_toggled,
|
||||
"on_cv_method_changed" : self.on_cv_method_changed,
|
||||
"on_cv_sets_changed" : self.on_cv_sets_changed,
|
||||
"on_pert_sets_changed" : self.on_pert_sets_changed,
|
||||
"on_conf_toggled" : self.on_conf_toggled
|
||||
}
|
||||
|
||||
self.wTree.signal_autoconnect(dic)
|
||||
|
||||
# set/ensure valid default values/ranges
|
||||
amax_sb = self.wTree.get_widget("amax_spinbutton")
|
||||
max_comp = min(data[0].shape) # max num of components
|
||||
if self._options['amax']>max_comp:
|
||||
logger.log('debug', 'amax default too large ... adjusting')
|
||||
self._options['amax'] = max_comp
|
||||
amax_sb.get_adjustment().set_all(self._options['amax'], 1, max_comp, 1, 0, 0)
|
||||
# aopt spin button
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
if self._options['aopt']>self._options['amax']:
|
||||
self._options['aopt'] = self._options['amax'] + 1 - 1
|
||||
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, self._options['amax'], 1, 0, 0)
|
||||
|
||||
# scale
|
||||
scale_cb = self.wTree.get_widget("scale_combobox")
|
||||
scale_cb.set_active(0)
|
||||
|
||||
# validation frames
|
||||
if self._options['calc_cv']==False:
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
cv_frame.set_sensitive(False)
|
||||
if self._options['calc_pert']==False:
|
||||
pert_frame = self.wTree.get_widget("pert_frame")
|
||||
pert_frame.set_sensitive(False)
|
||||
|
||||
cv = self.wTree.get_widget("cv_method").set_active(0)
|
||||
pm = self.wTree.get_widget("pert_method").set_active(0)
|
||||
|
||||
# confidence
|
||||
if self._options['calc_conf']==True:
|
||||
self.wTree.get_widget("subset_frame").set_sensitive(True)
|
||||
else:
|
||||
self.wTree.get_widget("subset_frame").set_sensitive(False)
|
||||
|
||||
|
||||
def on_amax_changed(self, sb):
|
||||
logger.log("debug", "amax changed: new value: %s" %sb.get_value_as_int())
|
||||
amax = sb.get_value_as_int()
|
||||
# update aopt if needed
|
||||
if amax<self._options['aopt']:
|
||||
self._options['aopt'] = amax
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
aopt_sb.get_adjustment().set_all(self._options['aopt'], 1, amax, 1, 0, 0)
|
||||
self._options['amax'] = sb.get_value_as_int()
|
||||
|
||||
def on_aopt_changed(self, sb):
|
||||
aopt = sb.get_value_as_int()
|
||||
self._options['aopt'] = aopt
|
||||
|
||||
def auto_aopt_toggled(self, tb):
|
||||
aopt_sb = self.wTree.get_widget("aopt_spinbutton")
|
||||
if tb.get_active():
|
||||
self._options['auto_aopt'] = True
|
||||
aopt_sb.set_sensitive(False)
|
||||
else:
|
||||
self._options['auto_aopt'] = False
|
||||
aopt_sb.set_sensitive(True)
|
||||
|
||||
def center_toggled(self, tb):
|
||||
if tb.get_active():
|
||||
self._options['center'] = True
|
||||
else:
|
||||
logger.log("debug", "centering set to False")
|
||||
self._options['center'] = False
|
||||
|
||||
def on_scale_changed(self, cb):
|
||||
scale = cb.get_active_text()
|
||||
if scale=='Scores':
|
||||
self._options['scale'] = 'scores'
|
||||
elif scale=='Loadings':
|
||||
self._options['scale'] = 'loads'
|
||||
else:
|
||||
raise IOError
|
||||
|
||||
def val_toggled(self, tb):
|
||||
"""Callback for validation: None. """
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
pert_frame = self.wTree.get_widget("pert_frame")
|
||||
cv_tb = self.wTree.get_widget("cv_toggle")
|
||||
p_tb = self.wTree.get_widget("pert_toggle")
|
||||
if tb.get_active():
|
||||
self._options['calc_cv'] = False
|
||||
self._options['calc_pert'] = False
|
||||
cv_frame.set_sensitive(False)
|
||||
pert_frame.set_sensitive(False)
|
||||
cv_tb.set_sensitive(False)
|
||||
p_tb.set_sensitive(False)
|
||||
else:
|
||||
p_tb.set_sensitive(True)
|
||||
cv_tb.set_sensitive(True)
|
||||
if p_tb.get_active():
|
||||
pert_frame.set_sensitive(True)
|
||||
self._options['calc_pert'] = True
|
||||
if cv_tb.get_active():
|
||||
cv_frame.set_sensitive(True)
|
||||
self._options['calc_cv'] = True
|
||||
|
||||
def cv_toggled(self, tb):
|
||||
cv_frame = self.wTree.get_widget("cv_frame")
|
||||
if tb.get_active():
|
||||
cv_frame.set_sensitive(True)
|
||||
self._options['calc_cv'] = True
|
||||
else:
|
||||
cv_frame.set_sensitive(False)
|
||||
self._options['calc_cv'] = False
|
||||
|
||||
def pert_toggled(self, tb):
|
||||
pert_frame = self.wTree.get_widget("pert_frame")
|
||||
if tb.get_active():
|
||||
pert_frame.set_sensitive(True)
|
||||
self._options['calc_pert'] = True
|
||||
else:
|
||||
pert_frame.set_sensitive(False)
|
||||
self._options['calc_pert'] = False
|
||||
|
||||
|
||||
def on_cv_method_changed(self, cb):
|
||||
method = cb.get_active_text()
|
||||
if method == 'Random':
|
||||
self._options['cv_val_method'] = 'random'
|
||||
|
||||
def on_pert_method_changed(self, cb):
|
||||
method = cb.get_active_text()
|
||||
if method == 'Random diags':
|
||||
self._options['pert_val_method'] = 'random_diag'
|
||||
|
||||
def on_cv_sets_changed(self, sb):
|
||||
val = sb.get_value_as_int()
|
||||
self._options['cv_val_sets'] = val
|
||||
|
||||
def on_pert_sets_changed(self, sb):
|
||||
val = sb.get_value_as_int()
|
||||
self._options['pert_val_sets'] = val
|
||||
|
||||
def on_conf_toggled(self, tb):
|
||||
if tb.get_active():
|
||||
self._options['calc_conf'] = False
|
||||
self.wTree.get_widget("subset_frame").set_sensitive(False)
|
||||
else:
|
||||
self._options['calc_conf'] = True
|
||||
self.wTree.get_widget("subset_frame").set_sensitive(True)
|
||||
|
||||
|
||||
|
||||
|
||||
class PlsOptionsDialog(OptionsDialog):
|
||||
"""Options dialog for Partial Least Squares Regression.
|
||||
"""
|
||||
|
||||
def __init__(self, data, options, input_names=['X', 'Y']):
|
||||
OptionsDialog.__init__(self, data, options, input_names)
|
||||
|
|
|
@ -12,8 +12,9 @@ fixme2:
|
|||
colorbar, but when adding colors the colorbar shoud be created.
|
||||
"""
|
||||
from fluents import plots
|
||||
from scipy import dot,sum,diag,arange,log,mean,newaxis
|
||||
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt
|
||||
from matplotlib import cm
|
||||
import pylab as PB
|
||||
|
||||
class PcaScorePlot(plots.ScatterPlot):
|
||||
"""PCA Score plot"""
|
||||
|
@ -103,38 +104,40 @@ class PlsLoadingPlot(plots.ScatterPlot):
|
|||
def set_ordinate(self, n):
|
||||
self.yaxis_data = self._T[:,n]
|
||||
|
||||
|
||||
class LineViewXc(plots.LineViewPlot):
|
||||
"""A line view of centered raw data
|
||||
"""
|
||||
def __init__(self, func_class, name='Profiles'):
|
||||
def __init__(self, model, name='Profiles'):
|
||||
# copy, center, plot
|
||||
x = func_class._dataset['X'].copy()
|
||||
x = model._dataset['X'].copy()
|
||||
x._array = x._array - mean(x._array,0)[newaxis]
|
||||
plots.LineViewPlot.__init__(self, x, 1, None, name)
|
||||
|
||||
|
||||
class ParalellCoordinates(plots.Plot):
|
||||
"""Parallell coordinates for score loads with many comp.
|
||||
"""
|
||||
def __init__(self, model, p='loads'):
|
||||
pass
|
||||
|
||||
|
||||
class PlsQvalScatter(plots.ScatterPlot):
|
||||
"""A vulcano like plot of loads vs qvals
|
||||
"""
|
||||
def __init__(self, func_class, pc=0):
|
||||
model = func_class.model
|
||||
if not model.has_key('w_tsq'):
|
||||
def __init__(self, model, pc=0):
|
||||
if not model.model.has_key('w_tsq'):
|
||||
return
|
||||
self._W = model['P']
|
||||
dataset_1 = func_class.as_dataset('P')
|
||||
dataset_2 = func_class.as_dataset('w_tsq')
|
||||
self._W = model.model['P']
|
||||
dataset_1 = model.as_dataset('P')
|
||||
dataset_2 = model.as_dataset('w_tsq')
|
||||
id_dim = dataset_1.get_dim_name(0) #genes
|
||||
sel_dim = dataset_1.get_dim_name(1) #_comp
|
||||
sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
|
||||
id_1, = dataset_1.get_identifiers(sel_dim, [0])
|
||||
id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
|
||||
if model.has_key('w_tsq'):
|
||||
col = model['w_tsq'].ravel()
|
||||
if model.model.has_key('w_tsq'):
|
||||
col = model.model['w_tsq'].ravel()
|
||||
col = normalise(col)
|
||||
else:
|
||||
col = 'g'
|
||||
|
@ -143,6 +146,33 @@ class PlsQvalScatter(plots.ScatterPlot):
|
|||
c=col, s=20, sel_dim_2=sel_dim_2,
|
||||
name='Load Volcano')
|
||||
|
||||
class PredictionErrorPlot(plots.Plot):
|
||||
"""A boxplot of prediction error vs. comp. number.
|
||||
"""
|
||||
def __init__(self, model, name="Pred. Err."):
|
||||
if not model.model.has_key('sep'):
|
||||
logger.log('notice', 'Model has no calculations of sep')
|
||||
return
|
||||
plots.Plot.__init__(self, name)
|
||||
self._frozen = True
|
||||
self.current_dim = 'johndoe'
|
||||
self.ax = self.fig.add_subplot(111)
|
||||
|
||||
# draw
|
||||
sep = model.model['sep']
|
||||
aopt = model.model['aopt']
|
||||
bx_plot_lines = self.ax.boxplot(sqrt(sep))
|
||||
aopt_marker = self.ax.axvline(aopt, linewidth=10,
|
||||
color='r',zorder=0,
|
||||
alpha=.5)
|
||||
|
||||
# add canvas
|
||||
self.add(self.canvas)
|
||||
self.canvas.show()
|
||||
|
||||
def set_current_selection(self, selection):
|
||||
pass
|
||||
|
||||
|
||||
class InfluencePlot(plots.ScatterPlot):
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from scipy import apply_along_axis,newaxis,zeros,\
|
||||
median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
|
||||
trace,zeros_like,sign,sort,real,argsort,rand,array
|
||||
trace,zeros_like,sign,sort,real,argsort,rand,array,\
|
||||
matrix
|
||||
from scipy.linalg import norm,svd,inv,eig
|
||||
from scipy.stats import median,mean
|
||||
|
||||
|
@ -106,3 +107,7 @@ def mat_center(X,axis=0,ret_mn=False):
|
|||
return Xs,mnX
|
||||
else:
|
||||
return Xs
|
||||
|
||||
def m_shape(array):
|
||||
"""Returns the array shape on the form of a numpy.matrix."""
|
||||
return matrix(array).shape
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
"""Module contain algorithms for (burdensome) calculations.
|
||||
|
||||
There is no typechecking of any kind here, just focus on speed
|
||||
|
@ -7,7 +6,7 @@ There is no typechecking of any kind here, just focus on speed
|
|||
from scipy.linalg import svd,norm,inv,pinv,qr
|
||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
||||
rand,sum,cumsum
|
||||
rand,sum,cumsum,matrix
|
||||
|
||||
def pca(a, aopt, scale='scores', mode='normal'):
|
||||
""" Principal Component Analysis model
|
||||
|
@ -19,6 +18,7 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
|||
|
||||
m, n = a.shape
|
||||
u, s, vt = svd(a, full_matrices=0)
|
||||
eigvals = (1./m)*s
|
||||
T = u*s
|
||||
T = T[:,:aopt]
|
||||
P = vt[:aopt,:].T
|
||||
|
@ -43,6 +43,29 @@ def pca(a, aopt, scale='scores', mode='normal'):
|
|||
|
||||
return {'T':T, 'P':P, 'E':E}
|
||||
|
||||
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
||||
"""Returns Principal component regression model."""
|
||||
m, n = a.shape
|
||||
try:
|
||||
k, l = b.shape
|
||||
except:
|
||||
k = b.shape[0]
|
||||
l = 1
|
||||
B = empty((aopt, n, l))
|
||||
U, s, Vt = svd(a, full_matrices=True)
|
||||
T = U*s
|
||||
T = T[:,:aopt]
|
||||
P = Vt[:aopt,:].T
|
||||
Q = dot(dot(inv(dot(T.T, T)), T.T), b).T
|
||||
for i in range(aopt):
|
||||
ti = T[:,:i+1]
|
||||
r = dot(dot(inv(dot(ti.T,ti)), ti.T), b)
|
||||
B[i] = dot(P[:,:i+1], r)
|
||||
E = a - dot(T, P.T)
|
||||
F = b - dot(T, Q.T)
|
||||
|
||||
return {'T':T, 'P':P,'Q': Q, 'B':B, 'E':E, 'F':F}
|
||||
|
||||
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||
"""Kernel pls for tall/wide matrices.
|
||||
|
||||
|
@ -51,9 +74,9 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
|||
"""
|
||||
m, n = a.shape
|
||||
if ab!=None:
|
||||
mm,l = ab.shape
|
||||
mm, l = m_shape(ab)
|
||||
else:
|
||||
k,l = b.shape
|
||||
k, l = m_shape(b)
|
||||
|
||||
W = empty((n, aopt))
|
||||
P = empty((n, aopt))
|
||||
|
@ -66,7 +89,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
|||
ab = dot(a.T, b)
|
||||
for i in range(aopt):
|
||||
if ab.shape[1]==1:
|
||||
w = ab
|
||||
w = ab.reshape(mm, l)
|
||||
else:
|
||||
u, s, vh = svd(dot(ab.T, ab))
|
||||
w = dot(ab, u[:,:1])
|
||||
|
@ -147,12 +170,11 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
|||
"""Undeflated Ridged svd(X'Y)
|
||||
"""
|
||||
m, n = a.shape
|
||||
k, l = b.shape
|
||||
k, l = m_shape(b)
|
||||
u, s, vt = svd(b, full_matrices=0)
|
||||
g0 = dot(u*s, u.T)
|
||||
g = (1 - r)*g0 + r*eye(m)
|
||||
ag = dot(a.T, g)
|
||||
|
||||
u, s, vt = svd(ag, full_matrices=0)
|
||||
W = u[:,:aopt]
|
||||
K = vt[:aopt,:].T
|
||||
|
@ -167,7 +189,7 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
|||
|
||||
U = dot(g0, K) #fixme check this
|
||||
Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
|
||||
B = zeros((aopt, n, l))
|
||||
B = zeros((aopt, n, l), dtype='f')
|
||||
for i in range(aopt):
|
||||
B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
|
||||
# leverages
|
||||
|
@ -198,3 +220,6 @@ def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
|||
|
||||
return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
|
||||
|
||||
|
||||
def m_shape(array):
|
||||
return matrix(array).shape
|
||||
|
|
|
@ -418,6 +418,31 @@ def weighted_laplacian(G,with_labels=False):
|
|||
else:
|
||||
return L
|
||||
|
||||
def subnetworks(G, T2):
|
||||
"""Return the highest scoring (T2-test) subgraph og G.
|
||||
|
||||
Use simulated annealing to identify highly scoring subgraphs.
|
||||
|
||||
ref: -- Ideker et.al (Bioinformatics 18, 2002)
|
||||
-- Patil and Nielsen (PNAS 2006)
|
||||
|
||||
"""
|
||||
N = 1000
|
||||
states = [(node, False) for node in G.nodes()]
|
||||
t2_last = 0.0
|
||||
for i in xrange(N):
|
||||
if i==0: #assign random states
|
||||
states = [(state[0], True) for state in states if rand(1)>.5]
|
||||
sub_nodes = [state[0] for state in states if state[1]]
|
||||
Gsub = NX.subgraph(G, sub_nodes)
|
||||
Gsub = NX.connected_components_subgraphs(Gsub)[0]
|
||||
t2 = [T2[node] for node in Gsub]
|
||||
if t2>t2_last:
|
||||
pass
|
||||
else:
|
||||
p = numpy.exp()
|
||||
|
||||
|
||||
|
||||
"""Below are methods for calculating graph metrics
|
||||
|
||||
|
@ -473,7 +498,7 @@ Ke = expm(A) .... expm(-A)?
|
|||
# 13.09.2206: update for use in numpy
|
||||
|
||||
|
||||
def K_expAdj(W, normalised=False, alpha=1.0):
|
||||
def K_expAdj(W, normalised=True, alpha=1.0):
|
||||
"""Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel.
|
||||
"""
|
||||
W = asarray(W)
|
||||
|
@ -499,7 +524,7 @@ def K_expAdj(W, normalised=False, alpha=1.0):
|
|||
|
||||
return dot(dot(vr,psigma),vri)
|
||||
|
||||
def K_vonNeumann(W,normalised=False,alpha=1.0):
|
||||
def K_vonNeumann(W, normalised=True, alpha=1.0):
|
||||
""" The geometric series of path lengths.
|
||||
Returns matrix square root of pseudo inverse of the adjacency matrix.
|
||||
"""
|
||||
|
|
|
@ -30,22 +30,14 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
|||
b_in = b[inn,:]
|
||||
b_out = b[out,:]
|
||||
if center:
|
||||
# centering projector: I - (1/n)11'
|
||||
# nin = len(inn)
|
||||
# Pc = eye(nin) - outer(ones((nin,)),ones((nin,)))/nin
|
||||
# xxt - x( outer(ones((nin,)),ones((nin,)))/nin ) x.T
|
||||
|
||||
# de jong:
|
||||
h = sum(aat_in,0)[ :,newaxis]
|
||||
h = (h - mean(h)/2)/len(inn)
|
||||
mn_a = h + h.T
|
||||
aat_in = aat_in - mn_a
|
||||
aat_in, mn = outerprod_centering(aat_in)
|
||||
aat_out = aat_out - mn
|
||||
if index_out:
|
||||
yield aat_in,aat_out,b_in,b_out,out
|
||||
else:
|
||||
yield aat_in,aat_out,b_in,b_out
|
||||
|
||||
def pls_gen(a,b, n_blocks=None, center=False, index_out=False,axis=0):
|
||||
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
|
||||
"""Random block crossvalidation
|
||||
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
|
||||
"""
|
||||
|
@ -56,17 +48,38 @@ def pls_gen(a,b, n_blocks=None, center=False, index_out=False,axis=0):
|
|||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
bcal = b.take(inn, 0)
|
||||
btrue = b.take(out, 0)
|
||||
if center:
|
||||
a = a - mean(a,0)[newaxis]
|
||||
b = b - mean(b,0)[newaxis]
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
mn_b = bcal.mean(0)[newaxis]
|
||||
bcal = bcal - mn_b
|
||||
btrue = btrue - mn_b
|
||||
if metric!=None:
|
||||
acal = dot(acal, metric)
|
||||
if index_out:
|
||||
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0),out
|
||||
yield acal, atrue, bcal, btrue, out
|
||||
else:
|
||||
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0)
|
||||
yield acal, atrue, bcal, btrue
|
||||
|
||||
|
||||
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
|
||||
"""PCA random block crossval generator.
|
||||
"""Returns a generator of crossvalidation sample segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (m x n)
|
||||
-- n_sets, number of segments/subsets to generate.
|
||||
-- center, bool, choice of centering each subset
|
||||
-- index_out, bool, return subset index
|
||||
-- axis, int, which axis to get subset from
|
||||
|
||||
ouput:
|
||||
-- V, generator with (n_sets) memebers (subsets)
|
||||
|
||||
"""
|
||||
m = a.shape[axis]
|
||||
index = randperm(m)
|
||||
|
@ -76,14 +89,19 @@ def pca_gen(a,n_sets=None, center=False, index_out=False,axis=0):
|
|||
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
|
||||
for out in out_ind_sets:
|
||||
inn = [i for i in index if i not in out]
|
||||
acal = a.take(inn, 0)
|
||||
atrue = a.take(out, 0)
|
||||
if center:
|
||||
a = a - mean(a,0)[newaxis]
|
||||
mn_a = acal.mean(0)[newaxis]
|
||||
acal = acal - mn_a
|
||||
atrue = atrue - mn_a
|
||||
if index_out:
|
||||
yield a.take(inn,0),a.take(out,0),out
|
||||
yield acal, atrue, out
|
||||
else:
|
||||
yield a.take(inn,0),a.take(out,0)
|
||||
yield acal, atrue
|
||||
|
||||
def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
||||
def w_pls_gen_jk(a, b, n_sets=None, center=True,
|
||||
index_out=False, axis=0):
|
||||
"""Random block crossvalidation for wide X (m>>n)
|
||||
Leave-one-out is a subset, with n_sets equals a.shape[-1]
|
||||
|
||||
|
@ -103,9 +121,8 @@ def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
|||
a_in = a[inn,:]
|
||||
mn_a = 0
|
||||
mAB = 0
|
||||
|
||||
if center:
|
||||
mn_a = mean(a,0)[newaxis]
|
||||
mn_a = a_in.mean(0)[newaxis]
|
||||
mAin = dot(-ones((1,nout)), a[out,:])/nin
|
||||
mBin = dot(-ones((1,nout)), b[out,:])/nin
|
||||
mAB = dot(mAin.T, (mBin*nin))
|
||||
|
@ -113,9 +130,9 @@ def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
|
|||
a_in = a_in - mn_a
|
||||
|
||||
if index_out:
|
||||
yield ain,ab, out
|
||||
yield a_in, ab_in, out
|
||||
else:
|
||||
yield a_in, ab
|
||||
yield a_in, ab_in
|
||||
|
||||
def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
|
||||
"""Random block shuffling along 1d axis
|
||||
|
@ -185,3 +202,19 @@ def diag_pert(a, n_sets=10, center=True, index_out=False):
|
|||
yield a_out, asarray(out)
|
||||
else:
|
||||
yield a_out
|
||||
|
||||
|
||||
def outerprod_centering(aat, ret_mn=True):
|
||||
"""Returns mean centered symmetric outerproduct matrix.
|
||||
"""
|
||||
n = aat.shape[0]
|
||||
h = aat.sum(0)[:,newaxis]
|
||||
h = (h - mean(h)/2)/n
|
||||
mn_a = h + h.T
|
||||
aatc = aat - mn_a
|
||||
if ret_mn:
|
||||
return aatc, aat.mean(0)
|
||||
return aat - mn_a
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,20 +1,24 @@
|
|||
"""This module implements some common validation schemes from pca and pls.
|
||||
"""
|
||||
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
||||
apply_along_axis,eye, kron
|
||||
apply_along_axis,eye,kron,array,sort
|
||||
from scipy.stats import median
|
||||
from scipy.linalg import triu,inv,svd,norm
|
||||
|
||||
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
|
||||
from engines import w_simpls,pls,bridge,pca
|
||||
from pylab import *
|
||||
from cx_utils import m_shape
|
||||
|
||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
"""RMSEP calc for pls with wide X.
|
||||
"""Returns and RMSEP for pls tailored for wide X.
|
||||
"""
|
||||
k, l = Y.shape
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='f')
|
||||
# X,Y are centered
|
||||
# X,Y are centered0
|
||||
if n_blocks==None:
|
||||
n_blocks = Y.shape[0]
|
||||
V = w_pls_gen(dot(X, X.T), Y, n_blocks=n_blocks, center=True)
|
||||
XXt = dot(X, X.T)
|
||||
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||
for Din, Doi, Yin, Yout in V:
|
||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||
Yin = Yin - ym
|
||||
|
@ -24,7 +28,6 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
|||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
||||
else:
|
||||
"Other algo-support comming soon"
|
||||
raise NotImplementedError
|
||||
#Yhat = empty((amax, k, l),dtype='<f8')
|
||||
Yhat = []
|
||||
|
@ -34,13 +37,14 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
|||
E = E + sum(E, 0)/Din.shape[0]
|
||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||
#Yhat = Y - dot(That,Q.T)
|
||||
return sqrt(PRESS/Y.shape[0])
|
||||
rmsep = sqrt(PRESS/Y.shape[0])
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
|
||||
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||
""" Validation results of pls model.
|
||||
"""
|
||||
|
||||
k, l = Y.shape
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||
EE = zeros((amax, k, l), dtype='<f8')
|
||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||
|
@ -50,6 +54,7 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
|||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||
Yin = (Yin - ym)
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
|
||||
if algo=='pls':
|
||||
dat = pls(Xin, Yin, amax, mode='normal')
|
||||
elif algo=='bridge':
|
||||
|
@ -62,7 +67,9 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
|||
EE[a,out,:] = E
|
||||
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
||||
|
||||
return sqrt(PRESS/(k-1.)), EE, Yhat
|
||||
rmsep = sqrt(PRESS/(k-1.))
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
|
||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||
"""Pca validation by altering elements in X.
|
||||
|
@ -79,18 +86,27 @@ def pca_alter_val(a, amax, n_sets=10,method='diag'):
|
|||
EE = a_sub - Xhat.ravel().take(ind)
|
||||
tot = (a_sub**2).sum()
|
||||
sep[i,j] = (EE**2).sum()/tot
|
||||
return sqrt(sep.mean(0))
|
||||
#return sep
|
||||
sep = sqrt(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
return sep, aopt
|
||||
|
||||
def pca_cv_val(X, amax, n_sets):
|
||||
""" Cross validation of pca using random sets crossval.
|
||||
def pca_cv_val(a, amax, n_sets):
|
||||
""" Returns PRESS from cross-validated pca using random segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (m x n)
|
||||
-- amax, maximum nuber of components used
|
||||
-- n_sets, number of segments to calculate
|
||||
output:
|
||||
-- sep, (amax x m x n), squared error of prediction (press)
|
||||
-- aopt, guestimated optimal number of components
|
||||
"""
|
||||
m, n = X.shape
|
||||
xtot = (X**2).sum()
|
||||
V = pca_gen(X, n_sets=7, center=True, index_out=True)
|
||||
m, n = a.shape
|
||||
E = empty((amax, m, n), dtype='f')
|
||||
xtot = (a**2).sum() # this needs centering
|
||||
V = pca_gen(a, n_sets=7, center=True, index_out=True)
|
||||
for xi, xout, ind in V:
|
||||
dat_i = pca(xi, amax, mode='detailed')
|
||||
dat_i = pca(xi, amax, mode='fast')
|
||||
Pi = dat_i['P']
|
||||
for a in xrange(amax):
|
||||
Pia = Pi[:,:a+1]
|
||||
|
@ -99,7 +115,9 @@ def pca_cv_val(X, amax, n_sets):
|
|||
sep = []
|
||||
for a in xrange(amax):
|
||||
sep.append(E[a].sum()/xtot)
|
||||
return sqrt(sep.mean(0))
|
||||
sep = array(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
return sep, aopt
|
||||
|
||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
||||
""" Returns CV-segments of paramter W for wide X.
|
||||
|
@ -128,7 +146,20 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
|||
return WW
|
||||
|
||||
def pca_jkP(a, aopt, n_blocks=None):
|
||||
""" Returns CV-segments of paramter P.
|
||||
"""Returns loading from PCA on CV-segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (n x m)
|
||||
-- aopt, number of components in model.
|
||||
-- nblocks, number of segments
|
||||
output:
|
||||
-- PP, loadings collected in a three way matrix
|
||||
(n_segments, m, aopt)
|
||||
|
||||
comments:
|
||||
* The loadings are scaled with the (1/samples)*eigenvalues.
|
||||
* Crossvalidation method is currently set to random blocks of samples.
|
||||
|
||||
todo: add support for T
|
||||
fixme: more efficient to add this in validation loop
|
||||
"""
|
||||
|
@ -138,8 +169,30 @@ def pca_jkP(a, aopt, n_blocks=None):
|
|||
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
|
||||
V = pca_gen(a, n_sets=n_blocks, center=True)
|
||||
for nn,(a_in, a_out) in enumerate(V):
|
||||
dat = pca(a_in, aopt, mode='fast')
|
||||
dat = pca(a_in, aopt, mode='fast', scale='loads')
|
||||
P = dat['P']
|
||||
PP[nn,:,:] = P
|
||||
|
||||
return PP
|
||||
|
||||
def find_aopt_from_sep(sep, method='75perc'):
|
||||
"""Returns an estimate of optimal number of components from rmsecv.
|
||||
"""
|
||||
if method=='vanilla':
|
||||
# min rmsep
|
||||
rmsecv = sqrt(sep.mean(0))
|
||||
return rmsecv.argmin() + 1
|
||||
|
||||
elif method=='75perc':
|
||||
prct = .75 #percentile
|
||||
ind = 1.*sep.shape[0]*prct
|
||||
med = median(sep)
|
||||
prc_75 = []
|
||||
for col in sep.T:
|
||||
col.sort()
|
||||
prc_75.append(col[int(ind)])
|
||||
prc_75 = array(prc_75)
|
||||
for i in range(1, sep.shape[1], 1):
|
||||
if med[i-1]<prc_75[i]:
|
||||
return i
|
||||
return len(med)
|
||||
|
|
Reference in New Issue