Multiple lib changes
This commit is contained in:
@@ -1,30 +1,33 @@
|
||||
"""This module implements some common validation schemes from pca and pls.
|
||||
"""
|
||||
from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
|
||||
apply_along_axis,eye, kron
|
||||
apply_along_axis,eye,kron,array,sort
|
||||
from scipy.stats import median
|
||||
from scipy.linalg import triu,inv,svd,norm
|
||||
|
||||
from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
|
||||
from engines import w_simpls,pls, bridge,pca
|
||||
from pylab import *
|
||||
from engines import w_simpls,pls,bridge,pca
|
||||
from cx_utils import m_shape
|
||||
|
||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
"""RMSEP calc for pls with wide X.
|
||||
"""Returns and RMSEP for pls tailored for wide X.
|
||||
"""
|
||||
k, l = Y.shape
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='f')
|
||||
# X,Y are centered
|
||||
# X,Y are centered0
|
||||
if n_blocks==None:
|
||||
n_blocks = Y.shape[0]
|
||||
V = w_pls_gen(dot(X, X.T), Y, n_blocks=n_blocks, center=True)
|
||||
XXt = dot(X, X.T)
|
||||
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||
for Din, Doi, Yin, Yout in V:
|
||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||
Yin = Yin - ym
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
if algo=='simpls':
|
||||
dat = w_simpls(Din, Yin, amax)
|
||||
Q,U,H = dat['Q'], dat['U'], dat['H']
|
||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
||||
else:
|
||||
"Other algo-support comming soon"
|
||||
raise NotImplementedError
|
||||
#Yhat = empty((amax, k, l),dtype='<f8')
|
||||
Yhat = []
|
||||
@@ -34,13 +37,14 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||
E = E + sum(E, 0)/Din.shape[0]
|
||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||
#Yhat = Y - dot(That,Q.T)
|
||||
return sqrt(PRESS/Y.shape[0])
|
||||
rmsep = sqrt(PRESS/Y.shape[0])
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
|
||||
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||
""" Validation results of pls model.
|
||||
"""
|
||||
|
||||
k, l = Y.shape
|
||||
"""
|
||||
k, l = m_shape(Y)
|
||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||
EE = zeros((amax, k, l), dtype='<f8')
|
||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||
@@ -50,6 +54,7 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||
Yin = (Yin - ym)
|
||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||
|
||||
if algo=='pls':
|
||||
dat = pls(Xin, Yin, amax, mode='normal')
|
||||
elif algo=='bridge':
|
||||
@@ -62,9 +67,11 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||
EE[a,out,:] = E
|
||||
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
||||
|
||||
return sqrt(PRESS/(k-1.)), EE, Yhat
|
||||
rmsep = sqrt(PRESS/(k-1.))
|
||||
aopt = find_aopt_from_sep(rmsep)
|
||||
return rmsep, aopt
|
||||
|
||||
def pca_alter_val(a, amax, n_sets=10,method='diag'):
|
||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||
"""Pca validation by altering elements in X.
|
||||
"""
|
||||
# todo: it is just as easy to do jk-estimates her as well
|
||||
@@ -79,18 +86,27 @@ def pca_alter_val(a, amax, n_sets=10,method='diag'):
|
||||
EE = a_sub - Xhat.ravel().take(ind)
|
||||
tot = (a_sub**2).sum()
|
||||
sep[i,j] = (EE**2).sum()/tot
|
||||
return sqrt(sep.mean(0))
|
||||
#return sep
|
||||
sep = sqrt(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
return sep, aopt
|
||||
|
||||
def pca_cv_val(X, amax, n_sets):
|
||||
""" Cross validation of pca using random sets crossval.
|
||||
def pca_cv_val(a, amax, n_sets):
|
||||
""" Returns PRESS from cross-validated pca using random segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (m x n)
|
||||
-- amax, maximum nuber of components used
|
||||
-- n_sets, number of segments to calculate
|
||||
output:
|
||||
-- sep, (amax x m x n), squared error of prediction (press)
|
||||
-- aopt, guestimated optimal number of components
|
||||
"""
|
||||
m, n = X.shape
|
||||
xtot = (X**2).sum()
|
||||
V = pca_gen(X, n_sets=7, center=True, index_out=True)
|
||||
m, n = a.shape
|
||||
E = empty((amax, m, n), dtype='f')
|
||||
for xi,xout,ind in V:
|
||||
dat_i = pca(xi, amax, mode='detailed')
|
||||
xtot = (a**2).sum() # this needs centering
|
||||
V = pca_gen(a, n_sets=7, center=True, index_out=True)
|
||||
for xi, xout, ind in V:
|
||||
dat_i = pca(xi, amax, mode='fast')
|
||||
Pi = dat_i['P']
|
||||
for a in xrange(amax):
|
||||
Pia = Pi[:,:a+1]
|
||||
@@ -99,7 +115,9 @@ def pca_cv_val(X, amax, n_sets):
|
||||
sep = []
|
||||
for a in xrange(amax):
|
||||
sep.append(E[a].sum()/xtot)
|
||||
return sqrt(sep.mean(0))
|
||||
sep = array(sep)
|
||||
aopt = find_aopt_from_sep(sep)
|
||||
return sep, aopt
|
||||
|
||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
||||
""" Returns CV-segments of paramter W for wide X.
|
||||
@@ -128,7 +146,20 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True):
|
||||
return WW
|
||||
|
||||
def pca_jkP(a, aopt, n_blocks=None):
|
||||
""" Returns CV-segments of paramter P.
|
||||
"""Returns loading from PCA on CV-segments.
|
||||
|
||||
input:
|
||||
-- a, data matrix (n x m)
|
||||
-- aopt, number of components in model.
|
||||
-- nblocks, number of segments
|
||||
output:
|
||||
-- PP, loadings collected in a three way matrix
|
||||
(n_segments, m, aopt)
|
||||
|
||||
comments:
|
||||
* The loadings are scaled with the (1/samples)*eigenvalues.
|
||||
* Crossvalidation method is currently set to random blocks of samples.
|
||||
|
||||
todo: add support for T
|
||||
fixme: more efficient to add this in validation loop
|
||||
"""
|
||||
@@ -138,8 +169,30 @@ def pca_jkP(a, aopt, n_blocks=None):
|
||||
PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
|
||||
V = pca_gen(a, n_sets=n_blocks, center=True)
|
||||
for nn,(a_in, a_out) in enumerate(V):
|
||||
dat = pca(a_in, aopt, mode='fast')
|
||||
dat = pca(a_in, aopt, mode='fast', scale='loads')
|
||||
P = dat['P']
|
||||
PP[nn,:,:] = P
|
||||
|
||||
return PP
|
||||
|
||||
def find_aopt_from_sep(sep, method='75perc'):
|
||||
"""Returns an estimate of optimal number of components from rmsecv.
|
||||
"""
|
||||
if method=='vanilla':
|
||||
# min rmsep
|
||||
rmsecv = sqrt(sep.mean(0))
|
||||
return rmsecv.argmin() + 1
|
||||
|
||||
elif method=='75perc':
|
||||
prct = .75 #percentile
|
||||
ind = 1.*sep.shape[0]*prct
|
||||
med = median(sep)
|
||||
prc_75 = []
|
||||
for col in sep.T:
|
||||
col.sort()
|
||||
prc_75.append(col[int(ind)])
|
||||
prc_75 = array(prc_75)
|
||||
for i in range(1, sep.shape[1], 1):
|
||||
if med[i-1]<prc_75[i]:
|
||||
return i
|
||||
return len(med)
|
||||
|
Reference in New Issue
Block a user