whitespace
This commit is contained in:
parent
3bd21ab089
commit
3d2492578e
|
@ -10,11 +10,15 @@ from engines import w_simpls,pls,bridge,pca
|
||||||
from cx_utils import m_shape
|
from cx_utils import m_shape
|
||||||
|
|
||||||
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
"""Returns and RMSEP for pls tailored for wide X.
|
"""Returns rmsep and aopt for pls tailored for wide X.
|
||||||
|
|
||||||
|
|
||||||
|
comments:
|
||||||
|
-- X, Y inputs need to be centered (fixme: check)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='f')
|
PRESS = zeros((l, amax+1), dtype='f')
|
||||||
# X,Y are centered0
|
|
||||||
if n_blocks==None:
|
if n_blocks==None:
|
||||||
n_blocks = Y.shape[0]
|
n_blocks = Y.shape[0]
|
||||||
XXt = dot(X, X.T)
|
XXt = dot(X, X.T)
|
||||||
|
@ -29,7 +33,7 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
That = dot(Doi, dot(U, inv(triu(dot(H.T,U))) ))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
#Yhat = empty((amax, k, l),dtype='<f8')
|
|
||||||
Yhat = []
|
Yhat = []
|
||||||
for j in range(l):
|
for j in range(l):
|
||||||
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
|
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
|
||||||
|
@ -41,15 +45,21 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
aopt = find_aopt_from_sep(rmsep)
|
||||||
return rmsep, aopt
|
return rmsep, aopt
|
||||||
|
|
||||||
def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||||
""" Validation results of pls model.
|
""" Validation results of pls model.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
comments:
|
||||||
|
-- X, Y inputs need to be centered (fixme: check)
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||||
EE = zeros((amax, k, l), dtype='<f8')
|
EE = zeros((amax, k, l), dtype='<f8')
|
||||||
Yhat = zeros((amax, k, l), dtype='<f8')
|
Yhat = zeros((amax, k, l), dtype='<f8')
|
||||||
# X,Y are centered
|
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric)
|
||||||
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
|
|
||||||
for Xin, Xout, Yin, Yout, out in V:
|
for Xin, Xout, Yin, Yout, out in V:
|
||||||
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
|
||||||
Yin = (Yin - ym)
|
Yin = (Yin - ym)
|
||||||
|
@ -73,13 +83,19 @@ def pls_val(X, Y, amax=2, n_blocks=10,algo='pls'):
|
||||||
|
|
||||||
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
def pca_alter_val(a, amax, n_sets=10, method='diag'):
|
||||||
"""Pca validation by altering elements in X.
|
"""Pca validation by altering elements in X.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
comments:
|
||||||
|
-- may do all jk estimates in this loop
|
||||||
"""
|
"""
|
||||||
# todo: it is just as easy to do jk-estimates her as well
|
|
||||||
V = diag_pert(a, n_sets, center=True, index_out=True)
|
V = diag_pert(a, n_sets, center=True, index_out=True)
|
||||||
sep = empty((n_sets, amax), dtype='f')
|
sep = empty((n_sets, amax), dtype='f')
|
||||||
for i, (xi, ind) in enumerate(V):
|
for i, (xi, ind) in enumerate(V):
|
||||||
dat_i = pca(xi, amax, mode='detailed')
|
dat_i = pca(xi, amax, mode='detailed')
|
||||||
Ti,Pi = dat_i['T'],dat_i['P']
|
Ti, Pi = dat_i['T'],dat_i['P']
|
||||||
for j in xrange(amax):
|
for j in xrange(amax):
|
||||||
Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
|
Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
|
||||||
a_sub = a.ravel().take(ind)
|
a_sub = a.ravel().take(ind)
|
||||||
|
@ -100,7 +116,9 @@ def pca_cv_val(a, amax, n_sets):
|
||||||
output:
|
output:
|
||||||
-- sep, (amax x m x n), squared error of prediction (press)
|
-- sep, (amax x m x n), squared error of prediction (press)
|
||||||
-- aopt, guestimated optimal number of components
|
-- aopt, guestimated optimal number of components
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
E = empty((amax, m, n), dtype='f')
|
E = empty((amax, m, n), dtype='f')
|
||||||
xtot = (a**2).sum() # this needs centering
|
xtot = (a**2).sum() # this needs centering
|
||||||
|
@ -117,11 +135,12 @@ def pca_cv_val(a, amax, n_sets):
|
||||||
sep.append(E[a].sum()/xtot)
|
sep.append(E[a].sum()/xtot)
|
||||||
sep = array(sep)
|
sep = array(sep)
|
||||||
aopt = find_aopt_from_sep(sep)
|
aopt = find_aopt_from_sep(sep)
|
||||||
|
|
||||||
return sep, aopt
|
return sep, aopt
|
||||||
|
|
||||||
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=False, center=True):
|
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None):
|
||||||
""" Returns CV-segments of paramter W for wide X.
|
""" Returns CV-segments of paramter W for wide X.
|
||||||
|
|
||||||
todo: add support for T,Q and B
|
todo: add support for T,Q and B
|
||||||
"""
|
"""
|
||||||
if n_blocks == None:
|
if n_blocks == None:
|
||||||
|
@ -129,26 +148,28 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=False, center=True):
|
||||||
|
|
||||||
Wcv = empty((n_blocks, a.shape[1], amax), dtype='f')
|
Wcv = empty((n_blocks, a.shape[1], amax), dtype='f')
|
||||||
|
|
||||||
if use_pack:
|
if use_pack and metric==None:
|
||||||
u, s, inflater = svd(a, full_matrices=0)
|
u, s, inflater = svd(a, full_matrices=0)
|
||||||
a = u*s
|
a = u*s
|
||||||
|
|
||||||
V = pls_gen(a, b, n_blocks=n_blocks, center=center)
|
V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric)
|
||||||
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
|
||||||
if algo=='pls':
|
if algo=='pls':
|
||||||
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
dat = pls(a_in, b_in, amax, 'loads', 'fast')
|
||||||
|
|
||||||
elif algo=='bridge':
|
elif algo=='bridge':
|
||||||
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
|
||||||
|
|
||||||
W = dat['W']
|
W = dat['W']
|
||||||
|
|
||||||
if use_pack:
|
if use_pack and metric==None:
|
||||||
W = dot(inflater.T, W)
|
W = dot(inflater.T, W)
|
||||||
|
|
||||||
Wcv[nn,:,:] = W
|
Wcv[nn,:,:] = W
|
||||||
|
|
||||||
return Wcv
|
return Wcv
|
||||||
|
|
||||||
def pca_jkP(a, aopt, n_blocks=None):
|
def pca_jkP(a, aopt, n_blocks=None, metric=None):
|
||||||
"""Returns loading from PCA on CV-segments.
|
"""Returns loading from PCA on CV-segments.
|
||||||
|
|
||||||
input:
|
input:
|
||||||
|
@ -178,9 +199,11 @@ def pca_jkP(a, aopt, n_blocks=None):
|
||||||
|
|
||||||
return PP
|
return PP
|
||||||
|
|
||||||
|
|
||||||
def find_aopt_from_sep(sep, method='75perc'):
|
def find_aopt_from_sep(sep, method='75perc'):
|
||||||
"""Returns an estimate of optimal number of components from rmsecv.
|
"""Returns an estimate of optimal number of components from rmsecv.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if method=='vanilla':
|
if method=='vanilla':
|
||||||
# min rmsep
|
# min rmsep
|
||||||
rmsecv = sqrt(sep.mean(0))
|
rmsecv = sqrt(sep.mean(0))
|
||||||
|
|
Reference in New Issue