Trying to fix cv_pls
This commit is contained in:
parent
9ccdf97d07
commit
b39e71ca2b
|
@ -13,9 +13,8 @@ try:
|
||||||
from symeig import symeig
|
from symeig import symeig
|
||||||
except:
|
except:
|
||||||
has_sym = False
|
has_sym = False
|
||||||
has_sym=False
|
|
||||||
|
|
||||||
def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
|
||||||
""" Principal Component Analysis.
|
""" Principal Component Analysis.
|
||||||
|
|
||||||
Performs PCA on given matrix and returns results in a dictionary.
|
Performs PCA on given matrix and returns results in a dictionary.
|
||||||
|
@ -26,7 +25,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
aopt : int
|
aopt : int
|
||||||
Number of components to use, aopt<=min(samples, variables)
|
Number of components to use, aopt<=min(samples, variables)
|
||||||
|
|
||||||
:Returns:
|
:Returns:
|
||||||
results : dict
|
results : dict
|
||||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||||
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
||||||
|
@ -42,14 +41,15 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
- pcr : other blm
|
- pcr : other blm
|
||||||
- pls : other blm
|
- pls : other blm
|
||||||
- lpls : other blm
|
- lpls : other blm
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
|
|
||||||
Uses kernel speed-up if m>>n or m<<n.
|
Uses kernel speed-up if m>>n or m<<n.
|
||||||
|
|
||||||
If residuals turn rank deficient, a lower number of component than given
|
If residuals turn rank deficient, a lower number of component than given
|
||||||
in input will be used. The number of components used is given in results-dict.
|
in input will be used. The number of components used is given in
|
||||||
|
results-dict.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
@ -57,7 +57,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
>>> import scipy,engines
|
>>> import scipy,engines
|
||||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||||
>>> dat=engines.pca(a, 2)
|
>>> dat=engines.pca(a, 2)
|
||||||
>>> dat['expvar']
|
>>> dat['expvarx']
|
||||||
array([0.,99.8561562, 100.])
|
array([0.,99.8561562, 100.])
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -76,7 +76,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
aopt = minimum(aopt, eff_rank)
|
aopt = minimum(aopt, eff_rank)
|
||||||
T = u*s
|
T = u*s
|
||||||
s = s[:aopt]
|
s = s[:aopt]
|
||||||
e = e[:aopt]
|
|
||||||
T = T[:,:aopt]
|
T = T[:,:aopt]
|
||||||
P = v[:,:aopt]
|
P = v[:,:aopt]
|
||||||
|
|
||||||
|
@ -91,7 +90,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
E = empty((aopt, m, n))
|
E = empty((aopt, m, n))
|
||||||
ssq = []
|
ssq = []
|
||||||
lev = []
|
lev = []
|
||||||
expvarx = empty((aopt, aopt+1))
|
|
||||||
for ai in range(aopt):
|
for ai in range(aopt):
|
||||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||||
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
|
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
|
||||||
|
@ -99,9 +97,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
||||||
else:
|
else:
|
||||||
lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
|
lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
|
||||||
|
|
||||||
expvarx[ai,:] = r_[0, 100*e.cumsum()/e.sum()]
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# residuals
|
# residuals
|
||||||
E = a - dot(T, P.T)
|
E = a - dot(T, P.T)
|
||||||
|
@ -112,9 +107,9 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||||
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
||||||
else:
|
else:
|
||||||
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
||||||
# variances
|
# variances
|
||||||
expvarx = r_[0, 100*e.cumsum()/e.sum()]
|
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
|
||||||
|
|
||||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
|
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
|
||||||
|
|
||||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
|
@ -130,7 +125,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
aopt : int
|
aopt : int
|
||||||
Number of components to use, aopt<=min(samples, variables)
|
Number of components to use, aopt<=min(samples, variables)
|
||||||
|
|
||||||
:Returns:
|
:Returns:
|
||||||
results : dict
|
results : dict
|
||||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||||
|
@ -143,7 +138,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||||
|
|
||||||
:SeeAlso:
|
:SeeAlso:
|
||||||
- pcr : other blm
|
- pca : other blm
|
||||||
- pls : other blm
|
- pls : other blm
|
||||||
- lpls : other blm
|
- lpls : other blm
|
||||||
|
|
||||||
|
@ -161,8 +156,9 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
|
|
||||||
>>> import scipy,engines
|
>>> import scipy,engines
|
||||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||||
>>> dat=engines.pca(a, 2)
|
>>> b=scipy.asarray([[1,1],[2,3]])
|
||||||
>>> dat['expvar']
|
>>> dat=engines.pcr(a, 2)
|
||||||
|
>>> dat['expvarx']
|
||||||
array([0.,99.8561562, 100.])
|
array([0.,99.8561562, 100.])
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -171,12 +167,11 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||||
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
||||||
T = dat['T']
|
T = dat['T']
|
||||||
weights = apply_along_axis(vnorm, 0, T)
|
weights = apply_along_axis(vnorm, 0, T)**2
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
# fixme: check weights
|
Q = dot(b.T, T*weights)
|
||||||
Q = dot(b.T, T*weights**2)
|
|
||||||
else:
|
else:
|
||||||
Q = dot(b.T, T/weights**2)
|
Q = dot(b.T, T/weights)
|
||||||
|
|
||||||
if mode=='fast':
|
if mode=='fast':
|
||||||
dat.update({'Q':Q})
|
dat.update({'Q':Q})
|
||||||
|
@ -187,43 +182,96 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||||
F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
|
F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
|
||||||
else:
|
else:
|
||||||
F = b - dot(T, Q.T)
|
F = b - dot(T, Q.T)
|
||||||
#fixme: explained variance in Y + Y-var leverages
|
expvary = r_[0, 100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
|
||||||
dat.update({'Q':Q, 'F':F})
|
#fixme: Y-var leverages
|
||||||
|
dat.update({'Q':Q, 'F':F, 'expvary':expvary})
|
||||||
return dat
|
return dat
|
||||||
|
|
||||||
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
def pls(a, b, aopt=2, scale='scores', mode='normal', ax_center=0, ab=None):
|
||||||
"""Partial Least Squares Regression.
|
"""Partial Least Squares Regression.
|
||||||
|
|
||||||
Applies plsr to given matrices and returns results in a dictionary.
|
Performs PLS on given matrix and returns results in a dictionary.
|
||||||
|
|
||||||
|
:Parameters:
|
||||||
|
a : array
|
||||||
|
Data measurement matrix, (samples x variables)
|
||||||
|
b : array
|
||||||
|
Data response matrix, (samples x responses)
|
||||||
|
aopt : int
|
||||||
|
Number of components to use, aopt<=min(samples, variables)
|
||||||
|
|
||||||
|
:Returns:
|
||||||
|
results : dict
|
||||||
|
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||||
|
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||||
|
explained variance of descriptors, expvary -- cumulative explained
|
||||||
|
variance of responses, aopt -- number of components used
|
||||||
|
|
||||||
|
:OtherParameters:
|
||||||
|
mode : str
|
||||||
|
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||||
|
center_axis : int
|
||||||
|
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||||
|
|
||||||
|
:SeeAlso:
|
||||||
|
- pca : other blm
|
||||||
|
- pcr : other blm
|
||||||
|
- lpls : other blm
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
|
||||||
|
Uses kernel speed-up if m>>n or m<<n.
|
||||||
|
|
||||||
|
If residuals turn rank deficient, a lower number of component than given
|
||||||
|
in input will be used. The number of components used is given in results-dict.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
|
||||||
|
>>> import scipy,engines
|
||||||
|
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||||
|
>>> b=scipy.asarray([[1,1],[2,3]])
|
||||||
|
>>> dat=engines.pls(a, b, 2)
|
||||||
|
>>> dat['expvarx']
|
||||||
|
array([0.,99.8561562, 100.])
|
||||||
|
|
||||||
Fast pls for calibration. Only inefficient for many Y-vars.
|
|
||||||
"""
|
"""
|
||||||
m, n = a.shape
|
|
||||||
|
m, n = m_shape(a)
|
||||||
if ab!=None:
|
if ab!=None:
|
||||||
mm, ll = m_shape(ab)
|
mm, l = m_shape(ab)
|
||||||
|
assert(m==mm)
|
||||||
else:
|
else:
|
||||||
k, l = m_shape(b)
|
k, l = m_shape(b)
|
||||||
assert(m==mm)
|
|
||||||
assert(l==ll)
|
|
||||||
W = empty((n, aopt))
|
W = empty((n, aopt))
|
||||||
P = empty((n, aopt))
|
P = empty((n, aopt))
|
||||||
R = empty((n, aopt))
|
R = empty((n, aopt))
|
||||||
Q = empty((l, aopt))
|
Q = empty((l, aopt))
|
||||||
T = empty((m, aopt))
|
T = empty((m, aopt))
|
||||||
B = empty((aopt, n, l))
|
B = empty((aopt, n, l))
|
||||||
|
|
||||||
if ab==None:
|
if ab==None:
|
||||||
ab = dot(a.T, b)
|
ab = dot(a.T, b)
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
if ab.shape[1]==1:
|
if ab.shape[1]==1:
|
||||||
w = ab.reshape(n, l)
|
w = ab.reshape(n, l)
|
||||||
|
w = w/vnorm(w)
|
||||||
|
elif n<l:
|
||||||
|
if has_sym:
|
||||||
|
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||||
|
else:
|
||||||
|
u, s, vh = svd(dot(ab, ab.T))
|
||||||
|
w = u[:,0]
|
||||||
else:
|
else:
|
||||||
u, s, vh = svd(dot(ab.T, ab))
|
if has_sym:
|
||||||
w = dot(ab, u[:,:1])
|
s, u = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
|
||||||
|
else:
|
||||||
w = w/vnorm(w)
|
u, s, vh = svd(dot(ab.T, ab))
|
||||||
|
w = dot(ab, u)
|
||||||
r = w.copy()
|
r = w.copy()
|
||||||
if i>0: # recursive estimate to
|
if i>0:
|
||||||
for j in range(0, i, 1):
|
for j in range(0, i, 1):
|
||||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||||
t = dot(a, r)
|
t = dot(a, r)
|
||||||
|
@ -233,8 +281,6 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
ab = ab - dot(p, q.T)*tt
|
ab = ab - dot(p, q.T)*tt
|
||||||
T[:,i] = t.ravel()
|
T[:,i] = t.ravel()
|
||||||
W[:,i] = w.ravel()
|
W[:,i] = w.ravel()
|
||||||
P[:,i] = p.ravel()
|
|
||||||
R[:,i] = r.ravel()
|
|
||||||
|
|
||||||
if mode=='fast' and i==aopt-1:
|
if mode=='fast' and i==aopt-1:
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
|
@ -243,6 +289,8 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
return {'T':T, 'W':W}
|
return {'T':T, 'W':W}
|
||||||
|
|
||||||
|
P[:,i] = p.ravel()
|
||||||
|
R[:,i] = r.ravel()
|
||||||
Q[:,i] = q.ravel()
|
Q[:,i] = q.ravel()
|
||||||
B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
|
B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
|
||||||
|
|
||||||
|
@ -272,14 +320,14 @@ def w_simpls(aat, b, aopt):
|
||||||
"""
|
"""
|
||||||
bb = b.copy()
|
bb = b.copy()
|
||||||
m, m = aat.shape
|
m, m = aat.shape
|
||||||
U = empty((m, aopt))
|
U = empty((m, aopt)) # W
|
||||||
T = empty((m, aopt))
|
T = empty((m, aopt))
|
||||||
H = empty((m, aopt)) #just like W in simpls
|
H = empty((m, aopt)) # R
|
||||||
PROJ = empty((m, aopt)) #just like R in simpls
|
PROJ = empty((m, aopt)) # P?
|
||||||
|
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
u, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||||
u = dot(b, u[:,:1]) #y-factor scores
|
u = dot(b, q[:,:1]) #y-factor scores
|
||||||
U[:,i] = u.ravel()
|
U[:,i] = u.ravel()
|
||||||
t = dot(aat, u)
|
t = dot(aat, u)
|
||||||
t = t/vnorm(t)
|
t = t/vnorm(t)
|
||||||
|
@ -293,6 +341,38 @@ def w_simpls(aat, b, aopt):
|
||||||
|
|
||||||
return {'T':T, 'U':U, 'Q':C, 'H':H}
|
return {'T':T, 'U':U, 'Q':C, 'H':H}
|
||||||
|
|
||||||
|
def w_pls(aat, b, aopt):
|
||||||
|
""" Pls for wide matrices.
|
||||||
|
Fast pls for crossval, used in calc rmsep for wide X
|
||||||
|
There is no P or W. T is normalised
|
||||||
|
"""
|
||||||
|
bb = b.copy()
|
||||||
|
m, m = aat.shape
|
||||||
|
U = empty((m, aopt)) # W
|
||||||
|
T = empty((m, aopt))
|
||||||
|
R = empty((m, aopt)) # R
|
||||||
|
PROJ = empty((m, aopt)) # P?
|
||||||
|
|
||||||
|
for i in range(aopt):
|
||||||
|
if has_sym:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
|
||||||
|
q = q[:,:1]
|
||||||
|
u = dot(b , q) #y-factor scores
|
||||||
|
U[:,i] = u.ravel()
|
||||||
|
t = dot(aat, u)
|
||||||
|
t = t/vnorm(t)
|
||||||
|
T[:,i] = t.ravel()
|
||||||
|
r = dot(aat, t) #score-weights
|
||||||
|
R[:,i] = r.ravel()
|
||||||
|
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
|
||||||
|
if i<aopt:
|
||||||
|
b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,b) )
|
||||||
|
C = dot(bb.T, T)
|
||||||
|
|
||||||
|
return {'T':T, 'U':U, 'Q':C, 'H':H}
|
||||||
|
|
||||||
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
|
||||||
"""Undeflated Ridged svd(X'Y)
|
"""Undeflated Ridged svd(X'Y)
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -16,8 +16,9 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
||||||
Returns:
|
Returns:
|
||||||
-- aat_in,aat_out,b_in,b_out,[out]
|
-- aat_in,aat_out,b_in,b_out,[out]
|
||||||
"""
|
"""
|
||||||
m,n = aat.shape
|
m, n = aat.shape
|
||||||
index = randperm(m)
|
index = randperm(m)
|
||||||
|
if n_blocks==None: n_blocks = m
|
||||||
nValuesInBlock = m/n_blocks
|
nValuesInBlock = m/n_blocks
|
||||||
if n_blocks==m:
|
if n_blocks==m:
|
||||||
index = arange(m)
|
index = arange(m)
|
||||||
|
@ -31,7 +32,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
|
||||||
b_out = b[out,:]
|
b_out = b[out,:]
|
||||||
if center:
|
if center:
|
||||||
aat_in, mn = outerprod_centering(aat_in)
|
aat_in, mn = outerprod_centering(aat_in)
|
||||||
aat_out = aat_out - mn
|
b_in = b_in - b_in.mean(0) # b_in + b_out/(b_in.shape[0])
|
||||||
if index_out:
|
if index_out:
|
||||||
yield aat_in,aat_out,b_in,b_out,out
|
yield aat_in,aat_out,b_in,b_out,out
|
||||||
else:
|
else:
|
||||||
|
@ -217,7 +218,7 @@ def outerprod_centering(aat, ret_mn=True):
|
||||||
mn_a = h + h.T # beauty of broadcasting
|
mn_a = h + h.T # beauty of broadcasting
|
||||||
aatc = aat - mn_a
|
aatc = aat - mn_a
|
||||||
if ret_mn:
|
if ret_mn:
|
||||||
return aatc, mn_a
|
return aatc, h
|
||||||
return aatc
|
return aatc
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,6 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
|
||||||
for Din, Doi, Yin, Yout in V:
|
for Din, Doi, Yin, Yout in V:
|
||||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||||
Yin = Yin - ym
|
|
||||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||||
if algo=='simpls':
|
if algo=='simpls':
|
||||||
dat = w_simpls(Din, Yin, amax)
|
dat = w_simpls(Din, Yin, amax)
|
||||||
|
@ -74,15 +73,14 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
|
||||||
for j in range(l):
|
for j in range(l):
|
||||||
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
|
TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
|
||||||
E = Yout[:,j][:,newaxis] - TQ
|
E = Yout[:,j][:,newaxis] - TQ
|
||||||
E = E + sum(E, 0)/Din.shape[0]
|
E = E + sum(E, 0)/Din.shape[0]
|
||||||
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
|
||||||
Yhat = Y - dot(That,Q.T)
|
#Yhat = Yin - dot(That,Q.T)
|
||||||
rmsep = sqrt(PRESS/Y.shape[0])
|
msep = PRESS/(Y.shape[0])
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
aopt = find_aopt_from_sep(msep)
|
||||||
return rmsep, Yhat, aopt
|
return sqrt(msep)
|
||||||
|
|
||||||
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||||
|
|
||||||
k, l = m_shape(Y)
|
k, l = m_shape(Y)
|
||||||
PRESS = zeros((l, amax+1), dtype='<f8')
|
PRESS = zeros((l, amax+1), dtype='<f8')
|
||||||
EE = zeros((amax, k, l), dtype='<f8')
|
EE = zeros((amax, k, l), dtype='<f8')
|
||||||
|
@ -105,9 +103,10 @@ def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
|
||||||
EE[a,out,:] = E
|
EE[a,out,:] = E
|
||||||
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
|
||||||
|
|
||||||
rmsep = sqrt(PRESS/(k-1.))
|
#rmsep = sqrt(PRESS/(k-1.))
|
||||||
aopt = find_aopt_from_sep(rmsep)
|
msep = PRESS
|
||||||
return rmsep, Yhat, aopt
|
aopt = find_aopt_from_sep(msep)
|
||||||
|
return msep, Yhat, aopt
|
||||||
|
|
||||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5):
|
||||||
"""Performs crossvalidation to get generalisation error in lpls"""
|
"""Performs crossvalidation to get generalisation error in lpls"""
|
||||||
|
@ -270,7 +269,7 @@ def lpls_jk(X, Y, Z, a_max, nsets=None, alpha=.5):
|
||||||
def find_aopt_from_sep(sep, method='75perc'):
|
def find_aopt_from_sep(sep, method='75perc'):
|
||||||
"""Returns an estimate of optimal number of components from rmsecv.
|
"""Returns an estimate of optimal number of components from rmsecv.
|
||||||
"""
|
"""
|
||||||
|
sep = sep.copy()
|
||||||
if method=='vanilla':
|
if method=='vanilla':
|
||||||
# min rmsep
|
# min rmsep
|
||||||
rmsecv = sqrt(sep.mean(0))
|
rmsecv = sqrt(sep.mean(0))
|
||||||
|
@ -282,7 +281,7 @@ def find_aopt_from_sep(sep, method='75perc'):
|
||||||
med = median(sep)
|
med = median(sep)
|
||||||
prc_75 = []
|
prc_75 = []
|
||||||
for col in sep.T:
|
for col in sep.T:
|
||||||
col.sort()
|
col.sort() #this is inplace -> ruins sep, so we are doing a copy
|
||||||
prc_75.append(col[int(ind)])
|
prc_75.append(col[int(ind)])
|
||||||
prc_75 = array(prc_75)
|
prc_75 = array(prc_75)
|
||||||
for i in range(1, sep.shape[1], 1):
|
for i in range(1, sep.shape[1], 1):
|
||||||
|
|
Reference in New Issue