Mostly clean ups
This commit is contained in:
parent
2951ca4088
commit
4c809674bb
@ -12,7 +12,7 @@ from numpy.random import shuffle
|
|||||||
from engines import nipals_lpls as lpls
|
from engines import nipals_lpls as lpls
|
||||||
|
|
||||||
|
|
||||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
|
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
|
||||||
"""Performs crossvalidation for generalisation error in lpls.
|
"""Performs crossvalidation for generalisation error in lpls.
|
||||||
|
|
||||||
The L-PLS crossvalidation is estimated just like an ordinary pls
|
The L-PLS crossvalidation is estimated just like an ordinary pls
|
||||||
@ -61,8 +61,8 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
|||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
o, p = Z.shape
|
o, p = Z.shape
|
||||||
assert m==k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m,n,k,l)
|
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
|
||||||
assert n==p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m,n,o,p)
|
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
nsets = m
|
nsets = m
|
||||||
if nsets > X.shape[0]:
|
if nsets > X.shape[0]:
|
||||||
@ -80,11 +80,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
|||||||
if mean_ctr[0] != 1:
|
if mean_ctr[0] != 1:
|
||||||
xi = X[val,:] - dat['mnx']
|
xi = X[val,:] - dat['mnx']
|
||||||
else:
|
else:
|
||||||
xi = X[val] - X[val].mean(1)[:,newaxis]
|
xi = X[val] - X[cal].mean(1)[:,newaxis]
|
||||||
if mean_ctr[2] != 1:
|
if mean_ctr[2] != 1:
|
||||||
ym = dat['mny']
|
ym = dat['mny']
|
||||||
else:
|
else:
|
||||||
ym = Y[val].mean(1)[:,newaxis] #???: check this
|
ym = Y[cal].mean(1)[:,newaxis]
|
||||||
# predictions
|
# predictions
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||||
@ -113,7 +113,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
|||||||
def pca_jk(a, aopt, n_blocks=None):
|
def pca_jk(a, aopt, n_blocks=None):
|
||||||
"""Returns jack-knife segements from PCA.
|
"""Returns jack-knife segements from PCA.
|
||||||
|
|
||||||
Parameters:
|
*Parameters*:
|
||||||
|
|
||||||
a : {array}
|
a : {array}
|
||||||
data matrix (n x m)
|
data matrix (n x m)
|
||||||
@ -122,21 +122,15 @@ def pca_jk(a, aopt, n_blocks=None):
|
|||||||
nsets : {integer}
|
nsets : {integer}
|
||||||
number of segments
|
number of segments
|
||||||
|
|
||||||
Returns:
|
*Returns*:
|
||||||
|
|
||||||
Pcv : {array}
|
Pcv : {array}
|
||||||
Loadings collected in a three way matrix (n_segments, m, aopt)
|
Loadings collected in a three way matrix (n_segments, m, aopt)
|
||||||
|
|
||||||
Notes:
|
*Notes*:
|
||||||
|
|
||||||
- The loadings are scaled with the (1/samples)*eigenvalues.
|
|
||||||
|
|
||||||
- Crossvalidation method is currently set to random blocks of samples.
|
- Crossvalidation method is currently set to random blocks of samples.
|
||||||
|
|
||||||
- todo: add support for T
|
|
||||||
|
|
||||||
- fixme: more efficient to add this in validation loop?
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
nsets = a.shape[0]
|
nsets = a.shape[0]
|
||||||
@ -183,7 +177,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center=True, verbose=False):
|
|||||||
"""
|
"""
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
assert(m==k)
|
assert(m == k)
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
nsets = X.shape[0]
|
nsets = X.shape[0]
|
||||||
Wcv = empty((nsets, X.shape[1], a_opt), dtype=X.dtype)
|
Wcv = empty((nsets, X.shape[1], a_opt), dtype=X.dtype)
|
||||||
@ -242,9 +236,9 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2], verbose=F
|
|||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
o, p = Z.shape
|
o, p = Z.shape
|
||||||
assert(m==k)
|
assert(m == k)
|
||||||
assert(n==p)
|
assert(n == p)
|
||||||
if nsets==None:
|
if nsets == None:
|
||||||
nsets = m
|
nsets = m
|
||||||
WWx = empty((nsets, n, a_opt), 'd')
|
WWx = empty((nsets, n, a_opt), 'd')
|
||||||
WWz = empty((nsets, o, a_opt), 'd')
|
WWz = empty((nsets, o, a_opt), 'd')
|
||||||
@ -279,12 +273,12 @@ def find_aopt_from_sep(sep, method='vanilla'):
|
|||||||
A guess on the optimal number of components
|
A guess on the optimal number of components
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if method=='vanilla':
|
if method == 'vanilla':
|
||||||
# min rmsep
|
# min rmsep
|
||||||
rmsecv = sqrt(sep.mean(0))
|
rmsecv = sqrt(sep.mean(0))
|
||||||
return rmsecv.argmin() + 1
|
return rmsecv.argmin() + 1
|
||||||
|
|
||||||
elif method=='75perc':
|
elif method == '75perc':
|
||||||
prct = .75 #percentile
|
prct = .75 #percentile
|
||||||
ind = 1.*sep.shape[0]*prct
|
ind = 1.*sep.shape[0]*prct
|
||||||
med = median(sep)
|
med = median(sep)
|
||||||
@ -305,6 +299,7 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
of length ~N/K, *without* replacement.
|
of length ~N/K, *without* replacement.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
N : {integer}
|
N : {integer}
|
||||||
Total number of samples
|
Total number of samples
|
||||||
K : {integer}
|
K : {integer}
|
||||||
@ -333,7 +328,7 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
otherwise interleaved ordering is used.
|
otherwise interleaved ordering is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if N>K:
|
if N > K:
|
||||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N)
|
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N)
|
||||||
index = xrange(N)
|
index = xrange(N)
|
||||||
if randomise:
|
if randomise:
|
||||||
|
145
pyblm/engines.py
145
pyblm/engines.py
@ -9,16 +9,17 @@ from math import sqrt as msqrt
|
|||||||
|
|
||||||
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
|
from numpy import dot,empty,zeros,apply_along_axis,newaxis,finfo,sqrt,r_,expand_dims,\
|
||||||
minimum
|
minimum
|
||||||
from numpy.linalg import inv, svd
|
from numpy.linalg import inv,svd
|
||||||
from scipy.sandbox import arpack
|
from scipy.sandbox import arpack
|
||||||
|
|
||||||
|
|
||||||
def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||||
""" Principal Component Analysis.
|
""" Principal Component Analysis.
|
||||||
|
|
||||||
PCA is a low rank bilinear aprroximation to a data matrix that sequentially
|
PCA is a low rank bilinear aprroximation to a data matrix that sequentially
|
||||||
extracts orthogonal components of maximum variance.
|
extracts orthogonal components of maximum variance.
|
||||||
|
|
||||||
Parameters:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Data measurement matrix, (samples x variables)
|
Data measurement matrix, (samples x variables)
|
||||||
@ -27,7 +28,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
center_axis : {integer}
|
center_axis : {integer}
|
||||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||||
|
|
||||||
Returns:
|
*Returns*:
|
||||||
|
|
||||||
T : {array}
|
T : {array}
|
||||||
Scores, (samples, components)
|
Scores, (samples, components)
|
||||||
@ -47,7 +48,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
leverage : {array}
|
leverage : {array}
|
||||||
Leverages, (samples,)
|
Leverages, (samples,)
|
||||||
|
|
||||||
OtherParameters:
|
*OtherParameters*:
|
||||||
|
|
||||||
scale : {string}, optional
|
scale : {string}, optional
|
||||||
Where to put the weights [['scores'], 'loadings']
|
Where to put the weights [['scores'], 'loadings']
|
||||||
@ -55,7 +56,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
Amount of info retained, [['normal'], 'fast', 'detailed']
|
Amount of info retained, [['normal'], 'fast', 'detailed']
|
||||||
|
|
||||||
|
|
||||||
:SeeAlso:
|
*SeeAlso*:
|
||||||
|
|
||||||
`center` : Data centering
|
`center` : Data centering
|
||||||
|
|
||||||
@ -78,10 +79,12 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
assert(aopt<=min(m,n))
|
min_aopt = min(m, n)
|
||||||
if center_axis>=0:
|
if center_axis >= 0:
|
||||||
X = X - expand_dims(X.mean(center_axis), center_axis)
|
X = X - expand_dims(X.mean(center_axis), center_axis)
|
||||||
if m>(n+100) or n>(m+100):
|
min_aopt = min_aopt - 1
|
||||||
|
assert(aopt <= min_aopt)
|
||||||
|
if m > (n+100) or n > (m+100):
|
||||||
u, s, v = esvd(X, aopt)
|
u, s, v = esvd(X, aopt)
|
||||||
else:
|
else:
|
||||||
u, s, vt = svd(X, 0)
|
u, s, vt = svd(X, 0)
|
||||||
@ -92,7 +95,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
|
|
||||||
# ranktest
|
# ranktest
|
||||||
tol = 1e-10
|
tol = 1e-10
|
||||||
eff_rank = sum(s>s[0]*tol)
|
eff_rank = sum(s > s[0]*tol)
|
||||||
aopt = minimum(aopt, eff_rank)
|
aopt = minimum(aopt, eff_rank)
|
||||||
T = u*s
|
T = u*s
|
||||||
s = s[:aopt]
|
s = s[:aopt]
|
||||||
@ -124,7 +127,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
|||||||
sep = E**2
|
sep = E**2
|
||||||
ssq = [sep.sum(0), sep.sum(1)]
|
ssq = [sep.sum(0), sep.sum(1)]
|
||||||
# leverages
|
# leverages
|
||||||
if scale=='loads':
|
if scale == 'loads':
|
||||||
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
||||||
else:
|
else:
|
||||||
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
||||||
@ -139,7 +142,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
|||||||
|
|
||||||
Performs PCR on given matrix and returns results in a dictionary.
|
Performs PCR on given matrix and returns results in a dictionary.
|
||||||
|
|
||||||
Parameters:
|
*Parameters*:
|
||||||
|
|
||||||
a : array
|
a : array
|
||||||
Data measurement matrix, (samples x variables)
|
Data measurement matrix, (samples x variables)
|
||||||
@ -148,19 +151,19 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
|||||||
aopt : int
|
aopt : int
|
||||||
Number of components to use, aopt<=min(samples, variables)
|
Number of components to use, aopt<=min(samples, variables)
|
||||||
|
|
||||||
Returns:
|
*Returns*:
|
||||||
|
|
||||||
results : dict
|
results : dict
|
||||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||||
explained variance, aopt -- number of components used
|
explained variance, aopt -- number of components used
|
||||||
|
|
||||||
OtherParameters:
|
*OtherParameters*:
|
||||||
|
|
||||||
mode : str
|
mode : {string}
|
||||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||||
center_axis : int
|
center_axis : {integer}
|
||||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||||
|
|
||||||
SeeAlso:
|
SeeAlso:
|
||||||
|
|
||||||
@ -194,21 +197,21 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
|||||||
except:
|
except:
|
||||||
b = atleast_2d(b).T
|
b = atleast_2d(b).T
|
||||||
k, l = b.shape
|
k, l = b.shape
|
||||||
if center_axis>=0:
|
if center_axis >= 0:
|
||||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||||
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
||||||
T = dat['T']
|
T = dat['T']
|
||||||
weights = apply_along_axis(vnorm, 0, T)**2
|
weights = apply_along_axis(vnorm, 0, T)**2
|
||||||
if scale=='loads':
|
if scale == 'loads':
|
||||||
Q = dot(b.T, T*weights)
|
Q = dot(b.T, T*weights)
|
||||||
else:
|
else:
|
||||||
Q = dot(b.T, T/weights)
|
Q = dot(b.T, T/weights)
|
||||||
|
|
||||||
if mode=='fast':
|
if mode == 'fast':
|
||||||
dat.update({'Q':Q})
|
dat.update({'Q':Q})
|
||||||
return dat
|
return dat
|
||||||
|
|
||||||
if mode=='detailed':
|
if mode == 'detailed':
|
||||||
F = empty((aopt, k, l))
|
F = empty((aopt, k, l))
|
||||||
ssqy = []
|
ssqy = []
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
@ -284,7 +287,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
|
|
||||||
*SeeAlso*:
|
*SeeAlso*:
|
||||||
|
|
||||||
`center` : data centering
|
`center` - data centering
|
||||||
|
|
||||||
*Notes*
|
*Notes*
|
||||||
|
|
||||||
@ -310,14 +313,16 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
except:
|
except:
|
||||||
Y = atleast_2d(Y).T
|
Y = atleast_2d(Y).T
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
assert(m==k)
|
assert(m == k)
|
||||||
assert(aopt<min(m, n))
|
mnx, mny = 0, 0
|
||||||
mnx, mny = 0,0
|
min_aopt = min(m, n)
|
||||||
if center_axis>=0:
|
if center_axis >= 0:
|
||||||
mnx = expand_dims(X.mean(center_axis), center_axis)
|
mnx = expand_dims(X.mean(center_axis), center_axis)
|
||||||
X = X - mnx
|
X = X - mnx
|
||||||
|
min_aopt = min_aopt - 1
|
||||||
mny = expand_dims(Y.mean(center_axis), center_axis)
|
mny = expand_dims(Y.mean(center_axis), center_axis)
|
||||||
Y = Y - mny
|
Y = Y - mny
|
||||||
|
assert(aopt > 0 and aopt < min_aopt)
|
||||||
|
|
||||||
W = empty((n, aopt))
|
W = empty((n, aopt))
|
||||||
P = empty((n, aopt))
|
P = empty((n, aopt))
|
||||||
@ -329,10 +334,10 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
|
|
||||||
XY = dot(X.T, Y)
|
XY = dot(X.T, Y)
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
if XY.shape[1]==1: #pls 1
|
if XY.shape[1] == 1: #pls 1
|
||||||
w = XY.reshape(n, l)
|
w = XY.reshape(n, l)
|
||||||
w = w/vnorm(w)
|
w = w/vnorm(w)
|
||||||
elif n<l: # more yvars than xvars
|
elif n < l: # more yvars than xvars
|
||||||
s, w = arpack.eigen_symmetric(dot(XY, XY.T),k=1, tol=1e-10, maxiter=100)
|
s, w = arpack.eigen_symmetric(dot(XY, XY.T),k=1, tol=1e-10, maxiter=100)
|
||||||
#w, s, vh = svd(dot(XY, XY.T))
|
#w, s, vh = svd(dot(XY, XY.T))
|
||||||
#w = w[:,:1]
|
#w = w[:,:1]
|
||||||
@ -344,7 +349,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
w = dot(XY, q)
|
w = dot(XY, q)
|
||||||
w = w/vnorm(w)
|
w = w/vnorm(w)
|
||||||
r = w.copy()
|
r = w.copy()
|
||||||
if i>0:
|
if i > 0:
|
||||||
for j in range(0, i, 1):
|
for j in range(0, i, 1):
|
||||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||||
|
|
||||||
@ -356,8 +361,8 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
T[:,i] = t.ravel()
|
T[:,i] = t.ravel()
|
||||||
W[:,i] = w.ravel()
|
W[:,i] = w.ravel()
|
||||||
|
|
||||||
if mode=='fast' and i==aopt-1:
|
if mode == 'fast' and i == (aopt - 1):
|
||||||
if scale=='loads':
|
if scale == 'loads':
|
||||||
tnorm = sqrt(tt)
|
tnorm = sqrt(tt)
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
@ -371,7 +376,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
qnorm = apply_along_axis(vnorm, 0, Q)
|
qnorm = apply_along_axis(vnorm, 0, Q)
|
||||||
tnorm = sqrt(tt)
|
tnorm = sqrt(tt)
|
||||||
pp = (P**2).sum(0)
|
pp = (P**2).sum(0)
|
||||||
if mode=='detailed':
|
if mode == 'detailed':
|
||||||
E = empty((aopt, m, n))
|
E = empty((aopt, m, n))
|
||||||
F = empty((aopt, k, l))
|
F = empty((aopt, k, l))
|
||||||
ssqx, ssqy = [], []
|
ssqx, ssqy = [], []
|
||||||
@ -401,7 +406,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
expvarx = r_[0, 100*tp/(X*X).sum()]
|
expvarx = r_[0, 100*tp/(X*X).sum()]
|
||||||
expvary = r_[0, 100*tq/(Y*Y).sum()]
|
expvary = r_[0, 100*tq/(Y*Y).sum()]
|
||||||
|
|
||||||
if scale=='loads':
|
if scale == 'loads':
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
Q = Q*tnorm
|
Q = Q*tnorm
|
||||||
@ -495,11 +500,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
|||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
u, o = Z.shape
|
u, o = Z.shape
|
||||||
max_rank = min(m, n)
|
max_rank = min(m, n) + 1
|
||||||
assert (a_max>0 and a_max<max_rank), "Number of comp error:\
|
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
||||||
tried:%d, max_rank:%d" %(a_max,max_rank)
|
tried: %d, max_rank: %d" %(a_max, max_rank)
|
||||||
|
|
||||||
if mean_ctr!=None:
|
if mean_ctr != None:
|
||||||
xctr, yctr, zctr = mean_ctr
|
xctr, yctr, zctr = mean_ctr
|
||||||
X, mnX = center(X, xctr)
|
X, mnX = center(X, xctr)
|
||||||
Y, mnY = center(Y, yctr)
|
Y, mnY = center(Y, yctr)
|
||||||
@ -537,7 +542,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
|||||||
l = G[:,:1]
|
l = G[:,:1]
|
||||||
diff = 1
|
diff = 1
|
||||||
niter = 0
|
niter = 0
|
||||||
while (diff>LIM and niter<MAX_ITER):
|
while (diff > LIM and niter < MAX_ITER):
|
||||||
niter += 1
|
niter += 1
|
||||||
u1 = u.copy()
|
u1 = u.copy()
|
||||||
w1 = w.copy()
|
w1 = w.copy()
|
||||||
@ -562,7 +567,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
|||||||
u = dot(F, c)
|
u = dot(F, c)
|
||||||
diff = dot((u - u1).T, (u - u1))
|
diff = dot((u - u1).T, (u - u1))
|
||||||
if verbose:
|
if verbose:
|
||||||
if niter==MAX_ITER:
|
if niter == MAX_ITER:
|
||||||
print "Maximum nunber of iterations reached!"
|
print "Maximum nunber of iterations reached!"
|
||||||
print "Iterations: %d " %niter
|
print "Iterations: %d " %niter
|
||||||
print "Error: %.2E" %diff
|
print "Error: %.2E" %diff
|
||||||
@ -606,7 +611,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
|||||||
evy = 100.*(1 - var_y/varY)
|
evy = 100.*(1 - var_y/varY)
|
||||||
evz = 100.*(1 - var_z/varZ)
|
evz = 100.*(1 - var_z/varZ)
|
||||||
|
|
||||||
if scale=='loads':
|
if scale == 'loads':
|
||||||
tnorm = apply_along_axis(vnorm, 0, T)
|
tnorm = apply_along_axis(vnorm, 0, T)
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
@ -617,6 +622,20 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
|||||||
|
|
||||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
||||||
|
|
||||||
|
def lpls_predict(model_dict, x, aopt):
|
||||||
|
"""Predict lpls reponses from existing model on new data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
m, n = x.shape
|
||||||
|
except:
|
||||||
|
x = atleast_2d(x.shape)
|
||||||
|
m, n = x.shape
|
||||||
|
|
||||||
|
if 'B0' in model_dict.keys():
|
||||||
|
y = model_dict['B0'] + dot()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def vnorm(a):
|
def vnorm(a):
|
||||||
"""Returns the norm of a vector.
|
"""Returns the norm of a vector.
|
||||||
|
|
||||||
@ -654,28 +673,28 @@ def center(a, axis):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# check if we have a vector
|
# check if we have a vector
|
||||||
is_vec = len(a.shape)==1
|
is_vec = len(a.shape) == 1
|
||||||
if not is_vec:
|
if not is_vec:
|
||||||
is_vec = a.shape[0]==1 or a.shape[1]==1
|
is_vec = a.shape[0] == 1 or a.shape[1] == 1
|
||||||
if is_vec:
|
if is_vec:
|
||||||
if axis==2:
|
if axis == 2:
|
||||||
warnings.warn("Double centering of vecor ignored, using ordinary centering")
|
warnings.warn("Double centering of vecor ignored, using ordinary centering")
|
||||||
if axis==-1:
|
if axis == -1:
|
||||||
mn = 0
|
mn = 0
|
||||||
else:
|
else:
|
||||||
mn = a.mean()
|
mn = a.mean()
|
||||||
return a - mn, mn
|
return a - mn, mn
|
||||||
# !!!fixme: use broadcasting
|
# !!!fixme: use broadcasting
|
||||||
if axis==-1:
|
if axis == -1:
|
||||||
mn = zeros((1,a.shape[1],))
|
mn = zeros((1,a.shape[1],))
|
||||||
#mn = tile(mn, (a.shape[0], 1))
|
#mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis==0:
|
elif axis == 0:
|
||||||
mn = a.mean(0)[newaxis]
|
mn = a.mean(0)[newaxis]
|
||||||
#mn = tile(mn, (a.shape[0], 1))
|
#mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis==1:
|
elif axis == 1:
|
||||||
mn = a.mean(1)[:,newaxis]
|
mn = a.mean(1)[:,newaxis]
|
||||||
#mn = tile(mn, (1, a.shape[1]))
|
#mn = tile(mn, (1, a.shape[1]))
|
||||||
elif axis==2:
|
elif axis == 2:
|
||||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||||
return a - mn , a.mean(0)[newaxis]
|
return a - mn , a.mean(0)[newaxis]
|
||||||
else:
|
else:
|
||||||
@ -702,11 +721,11 @@ def _scale(a, axis):
|
|||||||
Scaling vector/matrix
|
Scaling vector/matrix
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if axis==-1:
|
if axis == -1:
|
||||||
sc = zeros((a.shape[1],))
|
sc = zeros((a.shape[1],))
|
||||||
elif axis==0:
|
elif axis == 0:
|
||||||
sc = a.std(0)
|
sc = a.std(0)
|
||||||
elif axis==1:
|
elif axis == 1:
|
||||||
sc = a.std(1)[:,newaxis]
|
sc = a.std(1)[:,newaxis]
|
||||||
else:
|
else:
|
||||||
raise IOError("input error: axis must be in [-1,0,1]")
|
raise IOError("input error: axis must be in [-1,0,1]")
|
||||||
@ -714,19 +733,19 @@ def _scale(a, axis):
|
|||||||
return a - sc, sc
|
return a - sc, sc
|
||||||
|
|
||||||
def esvd(data, a_max=None):
|
def esvd(data, a_max=None):
|
||||||
""" SVD with kernel calculation
|
"""SVD with kernel calculation.
|
||||||
|
|
||||||
Calculate subspaces of X'X or XX' depending on the shape
|
Calculate subspaces of X'X or XX' depending on the shape
|
||||||
of the matrix.
|
of the matrix.
|
||||||
|
|
||||||
Parameters:
|
*Parameters*:
|
||||||
|
|
||||||
data : {array}
|
data : {array}
|
||||||
Data matrix
|
Data matrix
|
||||||
a_max : {integer}
|
a_max : {integer}
|
||||||
Number of components to extract
|
Number of components to extract
|
||||||
|
|
||||||
Returns:
|
*Returns*:
|
||||||
|
|
||||||
u : {array}
|
u : {array}
|
||||||
Right hand eigenvectors
|
Right hand eigenvectors
|
||||||
@ -735,20 +754,20 @@ def esvd(data, a_max=None):
|
|||||||
v : {array}
|
v : {array}
|
||||||
Left hand eigenvectors
|
Left hand eigenvectors
|
||||||
|
|
||||||
notes:
|
*Notes*:
|
||||||
|
|
||||||
Uses Anoldi iterations (ARPACK)
|
Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = data.shape
|
m, n = data.shape
|
||||||
if m>=n:
|
if m >= n:
|
||||||
kernel = dot(data.T, data)
|
kernel = dot(data.T, data)
|
||||||
|
|
||||||
if a_max==None:
|
if a_max == None:
|
||||||
a_max = n - 1
|
a_max = n - 1
|
||||||
s, v = arpack.eigen_symmetric(kernel,k=a_max, which='LM',
|
s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
||||||
maxiter=200,tol=1e-5)
|
maxiter=200, tol=1e-5)
|
||||||
s = s[::-1]
|
s = s[::-1]
|
||||||
v = v[:,::-1]
|
v = v[:,::-1]
|
||||||
#u, s, vt = svd(kernel)
|
#u, s, vt = svd(kernel)
|
||||||
@ -757,10 +776,10 @@ def esvd(data, a_max=None):
|
|||||||
u = dot(data, v)/s
|
u = dot(data, v)/s
|
||||||
else:
|
else:
|
||||||
kernel = dot(data, data.T)
|
kernel = dot(data, data.T)
|
||||||
if a_max==None:
|
if a_max == None:
|
||||||
a_max = m -1
|
a_max = m -1
|
||||||
s, u = arpack.eigen_symmetric(kernel,k=a_max, which='LM',
|
s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
||||||
maxiter=200,tol=1e-5)
|
maxiter=200, tol=1e-5)
|
||||||
s = s[::-1]
|
s = s[::-1]
|
||||||
u = u[:,::-1]
|
u = u[:,::-1]
|
||||||
#u, s, vt = svd(kernel)
|
#u, s, vt = svd(kernel)
|
||||||
|
Reference in New Issue
Block a user