Mostly clean ups
This commit is contained in:
parent
2951ca4088
commit
4c809674bb
@ -12,7 +12,7 @@ from numpy.random import shuffle
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
|
||||
"""Performs crossvalidation for generalisation error in lpls.
|
||||
|
||||
The L-PLS crossvalidation is estimated just like an ordinary pls
|
||||
@ -80,11 +80,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
if mean_ctr[0] != 1:
|
||||
xi = X[val,:] - dat['mnx']
|
||||
else:
|
||||
xi = X[val] - X[val].mean(1)[:,newaxis]
|
||||
xi = X[val] - X[cal].mean(1)[:,newaxis]
|
||||
if mean_ctr[2] != 1:
|
||||
ym = dat['mny']
|
||||
else:
|
||||
ym = Y[val].mean(1)[:,newaxis] #???: check this
|
||||
ym = Y[cal].mean(1)[:,newaxis]
|
||||
# predictions
|
||||
for a in range(a_max):
|
||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||
@ -113,7 +113,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
def pca_jk(a, aopt, n_blocks=None):
|
||||
"""Returns jack-knife segements from PCA.
|
||||
|
||||
Parameters:
|
||||
*Parameters*:
|
||||
|
||||
a : {array}
|
||||
data matrix (n x m)
|
||||
@ -122,21 +122,15 @@ def pca_jk(a, aopt, n_blocks=None):
|
||||
nsets : {integer}
|
||||
number of segments
|
||||
|
||||
Returns:
|
||||
*Returns*:
|
||||
|
||||
Pcv : {array}
|
||||
Loadings collected in a three way matrix (n_segments, m, aopt)
|
||||
|
||||
Notes:
|
||||
|
||||
- The loadings are scaled with the (1/samples)*eigenvalues.
|
||||
*Notes*:
|
||||
|
||||
- Crossvalidation method is currently set to random blocks of samples.
|
||||
|
||||
- todo: add support for T
|
||||
|
||||
- fixme: more efficient to add this in validation loop?
|
||||
|
||||
"""
|
||||
if nsets == None:
|
||||
nsets = a.shape[0]
|
||||
@ -305,6 +299,7 @@ def cv(N, K, randomise=True, sequential=False):
|
||||
of length ~N/K, *without* replacement.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
N : {integer}
|
||||
Total number of samples
|
||||
K : {integer}
|
||||
|
@ -12,13 +12,14 @@ minimum
|
||||
from numpy.linalg import inv,svd
|
||||
from scipy.sandbox import arpack
|
||||
|
||||
|
||||
def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
""" Principal Component Analysis.
|
||||
|
||||
PCA is a low rank bilinear aprroximation to a data matrix that sequentially
|
||||
extracts orthogonal components of maximum variance.
|
||||
|
||||
Parameters:
|
||||
*Parameters*:
|
||||
|
||||
X : {array}
|
||||
Data measurement matrix, (samples x variables)
|
||||
@ -27,7 +28,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
center_axis : {integer}
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
Returns:
|
||||
*Returns*:
|
||||
|
||||
T : {array}
|
||||
Scores, (samples, components)
|
||||
@ -47,7 +48,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
leverage : {array}
|
||||
Leverages, (samples,)
|
||||
|
||||
OtherParameters:
|
||||
*OtherParameters*:
|
||||
|
||||
scale : {string}, optional
|
||||
Where to put the weights [['scores'], 'loadings']
|
||||
@ -55,7 +56,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
Amount of info retained, [['normal'], 'fast', 'detailed']
|
||||
|
||||
|
||||
:SeeAlso:
|
||||
*SeeAlso*:
|
||||
|
||||
`center` : Data centering
|
||||
|
||||
@ -78,9 +79,11 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
"""
|
||||
|
||||
m, n = X.shape
|
||||
assert(aopt<=min(m,n))
|
||||
min_aopt = min(m, n)
|
||||
if center_axis >= 0:
|
||||
X = X - expand_dims(X.mean(center_axis), center_axis)
|
||||
min_aopt = min_aopt - 1
|
||||
assert(aopt <= min_aopt)
|
||||
if m > (n+100) or n > (m+100):
|
||||
u, s, v = esvd(X, aopt)
|
||||
else:
|
||||
@ -139,7 +142,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
|
||||
Performs PCR on given matrix and returns results in a dictionary.
|
||||
|
||||
Parameters:
|
||||
*Parameters*:
|
||||
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
@ -148,18 +151,18 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
Returns:
|
||||
*Returns*:
|
||||
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
OtherParameters:
|
||||
*OtherParameters*:
|
||||
|
||||
mode : str
|
||||
mode : {string}
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
center_axis : {integer}
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
SeeAlso:
|
||||
@ -284,7 +287,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
|
||||
*SeeAlso*:
|
||||
|
||||
`center` : data centering
|
||||
`center` - data centering
|
||||
|
||||
*Notes*
|
||||
|
||||
@ -311,13 +314,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
Y = atleast_2d(Y).T
|
||||
k, l = Y.shape
|
||||
assert(m == k)
|
||||
assert(aopt<min(m, n))
|
||||
mnx, mny = 0, 0
|
||||
min_aopt = min(m, n)
|
||||
if center_axis >= 0:
|
||||
mnx = expand_dims(X.mean(center_axis), center_axis)
|
||||
X = X - mnx
|
||||
min_aopt = min_aopt - 1
|
||||
mny = expand_dims(Y.mean(center_axis), center_axis)
|
||||
Y = Y - mny
|
||||
assert(aopt > 0 and aopt < min_aopt)
|
||||
|
||||
W = empty((n, aopt))
|
||||
P = empty((n, aopt))
|
||||
@ -356,7 +361,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
T[:,i] = t.ravel()
|
||||
W[:,i] = w.ravel()
|
||||
|
||||
if mode=='fast' and i==aopt-1:
|
||||
if mode == 'fast' and i == (aopt - 1):
|
||||
if scale == 'loads':
|
||||
tnorm = sqrt(tt)
|
||||
T = T/tnorm
|
||||
@ -495,7 +500,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
u, o = Z.shape
|
||||
max_rank = min(m, n)
|
||||
max_rank = min(m, n) + 1
|
||||
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
||||
tried: %d, max_rank: %d" %(a_max, max_rank)
|
||||
|
||||
@ -617,6 +622,20 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
||||
|
||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
||||
|
||||
def lpls_predict(model_dict, x, aopt):
|
||||
"""Predict lpls reponses from existing model on new data.
|
||||
"""
|
||||
try:
|
||||
m, n = x.shape
|
||||
except:
|
||||
x = atleast_2d(x.shape)
|
||||
m, n = x.shape
|
||||
|
||||
if 'B0' in model_dict.keys():
|
||||
y = model_dict['B0'] + dot()
|
||||
|
||||
|
||||
|
||||
def vnorm(a):
|
||||
"""Returns the norm of a vector.
|
||||
|
||||
@ -714,19 +733,19 @@ def _scale(a, axis):
|
||||
return a - sc, sc
|
||||
|
||||
def esvd(data, a_max=None):
|
||||
""" SVD with kernel calculation
|
||||
"""SVD with kernel calculation.
|
||||
|
||||
Calculate subspaces of X'X or XX' depending on the shape
|
||||
of the matrix.
|
||||
|
||||
Parameters:
|
||||
*Parameters*:
|
||||
|
||||
data : {array}
|
||||
Data matrix
|
||||
a_max : {integer}
|
||||
Number of components to extract
|
||||
|
||||
Returns:
|
||||
*Returns*:
|
||||
|
||||
u : {array}
|
||||
Right hand eigenvectors
|
||||
@ -735,9 +754,9 @@ def esvd(data, a_max=None):
|
||||
v : {array}
|
||||
Left hand eigenvectors
|
||||
|
||||
notes:
|
||||
*Notes*:
|
||||
|
||||
Uses Anoldi iterations (ARPACK)
|
||||
Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
|
||||
|
||||
"""
|
||||
|
||||
|
Reference in New Issue
Block a user