Mostly clean ups

This commit is contained in:
Arnar Flatberg 2007-11-27 15:05:19 +00:00
parent 2951ca4088
commit 4c809674bb
2 changed files with 98 additions and 84 deletions

View File

@ -12,7 +12,7 @@ from numpy.random import shuffle
from engines import nipals_lpls as lpls from engines import nipals_lpls as lpls
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True): def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
"""Performs crossvalidation for generalisation error in lpls. """Performs crossvalidation for generalisation error in lpls.
The L-PLS crossvalidation is estimated just like an ordinary pls The L-PLS crossvalidation is estimated just like an ordinary pls
@ -80,11 +80,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
if mean_ctr[0] != 1: if mean_ctr[0] != 1:
xi = X[val,:] - dat['mnx'] xi = X[val,:] - dat['mnx']
else: else:
xi = X[val] - X[val].mean(1)[:,newaxis] xi = X[val] - X[cal].mean(1)[:,newaxis]
if mean_ctr[2] != 1: if mean_ctr[2] != 1:
ym = dat['mny'] ym = dat['mny']
else: else:
ym = Y[val].mean(1)[:,newaxis] #???: check this ym = Y[cal].mean(1)[:,newaxis]
# predictions # predictions
for a in range(a_max): for a in range(a_max):
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a])) Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
@ -113,7 +113,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
def pca_jk(a, aopt, n_blocks=None): def pca_jk(a, aopt, n_blocks=None):
"""Returns jack-knife segements from PCA. """Returns jack-knife segements from PCA.
Parameters: *Parameters*:
a : {array} a : {array}
data matrix (n x m) data matrix (n x m)
@ -122,21 +122,15 @@ def pca_jk(a, aopt, n_blocks=None):
nsets : {integer} nsets : {integer}
number of segments number of segments
Returns: *Returns*:
Pcv : {array} Pcv : {array}
Loadings collected in a three way matrix (n_segments, m, aopt) Loadings collected in a three way matrix (n_segments, m, aopt)
Notes: *Notes*:
- The loadings are scaled with the (1/samples)*eigenvalues.
- Crossvalidation method is currently set to random blocks of samples. - Crossvalidation method is currently set to random blocks of samples.
- todo: add support for T
- fixme: more efficient to add this in validation loop?
""" """
if nsets == None: if nsets == None:
nsets = a.shape[0] nsets = a.shape[0]
@ -305,6 +299,7 @@ def cv(N, K, randomise=True, sequential=False):
of length ~N/K, *without* replacement. of length ~N/K, *without* replacement.
*Parameters*: *Parameters*:
N : {integer} N : {integer}
Total number of samples Total number of samples
K : {integer} K : {integer}

View File

@ -12,13 +12,14 @@ minimum
from numpy.linalg import inv,svd from numpy.linalg import inv,svd
from scipy.sandbox import arpack from scipy.sandbox import arpack
def pca(X, aopt, scale='scores', mode='normal', center_axis=0): def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
""" Principal Component Analysis. """ Principal Component Analysis.
PCA is a low rank bilinear aprroximation to a data matrix that sequentially PCA is a low rank bilinear aprroximation to a data matrix that sequentially
extracts orthogonal components of maximum variance. extracts orthogonal components of maximum variance.
Parameters: *Parameters*:
X : {array} X : {array}
Data measurement matrix, (samples x variables) Data measurement matrix, (samples x variables)
@ -27,7 +28,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
center_axis : {integer} center_axis : {integer}
Center along given axis. If neg.: no centering (-inf,..., matrix modes) Center along given axis. If neg.: no centering (-inf,..., matrix modes)
Returns: *Returns*:
T : {array} T : {array}
Scores, (samples, components) Scores, (samples, components)
@ -47,7 +48,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
leverage : {array} leverage : {array}
Leverages, (samples,) Leverages, (samples,)
OtherParameters: *OtherParameters*:
scale : {string}, optional scale : {string}, optional
Where to put the weights [['scores'], 'loadings'] Where to put the weights [['scores'], 'loadings']
@ -55,7 +56,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
Amount of info retained, [['normal'], 'fast', 'detailed'] Amount of info retained, [['normal'], 'fast', 'detailed']
:SeeAlso: *SeeAlso*:
`center` : Data centering `center` : Data centering
@ -78,9 +79,11 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
""" """
m, n = X.shape m, n = X.shape
assert(aopt<=min(m,n)) min_aopt = min(m, n)
if center_axis >= 0: if center_axis >= 0:
X = X - expand_dims(X.mean(center_axis), center_axis) X = X - expand_dims(X.mean(center_axis), center_axis)
min_aopt = min_aopt - 1
assert(aopt <= min_aopt)
if m > (n+100) or n > (m+100): if m > (n+100) or n > (m+100):
u, s, v = esvd(X, aopt) u, s, v = esvd(X, aopt)
else: else:
@ -139,7 +142,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
Performs PCR on given matrix and returns results in a dictionary. Performs PCR on given matrix and returns results in a dictionary.
Parameters: *Parameters*:
a : array a : array
Data measurement matrix, (samples x variables) Data measurement matrix, (samples x variables)
@ -148,18 +151,18 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
aopt : int aopt : int
Number of components to use, aopt<=min(samples, variables) Number of components to use, aopt<=min(samples, variables)
Returns: *Returns*:
results : dict results : dict
keys -- values, T -- scores, P -- loadings, E -- residuals, keys -- values, T -- scores, P -- loadings, E -- residuals,
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
explained variance, aopt -- number of components used explained variance, aopt -- number of components used
OtherParameters: *OtherParameters*:
mode : str mode : {string}
Amount of info retained, ('fast', 'normal', 'detailed') Amount of info retained, ('fast', 'normal', 'detailed')
center_axis : int center_axis : {integer}
Center along given axis. If neg.: no centering (-inf,..., matrix modes) Center along given axis. If neg.: no centering (-inf,..., matrix modes)
SeeAlso: SeeAlso:
@ -284,7 +287,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
*SeeAlso*: *SeeAlso*:
`center` : data centering `center` - data centering
*Notes* *Notes*
@ -311,13 +314,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
Y = atleast_2d(Y).T Y = atleast_2d(Y).T
k, l = Y.shape k, l = Y.shape
assert(m == k) assert(m == k)
assert(aopt<min(m, n))
mnx, mny = 0, 0 mnx, mny = 0, 0
min_aopt = min(m, n)
if center_axis >= 0: if center_axis >= 0:
mnx = expand_dims(X.mean(center_axis), center_axis) mnx = expand_dims(X.mean(center_axis), center_axis)
X = X - mnx X = X - mnx
min_aopt = min_aopt - 1
mny = expand_dims(Y.mean(center_axis), center_axis) mny = expand_dims(Y.mean(center_axis), center_axis)
Y = Y - mny Y = Y - mny
assert(aopt > 0 and aopt < min_aopt)
W = empty((n, aopt)) W = empty((n, aopt))
P = empty((n, aopt)) P = empty((n, aopt))
@ -356,7 +361,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
T[:,i] = t.ravel() T[:,i] = t.ravel()
W[:,i] = w.ravel() W[:,i] = w.ravel()
if mode=='fast' and i==aopt-1: if mode == 'fast' and i == (aopt - 1):
if scale == 'loads': if scale == 'loads':
tnorm = sqrt(tt) tnorm = sqrt(tt)
T = T/tnorm T = T/tnorm
@ -495,7 +500,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
m, n = X.shape m, n = X.shape
k, l = Y.shape k, l = Y.shape
u, o = Z.shape u, o = Z.shape
max_rank = min(m, n) max_rank = min(m, n) + 1
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\ assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
tried: %d, max_rank: %d" %(a_max, max_rank) tried: %d, max_rank: %d" %(a_max, max_rank)
@ -617,6 +622,20 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ} return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
def lpls_predict(model_dict, x, aopt):
"""Predict lpls reponses from existing model on new data.
"""
try:
m, n = x.shape
except:
x = atleast_2d(x.shape)
m, n = x.shape
if 'B0' in model_dict.keys():
y = model_dict['B0'] + dot()
def vnorm(a): def vnorm(a):
"""Returns the norm of a vector. """Returns the norm of a vector.
@ -714,19 +733,19 @@ def _scale(a, axis):
return a - sc, sc return a - sc, sc
def esvd(data, a_max=None): def esvd(data, a_max=None):
""" SVD with kernel calculation """SVD with kernel calculation.
Calculate subspaces of X'X or XX' depending on the shape Calculate subspaces of X'X or XX' depending on the shape
of the matrix. of the matrix.
Parameters: *Parameters*:
data : {array} data : {array}
Data matrix Data matrix
a_max : {integer} a_max : {integer}
Number of components to extract Number of components to extract
Returns: *Returns*:
u : {array} u : {array}
Right hand eigenvectors Right hand eigenvectors
@ -735,9 +754,9 @@ def esvd(data, a_max=None):
v : {array} v : {array}
Left hand eigenvectors Left hand eigenvectors
notes: *Notes*:
Uses Anoldi iterations (ARPACK) Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
""" """