Fixed conflicts
This commit is contained in:
parent
1103245d85
commit
253305b602
@ -13,4 +13,3 @@ def test(level=1, verbosity=1):
|
|||||||
print 'Python version %s' % (sys.version.replace('\n', '',),)
|
print 'Python version %s' % (sys.version.replace('\n', '',),)
|
||||||
from numpy.testing import NumpyTest
|
from numpy.testing import NumpyTest
|
||||||
return NumpyTest().test(level, verbosity)
|
return NumpyTest().test(level, verbosity)
|
||||||
|
|
||||||
|
@ -155,7 +155,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
validation scheme.
|
validation scheme.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -180,9 +180,9 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
If true, Require orthogonal latent components in Z.
|
If true, Require orthogonal latent components in Z.
|
||||||
verbose : {boolean}, optional
|
verbose : {boolean}, optional
|
||||||
Verbosity of console output. For use in debugging.
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
rmsep : {array}
|
rmsep : {array}
|
||||||
Root mean squred error of prediction
|
Root mean squred error of prediction
|
||||||
yhat : {array}
|
yhat : {array}
|
||||||
@ -191,19 +191,19 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
Estimated value of optimal number of components
|
Estimated value of optimal number of components
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
o, p = Z.shape
|
o, p = Z.shape
|
||||||
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
|
assert m == k, "X (%d,%d) - Y (%d,%d) dim mismatch" %(m, n, k, l)
|
||||||
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
|
assert n == p, "X (%d,%d) - Z (%d,%d) dim mismatch" %(m, n, o, p)
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
nsets = m
|
nsets = m
|
||||||
if nsets > X.shape[0]:
|
if nsets > X.shape[0]:
|
||||||
print "nsets (%d) is larger than number of variables (%d).\nnsets: %d -> %d" %(nsets, m, nsets, m)
|
print "nsets (%d) is larger than number of variables (%d).\nnsets: %d -> %d" %(nsets, m, nsets, m)
|
||||||
nsets = m
|
nsets = m
|
||||||
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
|
assert (alpha >= 0 and alpha<=1), "Alpha needs to be within [0,1], got: %.2f" %alpha
|
||||||
|
|
||||||
Yhat = empty((a_max, k, l), 'd')
|
Yhat = empty((a_max, k, l), 'd')
|
||||||
for cal, val in cv(k, nsets):
|
for cal, val in cv(k, nsets):
|
||||||
# do the training model
|
# do the training model
|
||||||
@ -217,10 +217,16 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
# predictions
|
# predictions
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||||
|
# todo: need a better support for classification error
|
||||||
|
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
||||||
|
if y_is_class:
|
||||||
|
pass
|
||||||
|
#Yhat, err = class_error(Yhat, Y)
|
||||||
|
#return Yhat, err
|
||||||
sep = (Y - Yhat)**2
|
sep = (Y - Yhat)**2
|
||||||
rmsep = sqrt(sep.mean(1)).T
|
rmsep = sqrt(sep.mean(1)).T
|
||||||
#aopt = find_aopt_from_sep(rmsep)
|
#aopt = find_aopt_from_sep(rmsep)
|
||||||
|
|
||||||
# todo: need a better support for classification error
|
# todo: need a better support for classification error
|
||||||
error = prediction_error(Yhat, Y, method='1/2')
|
error = prediction_error(Yhat, Y, method='1/2')
|
||||||
|
|
||||||
@ -228,7 +234,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
|
|||||||
|
|
||||||
def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
||||||
"""Returns jack-knife segments from PCA.
|
"""Returns jack-knife segments from PCA.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
a : {array}
|
a : {array}
|
||||||
@ -252,9 +258,9 @@ def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
|||||||
Loadings collected in a three way matrix (n_segments, m, aopt)
|
Loadings collected in a three way matrix (n_segments, m, aopt)
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
- Crossvalidation method is currently set to random blocks of samples.
|
- Crossvalidation method is currently set to random blocks of samples.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m, n = a.shape
|
m, n = a.shape
|
||||||
if nsets == None:
|
if nsets == None:
|
||||||
@ -278,14 +284,14 @@ def pca_jk(a, aopt, nsets=None, center_axis=[0], method='cv'):
|
|||||||
Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
|
Pcv[i,:,:] = pca(a[cal,:], aopt, mode='fast', scale='loads', center_axis = center_axis)['P']
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(method)
|
raise NotImplementedError(method)
|
||||||
|
|
||||||
return Pcv
|
return Pcv
|
||||||
|
|
||||||
def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
||||||
""" Returns jack-knife segements of W.
|
""" Returns jack-knife segements of W.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -294,7 +300,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
|||||||
The number of components to calculate (0, min(m,n))
|
The number of components to calculate (0, min(m,n))
|
||||||
nsets : (integer), optional
|
nsets : (integer), optional
|
||||||
Number of jack-knife segments
|
Number of jack-knife segments
|
||||||
|
|
||||||
center_axis : {boolean}, optional
|
center_axis : {boolean}, optional
|
||||||
- -1 : nothing
|
- -1 : nothing
|
||||||
- 0 : row center
|
- 0 : row center
|
||||||
@ -302,12 +308,12 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
|||||||
- 2 : double center
|
- 2 : double center
|
||||||
verbose : {boolean}, optional
|
verbose : {boolean}, optional
|
||||||
Verbosity of console output. For use in debugging.
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
Wcv : {array}
|
Wcv : {array}
|
||||||
Loading-weights jack-knife segements
|
Loading-weights jack-knife segements
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
@ -320,7 +326,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center_axis=[0,0], verbose=False):
|
|||||||
print "Segment number: %d" %i
|
print "Segment number: %d" %i
|
||||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
|
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=center_axis)
|
||||||
Wcv[i,:,:] = dat['W']
|
Wcv[i,:,:] = dat['W']
|
||||||
|
|
||||||
return Wcv
|
return Wcv
|
||||||
|
|
||||||
def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
||||||
@ -332,10 +338,10 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
|
|||||||
infer the paramter confidence in th model.
|
infer the paramter confidence in th model.
|
||||||
|
|
||||||
The segements returned are the X-block weights and Z-block weights.
|
The segements returned are the X-block weights and Z-block weights.
|
||||||
|
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -358,15 +364,15 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
|
|||||||
2 : double center
|
2 : double center
|
||||||
verbose : {boolean}, optional
|
verbose : {boolean}, optional
|
||||||
Verbosity of console output. For use in debugging.
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
Wx : {array}
|
Wx : {array}
|
||||||
X-block jack-knife segements
|
X-block jack-knife segements
|
||||||
Wz : {array}
|
Wz : {array}
|
||||||
Z-block jack-knife segements
|
Z-block jack-knife segements
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
o, p = Z.shape
|
o, p = Z.shape
|
||||||
@ -388,7 +394,7 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=
|
|||||||
|
|
||||||
def find_aopt_from_sep(err, method='vanilla'):
|
def find_aopt_from_sep(err, method='vanilla'):
|
||||||
"""Returns an estimate of optimal number of components.
|
"""Returns an estimate of optimal number of components.
|
||||||
|
|
||||||
The estimate is based on the error of prediction from
|
The estimate is based on the error of prediction from
|
||||||
crossvalidation. This is pretty much wild guessing and it is
|
crossvalidation. This is pretty much wild guessing and it is
|
||||||
recomended to inspect model parameters and prediction errors
|
recomended to inspect model parameters and prediction errors
|
||||||
@ -406,7 +412,7 @@ def find_aopt_from_sep(err, method='vanilla'):
|
|||||||
aopt : {integer}
|
aopt : {integer}
|
||||||
A guess on the optimal number of components
|
A guess on the optimal number of components
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if method == 'vanilla':
|
if method == 'vanilla':
|
||||||
# min rmsep
|
# min rmsep
|
||||||
rmsecv = sqrt(err.mean(0))
|
rmsecv = sqrt(err.mean(0))
|
||||||
@ -434,7 +440,7 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
of length ~N/K, *without* replacement.
|
of length ~N/K, *without* replacement.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
N : {integer}
|
N : {integer}
|
||||||
Total number of samples
|
Total number of samples
|
||||||
K : {integer}
|
K : {integer}
|
||||||
@ -443,7 +449,7 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
Use random sampling
|
Use random sampling
|
||||||
sequential : {boolean}
|
sequential : {boolean}
|
||||||
Use sequential sampling
|
Use sequential sampling
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
training : {array-like}
|
training : {array-like}
|
||||||
@ -456,12 +462,12 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
If randomise is true, a copy of index is shuffled before partitioning,
|
If randomise is true, a copy of index is shuffled before partitioning,
|
||||||
|
|
||||||
otherwise its order is preserved in training and validation.
|
otherwise its order is preserved in training and validation.
|
||||||
|
|
||||||
Randomise overrides the sequential argument. If randomise is true,
|
Randomise overrides the sequential argument. If randomise is true,
|
||||||
sequential is False
|
sequential is False
|
||||||
If sequential is true the index is partioned in continous blocks,
|
If sequential is true the index is partioned in continous blocks,
|
||||||
otherwise interleaved ordering is used.
|
otherwise interleaved ordering is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if K > N:
|
if K > N:
|
||||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N, N, K)
|
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N, N, K)
|
||||||
@ -510,6 +516,8 @@ def diag_cv(shape, nsets=9, randomise=True):
|
|||||||
except:
|
except:
|
||||||
raise ValueError("shape needs to be a two-tuple")
|
raise ValueError("shape needs to be a two-tuple")
|
||||||
if nsets>m or nsets>n:
|
if nsets>m or nsets>n:
|
||||||
|
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||||
|
raise ValueError, msg
|
||||||
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
msg = "You may not use more subsets than max(n_rows, n_cols)"
|
||||||
nsets = min(m, n)
|
nsets = min(m, n)
|
||||||
nm = n*m
|
nm = n*m
|
||||||
@ -524,7 +532,20 @@ def diag_cv(shape, nsets=9, randomise=True):
|
|||||||
validation.update(ind)
|
validation.update(ind)
|
||||||
#training = [j for j in index if j not in validation]
|
#training = [j for j in index if j not in validation]
|
||||||
yield list(validation)
|
yield list(validation)
|
||||||
|
|
||||||
|
def class_error(y_hat, y, method='vanilla'):
|
||||||
|
""" Not used.
|
||||||
|
"""
|
||||||
|
a_opt, k, l = y_hat.shape
|
||||||
|
y_hat_c = zeros((k, l), dtype='d')
|
||||||
|
if method == vanilla:
|
||||||
|
pass
|
||||||
|
for a in range(a_opt):
|
||||||
|
for i in range(k):
|
||||||
|
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
|
||||||
|
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
|
||||||
|
|
||||||
|
return y_hat_c, err
|
||||||
|
|
||||||
def prediction_error(y_hat, y, method='squared'):
|
def prediction_error(y_hat, y, method='squared'):
|
||||||
"""Loss function on multiclass Y.
|
"""Loss function on multiclass Y.
|
||||||
@ -651,7 +672,7 @@ def _wkernel_pls_val(X, Y, a_max, n_blocks=None):
|
|||||||
for Din, Doi, Yin, Yout in V:
|
for Din, Doi, Yin, Yout in V:
|
||||||
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
|
||||||
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
|
||||||
|
|
||||||
dat = w_simpls(Din, Yin, a_max)
|
dat = w_simpls(Din, Yin, a_max)
|
||||||
Q, U, H = dat['Q'], dat['U'], dat['H']
|
Q, U, H = dat['Q'], dat['U'], dat['H']
|
||||||
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
|
||||||
|
149
pyblm/engines.py
149
pyblm/engines.py
@ -20,7 +20,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
extracts orthogonal components of maximum variance.
|
extracts orthogonal components of maximum variance.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Data measurement matrix, (samples x variables)
|
Data measurement matrix, (samples x variables)
|
||||||
aopt : {integer}
|
aopt : {integer}
|
||||||
@ -55,21 +55,21 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
mode : {string}, optional
|
mode : {string}, optional
|
||||||
Amount of info retained, [['normal'], 'fast', 'detailed']
|
Amount of info retained, [['normal'], 'fast', 'detailed']
|
||||||
|
|
||||||
|
|
||||||
*SeeAlso*:
|
*SeeAlso*:
|
||||||
|
|
||||||
`center` : Data centering
|
`center` : Data centering
|
||||||
|
|
||||||
|
|
||||||
*Notes*
|
*Notes*
|
||||||
|
|
||||||
Uses kernel speed-up if m>>n or m<<n.
|
Uses kernel speed-up if m>>n or m<<n.
|
||||||
|
|
||||||
If residuals turn rank deficient, a lower number of component than given
|
If residuals turn rank deficient, a lower number of component than given
|
||||||
in input will be used.
|
in input will be used.
|
||||||
|
|
||||||
*Examples*:
|
*Examples*:
|
||||||
|
|
||||||
>>> import scipy,engines
|
>>> import scipy,engines
|
||||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||||
>>> dat=engines.pca(a, 2)
|
>>> dat=engines.pca(a, 2)
|
||||||
@ -77,7 +77,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
array([0.,99.8561562, 100.])
|
array([0.,99.8561562, 100.])
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
max_aopt = min(m, n)
|
max_aopt = min(m, n)
|
||||||
if center_axis != None:
|
if center_axis != None:
|
||||||
@ -94,7 +94,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
u = u[:,:aopt]
|
u = u[:,:aopt]
|
||||||
s = s[:aopt]
|
s = s[:aopt]
|
||||||
v = v[:,:aopt]
|
v = v[:,:aopt]
|
||||||
|
|
||||||
# ranktest
|
# ranktest
|
||||||
tol = 1e-10
|
tol = 1e-10
|
||||||
eff_rank = sum(s > s[0]*tol)
|
eff_rank = sum(s > s[0]*tol)
|
||||||
@ -104,14 +104,14 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
T = T[:,:aopt]
|
T = T[:,:aopt]
|
||||||
P = v[:,:aopt]
|
P = v[:,:aopt]
|
||||||
e = s**2
|
e = s**2
|
||||||
|
|
||||||
if scale=='loads':
|
if scale=='loads':
|
||||||
T = T/s
|
T = T/s
|
||||||
P = P*s
|
P = P*s
|
||||||
|
|
||||||
if mode in ['fast', 'f', 'F']:
|
if mode in ['fast', 'f', 'F']:
|
||||||
return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
|
return {'T':T, 'P':P, 'aopt':aopt, 'mnx': mnx}
|
||||||
|
|
||||||
if mode in ['detailed', 'd', 'D']:
|
if mode in ['detailed', 'd', 'D']:
|
||||||
E = empty((aopt, m, n))
|
E = empty((aopt, m, n))
|
||||||
ssq = []
|
ssq = []
|
||||||
@ -135,7 +135,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
|||||||
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
||||||
# variances
|
# variances
|
||||||
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
|
expvarx = r_[0, 100*e.cumsum()/(X*X).sum()]
|
||||||
|
|
||||||
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
||||||
'aopt': aopt, 'eigvals': e, 'mnx': mnx}
|
'aopt': aopt, 'eigvals': e, 'mnx': mnx}
|
||||||
|
|
||||||
@ -159,20 +159,20 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
|||||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||||
explained variance, aopt -- number of components used
|
explained variance, aopt -- number of components used
|
||||||
|
|
||||||
*OtherParameters*:
|
*OtherParameters*:
|
||||||
|
|
||||||
mode : {string}
|
mode : {string}
|
||||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||||
center_axis : {integer}
|
center_axis : {integer}
|
||||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||||
|
|
||||||
SeeAlso:
|
SeeAlso:
|
||||||
|
|
||||||
- pca : other blm
|
- pca : other blm
|
||||||
- pls : other blm
|
- pls : other blm
|
||||||
- lpls : other blm
|
- lpls : other blm
|
||||||
|
|
||||||
*Notes*
|
*Notes*
|
||||||
|
|
||||||
-----
|
-----
|
||||||
@ -180,12 +180,12 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
|||||||
Uses kernel speed-up if m>>n or m<<n.
|
Uses kernel speed-up if m>>n or m<<n.
|
||||||
|
|
||||||
If residuals turn rank deficient, a lower number of component than given
|
If residuals turn rank deficient, a lower number of component than given
|
||||||
in input will be used. The number of components used is given in results-dict.
|
in input will be used. The number of components used is given in results-dict.
|
||||||
|
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
>>> import scipy,engines
|
>>> import scipy,engines
|
||||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||||
>>> b=scipy.asarray([[1,1],[2,3]])
|
>>> b=scipy.asarray([[1,1],[2,3]])
|
||||||
@ -222,9 +222,9 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
|||||||
F = b - dot(T, Q.T)
|
F = b - dot(T, Q.T)
|
||||||
sepy = F**2
|
sepy = F**2
|
||||||
ssqy = [sepy.sum(0), sepy.sum(1)]
|
ssqy = [sepy.sum(0), sepy.sum(1)]
|
||||||
|
|
||||||
expvary = r_[0, 100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
|
expvary = r_[0, 100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
|
||||||
|
|
||||||
dat.update({'Q': Q, 'F': F, 'evy': expvary, 'ssqy': ssqy, 'mny': mny})
|
dat.update({'Q': Q, 'F': F, 'evy': expvary, 'ssqy': ssqy, 'mny': mny})
|
||||||
return dat
|
return dat
|
||||||
|
|
||||||
@ -245,9 +245,9 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
Which component should get the scale
|
Which component should get the scale
|
||||||
center_axis : {-1, integer}
|
center_axis : {-1, integer}
|
||||||
Perform centering across given axis, (-1 is no centering)
|
Perform centering across given axis, (-1 is no centering)
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
T : {array}
|
T : {array}
|
||||||
X-scores
|
X-scores
|
||||||
W : {array}
|
W : {array}
|
||||||
@ -280,25 +280,25 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
Sum of squared residuals in Y along each dimesion
|
Sum of squared residuals in Y along each dimesion
|
||||||
leverage : {array}
|
leverage : {array}
|
||||||
Sample leverages
|
Sample leverages
|
||||||
|
|
||||||
*OtherParameters*:
|
*OtherParameters*:
|
||||||
|
|
||||||
mode : ['normal', 'fast', 'detailed'], optional
|
mode : ['normal', 'fast', 'detailed'], optional
|
||||||
How much details to compute
|
How much details to compute
|
||||||
|
|
||||||
*SeeAlso*:
|
*SeeAlso*:
|
||||||
|
|
||||||
`center` - data centering
|
`center` - data centering
|
||||||
|
|
||||||
*Notes*
|
*Notes*
|
||||||
|
|
||||||
- The output with mode='fast' will only return T and W
|
- The output with mode='fast' will only return T and W
|
||||||
|
|
||||||
- If residuals turn rank deficient, a lower number of component than given in input will be used. The number of components used is given in results.
|
- If residuals turn rank deficient, a lower number of component than given in input will be used. The number of components used is given in results.
|
||||||
|
|
||||||
|
|
||||||
*Examples*
|
*Examples*
|
||||||
|
|
||||||
>>> import numpy, engines
|
>>> import numpy, engines
|
||||||
>>> a = numpy.asarray([[1,2,3],[2,4,5]])
|
>>> a = numpy.asarray([[1,2,3],[2,4,5]])
|
||||||
>>> b = numpy.asarray([[1,1],[2,3]])
|
>>> b = numpy.asarray([[1,1],[2,3]])
|
||||||
@ -307,7 +307,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
array([0.,99.8561562, 100.])
|
array([0.,99.8561562, 100.])
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
try:
|
try:
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
@ -322,7 +322,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
Y, mny = center(Y, center_axis[1])
|
Y, mny = center(Y, center_axis[1])
|
||||||
min_aopt = min_aopt - 1
|
min_aopt = min_aopt - 1
|
||||||
assert(aopt > 0 and aopt < min_aopt)
|
assert(aopt > 0 and aopt < min_aopt)
|
||||||
|
|
||||||
W = empty((n, aopt))
|
W = empty((n, aopt))
|
||||||
P = empty((n, aopt))
|
P = empty((n, aopt))
|
||||||
R = empty((n, aopt))
|
R = empty((n, aopt))
|
||||||
@ -330,7 +330,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
T = empty((m, aopt))
|
T = empty((m, aopt))
|
||||||
B = empty((aopt, n, l))
|
B = empty((aopt, n, l))
|
||||||
tt = empty((aopt,))
|
tt = empty((aopt,))
|
||||||
|
|
||||||
XY = dot(X.T, Y)
|
XY = dot(X.T, Y)
|
||||||
for i in range(aopt):
|
for i in range(aopt):
|
||||||
if XY.shape[1] == 1: #pls 1
|
if XY.shape[1] == 1: #pls 1
|
||||||
@ -345,7 +345,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
# with many samples, many x-vars and many non-orth y-vars (where arpack speed
|
# with many samples, many x-vars and many non-orth y-vars (where arpack speed
|
||||||
# shines)
|
# shines)
|
||||||
#############
|
#############
|
||||||
|
|
||||||
#s, w = arpack.eigen_symmetric(dot(XY, XY.T),k=1, tol=1e-10, maxiter=1000)
|
#s, w = arpack.eigen_symmetric(dot(XY, XY.T),k=1, tol=1e-10, maxiter=1000)
|
||||||
#if s[0] == 0:
|
#if s[0] == 0:
|
||||||
# print "Arpack did not converge... using svd"
|
# print "Arpack did not converge... using svd"
|
||||||
@ -357,15 +357,15 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
# print "Arpack did not converge... using svd"
|
# print "Arpack did not converge... using svd"
|
||||||
q, s, vh = svd(dot(XY.T, XY))
|
q, s, vh = svd(dot(XY.T, XY))
|
||||||
q = q[:,:1]
|
q = q[:,:1]
|
||||||
|
|
||||||
w = dot(XY, q)
|
w = dot(XY, q)
|
||||||
w = w/vnorm(w)
|
w = w/vnorm(w)
|
||||||
|
|
||||||
r = w.copy()
|
r = w.copy()
|
||||||
if i > 0:
|
if i > 0:
|
||||||
for j in range(0, i, 1):
|
for j in range(0, i, 1):
|
||||||
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
|
||||||
|
|
||||||
t = dot(X, r)
|
t = dot(X, r)
|
||||||
tt[i] = tti = dot(t.T, t).ravel()
|
tt[i] = tti = dot(t.T, t).ravel()
|
||||||
p = dot(X.T, t)/tti
|
p = dot(X.T, t)/tti
|
||||||
@ -385,7 +385,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
R[:,i] = r.ravel()
|
R[:,i] = r.ravel()
|
||||||
Q[:,i] = q.ravel()
|
Q[:,i] = q.ravel()
|
||||||
B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
|
B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
|
||||||
|
|
||||||
qnorm = apply_along_axis(vnorm, 0, Q)
|
qnorm = apply_along_axis(vnorm, 0, Q)
|
||||||
tnorm = sqrt(tt)
|
tnorm = sqrt(tt)
|
||||||
pp = (P**2).sum(0)
|
pp = (P**2).sum(0)
|
||||||
@ -412,13 +412,13 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
|||||||
sepy = F**2
|
sepy = F**2
|
||||||
ssqy = [sepy.sum(0), sepy.sum(1)]
|
ssqy = [sepy.sum(0), sepy.sum(1)]
|
||||||
leverage = 1./m + ((T/tnorm)**2).sum(1)
|
leverage = 1./m + ((T/tnorm)**2).sum(1)
|
||||||
|
|
||||||
# variances
|
# variances
|
||||||
tp= tt*pp
|
tp= tt*pp
|
||||||
tq = tt*qnorm*qnorm
|
tq = tt*qnorm*qnorm
|
||||||
expvarx = r_[0, 100*tp/(X*X).sum()]
|
expvarx = r_[0, 100*tp/(X*X).sum()]
|
||||||
expvary = r_[0, 100*tq/(Y*Y).sum()]
|
expvary = r_[0, 100*tq/(Y*Y).sum()]
|
||||||
|
|
||||||
if scale == 'loads':
|
if scale == 'loads':
|
||||||
T = T/tnorm
|
T = T/tnorm
|
||||||
W = W*tnorm
|
W = W*tnorm
|
||||||
@ -438,7 +438,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
of these three matrices tries to discover common directions/subspaces.
|
of these three matrices tries to discover common directions/subspaces.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -457,9 +457,9 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
0 : row center
|
0 : row center
|
||||||
1 : column center
|
1 : column center
|
||||||
2 : double center
|
2 : double center
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
T : {array}
|
T : {array}
|
||||||
X-scores
|
X-scores
|
||||||
W : {array}
|
W : {array}
|
||||||
@ -504,18 +504,18 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
Saeboe et al., LPLS-regression: a method for improved prediction and
|
Saeboe et al., LPLS-regression: a method for improved prediction and
|
||||||
classification through inclusion of background information on
|
classification through inclusion of background information on
|
||||||
predictor variables, J. of chemometrics and intell. laboratory syst.
|
predictor variables, J. of chemometrics and intell. laboratory syst.
|
||||||
|
|
||||||
Martens et.al, Regression of a data matrix on descriptors of
|
Martens et.al, Regression of a data matrix on descriptors of
|
||||||
both its rows and of its columns via latent variables: L-PLSR,
|
both its rows and of its columns via latent variables: L-PLSR,
|
||||||
Computational statistics & data analysis, 2005
|
Computational statistics & data analysis, 2005
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, l = Y.shape
|
k, l = Y.shape
|
||||||
u, o = Z.shape
|
u, o = Z.shape
|
||||||
max_rank = min(m, n)
|
max_rank = min(m, n)
|
||||||
|
|
||||||
|
|
||||||
if center_axis != None:
|
if center_axis != None:
|
||||||
xctr, yctr, zctr = center_axis
|
xctr, yctr, zctr = center_axis
|
||||||
X, mnX = center(X, xctr)
|
X, mnX = center(X, xctr)
|
||||||
@ -523,14 +523,14 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
Z, mnZ = center(Z, zctr)
|
Z, mnZ = center(Z, zctr)
|
||||||
max_rank = max_rank -1
|
max_rank = max_rank -1
|
||||||
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
||||||
tried: %d, max_rank: %d" %(a_max, max_rank)
|
tried: %d, max_rank: %d" %(a_max, max_rank)
|
||||||
|
|
||||||
# initial variance
|
# initial variance
|
||||||
varX = (X**2).sum()
|
varX = (X**2).sum()
|
||||||
varY = (Y**2).sum()
|
varY = (Y**2).sum()
|
||||||
varZ = (Z**2).sum()
|
varZ = (Z**2).sum()
|
||||||
|
|
||||||
# initialize
|
# initialize
|
||||||
U = empty((k, a_max))
|
U = empty((k, a_max))
|
||||||
Q = empty((l, a_max))
|
Q = empty((l, a_max))
|
||||||
T = zeros((m, a_max))
|
T = zeros((m, a_max))
|
||||||
@ -600,7 +600,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
else:
|
else:
|
||||||
k = w
|
k = w
|
||||||
l = dot(G, w)
|
l = dot(G, w)
|
||||||
|
|
||||||
U[:,a] = u.ravel()
|
U[:,a] = u.ravel()
|
||||||
W[:,a] = w.ravel()
|
W[:,a] = w.ravel()
|
||||||
P[:,a] = p.ravel()
|
P[:,a] = p.ravel()
|
||||||
@ -617,11 +617,11 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
var_x[a] = pow(E, 2).sum()
|
var_x[a] = pow(E, 2).sum()
|
||||||
var_y[a] = pow(F, 2).sum()
|
var_y[a] = pow(F, 2).sum()
|
||||||
var_z[a] = pow(G, 2).sum()
|
var_z[a] = pow(G, 2).sum()
|
||||||
|
|
||||||
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
|
||||||
#b0[a] = mnY - dot(mnX, B[a])
|
#b0[a] = mnY - dot(mnX, B[a])
|
||||||
|
|
||||||
|
|
||||||
# variance explained
|
# variance explained
|
||||||
evx = 100.*(1 - var_x/varX)
|
evx = 100.*(1 - var_x/varX)
|
||||||
evy = 100.*(1 - var_y/varY)
|
evy = 100.*(1 - var_y/varY)
|
||||||
@ -635,8 +635,8 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores',
|
|||||||
knorm = apply_along_axis(vnorm, 0, K)
|
knorm = apply_along_axis(vnorm, 0, K)
|
||||||
L = L*knorm
|
L = L*knorm
|
||||||
K = K/knorm
|
K = K/knorm
|
||||||
|
|
||||||
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'E': E, 'F': F, 'G': G, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}
|
||||||
|
|
||||||
def lpls_predict(model_dict, x, aopt):
|
def lpls_predict(model_dict, x, aopt):
|
||||||
"""Predict lpls reponses from existing model on new data.
|
"""Predict lpls reponses from existing model on new data.
|
||||||
@ -646,25 +646,25 @@ def lpls_predict(model_dict, x, aopt):
|
|||||||
except:
|
except:
|
||||||
x = atleast_2d(x.shape)
|
x = atleast_2d(x.shape)
|
||||||
m, n = x.shape
|
m, n = x.shape
|
||||||
|
|
||||||
if 'B0' in model_dict.keys():
|
if 'B0' in model_dict.keys():
|
||||||
y = model_dict['B0'] + dot()
|
y = model_dict['B0'] + dot()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def vnorm(a):
|
def vnorm(a):
|
||||||
"""Returns the norm of a vector.
|
"""Returns the norm of a vector.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
a : {array}
|
a : {array}
|
||||||
Input data, 1-dim, or column vector (m, 1)
|
Input data, 1-dim, or column vector (m, 1)
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
a_norm : {array}
|
a_norm : {array}
|
||||||
Norm of input vector
|
Norm of input vector
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return msqrt(dot(a.T,a))
|
return msqrt(dot(a.T,a))
|
||||||
|
|
||||||
@ -676,7 +676,7 @@ def center(a, axis):
|
|||||||
a : {array}
|
a : {array}
|
||||||
Input data
|
Input data
|
||||||
axis : {integer}
|
axis : {integer}
|
||||||
Which centering to perform.
|
Which centering to perform.
|
||||||
0 = col center, 1 = row center, 2 = double center
|
0 = col center, 1 = row center, 2 = double center
|
||||||
-1 = nothing
|
-1 = nothing
|
||||||
|
|
||||||
@ -707,16 +707,16 @@ def center(a, axis):
|
|||||||
else:
|
else:
|
||||||
mn = a.mean()*ones(a.shape)
|
mn = a.mean()*ones(a.shape)
|
||||||
return a - mn, mn
|
return a - mn, mn
|
||||||
|
|
||||||
if axis == -1:
|
if axis == -1:
|
||||||
mn = zeros((1,a.shape[1],))
|
mn = zeros((1,a.shape[1],))
|
||||||
mn = tile(mn, (a.shape[0], 1))
|
#mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis == 0:
|
elif axis == 0:
|
||||||
mn = a.mean(0)[newaxis]
|
mn = a.mean(0)[newaxis]
|
||||||
mn = tile(mn, (a.shape[0], 1))
|
#mn = tile(mn, (a.shape[0], 1))
|
||||||
elif axis == 1:
|
elif axis == 1:
|
||||||
mn = a.mean(1)[:,newaxis]
|
mn = a.mean(1)[:,newaxis]
|
||||||
mn = tile(mn, (1, a.shape[1]))
|
#mn = tile(mn, (1, a.shape[1]))
|
||||||
elif axis == 2:
|
elif axis == 2:
|
||||||
#fixme: double centering returns column mean as loc-vector, ok?
|
#fixme: double centering returns column mean as loc-vector, ok?
|
||||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||||
@ -774,7 +774,7 @@ def _scale(a, axis):
|
|||||||
a : {array}
|
a : {array}
|
||||||
Input data
|
Input data
|
||||||
axis : {integer}
|
axis : {integer}
|
||||||
Which scaling to perform.
|
Which scaling to perform.
|
||||||
0 = column, 1 = row, -1 = nothing
|
0 = column, 1 = row, -1 = nothing
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
@ -784,7 +784,7 @@ def _scale(a, axis):
|
|||||||
mn : {array}
|
mn : {array}
|
||||||
Scaling vector/matrix
|
Scaling vector/matrix
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if axis == -1:
|
if axis == -1:
|
||||||
sc = zeros((a.shape[1],))
|
sc = zeros((a.shape[1],))
|
||||||
elif axis == 0:
|
elif axis == 0:
|
||||||
@ -817,21 +817,20 @@ def esvd(data, a_max=None):
|
|||||||
Singular values
|
Singular values
|
||||||
v : {array}
|
v : {array}
|
||||||
Left hand eigenvectors
|
Left hand eigenvectors
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
|
Uses Anoldi iterations for the symmetric eigendecomp (ARPACK)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = data.shape
|
m, n = data.shape
|
||||||
if m >= n:
|
if m > n:
|
||||||
kernel = dot(data.T, data)
|
kernel = dot(data.T, data)
|
||||||
|
|
||||||
if a_max == None:
|
if a_max == None:
|
||||||
a_max = n - 1
|
a_max = n - 1
|
||||||
s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
||||||
maxiter=200, tol=1e-5)
|
maxiter=500, tol=1e-7)
|
||||||
s = s[::-1]
|
s = s[::-1]
|
||||||
v = v[:,::-1]
|
v = v[:,::-1]
|
||||||
#u, s, vt = svd(kernel)
|
#u, s, vt = svd(kernel)
|
||||||
@ -841,9 +840,9 @@ def esvd(data, a_max=None):
|
|||||||
else:
|
else:
|
||||||
kernel = dot(data, data.T)
|
kernel = dot(data, data.T)
|
||||||
if a_max == None:
|
if a_max == None:
|
||||||
a_max = m -1
|
a_max = m - 1
|
||||||
s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
|
||||||
maxiter=200, tol=1e-5)
|
maxiter=500, tol=1e-7)
|
||||||
s = s[::-1]
|
s = s[::-1]
|
||||||
u = u[:,::-1]
|
u = u[:,::-1]
|
||||||
#u, s, vt = svd(kernel)
|
#u, s, vt = svd(kernel)
|
||||||
|
@ -21,7 +21,7 @@ class Model(object):
|
|||||||
def __init__(self, name="johndoe"):
|
def __init__(self, name="johndoe"):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.options = {}
|
self.options = {}
|
||||||
|
|
||||||
def save(self, filename='pca.ml'):
|
def save(self, filename='pca.ml'):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -46,7 +46,7 @@ class PCA(Model):
|
|||||||
self._x = x
|
self._x = x
|
||||||
self.amax = amax
|
self.amax = amax
|
||||||
self.aopt = amax
|
self.aopt = amax
|
||||||
|
|
||||||
# properties
|
# properties
|
||||||
def amax():
|
def amax():
|
||||||
doc = "maximum number of components"
|
doc = "maximum number of components"
|
||||||
@ -77,7 +77,7 @@ class PCA(Model):
|
|||||||
del self._tot_var
|
del self._tot_var
|
||||||
return locals()
|
return locals()
|
||||||
tot_var = property(**tot_var())
|
tot_var = property(**tot_var())
|
||||||
|
|
||||||
def scores():
|
def scores():
|
||||||
doc = "pca scores"
|
doc = "pca scores"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -94,7 +94,7 @@ class PCA(Model):
|
|||||||
del self._core_scores
|
del self._core_scores
|
||||||
return locals()
|
return locals()
|
||||||
scores = property(**scores())
|
scores = property(**scores())
|
||||||
|
|
||||||
def loadings():
|
def loadings():
|
||||||
doc = "pca loadings"
|
doc = "pca loadings"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -111,7 +111,7 @@ class PCA(Model):
|
|||||||
self._loadings = p
|
self._loadings = p
|
||||||
return locals()
|
return locals()
|
||||||
loadings = property(**loadings())
|
loadings = property(**loadings())
|
||||||
|
|
||||||
def singvals():
|
def singvals():
|
||||||
doc = "Singular values"
|
doc = "Singular values"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -128,7 +128,7 @@ class PCA(Model):
|
|||||||
del self._singvals
|
del self._singvals
|
||||||
return locals()
|
return locals()
|
||||||
singvals = property(**singvals())
|
singvals = property(**singvals())
|
||||||
|
|
||||||
def x():
|
def x():
|
||||||
doc = "x is readonly, may not be deleted"
|
doc = "x is readonly, may not be deleted"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -154,12 +154,12 @@ class PCA(Model):
|
|||||||
del self._xc
|
del self._xc
|
||||||
return locals()
|
return locals()
|
||||||
xadd = property(**xadd())
|
xadd = property(**xadd())
|
||||||
|
|
||||||
def xc():
|
def xc():
|
||||||
doc = "mean_centered input data"
|
doc = "mean_centered input data"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_xc"):
|
if not hasattr(self, "_xc"):
|
||||||
self._xc = self.x + self.xadd
|
self._xc = self.x + self.xadd
|
||||||
return self._xc
|
return self._xc
|
||||||
def fset(self, xc):
|
def fset(self, xc):
|
||||||
self._xc = xc
|
self._xc = xc
|
||||||
@ -186,7 +186,7 @@ class PCA(Model):
|
|||||||
del self._xw
|
del self._xw
|
||||||
return locals()
|
return locals()
|
||||||
xw = property(**xw())
|
xw = property(**xw())
|
||||||
|
|
||||||
def explained_variance():
|
def explained_variance():
|
||||||
doc = "explained variance"
|
doc = "explained variance"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -215,7 +215,7 @@ class PCA(Model):
|
|||||||
del self._residuals
|
del self._residuals
|
||||||
return locals()
|
return locals()
|
||||||
residuals = property(**residuals())
|
residuals = property(**residuals())
|
||||||
|
|
||||||
def leverage():
|
def leverage():
|
||||||
doc = "objects leverage"
|
doc = "objects leverage"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
@ -254,7 +254,7 @@ class PCA(Model):
|
|||||||
self._column_metric = scale(self.xc, axis=1)
|
self._column_metric = scale(self.xc, axis=1)
|
||||||
return self._column_metric
|
return self._column_metric
|
||||||
def fset(self, w):
|
def fset(self, w):
|
||||||
|
|
||||||
self._column_metric = w
|
self._column_metric = w
|
||||||
# update model
|
# update model
|
||||||
def fdel(self):
|
def fdel(self):
|
||||||
@ -263,7 +263,7 @@ class PCA(Model):
|
|||||||
del self._xd
|
del self._xd
|
||||||
return locals()
|
return locals()
|
||||||
column_metric = property(**column_metric())
|
column_metric = property(**column_metric())
|
||||||
|
|
||||||
def blm_update(self, a, b):
|
def blm_update(self, a, b):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -281,8 +281,8 @@ class PCA(Model):
|
|||||||
|
|
||||||
def reweight(self, w):
|
def reweight(self, w):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from numpy.random import rand
|
from numpy.random import rand
|
||||||
X = rand(4,10)
|
X = rand(4,10)
|
||||||
|
@ -22,9 +22,9 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
|
|||||||
used in multivariate hypothesis testing. In order to avoid small variance
|
used in multivariate hypothesis testing. In order to avoid small variance
|
||||||
samples to become significant this version allows borrowing variance
|
samples to become significant this version allows borrowing variance
|
||||||
from the pooled covariance.
|
from the pooled covariance.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
Pcv : {array}
|
Pcv : {array}
|
||||||
Crossvalidation segements of paramter
|
Crossvalidation segements of paramter
|
||||||
P : {array}
|
P : {array}
|
||||||
@ -39,9 +39,9 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
|
|||||||
Rotate sub-segments toward calibration model.
|
Rotate sub-segments toward calibration model.
|
||||||
strict : {boolean}, optional
|
strict : {boolean}, optional
|
||||||
Only rotate 90 degree
|
Only rotate 90 degree
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
tsq : {array}
|
tsq : {array}
|
||||||
Hotellings T^2 estimate
|
Hotellings T^2 estimate
|
||||||
|
|
||||||
@ -50,19 +50,19 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
|
|||||||
Gidskehaug et al., A framework for significance analysis of
|
Gidskehaug et al., A framework for significance analysis of
|
||||||
gene expression datausing dimension reduction methods, BMC
|
gene expression datausing dimension reduction methods, BMC
|
||||||
bioinformatics, 2007
|
bioinformatics, 2007
|
||||||
|
|
||||||
*Notes*
|
*Notes*
|
||||||
|
|
||||||
The rotational freedom in the solution of bilinear
|
The rotational freedom in the solution of bilinear
|
||||||
models may require that a rotation onto the calibration
|
models may require that a rotation onto the calibration
|
||||||
model. One way of doing that is procrustes rotation.
|
model. One way of doing that is procrustes rotation.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
m, n = P.shape
|
m, n = P.shape
|
||||||
n_sets, n, amax = Pcv.shape
|
n_sets, n, amax = Pcv.shape
|
||||||
T_sq = empty((n,), dtype='d')
|
T_sq = empty((n,), dtype='d')
|
||||||
Cov_i = zeros((n, amax, amax), dtype='d')
|
Cov_i = zeros((n, amax, amax), dtype='d')
|
||||||
|
|
||||||
# rotate sub_models to full model
|
# rotate sub_models to full model
|
||||||
if crot:
|
if crot:
|
||||||
for i, Pi in enumerate(Pcv):
|
for i, Pi in enumerate(Pcv):
|
||||||
@ -77,20 +77,15 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
|
|||||||
P_ctr = P
|
P_ctr = P
|
||||||
|
|
||||||
for i in xrange(n):
|
for i in xrange(n):
|
||||||
Pi = Pcv[:,i,:] # (n_sets x amax)
|
Pi = Pcv[:,i,:] # (n_sets x amax)
|
||||||
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
Pi_ctr = P_ctr[i,:] # (1 x amax)
|
||||||
#Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
|
||||||
#Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
|
||||||
Pim = (Pi - Pi_ctr)
|
|
||||||
Cov_i[i] = dot(Pim.T, Pim)
|
|
||||||
if cov_center == 'median':
|
if cov_center == 'median':
|
||||||
Cov_p = median(Cov_i)
|
Cov_p = median(Cov_i)
|
||||||
elif cov_center == 'mean':
|
else cov_center == 'mean':
|
||||||
Cov_p = Cov.mean(0)
|
Cov_p = Cov.mean(0)
|
||||||
else:
|
|
||||||
print "Pooled covariance est. invalid, using median"
|
|
||||||
print cov_center
|
|
||||||
Cov_p = median(Cov_i)
|
|
||||||
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
|
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
|
||||||
for i in xrange(n):
|
for i in xrange(n):
|
||||||
Pc = P_ctr[i,:]
|
Pc = P_ctr[i,:]
|
||||||
@ -105,7 +100,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
|
|||||||
onto another by minimising the squared error.
|
onto another by minimising the squared error.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
a : {array}
|
a : {array}
|
||||||
Input array, stationary
|
Input array, stationary
|
||||||
b : {array}
|
b : {array}
|
||||||
@ -127,9 +122,9 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
|
|||||||
*Reference*:
|
*Reference*:
|
||||||
|
|
||||||
Schonemann, A generalized solution of the orthogonal Procrustes
|
Schonemann, A generalized solution of the orthogonal Procrustes
|
||||||
problem, Psychometrika, 1966
|
problem, Psychometrika, 1966
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if center:
|
if center:
|
||||||
mn_a = a.mean(0)
|
mn_a = a.mean(0)
|
||||||
a = a - mn_a
|
a = a - mn_a
|
||||||
@ -145,7 +140,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
|
|||||||
u, s, vt = svd(dot(b.T, a))
|
u, s, vt = svd(dot(b.T, a))
|
||||||
Cm = dot(u, vt) # Cm: orthogonal rotation matrix
|
Cm = dot(u, vt) # Cm: orthogonal rotation matrix
|
||||||
if strict:
|
if strict:
|
||||||
Cm = _ensure_strict(Cm)
|
Cm = _ensure_strict(Cm)
|
||||||
b_rot = dot(b, Cm)
|
b_rot = dot(b, Cm)
|
||||||
if verbose:
|
if verbose:
|
||||||
fit = ((b - b_rot)**2).sum()
|
fit = ((b - b_rot)**2).sum()
|
||||||
@ -158,7 +153,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
|
|||||||
|
|
||||||
def _ensure_strict(C, only_flips=True):
|
def _ensure_strict(C, only_flips=True):
|
||||||
"""Ensure that a rotation matrix does only 90 degree rotations.
|
"""Ensure that a rotation matrix does only 90 degree rotations.
|
||||||
|
|
||||||
In multiplication with pcs this allows flips and reordering.
|
In multiplication with pcs this allows flips and reordering.
|
||||||
if only_flips is True there will onlt be flips allowed
|
if only_flips is True there will onlt be flips allowed
|
||||||
|
|
||||||
@ -173,13 +168,13 @@ def _ensure_strict(C, only_flips=True):
|
|||||||
|
|
||||||
C_rot : {array}
|
C_rot : {array}
|
||||||
Restricted rotation matrix
|
Restricted rotation matrix
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
This function is not ready for use. Use (only_flips=True).
|
This function is not ready for use. Use (only_flips=True).
|
||||||
That is, for more than two components, the rotation matrix
|
That is, for more than two components, the rotation matrix
|
||||||
has a tendency to be unstable (det(Cm)>1), when rounding is used.
|
has a tendency to be unstable (det(Cm)>1), when rounding is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if only_flips:
|
if only_flips:
|
||||||
C = eye(C.shape[0])*sign(C)
|
C = eye(C.shape[0])*sign(C)
|
||||||
@ -199,9 +194,9 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
|
|
||||||
The response (Y) is randomly permuted, and the number of false positives
|
The response (Y) is randomly permuted, and the number of false positives
|
||||||
is registered by comparing hotellings T2 statistics of the calibration model.
|
is registered by comparing hotellings T2 statistics of the calibration model.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -237,14 +232,14 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
|
|
||||||
nsets : {integer}
|
nsets : {integer}
|
||||||
Number of crossvalidation segements
|
Number of crossvalidation segements
|
||||||
|
|
||||||
*Reference*:
|
*Reference*:
|
||||||
|
|
||||||
Gidskehaug et al., A framework for significance analysis of
|
Gidskehaug et al., A framework for significance analysis of
|
||||||
gene expression data using dimension reduction methods, BMC
|
gene expression data using dimension reduction methods, BMC
|
||||||
bioinformatics, 2007
|
bioinformatics, 2007
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
k, nz = Z.shape
|
k, nz = Z.shape
|
||||||
assert(n==nz)
|
assert(n==nz)
|
||||||
@ -255,7 +250,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
Y = atleast_2d(Y).T
|
Y = atleast_2d(Y).T
|
||||||
my, l = Y.shape
|
my, l = Y.shape
|
||||||
assert(m==my)
|
assert(m==my)
|
||||||
|
|
||||||
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
||||||
pert_tsq_z = zeros((k, n_iter), dtype='d') # (nzvars x n_subsets)
|
pert_tsq_z = zeros((k, n_iter), dtype='d') # (nzvars x n_subsets)
|
||||||
|
|
||||||
@ -264,7 +259,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
||||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||||
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
||||||
print "morn"
|
|
||||||
# Perturbations
|
# Perturbations
|
||||||
index = arange(m)
|
index = arange(m)
|
||||||
for i in range(n_iter):
|
for i in range(n_iter):
|
||||||
@ -275,7 +270,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
|||||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||||
# no reason to borrow variance in dag (alpha ->some small value)
|
# no reason to borrow variance in dag (alpha ->some small value)
|
||||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01)
|
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=0.01)
|
||||||
|
|
||||||
return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
|
return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
|
||||||
|
|
||||||
|
|
||||||
@ -286,9 +281,9 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
|||||||
|
|
||||||
The response (Y) is randomly permuted, and the number of false positives
|
The response (Y) is randomly permuted, and the number of false positives
|
||||||
is registered by comparing hotellings T2 statistics of the calibration model.
|
is registered by comparing hotellings T2 statistics of the calibration model.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
X : {array}
|
X : {array}
|
||||||
Main data matrix (m, n)
|
Main data matrix (m, n)
|
||||||
Y : {array}
|
Y : {array}
|
||||||
@ -318,14 +313,14 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
|||||||
Only rotate 90 degree
|
Only rotate 90 degree
|
||||||
nsets : {integer}
|
nsets : {integer}
|
||||||
Number of crossvalidation segements
|
Number of crossvalidation segements
|
||||||
|
|
||||||
*Reference*:
|
*Reference*:
|
||||||
|
|
||||||
Gidskehaug et al., A framework for significance analysis of
|
Gidskehaug et al., A framework for significance analysis of
|
||||||
gene expression data using dimension reduction methods, BMC
|
gene expression data using dimension reduction methods, BMC
|
||||||
bioinformatics, 2007
|
bioinformatics, 2007
|
||||||
"""
|
"""
|
||||||
|
|
||||||
m, n = X.shape
|
m, n = X.shape
|
||||||
try:
|
try:
|
||||||
my, l = Y.shape
|
my, l = Y.shape
|
||||||
@ -341,7 +336,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
|||||||
Wc = pls_jk(X, Y , aopt)
|
Wc = pls_jk(X, Y , aopt)
|
||||||
|
|
||||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||||
|
|
||||||
# Perturbations
|
# Perturbations
|
||||||
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
||||||
index = arange(m)
|
index = arange(m)
|
||||||
@ -351,7 +346,7 @@ def pls_qvals(X, Y, aopt, alpha=.3, n_iter=20,p_center='med', cov_center=median,
|
|||||||
dat = pls(X, Y[indi,:], aopt, scale='loads', center_axis=center_axis)
|
dat = pls(X, Y[indi,:], aopt, scale='loads', center_axis=center_axis)
|
||||||
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
||||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||||
|
|
||||||
return cal_tsq_x, pert_tsq_x
|
return cal_tsq_x, pert_tsq_x
|
||||||
|
|
||||||
|
|
||||||
@ -362,7 +357,7 @@ def _fdr(tsq, tsqp, loc_method=median):
|
|||||||
Fdr is a method used in multiple hypothesis testing to correct for multiple
|
Fdr is a method used in multiple hypothesis testing to correct for multiple
|
||||||
comparisons. It controls the expected proportion of incorrectly rejected null
|
comparisons. It controls the expected proportion of incorrectly rejected null
|
||||||
hypotheses (type I errors) in a list of rejected hypotheses.
|
hypotheses (type I errors) in a list of rejected hypotheses.
|
||||||
|
|
||||||
*Parameters*:
|
*Parameters*:
|
||||||
|
|
||||||
tsq : {array}
|
tsq : {array}
|
||||||
@ -372,14 +367,14 @@ def _fdr(tsq, tsqp, loc_method=median):
|
|||||||
|
|
||||||
loc_method : {py_func}
|
loc_method : {py_func}
|
||||||
Location method
|
Location method
|
||||||
|
|
||||||
*Returns*:
|
*Returns*:
|
||||||
|
|
||||||
fdr : {array}
|
fdr : {array}
|
||||||
False discovery rate
|
False discovery rate
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
This is an internal function for use in fdr estimation of jack-knifed
|
This is an internal function for use in fdr estimation of jack-knifed
|
||||||
perturbated blm parameters.
|
perturbated blm parameters.
|
||||||
|
|
||||||
@ -403,4 +398,3 @@ def _fdr(tsq, tsqp, loc_method=median):
|
|||||||
fd_rate = fp/n_signif
|
fd_rate = fp/n_signif
|
||||||
fd_rate[fd_rate>1] = 1
|
fd_rate[fd_rate>1] = 1
|
||||||
return fd_rate
|
return fd_rate
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user