Added test on lpls, updated centering in blm, added pls_qvals
This commit is contained in:
parent
4c809674bb
commit
8e9c4c1c58
@ -1,5 +1,5 @@
|
||||
from crossvalidation import lpls_val
|
||||
from statistics import lpls_qvals
|
||||
from statistics import lpls_qvals, pls_qvals
|
||||
from engines import pca, pcr, pls
|
||||
from engines import nipals_lpls as lpls
|
||||
#from tests import *
|
||||
|
@ -9,10 +9,11 @@ from numpy import dot,empty,zeros,sqrt,atleast_2d,argmax,asarray,median,\
|
||||
array_split,arange
|
||||
from numpy.random import shuffle
|
||||
|
||||
from engines import pls
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=False):
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
||||
"""Performs crossvalidation for generalisation error in lpls.
|
||||
|
||||
The L-PLS crossvalidation is estimated just like an ordinary pls
|
||||
@ -35,7 +36,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
alpha : {float}, optional
|
||||
Parameter to control the amount of influence from Z-matrix.
|
||||
0 is none, which returns a pls-solution, 1 is max
|
||||
mean_center : {array-like}, optional
|
||||
center_axis : {array-like}, optional
|
||||
A three element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
@ -74,14 +75,14 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
for cal, val in cv(nsets, k):
|
||||
# do the training model
|
||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
mean_ctr=mean_ctr, zorth=zorth, verbose=verbose)
|
||||
center_axis=center_axis, zorth=zorth, verbose=verbose)
|
||||
|
||||
# center test data
|
||||
if mean_ctr[0] != 1:
|
||||
if center_axis[0] != 1:
|
||||
xi = X[val,:] - dat['mnx']
|
||||
else:
|
||||
xi = X[val] - X[cal].mean(1)[:,newaxis]
|
||||
if mean_ctr[2] != 1:
|
||||
if center_axis[2] != 1:
|
||||
ym = dat['mny']
|
||||
else:
|
||||
ym = Y[cal].mean(1)[:,newaxis]
|
||||
@ -94,7 +95,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
# for n in range(10):
|
||||
# shuffle(cal)
|
||||
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
# mean_ctr=mean_ctr, verbose=verbose)
|
||||
# center_axis=center_axis, verbose=verbose)
|
||||
|
||||
|
||||
# todo: need a better support for classification error
|
||||
@ -110,7 +111,7 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=Fals
|
||||
|
||||
return rmsep, Yhat, aopt
|
||||
|
||||
def pca_jk(a, aopt, n_blocks=None):
|
||||
def pca_jk(a, aopt, center_axis=[0], nsets=None):
|
||||
"""Returns jack-knife segements from PCA.
|
||||
|
||||
*Parameters*:
|
||||
@ -119,6 +120,8 @@ def pca_jk(a, aopt, n_blocks=None):
|
||||
data matrix (n x m)
|
||||
aopt : {integer}
|
||||
number of components in model.
|
||||
center_axis:
|
||||
Centering
|
||||
nsets : {integer}
|
||||
number of segments
|
||||
|
||||
@ -141,13 +144,13 @@ def pca_jk(a, aopt, n_blocks=None):
|
||||
old_values = a.take(ind)
|
||||
new_values = mn_a.take(ind)
|
||||
a.put(ind, new_values)
|
||||
dat = pca(a, aopt, mode='fast', scale='loads', center=center)
|
||||
PP[nn,:,:] = dat['P']
|
||||
dat = pca(a, aopt, mode='fast', scale='loads', center_axis=center_axis)
|
||||
PP[i,:,:] = dat['P']
|
||||
a.put(ind, old_values)
|
||||
|
||||
return PP
|
||||
|
||||
def pls_jk(X, Y, a_opt, nsets=None, center=True, verbose=False):
|
||||
def pls_jk(X, Y, a_opt, nsets=None, center_axis=True, verbose=False):
|
||||
""" Returns jack-knife segements of W.
|
||||
|
||||
*Parameters*:
|
||||
@ -161,7 +164,7 @@ def pls_jk(X, Y, a_opt, nsets=None, center=True, verbose=False):
|
||||
nsets : (integer), optional
|
||||
Number of jack-knife segments
|
||||
|
||||
center : {boolean}, optional
|
||||
center_axis : {boolean}, optional
|
||||
- -1 : nothing
|
||||
- 0 : row center
|
||||
- 1 : column center
|
||||
@ -183,13 +186,13 @@ def pls_jk(X, Y, a_opt, nsets=None, center=True, verbose=False):
|
||||
Wcv = empty((nsets, X.shape[1], a_opt), dtype=X.dtype)
|
||||
for i, (cal, val) in enumerate(cv(k, nsets)):
|
||||
if verbose:
|
||||
print "Segement number: %d" %i
|
||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast')
|
||||
Wcv[nn,:,:] = dat['W']
|
||||
print "Segment number: %d" %i
|
||||
dat = pls(X[cal,:], Y[cal,:], a_opt, scale='loads', mode='fast', center_axis=[0, 0])
|
||||
Wcv[i,:,:] = dat['W']
|
||||
|
||||
return Wcv
|
||||
|
||||
def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2], verbose=False):
|
||||
def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, center_axis=[2,0,2], zorth=False, verbose=False):
|
||||
"""Returns jack-knifed segments of lpls model.
|
||||
|
||||
Jack-knifing is a method to perturb the model paramters, hopefully
|
||||
@ -215,7 +218,7 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2], verbose=F
|
||||
xz_alpha : {float}, optional
|
||||
Parameter to control the amount of influence from Z-matrix.
|
||||
0 is none, which returns a pls-solution, 1 is max
|
||||
mean_center : {array-like}, optional
|
||||
center_axis : {array-like}, optional
|
||||
A three element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
@ -244,8 +247,8 @@ def lpls_jk(X, Y, Z, a_opt, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2], verbose=F
|
||||
WWz = empty((nsets, o, a_opt), 'd')
|
||||
#WWy = empty((nsets, l, a_opt), 'd')
|
||||
for i, (cal, val) in enumerate(cv(k, nsets)):
|
||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_opt,alpha=xz_alpha, mean_ctr=mean_ctr,
|
||||
scale='loads', verbose=verbose)
|
||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_opt,alpha=xz_alpha, center_axis=center_axis,
|
||||
scale='loads', zorth=zorth, verbose=verbose)
|
||||
WWx[i,:,:] = dat['W']
|
||||
WWz[i,:,:] = dat['L']
|
||||
#WWy[i,:,:] = dat['Q']
|
||||
@ -328,8 +331,8 @@ def cv(N, K, randomise=True, sequential=False):
|
||||
otherwise interleaved ordering is used.
|
||||
|
||||
"""
|
||||
if N > K:
|
||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N)
|
||||
if K > N:
|
||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N, N, K)
|
||||
index = xrange(N)
|
||||
if randomise:
|
||||
from random import shuffle
|
||||
@ -375,19 +378,44 @@ def diag_cv(shape, nsets=9):
|
||||
yield training, validation
|
||||
|
||||
|
||||
def class_error(Yhat, Y, method='vanilla'):
|
||||
def class_error(y_hat, y, method='vanilla'):
|
||||
""" Not used.
|
||||
"""
|
||||
a_opt, k, l = Yhat.shape
|
||||
Yhat_c = zeros((k, l), dtype='d')
|
||||
a_opt, k, l = y_hat.shape
|
||||
y_hat_c = zeros((k, l), dtype='d')
|
||||
if method == vanilla:
|
||||
pass
|
||||
for a in range(a_opt):
|
||||
for i in range(k):
|
||||
Yhat_c[a, val, argmax(Yhat[a,val,:])] = 1.0
|
||||
err = 100*((Yhat_c + Y) == 2).sum(1)/Y.sum(0).astype('d')
|
||||
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
|
||||
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
|
||||
|
||||
return Yhat_c, err
|
||||
return y_hat_c, err
|
||||
|
||||
def _class_errorII(T, Y, method='lda'):
|
||||
""" Not used ...
|
||||
def prediction_error(y_hat, y, method='squared'):
|
||||
"""Loss function on multiclass Y.
|
||||
|
||||
Assumes y is a binary dummy class matrix (samples, classes)
|
||||
"""
|
||||
pass
|
||||
k, l = y.shape
|
||||
|
||||
if method == 'hinge':
|
||||
pass
|
||||
elif method == 'smooth_hinge':
|
||||
z = 90
|
||||
elif method == 'abs':
|
||||
err = abs(y - y_hat)
|
||||
elif method == 'squared':
|
||||
err = (y - y_hat)**2
|
||||
elif method == '0/1':
|
||||
pred = zeros_like(y_hat)
|
||||
for i, row in enumerate(y_hat):
|
||||
largest = row.argsort()[-1]
|
||||
pred[i, largest] = 1.
|
||||
err = abs(y - pred)
|
||||
elif method == '1/2':
|
||||
y_hat[y_hat>.5] = 1
|
||||
y_hat[y_hat<.5] = 0
|
||||
err = abs(y - y_hat)
|
||||
|
||||
return err
|
||||
|
@ -13,7 +13,7 @@ from numpy.linalg import inv,svd
|
||||
from scipy.sandbox import arpack
|
||||
|
||||
|
||||
def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
def pca(X, aopt, scale='scores', mode='normal', center_axis=[0]):
|
||||
""" Principal Component Analysis.
|
||||
|
||||
PCA is a low rank bilinear aprroximation to a data matrix that sequentially
|
||||
@ -80,8 +80,8 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
|
||||
m, n = X.shape
|
||||
min_aopt = min(m, n)
|
||||
if center_axis >= 0:
|
||||
X = X - expand_dims(X.mean(center_axis), center_axis)
|
||||
if center_axis != None:
|
||||
X, mnx = center(X, center_axis[0])
|
||||
min_aopt = min_aopt - 1
|
||||
assert(aopt <= min_aopt)
|
||||
if m > (n+100) or n > (m+100):
|
||||
@ -107,10 +107,10 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
T = T/s
|
||||
P = P*s
|
||||
|
||||
if mode == 'fast':
|
||||
if mode in ['fast', 'f', 'F']:
|
||||
return {'T':T, 'P':P, 'aopt':aopt}
|
||||
|
||||
if mode=='detailed':
|
||||
if mode in ['detailed', 'd', 'D']:
|
||||
E = empty((aopt, m, n))
|
||||
ssq = []
|
||||
lev = []
|
||||
@ -137,7 +137,7 @@ def pca(X, aopt, scale='scores', mode='normal', center_axis=0):
|
||||
return {'T': T, 'P': P, 'E': E, 'evx': expvarx, 'leverage': lev, 'ssqx': ssq,
|
||||
'aopt': aopt, 'eigvals': e}
|
||||
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=[0, 0]):
|
||||
""" Principal Component Regression.
|
||||
|
||||
Performs PCR on given matrix and returns results in a dictionary.
|
||||
@ -197,8 +197,7 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
except:
|
||||
b = atleast_2d(b).T
|
||||
k, l = b.shape
|
||||
if center_axis >= 0:
|
||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||
b, mny = center(b, center_axis[1])
|
||||
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
||||
T = dat['T']
|
||||
weights = apply_along_axis(vnorm, 0, T)**2
|
||||
@ -207,11 +206,11 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
else:
|
||||
Q = dot(b.T, T/weights)
|
||||
|
||||
if mode == 'fast':
|
||||
if mode in ['fast', 'f', 'F']:
|
||||
dat.update({'Q':Q})
|
||||
return dat
|
||||
|
||||
if mode == 'detailed':
|
||||
if mode in ['detailed', 'd', 'D']:
|
||||
F = empty((aopt, k, l))
|
||||
ssqy = []
|
||||
for i in range(aopt):
|
||||
@ -224,10 +223,10 @@ def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
|
||||
expvary = r_[0, 100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
|
||||
|
||||
dat.update({'Q': Q, 'F': F, 'evy': expvary, 'ssqy': ssqy})
|
||||
dat.update({'Q': Q, 'F': F, 'evy': expvary, 'ssqy': ssqy, 'mny': mny})
|
||||
return dat
|
||||
|
||||
def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=[0, 0]):
|
||||
"""Partial Least Squares Regression.
|
||||
|
||||
Performs PLS on given matrix and returns results in a dictionary.
|
||||
@ -316,12 +315,10 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
assert(m == k)
|
||||
mnx, mny = 0, 0
|
||||
min_aopt = min(m, n)
|
||||
if center_axis >= 0:
|
||||
mnx = expand_dims(X.mean(center_axis), center_axis)
|
||||
X = X - mnx
|
||||
if center_axis != None:
|
||||
X, mnx = center(X, center_axis[0])
|
||||
Y, mny = center(Y, center_axis[1])
|
||||
min_aopt = min_aopt - 1
|
||||
mny = expand_dims(Y.mean(center_axis), center_axis)
|
||||
Y = Y - mny
|
||||
assert(aopt > 0 and aopt < min_aopt)
|
||||
|
||||
W = empty((n, aopt))
|
||||
@ -361,7 +358,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
T[:,i] = t.ravel()
|
||||
W[:,i] = w.ravel()
|
||||
|
||||
if mode == 'fast' and i == (aopt - 1):
|
||||
if mode in ['fast', 'f', 'F'] and i == (aopt - 1):
|
||||
if scale == 'loads':
|
||||
tnorm = sqrt(tt)
|
||||
T = T/tnorm
|
||||
@ -376,7 +373,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
qnorm = apply_along_axis(vnorm, 0, Q)
|
||||
tnorm = sqrt(tt)
|
||||
pp = (P**2).sum(0)
|
||||
if mode == 'detailed':
|
||||
if mode in ['detailed', 'd', 'D']:
|
||||
E = empty((aopt, m, n))
|
||||
F = empty((aopt, k, l))
|
||||
ssqx, ssqy = [], []
|
||||
@ -416,7 +413,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
'evx': expvarx, 'evy': expvary, 'ssqx': ssqx, 'ssqy': ssqy,
|
||||
'leverage': leverage, 'mnx': mnx, 'mny': mny}
|
||||
|
||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zorth = False, verbose=False):
|
||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, center_axis=[2, 0, 2], scale='scores', zorth = False, verbose=False):
|
||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
An L-shaped low rank model aproximates three matrices in a hyploid
|
||||
@ -437,7 +434,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
||||
alpha : {float}, optional
|
||||
Parameter to control the amount of influence from Z-matrix.
|
||||
0 is none, which returns a pls-solution, 1 is max
|
||||
mean_center : {array-like}, optional
|
||||
center_axis : {array-like}, optional
|
||||
A three element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
@ -500,16 +497,19 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zo
|
||||
m, n = X.shape
|
||||
k, l = Y.shape
|
||||
u, o = Z.shape
|
||||
max_rank = min(m, n) + 1
|
||||
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
||||
tried: %d, max_rank: %d" %(a_max, max_rank)
|
||||
max_rank = min(m, n)
|
||||
|
||||
if mean_ctr != None:
|
||||
xctr, yctr, zctr = mean_ctr
|
||||
|
||||
if center_axis != None:
|
||||
xctr, yctr, zctr = center_axis
|
||||
X, mnX = center(X, xctr)
|
||||
Y, mnY = center(Y, yctr)
|
||||
Z, mnZ = center(Z, zctr)
|
||||
max_rank = max_rank -1
|
||||
assert (a_max > 0 and a_max < max_rank), "Number of comp error:\
|
||||
tried: %d, max_rank: %d" %(a_max, max_rank)
|
||||
|
||||
# initial variance
|
||||
varX = (X**2).sum()
|
||||
varY = (Y**2).sum()
|
||||
varZ = (Z**2).sum()
|
||||
@ -670,7 +670,13 @@ def center(a, axis):
|
||||
Centered data matrix
|
||||
mn : {array}
|
||||
Location vector/matrix
|
||||
|
||||
"""
|
||||
|
||||
### TODO ###
|
||||
# Perhaps not(!) use broadcasting and return full centering
|
||||
# matrix instead ?
|
||||
|
||||
|
||||
# check if we have a vector
|
||||
is_vec = len(a.shape) == 1
|
||||
@ -678,13 +684,13 @@ def center(a, axis):
|
||||
is_vec = a.shape[0] == 1 or a.shape[1] == 1
|
||||
if is_vec:
|
||||
if axis == 2:
|
||||
warnings.warn("Double centering of vecor ignored, using ordinary centering")
|
||||
warnings.warn("Double centering of vector ignored, using ordinary centering")
|
||||
if axis == -1:
|
||||
mn = 0
|
||||
else:
|
||||
mn = a.mean()
|
||||
return a - mn, mn
|
||||
# !!!fixme: use broadcasting
|
||||
|
||||
if axis == -1:
|
||||
mn = zeros((1,a.shape[1],))
|
||||
#mn = tile(mn, (a.shape[0], 1))
|
||||
@ -695,6 +701,7 @@ def center(a, axis):
|
||||
mn = a.mean(1)[:,newaxis]
|
||||
#mn = tile(mn, (1, a.shape[1]))
|
||||
elif axis == 2:
|
||||
#fixme: double centering returns column mean as loc-vector, ok?
|
||||
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
|
||||
return a - mn , a.mean(0)[newaxis]
|
||||
else:
|
||||
|
@ -1,6 +1,6 @@
|
||||
""" A collection of some statistical utitlites used.
|
||||
"""
|
||||
__all__ = ['hotelling', 'lpls_qvals']
|
||||
__all__ = ['hotelling', 'lpls_qvals', 'pls_qvals']
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from math import sqrt as msqrt
|
||||
@ -9,7 +9,8 @@ from numpy import dot,empty,zeros,eye,median,sign,arange,argsort
|
||||
from numpy.linalg import svd,inv,det
|
||||
from numpy.random import shuffle
|
||||
|
||||
from crossvalidation import lpls_jk
|
||||
from crossvalidation import lpls_jk, pls_jk
|
||||
from engines import pls
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
@ -174,8 +175,9 @@ def _ensure_strict(C, only_flips=True):
|
||||
return Cm*S
|
||||
|
||||
def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
sim_method='shuffle',p_center='med', cov_center=median,
|
||||
crot=True,strict=False,mean_ctr=[2,0,2], nsets=None):
|
||||
sim_method='shuffle', p_center='med', cov_center=median,
|
||||
crot=True, strict=False, center_axis=[2,0,2], nsets=None,
|
||||
zorth=False):
|
||||
|
||||
"""Returns qvals for l-pls model by permutation analysis.
|
||||
|
||||
@ -195,7 +197,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
alpha : {float}, optional
|
||||
Parameter to control the amount of influence from Z-matrix.
|
||||
0 is none, which returns a pls-solution, 1 is max
|
||||
mean_center : {array-like}, optional
|
||||
center_axis : {array-like}, optional
|
||||
A three element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
@ -242,8 +244,8 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
pert_tsq_z = zeros((k, n_iter), dtype='d') # (nzvars x n_subsets)
|
||||
|
||||
# Full model
|
||||
dat = lpls(X, Y, Z, aopt, scale='loads', mean_ctr=mean_ctr)
|
||||
Wc, Lc = lpls_jk(X, Y, Z ,aopt)
|
||||
dat = lpls(X, Y, Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
|
||||
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
|
||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
|
||||
|
||||
@ -252,13 +254,92 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
for i in range(n_iter):
|
||||
indi = index.copy()
|
||||
shuffle(indi)
|
||||
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', mean_ctr=mean_ctr)
|
||||
dat = lpls(X, Y[indi,:], Z, aopt, scale='loads', center_axis=center_axis, zorth=zorth)
|
||||
Wi, Li = lpls_jk(X, Y[indi,:], Z, aopt, nsets=nsets)
|
||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||
pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
|
||||
|
||||
return _fdr(cal_tsq_z, pert_tsq_z, median), _fdr(cal_tsq_x, pert_tsq_x, median)
|
||||
|
||||
|
||||
def pls_qvals(X, Y, aopt, alpha=.3, zx_alpha=.5, n_iter=20,
|
||||
sim_method='shuffle',p_center='med', cov_center=median,
|
||||
crot=True,strict=False, center_axis=[0,0], nsets=None, zorth=False):
|
||||
|
||||
"""Returns qvals for pls model by permutation analysis.
|
||||
|
||||
The response (Y) is randomly permuted, and the number of false positives
|
||||
is registered by comparing hotellings T2 statistics of the calibration model.
|
||||
|
||||
*Parameters*:
|
||||
|
||||
X : {array}
|
||||
Main data matrix (m, n)
|
||||
Y : {array}
|
||||
External row data (m, l)
|
||||
aopt : {integer}
|
||||
Optimal number of components
|
||||
center_axis : {array-like}, optional
|
||||
A two element array-like structure with elements in [-1,0,1,2],
|
||||
that decides the type of centering used.
|
||||
-1 : nothing
|
||||
0 : row center
|
||||
1 : column center
|
||||
2 : double center
|
||||
n_iter : {integer}, optional
|
||||
Number of permutations
|
||||
sim_method : ['shuffle'], optional
|
||||
Permutation method
|
||||
p_center : {'median', 'mean', 'cal_model'}, optional
|
||||
Location method for sub-segments
|
||||
cov_center : {py_func}, optional
|
||||
Location function
|
||||
alpha : {float}, optional
|
||||
Regularisation towards pooled covariance estimate.
|
||||
crot : {boolean}, optional
|
||||
Rotate sub-segmentss toward calibration model.
|
||||
strict : {boolean}, optional
|
||||
Only rotate 90 degree
|
||||
nsets : {integer}
|
||||
Number of crossvalidation segements
|
||||
|
||||
*Reference*:
|
||||
|
||||
Gidskehaug et al., A framework for significance analysis of
|
||||
gene expression data using dimension reduction methods, BMC
|
||||
bioinformatics, 2007
|
||||
"""
|
||||
|
||||
m, n = X.shape
|
||||
try:
|
||||
my, l = Y.shape
|
||||
except:
|
||||
# make Y a column vector
|
||||
Y = atleast_2d(Y).T
|
||||
my, l = Y.shape
|
||||
assert(m==my)
|
||||
|
||||
# Full model
|
||||
dat = pls(X, Y, aopt, scale='loads')
|
||||
# Submodels
|
||||
Wc = pls_jk(X, Y , aopt)
|
||||
|
||||
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
|
||||
|
||||
# Perturbations
|
||||
pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
|
||||
index = arange(m)
|
||||
for i in range(n_iter):
|
||||
indi = index.copy()
|
||||
shuffle(indi)
|
||||
dat = pls(X, Y[indi,:], aopt, scale='loads', center_axis=center_axis)
|
||||
Wi = pls_jk(X, Y[indi,:], aopt, nsets=nsets, center_axis=center_axis)
|
||||
pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
|
||||
|
||||
return _fdr(cal_tsq_x, pert_tsq_x, median)
|
||||
|
||||
|
||||
|
||||
def _fdr(tsq, tsqp, loc_method=median):
|
||||
"""Returns false discovery rate.
|
||||
|
||||
@ -306,3 +387,4 @@ def _fdr(tsq, tsqp, loc_method=median):
|
||||
fd_rate = fp/n_signif
|
||||
fd_rate[fd_rate>1] = 1
|
||||
return fd_rate
|
||||
|
||||
|
@ -10,8 +10,8 @@ from numpy.random import rand, randn
|
||||
from numpy.linalg import svd,norm
|
||||
restore_path()
|
||||
|
||||
def blm_array(shape=(5,10), comp=3, noise=0,seed=1,dtype='d'):
|
||||
assert(min(*shape)>=comp)
|
||||
def blm_array(shape=(5, 10), comp=3, noise=0, seed=1, dtype='d'):
|
||||
assert(min(*shape) >= comp)
|
||||
random.seed(seed)
|
||||
t = rand(shape[0], comp)
|
||||
p = rand(shape[1], comp)
|
||||
@ -44,6 +44,7 @@ class LplsTestCase(NumpyTestCase):
|
||||
pass
|
||||
#raise NotImplementedError
|
||||
|
||||
|
||||
class testAlphaZero(LplsTestCase):
|
||||
def do(self):
|
||||
#dat = lpls(self.a, self.b, self.c, self.nc, alpha=0.0)
|
||||
@ -51,9 +52,11 @@ class testAlphaZero(LplsTestCase):
|
||||
#assert_almost_equal(t1, t2)
|
||||
pass
|
||||
|
||||
|
||||
class testAlphaOne(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testZidentity(LplsTestCase):
|
||||
def do(self):
|
||||
I = eye(self.a.shape[1])
|
||||
@ -61,6 +64,7 @@ class testZidentity(LplsTestCase):
|
||||
dat2 = lpls(self.a, self.b, self.c, self.nc, alpha=0.0)
|
||||
assert_almost_equal(dat['T'], dat2['T'])
|
||||
|
||||
|
||||
class testYidentity(LplsTestCase):
|
||||
def do(self):
|
||||
I = eye(self.b.shape[0], dtype=self.a.dtype)
|
||||
@ -69,33 +73,43 @@ class testYidentity(LplsTestCase):
|
||||
T = u*s
|
||||
assert_almost_equal(abs(T0), abs(T[:,:self.nc]),5)
|
||||
|
||||
|
||||
class testWideX(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testTallX(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testWideY(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testTallY(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testWideZ(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testTallZ(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testRankDeficientX(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testRankDeficientY(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testRankDeficientZ(LplsTestCase):
|
||||
pass
|
||||
|
||||
|
||||
class testCenterX(LplsTestCase):
|
||||
def do(self):
|
||||
T = lpls(self.a, self.b, self.c, self.nc, mean_ctr=[0,-1,-1])['T']
|
||||
|
Reference in New Issue
Block a user