A few updates
This commit is contained in:
parent
902806c1d8
commit
2951ca4088
@ -12,7 +12,7 @@ from numpy.random import shuffle
|
|||||||
from engines import nipals_lpls as lpls
|
from engines import nipals_lpls as lpls
|
||||||
|
|
||||||
|
|
||||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=True):
|
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
|
||||||
"""Performs crossvalidation for generalisation error in lpls.
|
"""Performs crossvalidation for generalisation error in lpls.
|
||||||
|
|
||||||
The L-PLS crossvalidation is estimated just like an ordinary pls
|
The L-PLS crossvalidation is estimated just like an ordinary pls
|
||||||
@ -42,6 +42,8 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
|||||||
0 : row center
|
0 : row center
|
||||||
1 : column center
|
1 : column center
|
||||||
2 : double center
|
2 : double center
|
||||||
|
zorth : {boolean}
|
||||||
|
If true, Require orthogonal latent components in Z.
|
||||||
verbose : {boolean}, optional
|
verbose : {boolean}, optional
|
||||||
Verbosity of console output. For use in debugging.
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
@ -70,7 +72,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
|||||||
|
|
||||||
Yhat = empty((a_max, k, l), 'd')
|
Yhat = empty((a_max, k, l), 'd')
|
||||||
for cal, val in cv(nsets, k):
|
for cal, val in cv(nsets, k):
|
||||||
dat = lpls(X[cal],Y[cal],Z,a_max=a_max,alpha=alpha,mean_ctr=mean_ctr,verbose=verbose)
|
# do the training model
|
||||||
|
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||||
|
mean_ctr=mean_ctr, zorth=zorth, verbose=verbose)
|
||||||
|
|
||||||
|
# center test data
|
||||||
if mean_ctr[0] != 1:
|
if mean_ctr[0] != 1:
|
||||||
xi = X[val,:] - dat['mnx']
|
xi = X[val,:] - dat['mnx']
|
||||||
else:
|
else:
|
||||||
@ -79,14 +85,24 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
|||||||
ym = dat['mny']
|
ym = dat['mny']
|
||||||
else:
|
else:
|
||||||
ym = Y[val].mean(1)[:,newaxis] #???: check this
|
ym = Y[val].mean(1)[:,newaxis] #???: check this
|
||||||
|
# predictions
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||||
|
#if permute:
|
||||||
|
# xcal = X[cal]
|
||||||
|
# for a in range(1,a_max,1):
|
||||||
|
# for n in range(10):
|
||||||
|
# shuffle(cal)
|
||||||
|
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||||
|
# mean_ctr=mean_ctr, verbose=verbose)
|
||||||
|
|
||||||
|
|
||||||
# todo: need a better support for classification error
|
# todo: need a better support for classification error
|
||||||
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
||||||
if y_is_class:
|
if y_is_class:
|
||||||
Yhat, err = class_error(Yhat,Y)
|
pass
|
||||||
return Yhat, err
|
#Yhat, err = class_error(Yhat, Y)
|
||||||
|
#return Yhat, err
|
||||||
|
|
||||||
sep = (Y - Yhat)**2
|
sep = (Y - Yhat)**2
|
||||||
rmsep = sqrt(sep.mean(1)).T
|
rmsep = sqrt(sep.mean(1)).T
|
||||||
@ -317,8 +333,8 @@ def cv(N, K, randomise=True, sequential=False):
|
|||||||
otherwise interleaved ordering is used.
|
otherwise interleaved ordering is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if K>N:
|
if N>K:
|
||||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N,N,K)
|
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N)
|
||||||
index = xrange(N)
|
index = xrange(N)
|
||||||
if randomise:
|
if randomise:
|
||||||
from random import shuffle
|
from random import shuffle
|
||||||
@ -371,7 +387,7 @@ def class_error(Yhat, Y, method='vanilla'):
|
|||||||
Yhat_c = zeros((k, l), dtype='d')
|
Yhat_c = zeros((k, l), dtype='d')
|
||||||
for a in range(a_opt):
|
for a in range(a_opt):
|
||||||
for i in range(k):
|
for i in range(k):
|
||||||
Yhat_c[a,val,argmax(Yhat[a,val,:])] = 1.0
|
Yhat_c[a, val, argmax(Yhat[a,val,:])] = 1.0
|
||||||
err = 100*((Yhat_c + Y) == 2).sum(1)/Y.sum(0).astype('d')
|
err = 100*((Yhat_c + Y) == 2).sum(1)/Y.sum(0).astype('d')
|
||||||
|
|
||||||
return Yhat_c, err
|
return Yhat_c, err
|
||||||
|
@ -411,7 +411,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
|||||||
'evx': expvarx, 'evy': expvary, 'ssqx': ssqx, 'ssqy': ssqy,
|
'evx': expvarx, 'evy': expvary, 'ssqx': ssqx, 'ssqy': ssqy,
|
||||||
'leverage': leverage, 'mnx': mnx, 'mny': mny}
|
'leverage': leverage, 'mnx': mnx, 'mny': mny}
|
||||||
|
|
||||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False):
|
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zorth = False, verbose=False):
|
||||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||||
|
|
||||||
An L-shaped low rank model aproximates three matrices in a hyploid
|
An L-shaped low rank model aproximates three matrices in a hyploid
|
||||||
@ -475,10 +475,14 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
|||||||
|
|
||||||
scale : {'scores', 'loads'}, optional
|
scale : {'scores', 'loads'}, optional
|
||||||
Option to decide on where the scale goes.
|
Option to decide on where the scale goes.
|
||||||
|
zorth : {False, boolean}, optional
|
||||||
|
Option to force orthogonality between latent components
|
||||||
|
in Z
|
||||||
verbose : {boolean}, optional
|
verbose : {boolean}, optional
|
||||||
Verbosity of console output. For use in debugging.
|
Verbosity of console output. For use in debugging.
|
||||||
|
|
||||||
*References*
|
*References*
|
||||||
|
|
||||||
Saeboe et al., LPLS-regression: a method for improved prediction and
|
Saeboe et al., LPLS-regression: a method for improved prediction and
|
||||||
classification through inclusion of background information on
|
classification through inclusion of background information on
|
||||||
predictor variables, J. of chemometrics and intell. laboratory syst.
|
predictor variables, J. of chemometrics and intell. laboratory syst.
|
||||||
@ -522,18 +526,22 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
|||||||
var_y = empty((a_max,))
|
var_y = empty((a_max,))
|
||||||
var_z = empty((a_max,))
|
var_z = empty((a_max,))
|
||||||
|
|
||||||
MAX_ITER = 450
|
MAX_ITER = 4500
|
||||||
LIM = finfo(X.dtype).resolution
|
LIM = finfo(X.dtype).resolution
|
||||||
is_rd = False
|
is_rd = False
|
||||||
for a in range(a_max):
|
for a in range(a_max):
|
||||||
if verbose:
|
if verbose:
|
||||||
print "\nWorking on comp. %s" %a
|
print "\nWorking on comp. %s" %a
|
||||||
u = F[:,:1]
|
u = F[:,:1]
|
||||||
|
w = E[:1,:].T
|
||||||
|
l = G[:,:1]
|
||||||
diff = 1
|
diff = 1
|
||||||
niter = 0
|
niter = 0
|
||||||
while (diff>LIM and niter<MAX_ITER):
|
while (diff>LIM and niter<MAX_ITER):
|
||||||
niter += 1
|
niter += 1
|
||||||
u1 = u.copy()
|
u1 = u.copy()
|
||||||
|
w1 = w.copy()
|
||||||
|
l1 = l.copy()
|
||||||
w = dot(E.T, u)
|
w = dot(E.T, u)
|
||||||
wn = msqrt(dot(w.T, w))
|
wn = msqrt(dot(w.T, w))
|
||||||
if wn < LIM:
|
if wn < LIM:
|
||||||
@ -552,20 +560,25 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
|||||||
c = dot(F.T, t)
|
c = dot(F.T, t)
|
||||||
c = c/msqrt(dot(c.T, c))
|
c = c/msqrt(dot(c.T, c))
|
||||||
u = dot(F, c)
|
u = dot(F, c)
|
||||||
diff = dot((u-u1).T, (u-u1))
|
diff = dot((u - u1).T, (u - u1))
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print "Converged after %s iterations" %niter
|
if niter==MAX_ITER:
|
||||||
|
print "Maximum nunber of iterations reached!"
|
||||||
|
print "Iterations: %d " %niter
|
||||||
print "Error: %.2E" %diff
|
print "Error: %.2E" %diff
|
||||||
|
|
||||||
if is_rd:
|
if is_rd:
|
||||||
print "Hei og haa ... rank deficient, this should really not happen"
|
print "Hei og haa ... rank deficient, this should really not happen"
|
||||||
break
|
break
|
||||||
|
|
||||||
tt = dot(t.T, t)
|
tt = dot(t.T, t)
|
||||||
p = dot(X.T, t)/tt
|
p = dot(E.T, t)/tt
|
||||||
q = dot(Y.T, t)/tt
|
q = dot(F.T, t)/tt
|
||||||
l = dot(Z, w)
|
if zorth:
|
||||||
#k = dot(Z.T, l)/dot(l.T, l)
|
k = dot(G.T, l)/dot(l.T, l)
|
||||||
|
else:
|
||||||
|
k = w
|
||||||
|
l = dot(G, w)
|
||||||
|
|
||||||
U[:,a] = u.ravel()
|
U[:,a] = u.ravel()
|
||||||
W[:,a] = w.ravel()
|
W[:,a] = w.ravel()
|
||||||
@ -575,10 +588,10 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
|||||||
L[:,a] = l.ravel()
|
L[:,a] = l.ravel()
|
||||||
K[:,a] = k.ravel()
|
K[:,a] = k.ravel()
|
||||||
|
|
||||||
|
# rank-one deflations
|
||||||
E = E - dot(t, p.T)
|
E = E - dot(t, p.T)
|
||||||
F = F - dot(t, q.T)
|
F = F - dot(t, q.T)
|
||||||
G = (G.T - dot(k, l.T)).T
|
G = G - dot(l, k.T)
|
||||||
|
|
||||||
var_x[a] = pow(E, 2).sum()
|
var_x[a] = pow(E, 2).sum()
|
||||||
var_y[a] = pow(F, 2).sum()
|
var_y[a] = pow(F, 2).sum()
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
"""Bilinear models"""
|
"""Bilinear models"""
|
||||||
|
|
||||||
from numpy import expand_dims
|
from numpy import expand_dims,ones
|
||||||
|
|
||||||
from engines import pca
|
from engines import pca
|
||||||
|
|
||||||
@ -14,8 +14,11 @@ def scale(x, axis=0):
|
|||||||
#scale = 1./x.std(axis)
|
#scale = 1./x.std(axis)
|
||||||
return expand_dims(scale, axis)
|
return expand_dims(scale, axis)
|
||||||
|
|
||||||
|
|
||||||
class Model(object):
|
class Model(object):
|
||||||
def __init__(name="johndoe"):
|
"""All underscored attributes are properties.
|
||||||
|
"""
|
||||||
|
def __init__(self, name="johndoe"):
|
||||||
self.name = name
|
self.name = name
|
||||||
self.options = {}
|
self.options = {}
|
||||||
|
|
||||||
@ -27,8 +30,8 @@ class Model(object):
|
|||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
for param in self.__dict__.keys():
|
for param in self.__dict__.keys():
|
||||||
if param.startswith("_") and param[1]!="_":
|
if param.startswith("_") and param[1:5]!="core":
|
||||||
exec "del self." + param
|
exec "del self." + param[1:]
|
||||||
|
|
||||||
def clear_core(self):
|
def clear_core(self):
|
||||||
for param in self.__dict__.keys():
|
for param in self.__dict__.keys():
|
||||||
@ -43,7 +46,7 @@ class PCA(Model):
|
|||||||
self._x = x
|
self._x = x
|
||||||
self.amax = amax
|
self.amax = amax
|
||||||
self.aopt = amax
|
self.aopt = amax
|
||||||
|
|
||||||
# properties
|
# properties
|
||||||
def amax():
|
def amax():
|
||||||
doc = "maximum number of components"
|
doc = "maximum number of components"
|
||||||
@ -78,29 +81,29 @@ class PCA(Model):
|
|||||||
def scores():
|
def scores():
|
||||||
doc = "pca scores"
|
doc = "pca scores"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_scores"):
|
if not hasattr(self, "_core_scores"):
|
||||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
result= pca(self.xw, self.amax)
|
||||||
self._scores = u
|
self._core_scores = result['T']
|
||||||
self.singvals = s
|
self.singvals = result['eigvals']
|
||||||
self.loadings = v
|
self.loadings = result['P']
|
||||||
self.tot_var = tot_var
|
self.tot_var = 120.
|
||||||
return self._scores[:,:self.amax]
|
return self._core_scores[:,:self.amax]
|
||||||
def fset(self, t):
|
def fset(self, t):
|
||||||
self._scores = t
|
self._core_scores = t
|
||||||
def fdel(self):
|
def fdel(self):
|
||||||
del self._scores
|
del self._core_scores
|
||||||
return locals() # credit: David Niergarth
|
return locals()
|
||||||
scores = property(**scores())
|
scores = property(**scores())
|
||||||
|
|
||||||
def loadings():
|
def loadings():
|
||||||
doc = "pca loadings"
|
doc = "pca loadings"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_loadings"):
|
if not hasattr(self, "_loadings"):
|
||||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
result = pca(self.xw, self.amax)
|
||||||
self._loadings = v
|
self.loadings = result['P']
|
||||||
self.scores = u
|
self.scores = result['T']
|
||||||
self.singvals = s
|
self.singvals = result['eigvals']
|
||||||
self.tot_var = tot_var
|
self.tot_var = 120
|
||||||
return self._loadings[:,:self.amax]
|
return self._loadings[:,:self.amax]
|
||||||
def fdel(self):
|
def fdel(self):
|
||||||
del self._loadings
|
del self._loadings
|
||||||
@ -113,11 +116,11 @@ class PCA(Model):
|
|||||||
doc = "Singular values"
|
doc = "Singular values"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_singvals"):
|
if not hasattr(self, "_singvals"):
|
||||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
result = pca(self.xw, self.amax)
|
||||||
self._singvals = s
|
self._singvals = result['eigvals']
|
||||||
self.scores = u
|
self.scores = result['T']
|
||||||
self.loadings = v
|
self.loadings = result['P']
|
||||||
self.tot_var = tot_var
|
self.tot_var = 120
|
||||||
return self._singvals[:self.amax]
|
return self._singvals[:self.amax]
|
||||||
def fset(self, w):
|
def fset(self, w):
|
||||||
self._singvals = w
|
self._singvals = w
|
||||||
@ -139,7 +142,7 @@ class PCA(Model):
|
|||||||
doc = "column means"
|
doc = "column means"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_xadd"):
|
if not hasattr(self, "_xadd"):
|
||||||
self._xadd = center(self.x, axis=0)
|
self._xadd = mean_center(self.x, axis=0)
|
||||||
return self._xadd
|
return self._xadd
|
||||||
def fset(self, mnx):
|
def fset(self, mnx):
|
||||||
if hasattr(self, "_xc"):
|
if hasattr(self, "_xc"):
|
||||||
@ -153,7 +156,7 @@ class PCA(Model):
|
|||||||
xadd = property(**xadd())
|
xadd = property(**xadd())
|
||||||
|
|
||||||
def xc():
|
def xc():
|
||||||
doc = "centered input data"
|
doc = "mean_centered input data"
|
||||||
def fget(self):
|
def fget(self):
|
||||||
if not hasattr(self, "_xc"):
|
if not hasattr(self, "_xc"):
|
||||||
self._xc = self.x + self.xadd
|
self._xc = self.x + self.xadd
|
||||||
@ -161,7 +164,10 @@ class PCA(Model):
|
|||||||
def fset(self, xc):
|
def fset(self, xc):
|
||||||
self._xc = xc
|
self._xc = xc
|
||||||
def fdel(self):
|
def fdel(self):
|
||||||
del self._xc
|
print "a"
|
||||||
|
if hasattr(self, "_xc"):
|
||||||
|
print "del"
|
||||||
|
del self._xc
|
||||||
return locals()
|
return locals()
|
||||||
xc = property(**xc())
|
xc = property(**xc())
|
||||||
|
|
||||||
@ -237,7 +243,7 @@ class PCA(Model):
|
|||||||
def fdel(self):
|
def fdel(self):
|
||||||
del self._row_metric
|
del self._row_metric
|
||||||
if hasattr(self, "_xd"):
|
if hasattr(self, "_xd"):
|
||||||
del self.xd
|
del self._xd
|
||||||
return locals()
|
return locals()
|
||||||
row_metric = property(**row_metric())
|
row_metric = property(**row_metric())
|
||||||
|
|
||||||
@ -254,7 +260,7 @@ class PCA(Model):
|
|||||||
def fdel(self):
|
def fdel(self):
|
||||||
del self._column_metric
|
del self._column_metric
|
||||||
if hasattr(self, "_xd"):
|
if hasattr(self, "_xd"):
|
||||||
del self.xd
|
del self._xd
|
||||||
return locals()
|
return locals()
|
||||||
column_metric = property(**column_metric())
|
column_metric = property(**column_metric())
|
||||||
|
|
||||||
@ -273,10 +279,12 @@ class PCA(Model):
|
|||||||
def delete_rows(self, index):
|
def delete_rows(self, index):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def reweight(self, )
|
def reweight(self, w):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
X = random.rand(4,10)
|
from numpy.random import rand
|
||||||
pcaobj = PCA(X)
|
X = rand(4,10)
|
||||||
print "explained variance" + str(pcaobj.explained_variance)
|
pcaobj = PCA(X)
|
||||||
|
print "explained variance" + str(pcaobj.explained_variance)
|
||||||
|
@ -115,8 +115,8 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
|||||||
|
|
||||||
*Reference*:
|
*Reference*:
|
||||||
|
|
||||||
Schonemann, A generalized solution of the orthogonal Procrustes problem,
|
Schonemann, A generalized solution of the orthogonal Procrustes
|
||||||
Psychometrika, 1966
|
problem, Psychometrika, 1966
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if center:
|
if center:
|
||||||
@ -131,9 +131,9 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
|||||||
Cm = _ensure_strict(Cm)
|
Cm = _ensure_strict(Cm)
|
||||||
b_rot = dot(b, Cm)
|
b_rot = dot(b, Cm)
|
||||||
if verbose:
|
if verbose:
|
||||||
print Cm.round()
|
fit = ((b - b_rot)**2).sum()
|
||||||
fit = sum(ravel(b - b_rot)**2)
|
fit2 = (dot(a, a.T) + dot(b, b.T) - 2*diag(s)).trace()
|
||||||
print "Error: %.3E" %fit
|
print "Error: %.2E , %.2E" %(fit, fit2)
|
||||||
if center:
|
if center:
|
||||||
return mn_b + b_rot
|
return mn_b + b_rot
|
||||||
else:
|
else:
|
||||||
@ -159,7 +159,9 @@ def _ensure_strict(C, only_flips=True):
|
|||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
This function is not ready for use. Use (only_flips=True)
|
This function is not ready for use. Use (only_flips=True).
|
||||||
|
That is, for more than two components, the rotation matrix
|
||||||
|
has a tendency to be unstable (det(Cm)>1), when rounding is used.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if only_flips:
|
if only_flips:
|
||||||
@ -279,6 +281,16 @@ def _fdr(tsq, tsqp, loc_method=median):
|
|||||||
fdr : {array}
|
fdr : {array}
|
||||||
False discovery rate
|
False discovery rate
|
||||||
|
|
||||||
|
*Notes*:
|
||||||
|
|
||||||
|
This is an internal function for use in fdr estimation of jack-knifed
|
||||||
|
perturbated blm parameters.
|
||||||
|
|
||||||
|
|
||||||
|
*Reference*:
|
||||||
|
Gidskehaug et al., A framework for significance analysis of
|
||||||
|
gene expression data using dimension reduction methods, BMC
|
||||||
|
bioinformatics, 2007
|
||||||
"""
|
"""
|
||||||
n, = tsq.shape
|
n, = tsq.shape
|
||||||
k, m = tsqp.shape
|
k, m = tsqp.shape
|
||||||
|
Reference in New Issue
Block a user