A few updates
This commit is contained in:
parent
902806c1d8
commit
2951ca4088
@ -12,7 +12,7 @@ from numpy.random import shuffle
|
||||
from engines import nipals_lpls as lpls
|
||||
|
||||
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=True):
|
||||
def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2], zorth=False, verbose=True):
|
||||
"""Performs crossvalidation for generalisation error in lpls.
|
||||
|
||||
The L-PLS crossvalidation is estimated just like an ordinary pls
|
||||
@ -42,6 +42,8 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
||||
0 : row center
|
||||
1 : column center
|
||||
2 : double center
|
||||
zorth : {boolean}
|
||||
If true, Require orthogonal latent components in Z.
|
||||
verbose : {boolean}, optional
|
||||
Verbosity of console output. For use in debugging.
|
||||
|
||||
@ -70,7 +72,11 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
||||
|
||||
Yhat = empty((a_max, k, l), 'd')
|
||||
for cal, val in cv(nsets, k):
|
||||
dat = lpls(X[cal],Y[cal],Z,a_max=a_max,alpha=alpha,mean_ctr=mean_ctr,verbose=verbose)
|
||||
# do the training model
|
||||
dat = lpls(X[cal], Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
mean_ctr=mean_ctr, zorth=zorth, verbose=verbose)
|
||||
|
||||
# center test data
|
||||
if mean_ctr[0] != 1:
|
||||
xi = X[val,:] - dat['mnx']
|
||||
else:
|
||||
@ -79,14 +85,24 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2],verbose=Tru
|
||||
ym = dat['mny']
|
||||
else:
|
||||
ym = Y[val].mean(1)[:,newaxis] #???: check this
|
||||
# predictions
|
||||
for a in range(a_max):
|
||||
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
|
||||
#if permute:
|
||||
# xcal = X[cal]
|
||||
# for a in range(1,a_max,1):
|
||||
# for n in range(10):
|
||||
# shuffle(cal)
|
||||
# dat = lpls(xcal, Y[cal], Z, a_max=a_max, alpha=alpha,
|
||||
# mean_ctr=mean_ctr, verbose=verbose)
|
||||
|
||||
|
||||
# todo: need a better support for classification error
|
||||
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
|
||||
if y_is_class:
|
||||
Yhat, err = class_error(Yhat,Y)
|
||||
return Yhat, err
|
||||
pass
|
||||
#Yhat, err = class_error(Yhat, Y)
|
||||
#return Yhat, err
|
||||
|
||||
sep = (Y - Yhat)**2
|
||||
rmsep = sqrt(sep.mean(1)).T
|
||||
@ -317,8 +333,8 @@ def cv(N, K, randomise=True, sequential=False):
|
||||
otherwise interleaved ordering is used.
|
||||
|
||||
"""
|
||||
if K>N:
|
||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(N,N,K)
|
||||
if N>K:
|
||||
raise ValueError, "You cannot divide a list of %d samples into more than %d segments. Yout tried: %s" %(K, K, N)
|
||||
index = xrange(N)
|
||||
if randomise:
|
||||
from random import shuffle
|
||||
@ -371,7 +387,7 @@ def class_error(Yhat, Y, method='vanilla'):
|
||||
Yhat_c = zeros((k, l), dtype='d')
|
||||
for a in range(a_opt):
|
||||
for i in range(k):
|
||||
Yhat_c[a,val,argmax(Yhat[a,val,:])] = 1.0
|
||||
Yhat_c[a, val, argmax(Yhat[a,val,:])] = 1.0
|
||||
err = 100*((Yhat_c + Y) == 2).sum(1)/Y.sum(0).astype('d')
|
||||
|
||||
return Yhat_c, err
|
||||
|
@ -411,7 +411,7 @@ def pls(X, Y, aopt=2, scale='scores', mode='normal', center_axis=-1):
|
||||
'evx': expvarx, 'evy': expvary, 'ssqx': ssqx, 'ssqy': ssqy,
|
||||
'leverage': leverage, 'mnx': mnx, 'mny': mny}
|
||||
|
||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False):
|
||||
def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 2], scale='scores', zorth = False, verbose=False):
|
||||
""" L-shaped Partial Least Sqaures Regression by the nipals algorithm.
|
||||
|
||||
An L-shaped low rank model aproximates three matrices in a hyploid
|
||||
@ -475,10 +475,14 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
||||
|
||||
scale : {'scores', 'loads'}, optional
|
||||
Option to decide on where the scale goes.
|
||||
zorth : {False, boolean}, optional
|
||||
Option to force orthogonality between latent components
|
||||
in Z
|
||||
verbose : {boolean}, optional
|
||||
Verbosity of console output. For use in debugging.
|
||||
|
||||
*References*
|
||||
|
||||
Saeboe et al., LPLS-regression: a method for improved prediction and
|
||||
classification through inclusion of background information on
|
||||
predictor variables, J. of chemometrics and intell. laboratory syst.
|
||||
@ -522,18 +526,22 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
||||
var_y = empty((a_max,))
|
||||
var_z = empty((a_max,))
|
||||
|
||||
MAX_ITER = 450
|
||||
MAX_ITER = 4500
|
||||
LIM = finfo(X.dtype).resolution
|
||||
is_rd = False
|
||||
for a in range(a_max):
|
||||
if verbose:
|
||||
print "\nWorking on comp. %s" %a
|
||||
u = F[:,:1]
|
||||
w = E[:1,:].T
|
||||
l = G[:,:1]
|
||||
diff = 1
|
||||
niter = 0
|
||||
while (diff>LIM and niter<MAX_ITER):
|
||||
niter += 1
|
||||
u1 = u.copy()
|
||||
w1 = w.copy()
|
||||
l1 = l.copy()
|
||||
w = dot(E.T, u)
|
||||
wn = msqrt(dot(w.T, w))
|
||||
if wn < LIM:
|
||||
@ -552,20 +560,25 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
||||
c = dot(F.T, t)
|
||||
c = c/msqrt(dot(c.T, c))
|
||||
u = dot(F, c)
|
||||
diff = dot((u-u1).T, (u-u1))
|
||||
|
||||
diff = dot((u - u1).T, (u - u1))
|
||||
if verbose:
|
||||
print "Converged after %s iterations" %niter
|
||||
if niter==MAX_ITER:
|
||||
print "Maximum nunber of iterations reached!"
|
||||
print "Iterations: %d " %niter
|
||||
print "Error: %.2E" %diff
|
||||
|
||||
if is_rd:
|
||||
print "Hei og haa ... rank deficient, this should really not happen"
|
||||
break
|
||||
|
||||
tt = dot(t.T, t)
|
||||
p = dot(X.T, t)/tt
|
||||
q = dot(Y.T, t)/tt
|
||||
l = dot(Z, w)
|
||||
#k = dot(Z.T, l)/dot(l.T, l)
|
||||
p = dot(E.T, t)/tt
|
||||
q = dot(F.T, t)/tt
|
||||
if zorth:
|
||||
k = dot(G.T, l)/dot(l.T, l)
|
||||
else:
|
||||
k = w
|
||||
l = dot(G, w)
|
||||
|
||||
U[:,a] = u.ravel()
|
||||
W[:,a] = w.ravel()
|
||||
@ -575,10 +588,10 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', ve
|
||||
L[:,a] = l.ravel()
|
||||
K[:,a] = k.ravel()
|
||||
|
||||
|
||||
# rank-one deflations
|
||||
E = E - dot(t, p.T)
|
||||
F = F - dot(t, q.T)
|
||||
G = (G.T - dot(k, l.T)).T
|
||||
G = G - dot(l, k.T)
|
||||
|
||||
var_x[a] = pow(E, 2).sum()
|
||||
var_y[a] = pow(F, 2).sum()
|
||||
|
@ -1,6 +1,6 @@
|
||||
"""Bilinear models"""
|
||||
|
||||
from numpy import expand_dims
|
||||
from numpy import expand_dims,ones
|
||||
|
||||
from engines import pca
|
||||
|
||||
@ -14,8 +14,11 @@ def scale(x, axis=0):
|
||||
#scale = 1./x.std(axis)
|
||||
return expand_dims(scale, axis)
|
||||
|
||||
|
||||
class Model(object):
|
||||
def __init__(name="johndoe"):
|
||||
"""All underscored attributes are properties.
|
||||
"""
|
||||
def __init__(self, name="johndoe"):
|
||||
self.name = name
|
||||
self.options = {}
|
||||
|
||||
@ -27,8 +30,8 @@ class Model(object):
|
||||
|
||||
def clear(self):
|
||||
for param in self.__dict__.keys():
|
||||
if param.startswith("_") and param[1]!="_":
|
||||
exec "del self." + param
|
||||
if param.startswith("_") and param[1:5]!="core":
|
||||
exec "del self." + param[1:]
|
||||
|
||||
def clear_core(self):
|
||||
for param in self.__dict__.keys():
|
||||
@ -78,29 +81,29 @@ class PCA(Model):
|
||||
def scores():
|
||||
doc = "pca scores"
|
||||
def fget(self):
|
||||
if not hasattr(self, "_scores"):
|
||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
||||
self._scores = u
|
||||
self.singvals = s
|
||||
self.loadings = v
|
||||
self.tot_var = tot_var
|
||||
return self._scores[:,:self.amax]
|
||||
if not hasattr(self, "_core_scores"):
|
||||
result= pca(self.xw, self.amax)
|
||||
self._core_scores = result['T']
|
||||
self.singvals = result['eigvals']
|
||||
self.loadings = result['P']
|
||||
self.tot_var = 120.
|
||||
return self._core_scores[:,:self.amax]
|
||||
def fset(self, t):
|
||||
self._scores = t
|
||||
self._core_scores = t
|
||||
def fdel(self):
|
||||
del self._scores
|
||||
return locals() # credit: David Niergarth
|
||||
del self._core_scores
|
||||
return locals()
|
||||
scores = property(**scores())
|
||||
|
||||
def loadings():
|
||||
doc = "pca loadings"
|
||||
def fget(self):
|
||||
if not hasattr(self, "_loadings"):
|
||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
||||
self._loadings = v
|
||||
self.scores = u
|
||||
self.singvals = s
|
||||
self.tot_var = tot_var
|
||||
result = pca(self.xw, self.amax)
|
||||
self.loadings = result['P']
|
||||
self.scores = result['T']
|
||||
self.singvals = result['eigvals']
|
||||
self.tot_var = 120
|
||||
return self._loadings[:,:self.amax]
|
||||
def fdel(self):
|
||||
del self._loadings
|
||||
@ -113,11 +116,11 @@ class PCA(Model):
|
||||
doc = "Singular values"
|
||||
def fget(self):
|
||||
if not hasattr(self, "_singvals"):
|
||||
u, s, v, tot_var = pcaengine(self.xw, self.amax)
|
||||
self._singvals = s
|
||||
self.scores = u
|
||||
self.loadings = v
|
||||
self.tot_var = tot_var
|
||||
result = pca(self.xw, self.amax)
|
||||
self._singvals = result['eigvals']
|
||||
self.scores = result['T']
|
||||
self.loadings = result['P']
|
||||
self.tot_var = 120
|
||||
return self._singvals[:self.amax]
|
||||
def fset(self, w):
|
||||
self._singvals = w
|
||||
@ -139,7 +142,7 @@ class PCA(Model):
|
||||
doc = "column means"
|
||||
def fget(self):
|
||||
if not hasattr(self, "_xadd"):
|
||||
self._xadd = center(self.x, axis=0)
|
||||
self._xadd = mean_center(self.x, axis=0)
|
||||
return self._xadd
|
||||
def fset(self, mnx):
|
||||
if hasattr(self, "_xc"):
|
||||
@ -153,7 +156,7 @@ class PCA(Model):
|
||||
xadd = property(**xadd())
|
||||
|
||||
def xc():
|
||||
doc = "centered input data"
|
||||
doc = "mean_centered input data"
|
||||
def fget(self):
|
||||
if not hasattr(self, "_xc"):
|
||||
self._xc = self.x + self.xadd
|
||||
@ -161,6 +164,9 @@ class PCA(Model):
|
||||
def fset(self, xc):
|
||||
self._xc = xc
|
||||
def fdel(self):
|
||||
print "a"
|
||||
if hasattr(self, "_xc"):
|
||||
print "del"
|
||||
del self._xc
|
||||
return locals()
|
||||
xc = property(**xc())
|
||||
@ -237,7 +243,7 @@ class PCA(Model):
|
||||
def fdel(self):
|
||||
del self._row_metric
|
||||
if hasattr(self, "_xd"):
|
||||
del self.xd
|
||||
del self._xd
|
||||
return locals()
|
||||
row_metric = property(**row_metric())
|
||||
|
||||
@ -254,7 +260,7 @@ class PCA(Model):
|
||||
def fdel(self):
|
||||
del self._column_metric
|
||||
if hasattr(self, "_xd"):
|
||||
del self.xd
|
||||
del self._xd
|
||||
return locals()
|
||||
column_metric = property(**column_metric())
|
||||
|
||||
@ -273,10 +279,12 @@ class PCA(Model):
|
||||
def delete_rows(self, index):
|
||||
pass
|
||||
|
||||
def reweight(self, )
|
||||
def reweight(self, w):
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
X = random.rand(4,10)
|
||||
from numpy.random import rand
|
||||
X = rand(4,10)
|
||||
pcaobj = PCA(X)
|
||||
print "explained variance" + str(pcaobj.explained_variance)
|
||||
|
@ -115,8 +115,8 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
||||
|
||||
*Reference*:
|
||||
|
||||
Schonemann, A generalized solution of the orthogonal Procrustes problem,
|
||||
Psychometrika, 1966
|
||||
Schonemann, A generalized solution of the orthogonal Procrustes
|
||||
problem, Psychometrika, 1966
|
||||
"""
|
||||
|
||||
if center:
|
||||
@ -131,9 +131,9 @@ def procrustes(a, b, strict=True, center=False, verbose=False):
|
||||
Cm = _ensure_strict(Cm)
|
||||
b_rot = dot(b, Cm)
|
||||
if verbose:
|
||||
print Cm.round()
|
||||
fit = sum(ravel(b - b_rot)**2)
|
||||
print "Error: %.3E" %fit
|
||||
fit = ((b - b_rot)**2).sum()
|
||||
fit2 = (dot(a, a.T) + dot(b, b.T) - 2*diag(s)).trace()
|
||||
print "Error: %.2E , %.2E" %(fit, fit2)
|
||||
if center:
|
||||
return mn_b + b_rot
|
||||
else:
|
||||
@ -159,7 +159,9 @@ def _ensure_strict(C, only_flips=True):
|
||||
|
||||
*Notes*:
|
||||
|
||||
This function is not ready for use. Use (only_flips=True)
|
||||
This function is not ready for use. Use (only_flips=True).
|
||||
That is, for more than two components, the rotation matrix
|
||||
has a tendency to be unstable (det(Cm)>1), when rounding is used.
|
||||
|
||||
"""
|
||||
if only_flips:
|
||||
@ -279,6 +281,16 @@ def _fdr(tsq, tsqp, loc_method=median):
|
||||
fdr : {array}
|
||||
False discovery rate
|
||||
|
||||
*Notes*:
|
||||
|
||||
This is an internal function for use in fdr estimation of jack-knifed
|
||||
perturbated blm parameters.
|
||||
|
||||
|
||||
*Reference*:
|
||||
Gidskehaug et al., A framework for significance analysis of
|
||||
gene expression data using dimension reduction methods, BMC
|
||||
bioinformatics, 2007
|
||||
"""
|
||||
n, = tsq.shape
|
||||
k, m = tsqp.shape
|
||||
|
Reference in New Issue
Block a user