Projects/pyblm
Projects
/
pyblm
Archived
5
0
Fork 0

Fixed conflicts

This commit is contained in:
Arnar Flatberg 2007-12-14 00:16:31 +00:00
parent 1103245d85
commit 253305b602
5 changed files with 177 additions and 164 deletions

View File

@ -13,4 +13,3 @@ def test(level=1, verbosity=1):
print 'Python version %s' % (sys.version.replace('\n', '',),) print 'Python version %s' % (sys.version.replace('\n', '',),)
from numpy.testing import NumpyTest from numpy.testing import NumpyTest
return NumpyTest().test(level, verbosity) return NumpyTest().test(level, verbosity)

View File

@ -217,6 +217,12 @@ def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, center_axis=[2,0,2], zorth=F
# predictions # predictions
for a in range(a_max): for a in range(a_max):
Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a])) Yhat[a,val,:] = atleast_2d(ym + dot(xi, dat['B'][a]))
# todo: need a better support for classification error
y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
if y_is_class:
pass
#Yhat, err = class_error(Yhat, Y)
#return Yhat, err
sep = (Y - Yhat)**2 sep = (Y - Yhat)**2
rmsep = sqrt(sep.mean(1)).T rmsep = sqrt(sep.mean(1)).T
#aopt = find_aopt_from_sep(rmsep) #aopt = find_aopt_from_sep(rmsep)
@ -510,6 +516,8 @@ def diag_cv(shape, nsets=9, randomise=True):
except: except:
raise ValueError("shape needs to be a two-tuple") raise ValueError("shape needs to be a two-tuple")
if nsets>m or nsets>n: if nsets>m or nsets>n:
msg = "You may not use more subsets than max(n_rows, n_cols)"
raise ValueError, msg
msg = "You may not use more subsets than max(n_rows, n_cols)" msg = "You may not use more subsets than max(n_rows, n_cols)"
nsets = min(m, n) nsets = min(m, n)
nm = n*m nm = n*m
@ -525,6 +533,19 @@ def diag_cv(shape, nsets=9, randomise=True):
#training = [j for j in index if j not in validation] #training = [j for j in index if j not in validation]
yield list(validation) yield list(validation)
def class_error(y_hat, y, method='vanilla'):
""" Not used.
"""
a_opt, k, l = y_hat.shape
y_hat_c = zeros((k, l), dtype='d')
if method == vanilla:
pass
for a in range(a_opt):
for i in range(k):
y_hat_c[a, val, argmax(y_hat[a,val,:])] = 1.0
err = 100*((y_hat_c + y) == 2).sum(1)/y.sum(0).astype('d')
return y_hat_c, err
def prediction_error(y_hat, y, method='squared'): def prediction_error(y_hat, y, method='squared'):
"""Loss function on multiclass Y. """Loss function on multiclass Y.

View File

@ -710,13 +710,13 @@ def center(a, axis):
if axis == -1: if axis == -1:
mn = zeros((1,a.shape[1],)) mn = zeros((1,a.shape[1],))
mn = tile(mn, (a.shape[0], 1)) #mn = tile(mn, (a.shape[0], 1))
elif axis == 0: elif axis == 0:
mn = a.mean(0)[newaxis] mn = a.mean(0)[newaxis]
mn = tile(mn, (a.shape[0], 1)) #mn = tile(mn, (a.shape[0], 1))
elif axis == 1: elif axis == 1:
mn = a.mean(1)[:,newaxis] mn = a.mean(1)[:,newaxis]
mn = tile(mn, (1, a.shape[1])) #mn = tile(mn, (1, a.shape[1]))
elif axis == 2: elif axis == 2:
#fixme: double centering returns column mean as loc-vector, ok? #fixme: double centering returns column mean as loc-vector, ok?
mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean() mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
@ -825,13 +825,12 @@ def esvd(data, a_max=None):
""" """
m, n = data.shape m, n = data.shape
if m >= n: if m > n:
kernel = dot(data.T, data) kernel = dot(data.T, data)
if a_max == None: if a_max == None:
a_max = n - 1 a_max = n - 1
s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM', s, v = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
maxiter=200, tol=1e-5) maxiter=500, tol=1e-7)
s = s[::-1] s = s[::-1]
v = v[:,::-1] v = v[:,::-1]
#u, s, vt = svd(kernel) #u, s, vt = svd(kernel)
@ -841,9 +840,9 @@ def esvd(data, a_max=None):
else: else:
kernel = dot(data, data.T) kernel = dot(data, data.T)
if a_max == None: if a_max == None:
a_max = m -1 a_max = m - 1
s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM', s, u = arpack.eigen_symmetric(kernel, k=a_max, which='LM',
maxiter=200, tol=1e-5) maxiter=500, tol=1e-7)
s = s[::-1] s = s[::-1]
u = u[:,::-1] u = u[:,::-1]
#u, s, vt = svd(kernel) #u, s, vt = svd(kernel)

View File

@ -159,7 +159,7 @@ class PCA(Model):
doc = "mean_centered input data" doc = "mean_centered input data"
def fget(self): def fget(self):
if not hasattr(self, "_xc"): if not hasattr(self, "_xc"):
self._xc = self.x + self.xadd self._xc = self.x + self.xadd
return self._xc return self._xc
def fset(self, xc): def fset(self, xc):
self._xc = xc self._xc = xc

View File

@ -79,18 +79,13 @@ def hotelling(Pcv, P, p_center='median', cov_center='median',
for i in xrange(n): for i in xrange(n):
Pi = Pcv[:,i,:] # (n_sets x amax) Pi = Pcv[:,i,:] # (n_sets x amax)
Pi_ctr = P_ctr[i,:] # (1 x amax) Pi_ctr = P_ctr[i,:] # (1 x amax)
#Pim = (Pi - Pi_ctr)*msqrt(n_sets-1) Pim = (Pi - Pi_ctr)*msqrt(n_sets-1)
#Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim) Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
Pim = (Pi - Pi_ctr)
Cov_i[i] = dot(Pim.T, Pim)
if cov_center == 'median': if cov_center == 'median':
Cov_p = median(Cov_i) Cov_p = median(Cov_i)
elif cov_center == 'mean': else cov_center == 'mean':
Cov_p = Cov.mean(0) Cov_p = Cov.mean(0)
else:
print "Pooled covariance est. invalid, using median"
print cov_center
Cov_p = median(Cov_i)
reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p reg_cov = (1. - alpha)*Cov_i + alpha*Cov_p
for i in xrange(n): for i in xrange(n):
Pc = P_ctr[i,:] Pc = P_ctr[i,:]
@ -145,7 +140,7 @@ def procrustes(a, b, strict=True, center=False, force_norm=False, verbose=False)
u, s, vt = svd(dot(b.T, a)) u, s, vt = svd(dot(b.T, a))
Cm = dot(u, vt) # Cm: orthogonal rotation matrix Cm = dot(u, vt) # Cm: orthogonal rotation matrix
if strict: if strict:
Cm = _ensure_strict(Cm) Cm = _ensure_strict(Cm)
b_rot = dot(b, Cm) b_rot = dot(b, Cm)
if verbose: if verbose:
fit = ((b - b_rot)**2).sum() fit = ((b - b_rot)**2).sum()
@ -264,7 +259,7 @@ def lpls_qvals(X, Y, Z, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth) Wc, Lc = lpls_jk(X, Y, Z ,aopt, zorth=zorth)
cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha) cal_tsq_x = hotelling(Wc, dat['W'], alpha=alpha)
cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha) cal_tsq_z = hotelling(Lc, dat['L'], alpha=alpha)
print "morn"
# Perturbations # Perturbations
index = arange(m) index = arange(m)
for i in range(n_iter): for i in range(n_iter):
@ -403,4 +398,3 @@ def _fdr(tsq, tsqp, loc_method=median):
fd_rate = fp/n_signif fd_rate = fp/n_signif
fd_rate[fd_rate>1] = 1 fd_rate[fd_rate>1] = 1
return fd_rate return fd_rate