ups
This commit is contained in:
@@ -197,7 +197,7 @@ class PLS(Model):
|
||||
Model.__init__(self, id, name)
|
||||
self._options = PlsOptions()
|
||||
|
||||
def validation(self, amax, n_sets, cv_val_method):
|
||||
def validation(self):
|
||||
"""Returns rmsep for pls model.
|
||||
"""
|
||||
m, n = self.model['E0'].shape
|
||||
@@ -207,7 +207,7 @@ class PLS(Model):
|
||||
val_engine = pls_val
|
||||
if self._options['calc_cv']==True:
|
||||
rmsep, aopt = val_engine(self.model['E0'], self.model['F0'],
|
||||
amax, n_sets)
|
||||
self._options['amax'], self._options['n_sets'])
|
||||
self.model['rmsep'] = rmsep[:,:-1]
|
||||
self.model['aopt'] = aopt
|
||||
else:
|
||||
@@ -319,7 +319,7 @@ class PLS(Model):
|
||||
self.model['E0'] = self._data['X']
|
||||
self.model['F0'] = self._data['Y']
|
||||
|
||||
self.validation(**options.validation_options())
|
||||
self.validation()
|
||||
self.make_model(self.model['E0'], self.model['F0'],
|
||||
**options.make_model_options())
|
||||
# variance captured
|
||||
|
||||
+165
-45
@@ -6,81 +6,189 @@ There is almost no typechecking of any kind here, just focus on speed
|
||||
import math
|
||||
from scipy.linalg import svd,inv
|
||||
from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
|
||||
apply_along_axis,mean,ones,randn,empty_like,outer,c_,\
|
||||
rand,sum,cumsum,matrix
|
||||
apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
|
||||
rand,sum,cumsum,matrix, expand_dims,minimum,where
|
||||
has_sym=True
|
||||
try:
|
||||
import symmeig
|
||||
from symeig import symeig
|
||||
except:
|
||||
has_sym = False
|
||||
has_sym=False
|
||||
|
||||
def pca(a, aopt, scale='scores', mode='normal'):
|
||||
""" Principal Component Analysis model
|
||||
mode:
|
||||
-- fast : returns smallest dim scaled (T for n<=m, P for n>m )
|
||||
-- normal : returns all model params and residuals after aopt comp
|
||||
-- detailed : returns all model params and all residuals
|
||||
"""
|
||||
def pca(a, aopt,scale='scores',mode='normal',center_axis=-1):
|
||||
""" Principal Component Analysis.
|
||||
|
||||
Performs PCA on given matrix and returns results in a dictionary.
|
||||
|
||||
:Parameters:
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
:Returns:
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
lev --leverages, ssq -- sum of squares, expvar -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
:OtherParameters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
:SeeAlso:
|
||||
- pcr : other blm
|
||||
- pls : other blm
|
||||
- lpls : other blm
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Uses kernel speed-up if m>>n or m<<n.
|
||||
|
||||
If residuals turn rank deficient, a lower number of component than given
|
||||
in input will be used. The number of components used is given in results-dict.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import scipy,engines
|
||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||
>>> dat=engines.pca(a, 2)
|
||||
>>> dat['expvar']
|
||||
array([0.,99.8561562, 100.])
|
||||
|
||||
"""
|
||||
if center_axis>=0:
|
||||
a = a - expand_dims(a.mean(center_axis), center_axis)
|
||||
m, n = a.shape
|
||||
#print "rows: %s cols: %s" %(m,n)
|
||||
if m>(n+100) or n>(m+100):
|
||||
u, s, v = esvd(a)
|
||||
u, e, v = esvd(a)
|
||||
s = sqrt(e)
|
||||
else:
|
||||
u, s, vt = svd(a, 0)
|
||||
v = vt.T
|
||||
eigvals = (1./m)*s
|
||||
e = s**2
|
||||
tol = 1e-10
|
||||
eff_rank = sum(s>s[0]*tol)
|
||||
aopt = minimum(aopt, eff_rank)
|
||||
T = u*s
|
||||
s = s[:aopt]
|
||||
e = e[:aopt]
|
||||
T = T[:,:aopt]
|
||||
P = v[:,:aopt]
|
||||
|
||||
if scale=='loads':
|
||||
tnorm = apply_along_axis(vnorm, 0, T)
|
||||
T = T/tnorm
|
||||
P = P*tnorm
|
||||
T = T/s
|
||||
P = P*s
|
||||
|
||||
if mode == 'fast':
|
||||
return {'T':T, 'P':P}
|
||||
return {'T':T, 'P':P, 'aopt':aopt}
|
||||
|
||||
if mode=='detailed':
|
||||
"""Detailed mode returns residual matrix for all comp.
|
||||
That is E, is a three-mode matrix: (amax, m, n) """
|
||||
E = empty((aopt, m, n))
|
||||
E = empty((aopt, m, n))
|
||||
ssq = []
|
||||
lev = []
|
||||
expvarx = empty((aopt, aopt+1))
|
||||
for ai in range(aopt):
|
||||
e = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||
E[ai,:,:] = e.copy()
|
||||
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
|
||||
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
|
||||
if scale=='loads':
|
||||
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
|
||||
else:
|
||||
lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
|
||||
|
||||
expvarx[ai,:] = r_[0, 100*e.cumsum()/e.sum()]
|
||||
|
||||
else:
|
||||
E = a - dot(T,P.T)
|
||||
|
||||
return {'T':T, 'P':P, 'E':E}
|
||||
# residuals
|
||||
E = a - dot(T, P.T)
|
||||
SEP = E**2
|
||||
ssq = [SEP.sum(0), SEP.sum(1)]
|
||||
# leverages
|
||||
if scale=='loads':
|
||||
lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
|
||||
else:
|
||||
lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
|
||||
# variances
|
||||
expvarx = r_[0, 100*e.cumsum()/e.sum()]
|
||||
|
||||
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
|
||||
|
||||
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
|
||||
""" Principal Component Regression.
|
||||
|
||||
def pcr(a, b, aopt=2, scale='scores', mode='normal'):
|
||||
"""Principal Component Regression.
|
||||
Performs PCR on given matrix and returns results in a dictionary.
|
||||
|
||||
Returns
|
||||
:Parameters:
|
||||
a : array
|
||||
Data measurement matrix, (samples x variables)
|
||||
b : array
|
||||
Data response matrix, (samples x responses)
|
||||
aopt : int
|
||||
Number of components to use, aopt<=min(samples, variables)
|
||||
|
||||
:Returns:
|
||||
results : dict
|
||||
keys -- values, T -- scores, P -- loadings, E -- residuals,
|
||||
levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
|
||||
explained variance, aopt -- number of components used
|
||||
|
||||
:OtherParameters:
|
||||
mode : str
|
||||
Amount of info retained, ('fast', 'normal', 'detailed')
|
||||
center_axis : int
|
||||
Center along given axis. If neg.: no centering (-inf,..., matrix modes)
|
||||
|
||||
:SeeAlso:
|
||||
- pcr : other blm
|
||||
- pls : other blm
|
||||
- lpls : other blm
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
Uses kernel speed-up if m>>n or m<<n.
|
||||
|
||||
If residuals turn rank deficient, a lower number of component than given
|
||||
in input will be used. The number of components used is given in results-dict.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import scipy,engines
|
||||
>>> a=scipy.asarray([[1,2,3],[2,4,5]])
|
||||
>>> dat=engines.pca(a, 2)
|
||||
>>> dat['expvar']
|
||||
array([0.,99.8561562, 100.])
|
||||
|
||||
"""
|
||||
m, n = m_shape(a)
|
||||
B = empty((aopt, n, l))
|
||||
dat = pca(a, aopt=aopt, scale=scale, mode='normal', center_axis=0)
|
||||
k, l = m_shape(b)
|
||||
if center_axis>=0:
|
||||
b = b - expand_dims(b.mean(center_axis), center_axis)
|
||||
dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
|
||||
T = dat['T']
|
||||
weigths = apply_along_axis(vnorm, 0, T)
|
||||
weights = apply_along_axis(vnorm, 0, T)
|
||||
if scale=='loads':
|
||||
# fixme: check weights
|
||||
Q = dot(b.T, T*weights)
|
||||
Q = dot(b.T, T*weights**2)
|
||||
else:
|
||||
Q = dot(b.T, T/weights**2)
|
||||
|
||||
if mode=='fast':
|
||||
return {'T', T:, 'P':P, 'Q':Q}
|
||||
dat.update({'Q':Q})
|
||||
return dat
|
||||
if mode=='detailed':
|
||||
for i in range(1, aopt+1, 1):
|
||||
F[i,:,:] = b - dot(T[:,i],Q[:,:i].T)
|
||||
F = empty((aopt, k, l))
|
||||
for i in range(aopt):
|
||||
F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
|
||||
else:
|
||||
F = b - dot(T, Q.T)
|
||||
#fixme: explained variance in Y + Y-var leverages
|
||||
dat.update({'Q',Q, 'F':F})
|
||||
dat.update({'Q':Q, 'F':F})
|
||||
return dat
|
||||
|
||||
def pls(a, b, aopt=2, scale='scores', mode='normal', ab=None):
|
||||
@@ -271,7 +379,6 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
||||
X, mnX = center(X, xctr)
|
||||
Y, mnY = center(Y, xctr)
|
||||
Z, mnZ = center(Z, zctr)
|
||||
print Z.mean(1)
|
||||
|
||||
varX = pow(X, 2).sum()
|
||||
varY = pow(Y, 2).sum()
|
||||
@@ -365,7 +472,7 @@ def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], mode='normal', sca
|
||||
def m_shape(array):
|
||||
return matrix(array).shape
|
||||
|
||||
def esvd(data):
|
||||
def esvd(data, amax=None):
|
||||
"""SVD with the option of economy sized calculation
|
||||
Calculate subspaces of X'X or XX' depending on the shape
|
||||
of the matrix.
|
||||
@@ -378,17 +485,30 @@ def esvd(data):
|
||||
m, n = data.shape
|
||||
if m>=n:
|
||||
kernel = dot(data.T, data)
|
||||
u, s, vt = svd(kernel)
|
||||
u = dot(data, vt.T)
|
||||
v = vt.T
|
||||
if has_sym:
|
||||
if not amax:
|
||||
amax = n
|
||||
pcrange = [n-amax, n]
|
||||
s, v = symeig(kernel, range=pcrange, overwrite=True)
|
||||
s = s[::-1]
|
||||
v = v[:,arange(n, -1, -1)]
|
||||
else:
|
||||
u, s, vt = svd(kernel)
|
||||
v = vt.T
|
||||
u = dot(data, v)
|
||||
for i in xrange(n):
|
||||
s[i] = vnorm(u[:,i])
|
||||
u[:,i] = u[:,i]/s[i]
|
||||
else:
|
||||
kernel = dot(data, data.T)
|
||||
#data = (data + data.T)/2.0
|
||||
u, s, vt = svd(kernel)
|
||||
v = dot(u.T, data)
|
||||
if has_sym:
|
||||
if not amax:
|
||||
amax = m
|
||||
pcrange = [m-amax, m]
|
||||
s, u = symeig(kernel, range=pcrange, overwrite=True)
|
||||
else:
|
||||
u, s, vt = svd(kernel)
|
||||
v = dot(u.T, data)
|
||||
for i in xrange(m):
|
||||
s[i] = vnorm(v[i,:])
|
||||
v[i,:] = v[i,:]/s[i]
|
||||
|
||||
Reference in New Issue
Block a user