Sandbox version of models

This commit is contained in:
Arnar Flatberg 2007-10-31 18:19:00 +00:00
parent 5b91e2d809
commit 902806c1d8
2 changed files with 291 additions and 4 deletions

View File

@ -140,7 +140,7 @@ def pca_jk(a, aopt, n_blocks=None):
def pls_jk(X, Y, a_opt, nsets=None, center=True, verbose=False):
""" Returns jack-knife segements of W.
*Parameters*:
*Parameters*:
X : {array}
Main data matrix (m, n)
@ -251,12 +251,14 @@ def find_aopt_from_sep(sep, method='vanilla'):
closely before deciding on the optimal number of components.
*Parameters*:
sep : {array}
Squared error of prediction
method : ['vanilla', '75perc']
Mehtod used to estimate optimal number of components
*Returns*:
aopt : {integer}
A guess on the optimal number of components
"""
@ -297,12 +299,14 @@ def cv(N, K, randomise=True, sequential=False):
Use sequential sampling
*Returns*:
training : {array-like}
training-indices
validation : {array-like}
validation-indices
*Notes*:
If randomise is true, a copy of index is shuffled before partitioning,
otherwise its order is preserved in training and validation.
@ -334,9 +338,10 @@ def cv(N, K, randomise=True, sequential=False):
def diag_cv(shape, nsets=9):
"""Generates K (training, validation) index pairs.
Parameters:
*Parameters*:
N : {integer}
alpha -- scalar, approx. portion of data perturbed
alpha -- scalar, approx. portion of data perturbed
"""
try:
m, n = shape
@ -371,7 +376,7 @@ def class_error(Yhat, Y, method='vanilla'):
return Yhat_c, err
def class_errorII(T, Y, method='lda'):
def _class_errorII(T, Y, method='lda'):
""" Not used ...
"""
pass

282
pyblm/models.py Normal file
View File

@ -0,0 +1,282 @@
"""Bilinear models"""
from numpy import expand_dims
from engines import pca
def mean_center(x, axis=0):
"""Mean center across axis."""
return expand_dims(-x.mean(axis), axis)
def scale(x, axis=0):
""" Scale across axis."""
scale = ones((x.shape[axis-1],))
#scale = 1./x.std(axis)
return expand_dims(scale, axis)
class Model(object):
def __init__(name="johndoe"):
self.name = name
self.options = {}
def save(self, filename='pca.ml'):
pass
def load(self):
pass
def clear(self):
for param in self.__dict__.keys():
if param.startswith("_") and param[1]!="_":
exec "del self." + param
def clear_core(self):
for param in self.__dict__.keys():
if param.startswith("_"):
exec "del self." + param
#self.clear()
class PCA(Model):
def __init__(self, x, amax=10):
Model.__init__(self, name="PCA")
self._x = x
self.amax = amax
self.aopt = amax
# properties
def amax():
doc = "maximum number of components"
def fget(self):
return self._amax
def fset(self, a):
assert(a>0)
a = min(a, min(self.x.shape))
if hasattr(self, "_amax"):
if a>self._amax:
# fixme: do a model update
raise NotImplementedError
self._amax = a
def fdel(self):
pass
return locals()
amax = property(**amax())
def tot_var():
doc = "total variance"
def fget(self):
if not hasattr(self, "_tot_var"):
self._tot_var = (self.xw**2).sum()
return self._tot_var
def fset(self, tv):
self._tot_var = tv
def fdel(self):
del self._tot_var
return locals()
tot_var = property(**tot_var())
def scores():
doc = "pca scores"
def fget(self):
if not hasattr(self, "_scores"):
u, s, v, tot_var = pcaengine(self.xw, self.amax)
self._scores = u
self.singvals = s
self.loadings = v
self.tot_var = tot_var
return self._scores[:,:self.amax]
def fset(self, t):
self._scores = t
def fdel(self):
del self._scores
return locals() # credit: David Niergarth
scores = property(**scores())
def loadings():
doc = "pca loadings"
def fget(self):
if not hasattr(self, "_loadings"):
u, s, v, tot_var = pcaengine(self.xw, self.amax)
self._loadings = v
self.scores = u
self.singvals = s
self.tot_var = tot_var
return self._loadings[:,:self.amax]
def fdel(self):
del self._loadings
def fset(self, p):
self._loadings = p
return locals()
loadings = property(**loadings())
def singvals():
doc = "Singular values"
def fget(self):
if not hasattr(self, "_singvals"):
u, s, v, tot_var = pcaengine(self.xw, self.amax)
self._singvals = s
self.scores = u
self.loadings = v
self.tot_var = tot_var
return self._singvals[:self.amax]
def fset(self, w):
self._singvals = w
def fdel(self):
del self._singvals
return locals()
singvals = property(**singvals())
def x():
doc = "x is readonly, may not be deleted"
def fget(self):
return self._x
def fdel(self):
pass
return locals()
x = property(**x())
def xadd():
doc = "column means"
def fget(self):
if not hasattr(self, "_xadd"):
self._xadd = center(self.x, axis=0)
return self._xadd
def fset(self, mnx):
if hasattr(self, "_xc"):
del self._xc
self._xadd = mnx
def fdel(self):
del self._xadd
if hasattr(self, "_xc"):
del self._xc
return locals()
xadd = property(**xadd())
def xc():
doc = "centered input data"
def fget(self):
if not hasattr(self, "_xc"):
self._xc = self.x + self.xadd
return self._xc
def fset(self, xc):
self._xc = xc
def fdel(self):
del self._xc
return locals()
xc = property(**xc())
def xw():
doc = "scaled input data"
def fget(self):
if not hasattr(self, "_xw"):
if self.x.shape==self.row_metric.shape:
self._xw = dot(self.xc, self.row_metric)
else:
self._xw = self.xc * self.row_metric
return self._xw
def fset(self, xw):
self._xw = xw
def fdel(self):
del self._xw
return locals()
xw = property(**xw())
def explained_variance():
doc = "explained variance"
def fget(self):
if not hasattr(self, "_explained_variance"):
self._explained_variance = 100*(self.singvals**2)/self.tot_var
return self._explained_variance[:self.amax]
def fset(self, ev):
self._explained_variance = ev
def fdel(self):
del self._explained_variance
return locals()
explained_variance = property(**explained_variance())
def residuals():
doc = "residuals"
def fget(self):
if not hasattr(self, "_residuals"):
res = empty((self.amax, self.x.shape[0], self.x.shape[1]))
for a in range(self.amax):
res[a,:,:] = self.xw - dot(self.scores[:,:a+1], self.loadings[:,:a+1].T)
self._residuals = res
return self._residuals
def fset(self, e):
self._residuals = e
def fdel(self):
del self._residuals
return locals()
residuals = property(**residuals())
def leverage():
doc = "objects leverage"
def fget(self):
if not hasattr(self, "_leverage"):
u = self.scores/self.singvals
self._leverage = empty((self.amax, u.shape[0]))
for i in range(self.amax):
self._leverage[i,:] = 1./u.shape[0] + (u[:,:i+1]**2).sum(1)
return self._leverage[:self.amax,:]
def fset(self, lev):
self._leverage = lev
def fdel(self):
del self._leverage
return locals()
leverage = property(**leverage())
def row_metric():
doc = "row metric"
def fget(self):
if not hasattr(self, "_row_metric"):
self._row_metric = scale(self.xc, axis=0)
return self._row_metric
def fset(self, w):
self._row_metric = w
def fdel(self):
del self._row_metric
if hasattr(self, "_xd"):
del self.xd
return locals()
row_metric = property(**row_metric())
def column_metric():
doc = "column metric"
def fget(self):
if not hasattr(self, "_column_metric"):
self._column_metric = scale(self.xc, axis=1)
return self._column_metric
def fset(self, w):
self._column_metric = w
# update model
def fdel(self):
del self._column_metric
if hasattr(self, "_xd"):
del self.xd
return locals()
column_metric = property(**column_metric())
def blm_update(self, a, b):
pass
def append_columns(self, cols):
pass
def append_rows(self, rows):
pass
def delete_columns(self, index):
pass
def delete_rows(self, index):
pass
def reweight(self, )
if __name__ == "__main__":
X = random.rand(4,10)
pcaobj = PCA(X)
print "explained variance" + str(pcaobj.explained_variance)