Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0

... just lots of stuff

This commit is contained in:
Arnar Flatberg 2007-08-24 09:14:24 +00:00
parent 21b63b17e5
commit 7dbf28f65d
6 changed files with 134 additions and 109 deletions

View File

@ -18,6 +18,39 @@ from packer import Packer
import blmplots
class NewStyleModel(Function):
def __init__(self, id='johndoe', name='JohnDoe'):
Function.__init__(self, id, name)
self.name = name
self.options = Options
self.input_data = []
self.parts = {}
self.io_table = {}
self.datasets = []
self.plots = []
def create_dataset(self, param, Dataset=Dataset):
for ds in self.datasets:
if ds.get_name()==param: return ds
if not param in self.parts.keys():
logger.log('notice', 'Parameter: %s not present' %param)
return
if not param in self.io_table.keys():
logger.log('notice', 'Parameter: %s not in defined in io table' %param)
identifiers = self.io_table.get(param)
data = self.parts.get(param)
ds = Dataset(data, identifiers=identifiers, name=param)
self.datasets.append(dataset)
return ds
def create_plot(self, blmplot):
if blmplot.validate_model(self.parts):
plt = blmplot(self.parts)
self.plots.append(plt)
return plt
class Model(Function):
"""Base class of bilinear models.
"""
@ -80,7 +113,7 @@ class PCA(Model):
jk_segments = pca_jkP(self.model['E0'], aopt, n_sets)
Pcal = self.model['P'][:,:aopt]
# add the scale to P
# ensure scaled P
tnorm = scipy.apply_along_axis(norm, 0, self.model['T'][:,:aopt])
Pcal = Pcal*tnorm
tsq = hotelling(jk_segments, Pcal, p_center,
@ -91,32 +124,13 @@ class PCA(Model):
"""Model on optimal number of components.
"""
dat = pca(self.model['E0'], amax, scale, mode)
# explained variance
var_x, exp_var_x = variances(self.model['E0'], dat['T'], dat['P'])
dat['var_x'] = var_x
dat['exp_var_x'] = exp_var_x
#fixme###
do_lev_s = False
do_lev_v = False
#####
if do_lev_s:
# sample leverages
tnorm = scipy.apply_along_axis(norm, 0, dat['T']) # norm of Ts
s_lev = leverage(amax, tnorm)
dat['s_lev'] = s_lev
if do_lev_v:
# variable leverages
v_lev = leverage(amax, dat['P'])
dat['v_lev'] = v_lev
self.model.update(dat)
def as_dataset(self, param, dtype='dataset'):
"""Return model parameter as Dataset.
"""
if not param in self.model.keys():
logger.log('notice', 'Parameter: %s not in model' %param)
return
DX = self._dataset['X'] #input dataset
dim_name_0, dim_name_1 = DX.get_dim_name()
@ -139,6 +153,7 @@ class PCA(Model):
'sample_leverages' : [pc_ids, zero_dim],
'exp_var_x' : [pc_ids, zero_dim],
'var_x' : [pc_ids, zero_dim],
'eigvals' : [pc_ids, zero_dim]
}
out = Dataset(self.model[param], match_ids[param], name=param)
@ -256,7 +271,7 @@ class PLS(Model):
def make_model(self, a, b, amax, scale, mode, engine):
"""Make model on amax components.
"""
print "MAking model"
print "Making model"
dat = engine(a, b, amax, scale, mode)
self.model.update(dat)
@ -478,13 +493,6 @@ class LPLS(Model):
self._data['Z'] = c.asarray()
self.validation(options)
self.make_model(options)
print self.model['evx']
evx_str = [str(i)[:3] for i in self.model['evx']]
logger.log('notice', 'Explained variance:X\n\t: ' + str(evx_str))
evy_str = [str(i)[:3] for i in self.model['evy']]
logger.log('notice', 'Explained variance:Y\n\t: ' + str(evy_str))
evz_str = [str(i)[:3] for i in self.model['evz']]
logger.log('notice', 'Explained variance:Z\n\t: ' + str(evz_str))
if options['calc_conf']:
self.confidence(options)
@ -553,13 +561,15 @@ class PcaOptions(Options):
opt['all_plots'] = [(blmplots.PcaScorePlot, 'Scores', True),
(blmplots.PcaLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False)
(blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.PcaScreePlot, 'Scree', True)
]
opt['out_data'] = ['T','P', 'p_tsq']
opt['out_plots'] = [blmplots.PcaScorePlot,
blmplots.PcaLoadingPlot,
blmplots.LineViewXc]
blmplots.LineViewXc,
blmplots.PcaScreePlot]
self.update(opt)
@ -621,7 +631,8 @@ class PlsOptions(Options):
# (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
(blmplots.PlsLoadingPlot, 'Loadings', True),
(blmplots.PlsXLoadingPlot, 'X-Loadings', True),
(blmplots.PlsYLoadingPlot, 'Y-Loadings', True),
(blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.RMSEPPlot, 'RMSEP', False),
@ -629,7 +640,7 @@ class PlsOptions(Options):
]
opt['out_data'] = ['T','P', 'w_tsq']
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsLoadingPlot,blmplots.LineViewXc]
opt['out_plots'] = [blmplots.PlsScorePlot,blmplots.PlsXLoadingPlot,blmplots.PlsYLoadingPlot,blmplots.LineViewXc]
#opt['out_data'] = None
@ -699,11 +710,9 @@ class LplsOptions(Options):
]
# (class, name, sensitive, ticked)
opt['all_plots'] = [(blmplots.PlsScorePlot, 'Scores', True),
(blmplots.PlsLoadingPlot, 'Loadings', True),
opt['all_plots'] = [(blmplots.LplsScorePlot, 'Scores', True),
(blmplots.LplsXLoadingPlot, 'Loadings', True),
(blmplots.LineViewXc, 'Line view', True),
(blmplots.PredictionErrorPlot, 'Residual Error', False),
(blmplots.RMSEPPlot, 'RMSEP', False),
(blmplots.LplsHypoidCorrelationPlot, 'Hypoid corr.', False),
(blmplots.LplsXCorrelationPlot, 'X corr.', True),
(blmplots.LplsZCorrelationPlot, 'Z corr.', True)

View File

@ -8,7 +8,7 @@ fixme:
from matplotlib import cm,patches
import gtk
import fluents
from fluents import plots, main
from fluents import plots, main,logger
import scipy
from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
from scipy.stats import corrcoef
@ -80,10 +80,17 @@ class BlmScatterPlot(plots.ScatterPlot):
"""Set patch sizes."""
pass
def set_expvar_axlabels(self, param="evx"):
def set_expvar_axlabels(self, param=None):
if param == None:
param = self._expvar_param
else:
self._expvar_param = param
if not self.model.model.has_key(param):
self.model.model[param] = None
if self.model.model[param]==None:
logger.log('notice', 'Param: %s not in model' %param)
print self.model.model.keys()
print self.model.model[param]
pass #fixme: do expvar calc here if not present
else:
expvar = self.model.model[param]
@ -127,7 +134,7 @@ class BlmScatterPlot(plots.ScatterPlot):
self.selection_collection._offsets = xy
self.canvas.draw_idle()
pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
new_lims = (self.xaxis_data.min()+pad, self.xaxis_data.max()+pad)
new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
self.axes.set_xlim(new_lims, emit=True)
self.set_expvar_axlabels()
self.canvas.draw_idle()
@ -140,7 +147,7 @@ class BlmScatterPlot(plots.ScatterPlot):
self.sc._offsets = xy
self.selection_collection._offsets = xy
pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
new_lims = (self.yaxis_data.min()+pad, self.yaxis_data.max()+pad)
new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
self.axes.set_ylim(new_lims, emit=True)
self.set_expvar_axlabels()
self.canvas.draw_idle()
@ -159,19 +166,28 @@ class BlmScatterPlot(plots.ScatterPlot):
for indx,txt in self._text_labels.items():
if indx in index:
txt.set_visible(True)
self.canvas.draw()
self.canvas.draw_idle()
def hide_labels(self):
for txt in self._text_labels.values():
txt.set_visible(False)
self.canvas.draw()
self.canvas.draw_idle()
class PcaScreePlot(plots.BarPlot):
def __init__(self, model):
title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
ds = model.as_dataset('eigvals')
if ds==None:
logger.log('notice', 'Model does not contain eigvals')
plots.BarPlot.__init__(self, ds, name=title)
class PcaScorePlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
title = "Pca scores (%s)" %model._dataset['X'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
self.set_expvar_axlabels(param="expvarx")
class PcaLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
@ -185,13 +201,19 @@ class PlsScorePlot(BlmScatterPlot):
BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
class PlsLoadingPlot(BlmScatterPlot):
class PlsXLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
title = "Pls loadings (%s)" %model._dataset['X'].get_name()
title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
#self.set_expvar_axlabels(self, param="expvarx")
class PlsYLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
class PlsCorrelationLoadingPlot(BlmScatterPlot):
def __init__(self, model, absi=0, ordi=1):
title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
@ -402,9 +424,21 @@ class TRBiplot(plots.ScatterPlot):
class InfluencePlot(plots.ScatterPlot):
""" Returns a leverage vs resiudal scatter plot.
"""
"""
pass
def __init__(self, model, dim, name="Influence"):
if not model.model.has_key('levx'):
logger.log('notice', 'Model has no calculations of leverages')
return
if not model.model.has_key('ssqx'):
logger.log('notice', 'Model has no calculations of residuals')
return
ds1 = model.as_dataset('levx')
ds2 = model.as_dataset('ssqx')
plots.ScatterPlot.__init__(self, ds1, ds2,
id_dim, sel_dim, id_1, id_2,
c=col, s=20, sel_dim_2=sel_dim_2,
name='Load Volcano')
class RMSEPPlot(plots.BarPlot):

View File

@ -11,7 +11,7 @@ import time
def hotelling(Pcv, P, p_center='med', cov_center='med',
alpha=0.3, crot=True, strict=False, metric=None):
alpha=0.3, crot=True, strict=False):
"""Returns regularized hotelling T^2.
alpha -- regularisation towards pooled cov estimates
@ -21,13 +21,9 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
alpha -- regularisation
crot -- rotate submodels toward full?
strict -- only rotate 90 degree ?
metric -- inverse metric matrix (if Pcv and P from metric pca/pls)
"""
m, n = P.shape
if metric==None:
metric = eye(m, dtype='<f8')
P = dot(metric.T, asarray(P))
n_sets, n, amax = Pcv.shape
# allocate
T_sq = empty((n, ),dtype='f')
@ -36,7 +32,6 @@ def hotelling(Pcv, P, p_center='med', cov_center='med',
# rotate sub_models to full model
if crot:
for i, Pi in enumerate(Pcv):
Pi = dot(metric.T, Pi)
Pcv[i] = procrustes(P, Pi, strict=strict)
# center of pnull
@ -118,7 +113,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
center=True,
sim_method='shuffle',
p_center='med', cov_center='med',
crot=True, strict=False, metric=None):
crot=True, strict=False):
"""Returns qvals for pls model.
@ -133,7 +128,6 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
cov_center -- location estimator for covariance of submodels ['med']
crot -- bool, use rotations of sub models?
strict -- bool, use stict (rot/flips only) rotations?
metric -- bool, use row metric?
"""
m, n = a.shape
@ -144,13 +138,12 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
if center:
ac = a - a.mean(0)
bc = b - b.mean(0)
if metric!=None:
ac = dot(ac, metric)
if algo=='bridge':
dat = bridge(ac, bc, aopt, 'loads', 'fast')
else:
dat = pls(ac, bc, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric, center=True)
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center)
@ -162,7 +155,7 @@ def pls_qvals(a, b, aopt=None, alpha=.3,
dat = bridge(ac, b_shuff, aopt, 'loads','fast')
else:
dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo, metric=metric)
Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center)
@ -205,10 +198,10 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
n_iter=20, algo='pls',
sim_method='shuffle',
p_center='med', cov_center='med',
crot=True, strict=False, metric=None):
crot=True, strict=False):
"""Returns qvals for pls model.
Shuffling of variables in X is preprocessed in metric.
Shuffling of variables in X.
Null model is 'If I put genes randomly on network' ... if they are sign:
then this is due to network structure and not covariance with response.
@ -223,7 +216,6 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
cov_center -- location estimator for covariance of submodels ['med']
crot -- bool, use rotations of sub models?
strict -- bool, use stict (rot/flips only) rotations?
metric -- bool, use row metric?
"""
m, n = a.shape
@ -236,13 +228,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
if center==True:
ac = a - a.mean(0)
bc = b - b.mean(0)
if metric==None:
metric = eye(n,n)
if algo=='bridge':
dat = bridge(ac, bc, aopt, 'loads', 'fast')
else:
dat = pls(ac, bc, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center)
@ -251,13 +242,12 @@ def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
for i, a_shuff in enumerate(Vs):
t1 = time.time()
a = a_shuff - a_shuff.mean(0)
a = dot(a, metric)
if algo=='bridge':
dat = bridge(a, b, aopt, 'loads','fast')
else:
dat = pls(a, b, aopt, 'loads', 'fast')
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo, metric=metric)
Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
alpha=alpha, crot=crot, strict=strict,
cov_center=cov_center)

View File

@ -68,7 +68,6 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
a = a - expand_dims(a.mean(center_axis), center_axis)
if m>(n+100) or n>(m+100):
u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
print s[:10]
else:
u, s, vt = svd(a, 0)
v = vt.T
@ -94,7 +93,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
lev = []
for ai in range(aopt):
E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
ssq.append([(E[ai,:,:]**2).sum(0), (E[ai,:,:]**2).sum(1)])
ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
if scale=='loads':
lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
else:
@ -112,7 +111,7 @@ def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
# variances
expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt}
return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
""" Principal Component Regression.
@ -282,7 +281,7 @@ def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
if i>0:
for j in range(0, i, 1):
r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
print vnorm(r)
t = dot(a, r)
tt = vnorm(t)**2
p = dot(a.T, t)/tt
@ -375,14 +374,11 @@ def w_pls(aat, b, aopt):
u = dot(b , q) #y-factor scores
U[:,i] = u.ravel()
t = dot(aat, u)
print "Norm of t: %s" %vnorm(t)
print "s: %s" %s
t = t/vnorm(t)
T[:,i] = t.ravel()
r = dot(aat, t)#score-weights
#r = r/vnorm(r)
print "Norm R: %s" %vnorm(r)
R[:,i] = r.ravel()
PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
if i<aopt:
@ -701,10 +697,9 @@ def esvd(data, amax=None):
pcrange = None
else:
pcrange = [n-amax, n]
print "symm>n"
s, v = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1]
v = v[:,::-1]
s = s[::-1].real
v = v[:,::-1].real
else:
u, s, vt = svd(kernel)
v = vt.T
@ -718,7 +713,6 @@ def esvd(data, amax=None):
pcrange = None
else:
pcrange = [m-amax, m]
print "sym (m<n)"
s, u = symeig(kernel, range=pcrange, overwrite=True)
s = s[::-1]
u = u[:,::-1]
@ -726,8 +720,8 @@ def esvd(data, amax=None):
u, s, vt = svd(kernel)
s = sqrt(s)
v = dot(data.T, u)/s
print s[:2]
return u, s, v
# some use of symeig returns the 0 imaginary part
return u.real, s.real, v.real
def vnorm(x):
# assume column arrays (or vectors)

View File

@ -38,7 +38,7 @@ def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
else:
yield aat_in,aat_out,b_in,b_out
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=None):
def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
"""Random block crossvalidation
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
"""
@ -61,15 +61,14 @@ def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0, metric=No
mn_b = bcal.mean(0)[newaxis]
bcal = bcal - mn_b
btrue = btrue - mn_b
if metric!=None:
acal = dot(acal, metric)
if index_out:
yield acal, atrue, bcal, btrue, out
else:
yield acal, atrue, bcal, btrue
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
"""Returns a generator of crossvalidation sample segments.
input:
@ -97,8 +96,7 @@ def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0, metric=None):
mn_a = acal.mean(0)[newaxis]
acal = acal - mn_a
atrue = atrue - mn_a
if metric!=None:
acal = dot(acal, metric)
if index_out:
yield acal, atrue, out
else:

View File

@ -80,12 +80,12 @@ def w_pls_cv_val(X, Y, amax, n_blocks=None, algo='simpls'):
aopt = find_aopt_from_sep(msep)
return sqrt(msep)
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls', metric=None):
def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
k, l = m_shape(Y)
PRESS = zeros((l, amax+1), dtype='<f8')
EE = zeros((amax, k, l), dtype='<f8')
Yhat = zeros((amax, k, l), dtype='<f8')
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True, metric=metric)
V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
for Xin, Xout, Yin, Yout, out in V:
ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
Yin = (Yin - ym)
@ -187,7 +187,7 @@ def pca_cv_val(a, amax, n_sets):
return sep, aopt
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, metric=None):
def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
""" Returns CV-segments of paramter W for wide X.
todo: add support for T,Q and B
@ -196,11 +196,11 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
n_blocks = b.shape[0]
Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
if use_pack and metric==None:
if use_pack:
u, s, inflater = svd(a, full_matrices=0)
a = u*s
V = pls_gen(a, b, n_blocks=n_blocks, center=center, metric=metric)
V = pls_gen(a, b, n_blocks=n_blocks, center=center)
for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
if algo=='pls':
dat = pls(a_in, b_in, amax, 'loads', 'fast')
@ -209,14 +209,14 @@ def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True, m
dat = bridge(a_in, b_in, amax, 'loads', 'fast')
W = dat['W']
if use_pack and metric==None:
if use_pack:
W = dot(inflater.T, W)
Wcv[nn,:,:] = W[:,:,]
return Wcv
def pca_jkP(a, aopt, n_blocks=None, metric=None):
def pca_jkP(a, aopt, n_blocks=None):
"""Returns loading from PCA on CV-segments.
input: