This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/fluents/lib/select_generators.py

188 lines
5.9 KiB
Python

"""Matrix cross validation selection generators
"""
from scipy import take,arange,ceil,repeat,newaxis,mean,asarray,dot,ones,\
random,array_split,floor,vstack,asarray,minimum
from cx_utils import randperm
def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
"""Random block crossvalidation for wide (XX.T) trick in PLS.
Leave-one-out is a subset, with n_blocks equals nSamples
aat -- outerproduct of X
b -- Y
n_blocks =
center -- use centering of calibration ,sets (aat_in,b_in) are centered
Returns:
-- aat_in,aat_out,b_in,b_out,[out]
"""
m,n = aat.shape
index = randperm(m)
nValuesInBlock = m/n_blocks
if n_blocks==m:
index = arange(m)
out_ind = [index[i*nValuesInBlock:(i+1)*nValuesInBlock] for i in range(n_blocks)]
for out in out_ind:
inn = [i for i in index if i not in out]
aat_in = aat[inn,:][:,inn]
aat_out = aat[out,:][:,inn]
b_in = b[inn,:]
b_out = b[out,:]
if center:
# centering projector: I - (1/n)11'
# nin = len(inn)
# Pc = eye(nin) - outer(ones((nin,)),ones((nin,)))/nin
# xxt - x( outer(ones((nin,)),ones((nin,)))/nin ) x.T
# de jong:
h = sum(aat_in,0)[ :,newaxis]
h = (h - mean(h)/2)/len(inn)
mn_a = h + h.T
aat_in = aat_in - mn_a
if index_out:
yield aat_in,aat_out,b_in,b_out,out
else:
yield aat_in,aat_out,b_in,b_out
def pls_gen(a,b, n_blocks=None, center=False, index_out=False,axis=0):
"""Random block crossvalidation
Leave-one-out is a subset, with n_blocks equals a.shape[-1]
"""
index = randperm(a.shape[axis])
if n_blocks==None:
n_blocks = a.shape[axis]
n_in_set = ceil(float(a.shape[axis])/n_blocks)
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
for out in out_ind_sets:
inn = [i for i in index if i not in out]
if center:
a = a - mean(a,0)[newaxis]
b = b - mean(b,0)[newaxis]
if index_out:
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0),out
else:
yield a.take(inn,0),a.take(out,0), b.take(inn,0),b.take(out,0)
def pca_gen(a,n_sets=None, center=False, index_out=False,axis=0):
"""PCA random block crossval generator.
"""
m = a.shape[axis]
index = randperm(m)
if n_sets==None:
n_sets = m
n_in_set = ceil(float(m)/n_sets)
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
for out in out_ind_sets:
inn = [i for i in index if i not in out]
if center:
a = a - mean(a,0)[newaxis]
if index_out:
yield a.take(inn,0),a.take(out,0),out
else:
yield a.take(inn,0),a.take(out,0)
def w_pls_gen_jk(a,b,n_sets=None,center=True,index_out=False,axis=0):
"""Random block crossvalidation for wide X (m>>n)
Leave-one-out is a subset, with n_sets equals a.shape[-1]
Returns : X_m and X_m'Y_m
"""
m = a.shape[axis]
ab = dot(a.T,b)
index = randperm(m)
if n_sets==None:
n_sets = m
n_in_set = ceil(float(m)/n_sets)
out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
for out in out_ind_sets:
inn = [i for i in index if i not in out]
nin = len(inn)
nout = len(out)
a_in = a[inn,:]
mn_a = 0
mAB = 0
if center:
mn_a = mean(a,0)[newaxis]
mAin = dot(-ones((1,nout)),a[out,:])/nin
mBin = dot(-ones((1,nout)),b[out,:])/nin
mAB = dot(mAin.T,(mBin*nin))
ab_in = ab - dot(a[out,].T,b[out,:]) - mAB
a_in = a_in - mn_a
if index_out:
yield ain,ab, out
else:
yield a_in, ab
def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
"""Random block shuffling along 1d axis
Returns : Shuffled a by axis
"""
m = a.shape[axis]
if blocks==None:
blocks = m
for ii in xrange(n_sets):
index = randperm(m)
if blocks==m:
a_out = a.take(index, axis)
else:
index = arange(m)
dummy = map(random.shuffle, array_split(index, blocks))
a_out = a.take(index, axis)
if index_out:
yield a_out, index
else:
yield a_out
def shuffle_1d(a, n_sets, axis=0):
"""Random shuffling along 1d axis.
Returns : Shuffled a by axis
"""
m = a.shape[axis]
for ii in xrange(n_sets):
index = randperm(m)
yield a.take(index, axis)
def diag_pert(a, n_sets=10, center=True, index_out=False):
"""Alter generator returning sets perturbed with means at diagonals.
input:
X -- matrix, data
alpha -- scalar, approx. portion of data perturbed
"""
m, n = a.shape
tr=False
if m>n:
a = a.T
m, n = a.shape
tr = True
if n_sets>m or n_sets>n:
msg = "You may not use more subsets than max(n_rows, n_cols)"
raise ValueError, msg
nm=n*m
start_inds = array_split(randperm(m),n_sets) # we use random start diags
if center:
a = a - mean(a, 0)[newaxis]
for v in range(n_sets):
a_out = a.copy()
out = []
for start in start_inds[v]:
ind = arange(start+v, nm, n+1)
[out.append(i) for i in ind]
if center:
a_out.put(a.mean(),ind)
else:
a_out.put(0, ind)
if tr:
a_out = a_out.T
if index_out:
yield a_out, asarray(out)
else:
yield a_out