laydi/fluents/lib/cv_index.py

from numpy import array_split,arange

    
def cv(n, k, randomise=False, sequential=False):
    """
    Generates k (training, validation) index pairs.
    
    Each pair is a partition of arange(n), where validation is an iterable
    of length ~n/k.
    
    If randomise is true, a copy of index is shuffled before partitioning,
    otherwise its order is preserved in training and validation.

    Randomise overrides the sequential argument. If randomise is true,
    sequential is False
    
    If sequential is true the index is partioned in continous blocks,
    otherwise interleaved ordering is used.
    """
    index = xrange(N)
    if randomise:
        from random import shuffle
        index = list(index)
        shuffle(index)
        sequential = False
    if sequential:
        for validation in array_split(index, K):
            training = [i for i in index if i not in validation]
            yield training, validation
    else:
        for k in xrange(K):
            training = [i for i in index if i % K != k]
            validation = [i for i in index if i % K == k]
            yield training, validation

def shuffle_diag(shape, K, randomise=False, sequential=False):
    """
    Generates k (training, validation) index pairs.
    """
    m, n = shape
    
    if K>m or K>n:
        msg = "You may not use more subsets than max(n_rows, n_cols)"
        raise ValueError, msg

    mon = max(m, n)
    #index = xrange(n)
    index = [i for i in range(m*n) if i % m == 0]
    print index
    if randomise:
        from random import shuffle
        index = list(index)
        shuffle(index)
        sequential = False
        
    if sequential:
        start_inds = array_split(index, K)
    else:
        for k in xrange(K):
            start_inds = [index[i] for i in xrange(n) if i % K == k]
    
    print start_inds
    for start in start_inds:
        ind = arange(start, n*m, mon+1)
        yield ind
New crossval index generator 2007-09-25 08:31:40 +02:00			`from numpy import array_split,arange`


			`def cv(n, k, randomise=False, sequential=False):`
			`"""`
			`Generates k (training, validation) index pairs.`

			`Each pair is a partition of arange(n), where validation is an iterable`
			`of length ~n/k.`

			`If randomise is true, a copy of index is shuffled before partitioning,`
			`otherwise its order is preserved in training and validation.`

			`Randomise overrides the sequential argument. If randomise is true,`
			`sequential is False`

			`If sequential is true the index is partioned in continous blocks,`
			`otherwise interleaved ordering is used.`
			`"""`
			`index = xrange(N)`
			`if randomise:`
			`from random import shuffle`
			`index = list(index)`
			`shuffle(index)`
			`sequential = False`
			`if sequential:`
			`for validation in array_split(index, K):`
			`training = [i for i in index if i not in validation]`
			`yield training, validation`
			`else:`
			`for k in xrange(K):`
			`training = [i for i in index if i % K != k]`
			`validation = [i for i in index if i % K == k]`
			`yield training, validation`

			`def shuffle_diag(shape, K, randomise=False, sequential=False):`
			`"""`
			`Generates k (training, validation) index pairs.`
			`"""`
			`m, n = shape`

			`if K>m or K>n:`
			`msg = "You may not use more subsets than max(n_rows, n_cols)"`
			`raise ValueError, msg`

			`mon = max(m, n)`
			`#index = xrange(n)`
			`index = [i for i in range(m*n) if i % m == 0]`
			`print index`
			`if randomise:`
			`from random import shuffle`
			`index = list(index)`
			`shuffle(index)`
			`sequential = False`

			`if sequential:`
			`start_inds = array_split(index, K)`
			`else:`
			`for k in xrange(K):`
			`start_inds = [index[i] for i in xrange(n) if i % K == k]`

			`print start_inds`
			`for start in start_inds:`
			`ind = arange(start, n*m, mon+1)`
			`yield ind`