BREAKING STUFF!

Rename fluents to laydi.
2008-12-05 21:48:24 +00:00
parent 45a06fab7f
commit 27e4504bf6
31 changed files with 0 additions and 0 deletions
--- a/laydi/init.py
+++ b/laydi/init.py
@@ -0,0 +1,3 @@
+
+import main
+
--- a/laydi/annotations.py
+++ b/laydi/annotations.py
@@ -0,0 +1,105 @@
+
+from fluents import dataset
+
+_dim_annotation_handlers = {}
+
+def get_dim_annotations(dimname, annotation, ids):
+    """Returns a list of annotations corresponding to the given ids in 
+    dimension dimname"""
+    global _dim_annotation_handlers
+
+    if _dim_annotation_handlers.has_key(dimname):
+        return _dim_annotation_handlers[dimname].get_annotations(annotation, ids)
+    return None
+
+def set_dim_handler(dimname, handler):
+    """Set the handler for the given dimension."""
+    global _dim_annotation_handlers
+    _dim_annotation_handlers[dimname] = handler
+    
+def get_dim_handler(dimname):
+    """Get the handler for the given dimension."""
+    global _dim_annotation_handlers
+    if _dim_annotation_handlers.has_key(dimname):
+        return _dim_annotation_handlers[dimname]
+    else: 
+        return None
+
+
+class AnnotationHandler:
+    def __init__(self):
+        pass
+
+    def get_annotations(self, annotationname, ids, default=None):
+        return None
+
+    def get_annotation_names(self):
+        return []
+
+
+class DictAnnotationHandler(AnnotationHandler):
+
+    def __init__(self, d=None):
+        if d == None:
+            d = {}
+        self._dict = d
+    
+    def get_annotations(self, annotationname, ids, default=None):
+        d = self._dict
+        retval = []
+        for id in ids:
+            if d[annotationname].has_key(id):
+                retval.append(d[annotationname][id])
+            else:
+                retval.append(default)
+        return retval
+    
+    def add_annotations(self, annotationname, d):
+        self._dict[annotationname] = d
+
+    def get_annotation_names(self):
+        return self._dict.keys()
+        
+
+def read_annotations_file(filename):
+    """Read annotations from file.
+    
+    Reads annotations from a tab delimited file of the format::
+     dimname     annotation_name1    annotation_name2 ...
+     id1         Foo                 0.43
+     id2         Bar                 0.59
+    """
+    
+    ann = DictAnnotationHandler()
+    dimname = None
+    annotation_dicts = []
+    annotation_names = []
+    
+    fd = open(filename)
+
+    ## Read the first line, which contains the dimension name and
+    ## annotation names.
+    line = fd.readline()
+    values = [x.strip() for x in line.split('\t')]
+    dimname = values[0]  
+    annotation_names = values[1:]
+    annotation_dicts = [{} for x in annotation_names]
+
+    ## Read the lines containing the annotations. The first value on
+    ## each line is an id along the dimension.
+    while line:
+        values = [x.strip() for x in line.split('\t')]
+        for i, x in enumerate(values[1:]):
+            annotation_dicts[i][values[0]] = x
+        line = fd.readline()
+
+    fd.close()
+
+    ## Add everything to the annotation object and add the object to
+    ## the specified dimension.
+    for i, a in enumerate(annotation_names):
+        ann.add_annotations(a, annotation_dicts[i])
+
+    _dim_annotation_handlers[dimname] = ann
+    return ann
+
--- a/laydi/cfgparse.py
+++ b/laydi/cfgparse.py
--- a/laydi/dataset.py
+++ b/laydi/dataset.py
@@ -0,0 +1,748 @@
+from scipy import ndarray,atleast_2d,asarray,intersect1d,zeros,empty,sparse,\
+where
+from scipy import sort as array_sort
+from itertools import izip
+import shelve
+import copy
+import re
+
+
+class Dataset(object):
+    """The Dataset base class.
+    
+    A Dataset is an n-way array with defined string identifiers across
+    all dimensions.
+
+    example of use:
+
+    ---
+    dim_name_rows = 'rows'
+    names_rows = ('row_a','row_b')
+    ids_1 = [dim_name_rows, names_rows]
+
+    dim_name_cols = 'cols'
+    names_cols = ('col_a','col_b','col_c','col_d')
+    ids_2 = [dim_name_cols, names_cols]
+
+    Array_X = rand(2,4)
+    data = Dataset(Array_X,(ids_1,ids_2),name="Testing")
+
+    dim_names = [dim for dim in data]
+
+    column_identifiers = [id for id in data['cols'].keys()]
+    column_index = [index for index in data['cols'].values()]
+
+    'cols' in data -> True
+
+    ---
+
+    data = Dataset(rand(10,20)) (generates dims and ids (no links))
+    """
+    
+    def __init__(self, array, identifiers=None, name='Unnamed dataset'):
+        self._dims = [] #existing dimensions in this dataset
+        self._map = {} # internal mapping for dataset:  identifier <--> index
+        self._name = name
+        self._identifiers = identifiers
+        
+        if not isinstance(array, sparse.spmatrix):
+            array = atleast_2d(asarray(array))
+        # vector are column (array)
+        if array.shape[0] == 1:
+            array = array.T
+        self.shape = array.shape
+        
+        if identifiers != None:
+            self._validate_identifiers(identifiers)
+            self._set_identifiers(identifiers, self._all_dims)
+        else:
+            self._identifiers = self._create_identifiers(self.shape, self._all_dims)
+            self._set_identifiers(self._identifiers, self._all_dims)
+        self._array = array
+
+    def __iter__(self):
+        """Returns an iterator over dimensions of dataset."""
+        return self._dims.__iter__()
+
+    def __contains__(self,dim):
+        """Returns True if dim is a dimension name in dataset."""
+        # return self._dims.__contains__(dim)
+        return self._map.__contains__(dim)
+
+    def __len__(self):
+        """Returns the number of dimensions in the dataset"""
+        return len(self._map)
+
+    def __getitem__(self,dim):
+        """Return the identifers along the dimension dim."""
+        return self._map[dim]
+
+    def _create_identifiers(self, shape, all_dims):
+        """Creates dimension names and identifier names, and returns
+        identifiers."""
+        
+        dim_names = ['rows','cols'] 
+        ids = []
+        for axis, n in enumerate(shape):
+            if axis < 2:
+                dim_suggestion = dim_names[axis]
+            else:
+                dim_suggestion = 'dim'
+            dim_suggestion = self._suggest_dim_name(dim_suggestion, all_dims) 
+            identifier_creation = [str(axis) + "_" + i for i in map(str, range(n))]
+            ids.append((dim_suggestion, identifier_creation))
+            all_dims.add(dim_suggestion)
+        return ids
+
+    def _set_identifiers(self, identifiers, all_dims):
+        """Creates internal mapping of identifiers structure."""
+        for dim, ids in identifiers:
+            pos_map = ReverseDict()
+            if dim not in self._dims:
+                self._dims.append(dim)
+                all_dims.add(dim)
+            else:
+                raise ValueError, "Dimension names must be unique whitin dataset"
+            for pos, id in enumerate(ids):
+                pos_map[id] = pos
+            self._map[dim] = pos_map
+            
+    def _suggest_dim_name(self,dim_name,all_dims):
+        """Suggests a unique name for dim and returns it"""
+        c = 0
+        new_name = dim_name
+        while new_name in all_dims:
+            new_name = dim_name + "_" + str(c)
+            c += 1
+        return new_name
+        
+    def asarray(self):
+        """Returns the numeric array (data) of dataset"""
+        if isinstance(self._array, sparse.spmatrix):
+            return self._array.toarray()
+        return self._array
+
+    def set_array(self, array):
+        """Adds array as an ArrayType object.
+        A one-dim array is transformed to a two-dim array (row-vector)
+        """
+        if not isinstance(array, type(self._array)):
+            raise ValueError("Input array of type: %s does not match existing array type: %s") %(type(array), type(self._array))
+        if self.shape != array.shape:
+            raise ValueError, "Input array must be of similar dimensions as dataset"
+        self._array = atleast_2d(asarray(array))
+
+    def get_name(self):
+        """Returns dataset name"""
+        return self._name
+
+    def get_all_dims(self):
+        """Returns all dimensions in project"""
+        return self._all_dims
+
+    def get_dim_name(self, axis=None):
+        """Returns dim name for an axis, if no axis is provided it
+        returns a list of dims"""
+        if type(axis) == int:
+            return self._dims[axis]
+        else:
+            return [dim for dim in self._dims]
+
+    def common_dims(self, ds):
+        """Returns a list of the common dimensions in the two datasets."""
+        dims = self.get_dim_name()
+        ds_dims = ds.get_dim_name()
+        return [d for d in dims if d in ds_dims]
+        
+    def get_identifiers(self, dim, indices=None, sorted=False):
+        """Returns identifiers along dim, sorted by position (index)
+        is optional.
+	
+        You can optionally provide a list/ndarray of indices to get
+        only the identifiers of a given position.
+
+        Identifiers are the unique names (strings) for a variable in a
+        given dim.  Index (Indices) are the Identifiers position in a
+        matrix in a given dim.
+        """
+        if indices != None:
+            if len(indices) == 0:# if empty list or empty array
+                return []
+        if indices != None:
+            # be sure to match intersection
+            #indices = intersect1d(self.get_indices(dim),indices)
+            ids = [self._map[dim].reverse[i] for i in indices]
+	else:
+            if sorted == True:
+                ids = [self._map[dim].reverse[i] for i in array_sort(self._map[dim].values())]
+            else:
+                ids = self._map[dim].keys()
+
+        return ids
+        
+    def get_indices(self, dim, idents=None):
+        """Returns indices for identifiers along dimension.
+        You can optionally provide a list of identifiers to retrieve a
+        index subset.
+        
+        Identifiers are the unique names (strings) for a variable in a
+        given dim.  Index (Indices) are the Identifiers position in a
+        matrix in a given dim.  If none of the input identifiers are
+        found an empty index is returned
+        """
+        if not isinstance(idents, list) and not isinstance(idents, set):
+            raise ValueError("idents needs to be a list/set got: %s" %type(idents))
+        if idents == None:
+            index = array_sort(self._map[dim].values())
+        else:
+            index = [self._map[dim][key]
+                     for key in idents if self._map[dim].has_key(key)]
+        return asarray(index)        
+
+    def existing_identifiers(self, dim, idents):
+        """Filters a list of identifiers to find those that are present in the
+        dataset.
+
+        The most common use of this function is to get a list of
+        identifiers who correspond one to one with the list of indices produced
+        when get_indices is given an identifier list. That is
+        ds.get_indices(dim, idents) and ds.exisiting_identifiers(dim, idents)
+        will have the same order.
+
+        @param dim: A dimension present in the dataset.
+        @param idents: A list of identifiers along the given dimension.
+        @return: A list of identifiers in the same order as idents, but
+        without elements not present in the dataset.
+        """
+        if not isinstance(idents, list) and not isinstance(idents, set):
+            raise ValueError("idents needs to be a list/set got: %s" %type(idents))
+
+        return [key for key in idents if self._map[dim].has_key(key)]
+
+    def copy(self):
+        """ Returns deepcopy of dataset.
+        """
+        return copy.deepcopy(self)
+
+    def subdata(self, dim, idents):
+        """Returns a new dataset based on dimension and given identifiers.
+        
+        """
+        ds = self.copy()
+        indices = ds.get_indices(dim, idents)
+        idents = ds.get_identifiers(dim, indices=indices)
+        if not idents:
+            raise ValueError("No of identifers from: \n%s \nfound in %s" %(str(idents), ds._name))
+        ax = [i for i, name in enumerate(ds._dims) if name == dim][0]
+        subarr = ds._array.take(indices, ax)
+        new_indices = range(len(idents))
+        ds._map[dim] = ReverseDict(zip(idents, new_indices))
+        ds.shape = tuple(len(ds._map[d]) for d in ds._dims)
+        ds.set_array(subarr)
+        return ds
+    
+    def transpose(self):
+        """Returns a copy of transpose of a dataset.
+
+        As for the moment: only support for 2D-arrays.
+        """
+        
+        assert(len(self.shape) == 2)
+        ds = self.copy()
+        ds._array = ds._array.T
+        ds._dims.reverse()
+        ds.shape = ds._array.shape
+        return ds
+    
+    def _validate_identifiers(self, identifiers):
+        for dim_name, ids in identifiers: 
+            if len(set(ids)) != len(ids):
+                raise ValueError("Identifiers not unique in : %s" %dim_name)
+        identifier_shape = [len(i[1]) for i in identifiers]
+        if len(identifier_shape) != len(self.shape):
+            raise ValueError("Identifier list length must equal array dims")
+        for ni, na in zip(identifier_shape, self.shape):
+            if ni != na:
+                raise ValueError, "Identifier-array mismatch: %s: (idents: %s, array: %s)" %(self._name, ni, na)
+
+
+class CategoryDataset(Dataset):
+    """The category dataset class.
+
+    A dataset for representing class information as binary
+    matrices (0/1-matrices).
+
+    There is support for using a less memory demanding, sparse format. The
+    prefered (default) format for a category dataset is the compressed sparse row 
+    format (csr)
+
+    Always has linked dimension in first dim:
+    ex matrix:
+    .        go_term1    go_term2  ...
+    gene_1
+    gene_2
+    gene_3
+    .
+    .
+    .
+    
+    """
+    
+    def __init__(self, array, identifiers=None, name='C'):
+        Dataset.__init__(self, array, identifiers=identifiers, name=name)
+
+    def as_spmatrix(self):
+        if isinstance(self._array, sparse.spmatrix):
+            return self._array
+        else:
+            arr = self.asarray()
+            return sparse.csr_matrix(arr.astype('i'))
+
+    def to_spmatrix(self):
+        if isinstance(self._array, sparse.spmatrix):
+            self._array = self._array.tocsr()
+        else:
+            self._array = sparse.scr_matrix(self._array)
+
+    def as_dictlists(self):
+        """Returns data as dict of identifiers along first dim.
+
+        ex: data['gene_1'] = ['map0030','map0010', ...]
+        
+        fixme: Deprecated?
+        """
+        data = {}
+        for name, ind in self._map[self.get_dim_name(0)].items():
+            if isinstance(self._array, ndarray):
+                indices = self._array[ind,:].nonzero()[0]
+            elif isinstance(self._array, sparse.spmatrix):
+                if not isinstance(self._array, sparse.csr_matrix):
+                    array = self._array.tocsr()
+                else:
+                    array = self._array
+                indices = array[ind,:].indices
+            if len(indices) == 0: # should we allow categories with no members?
+                continue
+            data[name] = self.get_identifiers(self.get_dim_name(1), indices)
+        self._dictlists = data
+        return data
+
+    def as_selections(self):
+        """Returns data as a list of Selection objects.
+
+        The list of selections is not ordered (sorted) by any means.
+        """
+        ret_list = []
+        for cat_name, ind in self._map[self.get_dim_name(1)].items():
+            if isinstance(self._array, sparse.spmatrix):
+                if not isinstance(self._array, sparse.csc_matrix):
+                    self._array = self._array.tocsc()
+                indices = self._array[:,ind].indices
+            else:
+                indices = self._array[:,ind].nonzero()[0]
+            if len(indices) == 0:
+                continue
+            ids = self.get_identifiers(self.get_dim_name(0), indices)
+            selection = Selection(cat_name)
+            selection.select(self.get_dim_name(0), ids)
+            ret_list.append(selection)
+        return ret_list
+    
+
+class GraphDataset(Dataset):
+    """The graph dataset class.
+
+    A dataset class for representing graphs. The constructor may use an 
+    incidence matrix (possibly sparse) or (if networkx installed) a 
+    networkx.(X)Graph structure.
+    
+    If the networkx library is installed, there is support for
+    representing the graph as a networkx.Graph, or networkx.XGraph structure.
+    """
+    
+    def __init__(self, input, identifiers=None, name='A', nodepos = None):      
+        if isinstance(input, sparse.spmatrix):
+            arr = input
+        else:
+            try:
+                arr = asarray(input)
+            except:
+                raise ValueError("Could not identify input")
+        Dataset.__init__(self, array=arr, identifiers=identifiers, name=name)
+        self._graph = None
+        self.nodepos = nodepos
+        
+    def as_spmatrix(self):
+        if isinstance(self._array, sparse.spmatrix):
+            return self._array
+        else:
+            arr = self.asarray()
+            return sparse.csr_matrix(arr.astype('i'))
+
+    def to_spmatrix(self):
+        if isinstance(self._array, sparse.spmatrix):
+            self._array = self._array.tocsr()
+        else:
+            self._array = sparse.scr_matrix(self._array)
+    
+    def asnetworkx(self):
+        if self._graph != None:
+            return self._graph
+        dim0, dim1 = self.get_dim_name()
+        node_ids = self.get_identifiers(dim0, sorted=True)
+        edge_ids = self.get_identifiers(dim1, sorted=True)
+        G, weights = self._graph_from_incidence_matrix(self._array, node_ids=node_ids, edge_ids=edge_ids)
+        self._graph = G
+        return G
+        
+    def from_networkx(cls, G, node_dim, edge_dim, sp_format=True):
+        """Create graph dataset from networkx graph.
+        
+        When G is a Graph/Digraph edge identifiers will be created,
+        else (XGraoh/XDigraph) it is assumed that edge attributes are
+        the edge identifiers.
+        """
+        
+        import networkx as nx
+        n = G.number_of_nodes()
+        m = G.number_of_edges()
+        
+        if isinstance(G, nx.DiGraph):
+            G = nx.XDiGraph(G)
+        elif isinstance(G, nx.Graph):
+            G = nx.XGraph(G)
+        
+        edge_ids = [e[2] for e in G.edges()]
+        node_ids = map(str, G.nodes())
+        n2ind = {}
+        for ind, node in enumerate(node_ids):
+            n2ind[node] = ind
+        
+        if sp_format:
+            I = sparse.lil_matrix((n, m))
+        else:
+            I = zeros((m, n), dtype='i')
+        
+        for i, (h, t, eid) in enumerate(G.edges()):
+            if eid != None:
+                edge_ids[i] = eid
+            else:
+                edge_ids[i] = 'e_' + str(i)
+            hind = n2ind[str(h)]
+            tind = n2ind[str(t)]
+            I[hind, i] = 1
+            if G.is_directed():
+                I[tind, i] = -1
+            else:
+                I[tind, i] = 1
+        idents = [[node_dim, node_ids], [edge_dim, edge_ids]]
+        if G.name != '':
+            name = G.name
+        else:
+            name = 'A'
+        ds = GraphDataset(I, idents, name)
+        return ds
+    
+    from_networkx = classmethod(from_networkx)            
+    
+    def _incidence2adjacency(self, I):
+        """Incidence to adjacency matrix.
+        
+        I*I.T - eye(n)?
+        """
+        raise NotImplementedError
+    
+    def _graph_from_incidence_matrix(self, I, node_ids, edge_ids):
+        """Creates a networkx graph class from incidence
+        (possibly weighted) matrix and ordered labels.
+        
+        labels = None, results in string-numbered labels
+        """
+        try:
+            import networkx as nx
+        except:
+            print "Failed in import of NetworkX"
+            return None
+
+        m, n = I.shape
+        assert(m == len(node_ids))
+        assert(n == len(edge_ids))
+        weights = []
+        directed = False
+        G = nx.XDiGraph(name=self._name)
+        if isinstance(I, sparse.spmatrix):
+            I = I.tocsr()
+        for ename, col in izip(edge_ids, I.T):
+            if isinstance(I, sparse.spmatrix):
+                node_ind = col.indices
+                w1, w2 = col.data
+            else:
+                node_ind = where(col != 0)[0]
+                w1, w2 = col[node_ind]
+            node1 = node_ids[node_ind[0]]
+            node2 = node_ids[node_ind[1]]
+            if w1 < 0: # w1 is tail
+                directed = True
+                assert(w2 > 0 and (w1 + w2) == 0)
+                G.add_edge(node2, node1, ename)
+                weights.append(w2)
+            else: #w2 is tail or graph is undirected
+                assert(w1 > 0)
+                if w2 < 0:
+                    directed = True
+                G.add_edge(node1, node2, ename)
+                weights.append(w1)
+        if not directed:
+            G = G.to_undirected()
+        return G, asarray(weights)
+
+Dataset._all_dims = set()
+
+
+class ReverseDict(dict):
+    """A dictionary which can lookup values by key, and keys by value.
+    
+    All values and keys must be hashable, and unique.
+    
+    example:
+    >>d = ReverseDict((['a',1],['b',2]))
+    >>print d['a'] --> 1
+    >>print d.reverse[1] --> 'a'
+    """
+    def __init__(self, *args, **kw):
+        dict.__init__(self, *args, **kw)
+        self.reverse = dict([[v, k] for k, v in self.items()])
+
+    def __setitem__(self, key, value):
+        dict.__setitem__(self, key, value)
+        try:
+            self.reverse[value] = key
+        except:
+            self.reverse = {value:key}
+
+
+class Selection(dict):
+    """Handles selected identifiers along each dimension of a dataset"""
+
+    def __init__(self, title='Unnamed Selecton'):
+        self.title = title
+        
+    def __getitem__(self, key):
+        if not self.has_key(key):
+            return None
+        return dict.__getitem__(self, key)
+
+    def dims(self):
+        return self.keys()
+        
+    def axis_len(self, axis):
+        if self._selection.has_key(axis):
+            return len(self._selection[axis])
+        return 0
+
+    def select(self, axis, labels):
+        self[axis] = labels
+
+
+def write_ftsv(fd, ds, decimals=7, sep='\t', fmt=None, sp_format=True):
+    """Writes a dataset in fluents tab separated values (ftsv) form.
+    
+    @param fd: An open file descriptor to the output file.
+    @param ds: The dataset to be written. 
+    @param decimals: Number of decimals, only supported for dataset.
+    @param fmt: String formating
+    The function handles datasets of these classes: 
+    Dataset, CategoryDataset and GraphDataset
+    """
+    opened = False
+    if isinstance(fd, str):
+        fd = open(fd, 'w')
+        opened = True
+    
+    # Write header information
+    if isinstance(ds, CategoryDataset):
+        type = 'category'
+        if fmt == None:
+            fmt = '%d'
+    elif isinstance(ds, GraphDataset):
+        type = 'network'
+        if fmt == None:
+            fmt = '%d'
+    elif isinstance(ds, Dataset):
+        type = 'dataset'
+        if fmt == None:
+            fmt = '%%.%df' % decimals
+        else:
+            fmt = '%%.%d' %decimals + fmt
+    else:
+        raise Exception("Unknown object type")
+    fd.write('# type: %s' %type + '\n')
+
+    for dim in ds.get_dim_name():
+        fd.write("# dimension: %s" % dim)
+        for ident in ds.get_identifiers(dim, sorted=True):
+            fd.write(" " + ident)
+        fd.write("\n")
+
+    fd.write("# name: %s" % ds.get_name() + '\n')
+    # xy-node-positions
+    if type == 'network' and ds.nodepos != None:
+        fd.write("# nodepos:")
+        node_dim = ds.get_dim_name(0)
+        for ident in ds.get_identifiers(node_dim, sorted=True):
+            fd.write(" %s,%s" %ds.nodepos[ident])
+        fd.write("\n")
+    
+    # Write data
+    if hasattr(ds, "as_spmatrix") and sp_format == True:
+        m = ds.as_spmatrix()
+    else:
+        m = ds.asarray()
+    if isinstance(m, sparse.spmatrix):
+        _write_sparse_elements(fd, m, fmt, sep)
+    else:
+        _write_elements(fd, m, fmt, sep)
+
+    if opened:
+        fd.close()
+
+def read_ftsv(fd, sep=None):
+    """Read a dataset in fluents tab separated values (ftsv) form and return it.
+    
+    @param fd: An open file descriptor.
+    @return: A Dataset, CategoryDataset or GraphDataset depending on the information
+    read.
+    """
+    opened = False
+    if isinstance(fd, str):
+        fd = open(fd)
+        opened = True
+
+    split_re = re.compile('^#\s*(\w+)\s*:\s*(.+)')
+    dimensions = []
+    identifiers = {}
+    type = 'dataset'
+    name = 'Unnamed dataset'
+    sp_format = False
+    nodepos = None
+    # graphtype = 'graph'
+
+    # Read header lines from file.
+    line = fd.readline()
+    while line:
+        m = split_re.match(line)
+        if m:
+            key, val = m.groups()
+            
+            # The line is on the form;
+            # dimension: dimname id1 id2 id3 ...
+            if key == 'dimension':
+                values = [v.strip() for v in val.split(' ')]
+                dimensions.append(values[0])
+                identifiers[values[0]] = values[1:]
+
+            # Read type of dataset.
+            # Should be dataset, category, or network
+            elif key == 'type':
+                type = val
+            
+            elif key == 'name':
+                name = val
+            
+            # storage format
+            # if sp_format is True then use coordinate triplets
+            elif key == 'sp_format':
+                if val in ['False', 'false', '0', 'F', 'f',]:
+                    sp_format = False
+                elif val in ['True', 'true', '1', 'T', 't']:
+                    sp_format = True
+                else:
+                    raise ValueError("sp_format: %s not valid " %sp_format)
+            
+            elif key == 'nodepos':
+                node_dim = dimensions[0]
+                idents = identifiers[node_dim]
+                nodepos = {}
+                xys = val.split(" ")
+                for node_id, xy in zip(idents, xys):
+                    x, y = map(float, xy.split(","))
+                    nodepos[node_id] = (x, y)
+        
+        else:
+            break
+        line = fd.readline()
+
+    # Dimensions in the form [(dim1, [id1, id2, id3 ..) ...] 
+    dims = [(x, identifiers[x]) for x in dimensions]
+    dim_lengths = [len(identifiers[x]) for x in dimensions]
+
+    # Create matrix and assign element reader
+    if type == 'category':
+        if sp_format:
+            matrix = sparse.lil_matrix(dim_lengths)
+        else:
+            matrix = empty(dim_lengths, dtype='i')
+    else:
+        if sp_format:
+            matrix = sparse.lil_matrix(dim_lengths)
+        else:
+            matrix = empty(dim_lengths)
+
+    if sp_format:
+        matrix = _read_sparse_elements(fd, matrix)
+    else:
+        matrix = _read_elements(fd, matrix)
+    
+
+    # Create dataset of specified type
+    if type == 'category':
+        ds = CategoryDataset(matrix, dims, name)
+    elif type == 'network':
+        ds = GraphDataset(matrix, dims, name=name, nodepos=nodepos)
+    else:
+        ds = Dataset(matrix, dims, name)
+
+    if opened:
+        fd.close()
+
+    return ds
+
+def _write_sparse_elements(fd, arr, fmt='%d', sep=None):
+    """ Sparse coordinate format.""" 
+    fd.write('# sp_format: True\n\n')
+    fmt = '%d %d ' + fmt + '\n'
+    csr = arr.tocsr()
+    for ii in xrange(csr.size):
+        ir, ic = csr.rowcol(ii)
+        data = csr.getdata(ii)
+        fd.write(fmt % (ir, ic, data))
+
+def _write_elements(fd, arr, fmt='%f', sep='\t'):
+    """Standard value separated format."""
+    fmt = fmt + sep
+    fd.write('\n')
+    y, x = arr.shape
+    for j in range(y):
+        for i in range(x):
+            fd.write(fmt %arr[j, i])
+        fd.write('\n')
+
+def _read_elements(fd, arr, sep=None):
+    line = fd.readline()
+    i = 0
+    while line:
+        values = line.split(sep)
+        for j, val in enumerate(values):
+            arr[i,j] = float(val)
+        i += 1
+        line = fd.readline()
+    return arr
+
+def _read_sparse_elements(fd, arr, sep=None):
+    line = fd.readline()
+    while line:
+        i, j, val = line.split()
+        arr[int(i),int(j)] = float(val)
+        line = fd.readline()
+    return arr.tocsr()
+
--- a/laydi/dialogs.py
+++ b/laydi/dialogs.py
@@ -0,0 +1,108 @@
+import pygtk
+# pygtk.require('2.0')
+import gtk
+import sys
+import os
+import gobject
+import logger, project, workflow
+
+DATADIR = os.path.dirname(sys.modules['fluents'].__file__)
+GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
+
+class CreateProjectDruid(gtk.Window):
+    """A druid for creating a new project.
+
+    The CreateProjectDruid gets a list of all classes derived from
+    Workflow, and asks the user to select one of these. A new project of
+    the selected class is added to the application."""
+    
+    def __init__(self):
+        gtk.Window.__init__(self)
+        self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'new_project_druid')
+        self.workflows = self.make_workflow_list()
+        self.selected = None
+        
+        renderer = gtk.CellRendererText()
+        wf_name = gtk.TreeViewColumn('Workflow Name', renderer, text=0)
+        self['workflow_list'].insert_column(wf_name, 0)
+
+        self.wf_info = gtk.TextBuffer()
+        self['workflow_info'].set_buffer(self.wf_info)
+
+    def __getitem__(self, key):
+        return self.widget_tree.get_widget(key)
+
+    def make_workflow_list(self):
+        store = gtk.ListStore(gobject.TYPE_STRING, gobject.TYPE_PYOBJECT)
+        for wf in workflow.workflow_list():
+            store.insert_after(None, (wf.name, wf))
+        return store
+        
+    def run(self):
+        self['workflow_list'].set_model(self.workflows)
+
+        self['druidpagestart1'].show()
+        self['druidpagefinish1'].show()
+        self['new_project_druid'].show()
+ 
+        self['druidpagefinish1'].connect('finish', self.finish)
+        self['workflow_list'].connect('cursor_changed', self.selection_updated)
+        self['druid'].connect('cancel', self.cancel)
+        self.connect('destroy', self.delete)
+
+    def delete(self, widget):
+        return False
+
+    def hide(self):
+        self['druidpagestart1'].hide()
+        self['druidpagefinish1'].hide()
+        self['new_project_druid'].hide()
+        gtk.Window.hide(self)
+        
+    def finish(self, *rest):
+        tree, it = self['workflow_list'].get_selection().get_selected()
+        wf_class = self.workflows.get_value(it, 1)
+        proj = project.Project()
+        main.set_workflow(wf_class())
+#        self.app.set_workflow(wf(self.app))
+#        self.app.set_project(proj)
+        main.set_project(proj)
+        self.hide()
+        self.destroy()
+
+    def cancel(self, *ignored):
+        self.hide()
+        self.destroy()
+
+    def selection_updated(self, *rest):
+        tree, it = self['workflow_list'].get_selection().get_selected()
+        wf = self.workflows.get_value(it, 1)
+        self.wf_info.set_text(wf.description) 
+
+
+def get_text(title, text):
+    """Allow user to type in a string for text."""
+    dlg = gtk.Dialog(title)
+    dlg.show()
+
+    text = gtk.Label(text)
+    text.show()
+
+    entry = gtk.Entry()
+    entry.show()
+    entry.set_activates_default(True)
+    dlg.vbox.pack_start(text)
+    dlg.vbox.pack_start(entry)
+
+    dlg.add_button(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL)
+    dlg.add_button(gtk.STOCK_OK, gtk.RESPONSE_OK)
+    dlg.set_default_response(gtk.RESPONSE_OK)
+    response = dlg.run()
+
+    retval = None
+
+    if response == gtk.RESPONSE_OK:
+        retval = entry.get_text()
+    dlg.destroy()
+    return retval
+    
--- a/laydi/fluents.glade
+++ b/laydi/fluents.glade
--- a/laydi/fluents.py
+++ b/laydi/fluents.py
@@ -0,0 +1,402 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+import pygtk
+pygtk.require('2.0')
+import gobject
+import gtk
+import gtk.gdk
+import gtk.glade
+import gnome
+import gnome.ui
+import scipy
+import pango
+import project, workflow, dataset, view, navigator, dialogs, selections, plots, main
+from logger import logger, LogView
+
+
+PROGRAM_NAME = 'laydi'
+VERSION = '0.1.0'
+DATADIR = os.path.join(main.PYDIR, 'fluents')
+#ICONDIR = os.path.join(DATADIR,"..","icons")
+ICONDIR = main.ICONDIR
+GLADEFILENAME = os.path.join(main.PYDIR, 'fluents/fluents.glade')
+_icon_mapper = {dataset.Dataset: 'dataset',
+               dataset.CategoryDataset: 'category_dataset',
+               dataset.GraphDataset: 'graph_dataset',
+               plots.Plot: 'line_plot'}
+
+class IconFactory:
+    """Factory for icons that ensures that each icon is only loaded once."""
+
+    def __init__(self, path):
+        self._path = path
+        self._icons = {}
+
+    def get(self, iconname):
+        """Returns the gdk loaded PixBuf for the given icon.
+        Reads the icon from file if necessary."""
+        
+        # if iconname isnt a string, try to autoconvert
+        if not isinstance(iconname, str):
+            for cls in _icon_mapper.keys():
+                if isinstance(iconname, cls):
+                    iconname = _icon_mapper[cls]
+        
+        if self._icons.has_key(iconname):
+            return self._icons[iconname]
+
+        icon_fname = os.path.join(self._path, '%s.png' % iconname)
+        icon = gtk.gdk.pixbuf_new_from_file(icon_fname)
+        self._icons[iconname] = icon
+        return icon
+
+icon_factory = IconFactory(ICONDIR)
+
+class TableSizeSelection(gtk.Window):
+
+    def __init__(self):
+        self._SIZE = size = 5
+        gtk.Window.__init__(self, gtk.WINDOW_POPUP)
+        self._table = gtk.Table(size, size, True)
+        self._items = []
+
+        ## Create a 3x3 table of EventBox object, doubly stored because 
+        ## gtk.Table does not support indexed retrieval.
+
+        for y in range(size):
+            line = []
+            for x in range(size):
+                ebox = gtk.EventBox()
+                ebox.add(gtk.Frame())
+                ebox.set_size_request(20, 20)
+                ebox.set_visible_window(True)
+                self._table.attach(ebox, x, x+1, y, y+1, gtk.FILL, gtk.FILL)
+                line.append(ebox)
+            self._items.append(line)
+
+        self.set_border_width(5)
+        self.add(self._table)
+        self.connect_signals()
+
+    def _get_child_pos(self, child):
+        size = self._SIZE
+        for x in range(size):
+            for y in range(size):
+                if self._items[y][x] == child:
+                    return (x, y)
+        return None
+
+    def connect_signals(self):
+        size = self._SIZE
+        for x in range(size):
+            for y in range(size):
+                self._items[y][x].add_events(gtk.gdk.ENTER_NOTIFY_MASK)
+                self._items[y][x].connect("enter-notify-event", 
+                                          self._on_enter_notify)
+                self._items[y][x].connect("button-release-event", 
+                                          self._on_button_release)
+
+    def _on_enter_notify(self, widget, event):
+        size = self._SIZE
+        x, y = self._get_child_pos(widget)
+        for i in range(size):
+            for j in range(size):
+                if i <= x and j <= y:
+                    self._items[j][i].set_state(gtk.STATE_SELECTED)
+                else:
+                    self._items[j][i].set_state(gtk.STATE_NORMAL)
+        self.x = x
+        self.y = y
+
+    def _on_button_release(self, widget, event):
+        size = self._SIZE
+        self.emit('table-size-set', self.x+1, self.y+1)
+        self.hide_all()
+
+        for x in range(size):
+            for y in range(size):
+                self._items[y][x].set_state(gtk.STATE_NORMAL)
+
+
+class ViewFrameToolButton (gtk.ToolItem):
+
+    def __init__(self):
+        gtk.ToolItem.__init__(self)
+
+        fname = os.path.join(ICONDIR, "table_size.png")
+        image = gtk.Image()
+        image.set_from_file(fname)
+
+        self._button = gtk.Button()
+        self._button.set_image(image)
+        self._button.set_property("can-focus", False)
+
+        eb = gtk.EventBox()
+        eb.add(self._button)
+        self.add(eb)
+        self._item = TableSizeSelection()
+        self._button.connect("button-press-event", self._on_show_menu)
+        image.show()
+        self._image = image
+
+        self._item.connect("table-size-set", self._on_table_size_set)
+        self._button.set_relief(gtk.RELIEF_NONE)
+        self.show_all()
+
+    def _on_show_menu(self, widget, event):
+        x, y = self._image.window.get_origin()
+        x2, y2, w, h, b = self._image.window.get_geometry()
+        
+        self._item.move(x, y+h)
+        self._item.show_all()
+
+    def _on_table_size_set(self, widget, width, height):
+        main.application['main_view'].resize_table(width, height)
+
+
+class FluentApp:
+
+    def __init__(self): # Application variables
+#        self.project = None
+        self.current_data = None
+        self._last_view = None
+        self._plot_toolbar = None
+        self._toolbar_state = None
+        
+        gtk.glade.set_custom_handler(self.custom_object_factory)
+        self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'appwindow')
+#        self.workflow = wf
+    
+        self.idlist_crt = selections.IdListController(self['identifier_list'])
+        self.sellist_crt = selections.SelectionListController(self['selection_tree'],
+                                                        self.idlist_crt)
+        self.dimlist_crt = selections.DimListController(self['dim_list'],
+                                                        self.sellist_crt)
+        self.sellist_crt.set_dimlist_controller(self.dimlist_crt)
+
+    def init_gui(self):
+        self['appwindow'].set_size_request(800, 600)
+
+        # Set up workflow
+        self.wf_view = workflow.WorkflowView(main.workflow)
+        self.wf_view.show()
+        self['workflow_vbox'].pack_end(self.wf_view)
+
+        self._wf_menu = workflow.WorkflowMenu(main.workflow)
+        self._wf_menu.show()
+        wf_menuitem = gtk.MenuItem('Fu_nctions')
+        wf_menuitem.set_submenu(self._wf_menu)
+        wf_menuitem.show()
+
+        self['menubar1'].insert(wf_menuitem, 2)
+
+        # Connect signals
+        signals = {'on_quit1_activate' : (gtk.main_quit),
+                   'on_appwindow_delete_event' : (gtk.main_quit),
+                   'on_zoom_in_button_clicked' : (self.on_single_view),
+                   'on_zoom_out_button_clicked' : (self.on_multiple_view),
+                   'on_new1_activate' : (self.on_create_project),
+                   'on_button_new_clicked' : (self.on_create_project),
+                   'on_workflow_refresh_clicked' : (self.on_workflow_refresh_clicked),
+                   'on_index1_activate' : (self.on_help_index),
+                   'on_about1_activate' : (self.on_help_about),
+                   'on_report_bug1_activate' : (self.on_help_report_bug),
+                   'on_small_view1_activate' : (self.on_multiple_view),
+                   'on_large_view1_activate' : (self.on_single_view),
+
+                   'on_left1_activate' : (self.on_left),
+                   'on_right1_activate' : (self.on_right),
+                   'on_up1_activate' : (self.on_up),
+                   'on_down1_activate' : (self.on_down),
+
+                   'on_navigator1_activate' : (self.on_show_navigator),
+                   'on_workflow1_activate' : (self.on_show_workflow),
+                   'on_information1_activate' : (self.on_show_infopane),
+                  }
+        self.widget_tree.signal_autoconnect(signals)
+
+        self['main_view'].connect('view-changed', self.on_view_changed)
+
+        # Log that we've set up the app now
+        logger.debug('Program started')
+
+        # Add ViewFrame table size to toolbar
+        tb = ViewFrameToolButton()
+        self['toolbar'].add(tb)
+
+    def set_project(self, proj):
+        logger.notice('Welcome to your new project. Grasp That Data!')
+        self.navigator_view.add_project(proj)
+        self.dimlist_crt.set_project(proj)
+        self.sellist_crt.set_project(proj)
+        
+    def set_workflow(self, workflow):
+        main.workflow = workflow
+        self.wf_view.set_workflow(main.workflow)
+
+    def show(self):
+        self.init_gui()
+
+    def change_plot(self, plot):
+        """Sets the plot in the currently active ViewFrame. If the plot is 
+        already shown in another ViewFrame it will be moved from there."""
+        # Set current selection in the plot before showing it.
+        plot.selection_changed(None, main.project.get_selection())
+
+        self['main_view'].insert_view(plot)
+        self._update_toolbar(plot)
+       
+    def change_plots(self, plots):
+        """Changes all plots.""" 
+        self['main_view'].set_all_plots(plots)
+        v = self.get_active_view_frame().get_view()
+        self._update_toolbar(v)
+
+    def get_active_view_frame(self):
+        return self['main_view'].get_active_view_frame()
+
+    def _update_toolbar(self, view):
+        """Set the plot specific toolbar to the toolbar of the currently 
+        active plot."""
+
+        # don't do anything on no change
+        if self._last_view == view:
+            return
+        self._last_view = view
+        
+        logger.debug("view changed to %s" % view)
+
+        window = self['plot_toolbar_dock']
+        if self._plot_toolbar:
+            toolbar_state = self._plot_toolbar.get_mode()
+            window.remove(self._plot_toolbar)
+        else:
+            toolbar_state = "default"
+        
+        if view:    
+            self._plot_toolbar = view.get_toolbar()
+            self._plot_toolbar.set_mode(toolbar_state)
+        else:
+            self._plot_toolbar = None
+
+        if self._plot_toolbar:
+            window.add(self._plot_toolbar)
+
+    # Methods to create GUI widgets from CustomWidgets in the glade file.
+    # The custom_object_factory calls other functions to generate specific
+    # widgets.
+
+    def custom_object_factory(self, glade, fun_name, widget_name, s1, s2, i1, i2):
+        "Called by the glade file reader to create custom GUI widgets."
+        handler = getattr(self, fun_name)
+        return handler(s1, s2, i1, i2)
+        
+    def create_logview(self, str1, str2, int1, int2):
+        self.log_view = LogView(logger)
+        self.log_view.show()
+        return self.log_view
+
+    def create_main_view(self, str1, str2, int1, int2):
+        self.main_view = view.MainView()
+        self.main_view.show()
+        return self.main_view
+
+    def create_navigator_view(self, str1, str2, int1, int2):
+        self.navigator_view = navigator.NavigatorView()
+        self.navigator_view.show()
+        return self.navigator_view
+
+    def create_dim_list(self, str1, str2, int1, int2):
+        self.dim_list = selections.DimList()
+        self.dim_list.show()
+        return self.dim_list
+    
+    def create_selection_tree(self, str1, str2, int1, int2):
+        self.selection_tree = selections.SelectionTree()
+        self.selection_tree.show()
+        return self.selection_tree
+
+    def create_identifier_list(self, str1, str2, int1, int2):
+        self.identifier_list = selections.IdentifierList()
+        self.identifier_list.show()
+        return self.identifier_list
+
+    def __getitem__(self, key):
+        return self.widget_tree.get_widget(key)
+
+    # Event handlers.
+    # These methods are called by the gtk framework in response to events and
+    # should not be called directly.
+
+    def on_single_view(self, *ignored):
+        self['main_view'].goto_large()
+
+    def on_multiple_view(self, *ignored):
+        self['main_view'].goto_small()
+
+    def on_create_project(self, *rest):
+        d = dialogs.CreateProjectDruid(self)
+        d.run()
+        
+    def on_help_about(self, *rest):
+        widget_tree = gtk.glade.XML(GLADEFILENAME, 'aboutdialog')
+        about = widget_tree.get_widget('aboutdialog')
+        about.run()
+
+    def on_help_index(self, *ignored):
+        gnome.help_display_uri('https://dev.pvv.org/projects/fluents/wiki/help')
+
+    def on_help_report_bug(self, *ignored):
+        gnome.help_display_uri('https://dev.pvv.org/projects/fluents/newticket')
+
+    def on_workflow_refresh_clicked(self, *ignored):
+        try:
+            reload(sys.modules[main.workflow.__class__.__module__])
+        except Exception, e:
+            logger.warning('Cannot reload workflow')
+            logger.warning(e)
+        else:
+            logger.notice('Successfully reloaded workflow')
+
+    def on_view_changed(self, widget, vf):
+        self._update_toolbar(vf.get_view())
+
+    def on_show_navigator(self, item):
+        if item.get_active():
+            self['data_vbox'].show()
+        else:
+            self['data_vbox'].hide()
+
+    def on_show_workflow(self, item):
+        if item.get_active():
+            self['workflow_vbox'].show()
+        else:
+            self['workflow_vbox'].hide()
+
+    def on_show_infopane(self, item):
+        if item.get_active():
+            self['bottom_notebook'].show()
+        else:
+            self['bottom_notebook'].hide()
+
+    def on_left(self, item):
+        self.main_view.move_focus_left()
+
+    def on_right(self, item):
+        self.main_view.move_focus_right()
+
+    def on_up(self, item):
+        self.main_view.move_focus_up()
+
+    def on_down(self, item):
+        self.main_view.move_focus_down()
+
+
+gobject.signal_new('table-size-set', TableSizeSelection, 
+                   gobject.SIGNAL_RUN_LAST,
+                   gobject.TYPE_NONE, 
+                   (gobject.TYPE_INT, gobject.TYPE_INT))
+
--- a/laydi/lib/R_utils.py
+++ b/laydi/lib/R_utils.py
@@ -0,0 +1,284 @@
+"""A collection of functions that use R.
+
+Most functions use libraries from bioconductor
+
+depends on:
+(not updated)
+-- bioconductor min. install
+-- hgu133a
+-- hgu133plus2
+
+"""
+
+import scipy
+import Numeric as N
+import rpy
+silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
+
+def get_locusid(probelist=None,org="hgu133a"):
+    """Returns a dictionary of locus link id for each affy probeset
+    and reverse mapping
+
+    innput:
+    [probelist] -- probelist of affy probesets
+    [org] -- chip type (organism)
+
+    out:
+    aff2loc, loc2aff
+
+    The mapping is one-to-one for affy->locus_id
+    However, there are several affy probesets for one locus_id
+
+    From bioc-mail-archive: BioC takes the GeneBank ids associated
+    with the probes (provided by the manufacture) and then maps them
+    to Entrez Gene ids using data from UniGene, Entrez Gene, and other
+    available data sources we trust. The Entrez Gene id a probe is
+    assigned to is determined by votes from all the sources used. If
+    there is no agreement among the sources, we take the smallest
+    Entrez Gene id.
+    """
+    silent_eval("library("+org+")")
+    silent_eval('locus_ids = as.list('+org+'LOCUSID)')
+    silent_eval('pp<-as.list(locus_ids[!is.na(locus_ids)])')
+    loc_ids = rpy.r("pp")
+    for id in loc_ids:
+        loc_ids[id] = str(loc_ids[id])
+        
+    aff2loc = {}
+    if probelist:
+        for pid in probelist:
+            try:
+                aff2loc[pid]=loc_ids[pid]
+            except:
+                print "Affy probeset: %s has no locus id" %pid
+        print "\nCONVERSION SUMMARY:\n \
+        Number of probesets input %s \n \
+        Number of translated locus ids: %s \n \
+        Number of missings: %s" %(len(probelist),len(aff2loc),len(probelist)-len(aff2loc))
+    else:
+        aff2loc = loc_ids
+    # reverse mapping
+    loc2aff = {}
+    for k,v in aff2loc.items():
+        if loc2aff.has_key(v):
+            loc2aff[v].append(k)
+        else:
+            loc2aff[v]=[k]
+    
+    return aff2loc,loc2aff
+
+def get_kegg_paths(org="hgu133plus2",id_type='aff',probelist=None):
+    """Returns a dictionary of KEGG maps.
+
+    input:
+             org  --  chip_type (see bioconductor.org)
+             id_type -- id ['aff','loc']
+    
+    key: affy_id, value = list of kegg map id
+    example: '65884_at': ['00510', '00513']
+    """
+    silent_eval("library("+org+")")
+    silent_eval('xx<-as.list('+org+'PATH)')
+    silent_eval('xp <- xx[!is.na(xx)]')
+    aff2path = rpy.r("xp")
+    dummy = rpy.r("xx")
+    
+    if id_type=='loc':
+        aff2loc,loc2aff = get_locusid(org=org)
+        loc2path = {}
+        for id,path in aff2path.items():
+            if loc2path.has_key(id):
+                pp = [path.append(i) for i in loc2path[id]]
+                print "Found duplicate in path: %s" %path
+            loc2path[aff2loc[id]]=path
+        aff2path = loc2path
+    out = {}
+
+    if probelist:
+        for pid in probelist:
+            try:
+                out[pid]=aff2path[pid]
+            except:
+                print "Could not find id: %s" %pid
+    else:
+        out = aff2path
+    for k,v in out.items():
+        # if string convert tol list
+        try:
+            v + ''
+            out[k] = [v]
+        except:
+            out[k] = v
+            
+    return out
+
+def get_probe_list(org="hgu133plus2"):
+    rpy.r.library(org)
+    silent_eval('probe_list<-ls('+org+'ACCNUM )')
+    pl = rpy.r("probe_list")
+    return pl
+
+def get_GO_from_aff(org="hgu133plus2",id_type='aff',probelist=None):
+    """Returns a dictionary of GO terms.
+
+    input:
+             org  --  chip_type (see bioconductor.org)
+             id_type -- id ['aff','loc']
+    
+    key: 
+    example: '65884_at': 
+    """
+    silent_eval("library("+org+")")
+    silent_eval('xx<-as.list('+org+'GO)')
+    silent_eval('xp <- xx[!is.na(xx)]')
+    aff2path = rpy.r("xp")
+    dummy = rpy.r("xx")
+    if id_type=='loc':
+        LOC = get_locusid(org=org)
+        loc2path = {}
+        for id,path in aff2path.items():
+            if loc2path.has_key(id):
+                pp = [path.append(i) for i in loc2path[id]]
+                print "Found duplicate in path: %s" %path
+            loc2path[LOC[id]]=path
+        aff2path = loc2path
+    out = {}
+    if probelist:
+        for pid in probelist:
+            try:
+                out[pid]=aff2path[pid]
+            except:
+                print "Could not find id: %s" %pid
+    return aff2path
+
+def get_kegg_as_category(org="hgu133plus2",id_type='aff',probelist=None):
+    """Returns kegg pathway memberships in dummy (1/0) matrix (genes x maps)
+    
+    """
+    kegg = get_kegg_paths(org=org, id_type=id_type, probelist=probelist)
+    maps = set()
+    for kpth in kegg.values():
+        maps.update(kpth)
+    
+    n_maps = len(maps)
+    n_genes = len(kegg)
+    gene2index = dict(zip(kegg.keys(), range(n_genes)))
+    map2index = dict(zip(maps, range(n_maps)))
+    C = scipy.zeros((n_genes, n_maps))
+    for k,v in kegg.items():
+        for m in v:
+            C[gene2index[k], map2index[m]]=1
+        
+    return C, list(maps), kegg.keys()
+
+def impute(X, k=10, rowmax=0.5, colmax=0.8, maxp=1500, seed=362436069):
+    """
+    A function to impute missing expression data, using nearest
+    neighbor averaging. (from bioconductors impute)
+
+    input:
+
+    data: An expression matrix with genes in the rows, samples in the
+          columns
+
+       k: Number of neighbors to be used in the imputation (default=10)
+
+    rowmax: The maximum percent missing data allowed in any row (default
+          50%). For any rows with more than 'rowmax'% missing are
+          imputed using the overall mean per sample.
+
+    colmax: The maximum percent missing data allowed in any column
+          (default 80%). If any column has more than 'colmax'% missing
+          data, the program halts and reports an error.
+
+    maxp: The largest block of genes imputed using the knn algorithm
+          inside 'impute.knn' (default 1500); larger blocks are divided
+          by two-means clustering (recursively) prior to imputation. If
+          'maxp=p', only knn imputation is done
+
+    seed: The seed used for the random number generator (default
+          362436069) for reproducibility.
+
+    
+    call:
+    impute(data ,k = 10, rowmax = 0.5, colmax = 0.8, maxp = 1500, rng.seed=362436069)
+    """
+    
+    rpy.r.library("impute")
+    X = N.asarray(X) # cast as numeric array
+    m, n = scipy.shape(X)
+    if m>n:
+        print "Warning (impute): more samples than variables. running transpose"
+        t_flag = True
+    else:
+        X = N.transpose(X)
+        t_flag = False
+    
+    rpy.r.assign("X", X)
+    rpy.r.assign("k", k)
+    rpy.r.assign("rmax", rowmax)
+    rpy.r.assign("cmax", colmax)
+    rpy.r.assign("maxp", maxp)
+
+    call = "out<-impute.knn(X,k=k,rowmax=rmax,colmax=cmax,maxp=maxp)"
+    silent_eval(call)
+    out = rpy.r("out")
+    if not t_flag:
+        E = out['data']
+        E = scipy.asarray(E)
+        E = E.T
+    else:
+        E =  out['data']
+        E = scipy.asarray(E)
+    return E
+
+
+def get_chip_annotation(org="hgu133a",annot='pmid', id_type='loc',probelist=None):
+    """Returns a dictionary of annoations.
+
+    input:
+             org  --  chip_type (see bioconductor.org)
+             annot -- annotation ['genename', 'pmid', ' symbol']
+             id_type -- id ['aff','loc']
+             
+    
+    key: id, value = list of annoations
+    example: '65884_at': ['15672394', '138402']
+    """
+    _valid_annot = ['genename', 'pmid', 'symbol', 'enzyme', 'chr', 'chrloc']
+    if annot.lower() not in _valid_annot:
+        raise ValueError("Annotation must be one of %s" %_valid_annot)
+    silent_eval("library("+org+")")
+    silent_eval("dummy<-as.list("+org+annot.upper()+")")
+    silent_eval('annotations <- dummy[!is.na(dummy)]')
+    aff2annot = rpy.r("annotations")
+    if id_type=='loc':
+        aff2loc, loc2aff = get_locusid(org=org)
+        loc2annot = {}
+        for geneid, annotation in aff2annot.items():
+            annotation = ensure_list(annotation)
+            print annotation
+            if loc2annot.has_key(geneid):
+                for extra in loc2annot[geneid]:
+                    annotation.append(extra) 
+                print "Found duplicate in gene: %s" %geneid
+            loc2annot[aff2loc[geneid]] = annotation
+        aff2annot = loc2annot
+
+    out = {}
+    if probelist:
+        for pid in probelist:
+            try:
+                out[pid] = aff2annot.get(pid, 'none')
+            except:
+                print "Could not find id: %s" %pid
+    else:
+        out = aff2annot
+            
+    return out
+
+def ensure_list(value):
+    if isinstance(value, list):
+        return value
+    else:
+        return [value]
--- a/laydi/lib/init.py
+++ b/laydi/lib/init.py
--- a/laydi/lib/blmfuncs.py
+++ b/laydi/lib/blmfuncs.py
--- a/laydi/lib/blmplots.py
+++ b/laydi/lib/blmplots.py
@@ -0,0 +1,458 @@
+"""Specialised plots for functions defined in blmfuncs.py.
+
+fixme:
+        -- If scatterplot is not inited with a colorvector there will be no
+        colorbar, but when adding colors the colorbar shoud be created.
+"""
+
+from matplotlib import cm,patches
+import gtk
+import fluents
+from fluents import plots, main,logger
+import scipy
+from scipy import dot,sum,diag,arange,log,mean,newaxis,sqrt,apply_along_axis,empty
+from scipy.stats import corrcoef
+
+def correlation_loadings(data, T, test=True):
+    """ Returns correlation loadings.
+
+    :input:
+        - D: [nsamps, nvars], data (non-centered data)
+        - T: [nsamps, a_max], Scores
+    :ouput:
+        - R: [nvars, a_max], Correlation loadings
+
+    :notes:
+    
+    """
+    nsamps, nvars = data.shape
+    nsampsT, a_max = T.shape
+    
+    if nsamps!=nsampsT: raise IOError("D/T mismatch")
+    
+    # center
+    data = data - data.mean(0)
+    R = empty((nvars, a_max),'d')
+    for a in range(a_max):
+        for k in range(nvars):
+            R[k,a] = corrcoef(data[:,k], T[:,a])[0,1]
+    
+    return R
+
+class BlmScatterPlot(plots.ScatterPlot):
+    """Scatter plot used for scores and loadings in bilinear models."""
+
+    def __init__(self, title, model, absi=0, ordi=1, part_name='T', color_by=None):
+        self.model = model
+        if model.model.has_key(part_name)!=True:
+            raise ValueError("Model part: %s not found in model" %mod_param)
+        self._T = model.model[part_name]
+        if self._T.shape[1]==1:
+            logger.log('notice', 'Scores have only one component')
+            absi= ordi = 0
+        self._absi = absi
+        self._ordi = ordi
+        self._cmap = cm.summer
+        
+        dataset_1 = model.as_dataset(part_name)
+        id_dim = dataset_1.get_dim_name(0)
+        sel_dim = dataset_1.get_dim_name(1)
+        id_1, = dataset_1.get_identifiers(sel_dim, [absi])
+        id_2, = dataset_1.get_identifiers(sel_dim, [ordi])
+        col = 'b'
+        if model.model.has_key(color_by):
+            col = model.model[color_by].ravel()
+        plots.ScatterPlot.__init__(self, dataset_1, dataset_1, id_dim, sel_dim, id_1, id_2 ,c=col ,s=40 , name=title)
+        self._mappable.set_cmap(self._cmap)
+        self.sc = self._mappable
+        self.add_pc_spin_buttons(self._T.shape[1], absi, ordi)
+        
+    def set_facecolor(self, colors):
+        """Set patch facecolors.
+        """
+        pass
+
+    def set_alphas(self, alphas):
+        """Set alpha channel for all patches."""
+        pass
+
+    def set_sizes(self, sizes):
+        """Set patch sizes."""
+        pass
+
+    def set_expvar_axlabels(self, param=None):
+        if param == None:
+            param = self._expvar_param
+        else:
+            self._expvar_param = param
+        if not self.model.model.has_key(param):
+            self.model.model[param] = None
+        if self.model.model[param]==None:
+            logger.log('notice', 'Param: %s not in model' %param)
+            print self.model.model.keys()
+            print self.model.model[param]
+            pass #fixme: do expvar calc here if not present
+        else:
+            expvar = self.model.model[param]
+            xstr = "Comp: %s , %.1f " %(self._absi, expvar[self._absi+1])
+            ystr = "Comp: %s , %.1f " %(self._ordi, expvar[self._ordi+1])
+            self.axes.set_xlabel(xstr)
+            self.axes.set_ylabel(ystr)
+    
+    def add_pc_spin_buttons(self, amax, absi, ordi):    
+        sb_a = gtk.SpinButton(climb_rate=1)
+        sb_a.set_range(1, amax)
+        sb_a.set_value(absi+1)
+        sb_a.set_increments(1, 5)
+        sb_a.connect('value_changed', self.set_absicca)
+        sb_o = gtk.SpinButton(climb_rate=1)
+        sb_o.set_range(1, amax)
+        sb_o.set_value(ordi+1)
+        sb_o.set_increments(1, 5)
+        sb_o.connect('value_changed', self.set_ordinate)
+        hbox = gtk.HBox()
+        gtk_label_a = gtk.Label("A:")
+        gtk_label_o = gtk.Label(" O:")
+        toolitem = gtk.ToolItem()                          
+        toolitem.set_expand(False)
+        toolitem.set_border_width(2)
+        toolitem.add(hbox)        
+        hbox.pack_start(gtk_label_a)        
+        hbox.pack_start(sb_a)
+        hbox.pack_start(gtk_label_o)        
+        hbox.pack_start(sb_o)
+        self._toolbar.insert(toolitem, -1)
+        toolitem.set_tooltip(self._toolbar.tooltips, "Set Principal component")
+        self._toolbar.show_all() #do i need this?
+
+    def set_absicca(self, sb):
+        self._absi = sb.get_value_as_int() - 1
+        xy = self._T[:,[self._absi, self._ordi]]
+        self.xaxis_data = xy[:,0]
+        self.yaxis_data = xy[:,1]
+        self.sc._offsets = xy
+        self.selection_collection._offsets = xy
+        self.canvas.draw_idle()
+        pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
+        new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
+        self.axes.set_xlim(new_lims, emit=True)
+        self.set_expvar_axlabels()
+        self.canvas.draw_idle()
+        
+    def set_ordinate(self, sb):
+        self._ordi = sb.get_value_as_int() - 1
+        xy = self._T[:,[self._absi, self._ordi]]
+        self.xaxis_data = xy[:,0]
+        self.yaxis_data = xy[:,1]
+        self.sc._offsets = xy
+        self.selection_collection._offsets = xy
+        pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
+        new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
+        self.axes.set_ylim(new_lims, emit=True)
+        self.set_expvar_axlabels()
+        self.canvas.draw_idle()
+    
+    def show_labels(self, index=None):
+        if self._text_labels == None:
+            x = self.xaxis_data
+            y = self.yaxis_data
+            self._text_labels = {}
+            for name, n in self.dataset_1[self.current_dim].items():
+                txt = self.axes.text(x[n],y[n], name)
+                txt.set_visible(False)
+                self._text_labels[n] = txt
+        if index!=None:
+            self.hide_labels()
+            for indx,txt in self._text_labels.items():
+                if indx in index:
+                    txt.set_visible(True)
+        self.canvas.draw_idle()
+                
+    def hide_labels(self):
+        for txt in self._text_labels.values():
+            txt.set_visible(False)
+        self.canvas.draw_idle()
+
+
+class PcaScreePlot(plots.BarPlot):
+    def __init__(self, model):
+        title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
+        ds = model.as_dataset('eigvals')
+        if ds==None:
+            logger.log('notice', 'Model does not contain eigvals')
+        plots.BarPlot.__init__(self, ds, name=title)
+
+
+class PcaScorePlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pca scores (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
+        self.set_expvar_axlabels(param="expvarx")
+
+class PcaLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pca loadings (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='p_tsq')
+        self.set_expvar_axlabels(param="expvarx")
+
+class PlsScorePlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pls scores (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
+
+
+class PlsXLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
+        #self.set_expvar_axlabels(self, param="expvarx")
+
+
+class PlsYLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
+
+
+class PlsCorrelationLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
+
+
+class LplsScorePlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "L-pls scores (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
+        self.set_expvar_axlabels("evx")
+
+
+class LplsXLoadingPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Lpls x-loadings (%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='tsqx')
+        self.set_expvar_axlabels("evx")
+
+
+class LplsZLoadingPlot(BlmScatterPlot, plots.PlotThresholder):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Lpls z-loadings (%s)" %model._dataset['Z'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='L', color_by='tsqz')
+        self.set_expvar_axlabels(param="evz")
+        plots.PlotThresholder.__init__(self, "IC")
+        
+
+    def _update_color_from_dataset(self, ds):
+        BlmScatterPlot._update_color_from_dataset(self, ds)
+        self.set_threshold_dataset(ds)
+        
+
+class LplsXCorrelationPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Lpls x-corr. loads (%s)" %model._dataset['X'].get_name()
+        if not model.model.has_key('Rx'):
+            R = correlation_loadings(model._data['X'], model.model['T'])
+            model.model['Rx'] = R
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rx')
+        self.set_expvar_axlabels("evx")
+        radius = 1
+        center = (0,0)
+        c100 = patches.Circle(center,radius=radius,
+                              facecolor='gray',
+                              alpha=.1,
+                              zorder=1)
+        c50 = patches.Circle(center, radius= sqrt(radius/2.0),
+                             facecolor='gray',
+                             alpha=.1,
+                             zorder=2)
+        self.axes.add_patch(c100)
+        self.axes.add_patch(c50)
+        self.axes.axhline(lw=1.5,color='k')
+        self.axes.axvline(lw=1.5,color='k')
+        self.axes.set_xlim([-1.05,1.05])
+        self.axes.set_ylim([-1.05, 1.05])
+        self.canvas.show()
+        
+class LplsZCorrelationPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Lpls z-corr. loads (%s)" %model._dataset['Z'].get_name()
+        if not model.model.has_key('Rz'):
+            R = correlation_loadings(model._data['Z'].T, model.model['W'])
+            model.model['Rz'] = R
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rz')
+        self.set_expvar_axlabels("evz")
+        radius = 1
+        center = (0,0)
+        c100 = patches.Circle(center,radius=radius,
+                              facecolor='gray',
+                              alpha=.1,
+                              zorder=1)
+        c50 = patches.Circle(center, radius=sqrt(radius/2.0),
+                             facecolor='gray',
+                             alpha=.1,
+                             zorder=2)
+        self.axes.add_patch(c100)
+        self.axes.add_patch(c50)
+        self.axes.axhline(lw=1.5,color='k')
+        self.axes.axvline(lw=1.5,color='k')
+        self.axes.set_xlim([-1.05,1.05])
+        self.axes.set_ylim([-1.05, 1.05])
+        self.canvas.show()
+
+
+class LplsHypoidCorrelationPlot(BlmScatterPlot):
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Hypoid correlations(%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W')
+
+
+class LplsExplainedVariancePlot(plots.Plot):
+    def __init__(self, model):
+        self.model = model
+        plots.Plot.__init__(self, "Explained variance")
+        xax = scipy.arange(model.model['evx'].shape[0])
+        self.axes.plot(xax, model.model['evx'], 'b-', label='X', linewidth=1.5)
+        self.axes.plot(xax, model.model['evy'], 'k-', label='Y', linewidth=1.5)
+        self.axes.plot(xax, model.model['evz'], 'g-', label='Z', linewidth=1.5)
+        self.canvas.draw()
+    
+class LineViewXc(plots.LineViewPlot):
+    """A line view of centered raw data
+    """
+    def __init__(self, model, name='Profiles'):
+        dx = model._dataset['X']
+        plots.LineViewPlot.__init__(self, dx, 1, None, False,name)
+        self.add_center_check_button(self.data_is_centered)
+    
+    def add_center_check_button(self, ticked):
+        """Add a checker button for centerd view of data."""
+        cb = gtk.CheckButton("Center")
+        cb.set_active(ticked)
+        cb.connect('toggled', self._toggle_center)
+        toolitem = gtk.ToolItem()   
+        toolitem.set_expand(False)
+        toolitem.set_border_width(2)
+        toolitem.add(cb)
+        self._toolbar.insert(toolitem, -1)
+        toolitem.set_tooltip(self._toolbar.tooltips, "Column center the line view")
+        self._toolbar.show_all() #do i need this?
+
+    def _toggle_center(self, active):
+        if self.data_is_centered:
+            self._data = self._data + self._mn_data
+            self.data_is_centered = False
+        else:
+            self._mn_data = self._data.mean(0)
+            self._data = self._data - self._mn_data
+            self.data_is_centered = True
+        self.make_lines()
+        self.set_background()
+        self.set_current_selection(main.project.get_selection())
+
+
+class ParalellCoordinates(plots.Plot):
+    """Parallell coordinates for score loads with many comp.
+    """
+    def __init__(self, model, p='loads'):
+        pass
+
+
+class PlsQvalScatter(plots.ScatterPlot):
+    """A vulcano like plot of loads vs qvals
+    """
+    def __init__(self, model, pc=0):
+        if not model.model.has_key('w_tsq'):
+            return None
+        self._W = model.model['W']
+        dataset_1 = model.as_dataset('W')
+        dataset_2 = model.as_dataset('w_tsq')
+        id_dim = dataset_1.get_dim_name(0) #genes
+        sel_dim = dataset_1.get_dim_name(1) #_comp
+        sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
+        id_1, = dataset_1.get_identifiers(sel_dim, [0])
+        id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
+        if model.model.has_key('w_tsq'):
+            col = model.model['w_tsq'].ravel()
+            #col = normalise(col)
+        else:
+            col = 'g'
+        plots.ScatterPlot.__init__(self, dataset_1, dataset_2,
+                                   id_dim, sel_dim, id_1, id_2,
+                                   c=col, s=20, sel_dim_2=sel_dim_2,
+                                   name='Load Volcano')
+
+
+class PredictionErrorPlot(plots.Plot):
+    """A boxplot of prediction error vs. comp. number.
+    """
+    def __init__(self, model, name="Prediction Error"):
+        if not model.model.has_key('sep'):
+            logger.log('notice', 'Model has no calculations of sep')
+            return None
+        plots.Plot.__init__(self, name)
+        self._frozen = True
+        self.current_dim = 'johndoe'
+        self.axes = self.fig.add_subplot(111)
+        
+        # draw
+        sep = model.model['sep']
+        aopt = model.model['aopt']
+        bx_plot_lines = self.axes.boxplot(sqrt(sep))
+        aopt_marker = self.axes.axvline(aopt, linewidth=10,
+                                      color='r',zorder=0,
+                                      alpha=.5)
+        
+        # add canvas
+        self.add(self.canvas)
+        self.canvas.show()
+
+    def set_current_selection(self, selection):
+        pass
+
+
+class TRBiplot(plots.ScatterPlot):    
+    def __init__(self, model, absi=0, ordi=1):
+        title = "Target rotation biplot(%s)" %model._dataset['X'].get_name()
+        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'B')
+        B = model.model.get('B')
+        # normalize B
+        Bnorm = scipy.apply_along_axis(scipy.linalg.norm, 1, B)
+        x = model._dataset['X'].copy()
+        Xc = x._array - mean(x._array,0)[newaxis]
+        w_rot = B/Bnorm 
+        t_rot = dot(Xc, w_rot)
+    
+
+class InfluencePlot(plots.ScatterPlot):
+    """ Returns a leverage vs resiudal scatter plot.
+    """
+    def __init__(self, model, dim, name="Influence"):
+        if not model.model.has_key('levx'):
+            logger.log('notice', 'Model has no calculations of leverages')
+            return
+        if not model.model.has_key('ssqx'):
+            logger.log('notice', 'Model has no calculations of residuals')
+            return
+        ds1 = model.as_dataset('levx')
+        ds2 = model.as_dataset('ssqx')
+        plots.ScatterPlot.__init__(self, ds1, ds2,
+                                   id_dim, sel_dim, id_1, id_2,
+                                   c=col, s=20, sel_dim_2=sel_dim_2,
+                                   name='Load Volcano')
+        
+
+class RMSEPPlot(plots.BarPlot):
+    def __init__(self, model, name="RMSEP"):
+        if not model.model.has_key('rmsep'):
+            logger.log('notice', 'Model has no calculations of sep')
+            return
+        dataset = model.as_dataset('rmsep')
+        plots.BarPlot.__init__(self, dataset, name=name)
+
+
+def normalise(x):
+    """Scale vector x to [0,1]
+    """
+    x = x - x.min()
+    x = x/x.max()
+    return x
--- a/laydi/lib/cv_index.py
+++ b/laydi/lib/cv_index.py
@@ -0,0 +1,66 @@
+from numpy import array_split,arange
+
+    
+def cv(n, k, randomise=False, sequential=False):
+    """
+    Generates k (training, validation) index pairs.
+    
+    Each pair is a partition of arange(n), where validation is an iterable
+    of length ~n/k.
+    
+    If randomise is true, a copy of index is shuffled before partitioning,
+    otherwise its order is preserved in training and validation.
+
+    Randomise overrides the sequential argument. If randomise is true,
+    sequential is False
+    
+    If sequential is true the index is partioned in continous blocks,
+    otherwise interleaved ordering is used.
+    """
+    index = xrange(N)
+    if randomise:
+        from random import shuffle
+        index = list(index)
+        shuffle(index)
+        sequential = False
+    if sequential:
+        for validation in array_split(index, K):
+            training = [i for i in index if i not in validation]
+            yield training, validation
+    else:
+        for k in xrange(K):
+            training = [i for i in index if i % K != k]
+            validation = [i for i in index if i % K == k]
+            yield training, validation
+
+def shuffle_diag(shape, K, randomise=False, sequential=False):
+    """
+    Generates k (training, validation) index pairs.
+    """
+    m, n = shape
+    
+    if K>m or K>n:
+        msg = "You may not use more subsets than max(n_rows, n_cols)"
+        raise ValueError, msg
+
+    mon = max(m, n)
+    #index = xrange(n)
+    index = [i for i in range(m*n) if i % m == 0]
+    print index
+    if randomise:
+        from random import shuffle
+        index = list(index)
+        shuffle(index)
+        sequential = False
+        
+    if sequential:
+        start_inds = array_split(index, K)
+    else:
+        for k in xrange(K):
+            start_inds = [index[i] for i in xrange(n) if i % K == k]
+    
+    print start_inds
+    for start in start_inds:
+        ind = arange(start, n*m, mon+1)
+        yield ind
+
--- a/laydi/lib/cx_stats.py
+++ b/laydi/lib/cx_stats.py
@@ -0,0 +1,438 @@
+import time
+import cPickle
+
+from scipy import zeros,zeros_like,sqrt,dot,trace,sign,round_,argmax,\
+     sort,ravel,newaxis,asarray,diag,sum,outer,argsort,arange,ones_like,\
+     all,apply_along_axis,eye,atleast_2d,empty
+from scipy.linalg import svd,inv,norm,det,sqrtm
+from scipy.stats import mean,median
+
+#import plots_lpls
+
+from cx_utils import mat_center
+from validation import pls_jkW, lpls_jk
+from select_generators import shuffle_1d
+from engines import pca, pls, bridge
+from engines import nipals_lpls as lpls
+
+
+
+def hotelling(Pcv, P, p_center='med', cov_center='med',
+              alpha=0.3, crot=True, strict=False):
+    """Returns regularized hotelling T^2.
+    
+    alpha -- regularisation towards pooled cov estimates
+    beta -- regularisation for unstable eigenvalues
+    p_center -- location method for submodels
+    cov_center -- location method for sub coviariances
+    alpha -- regularisation
+    crot -- rotate submodels toward full?
+    strict -- only rotate 90 degree ?
+    
+    """
+    m, n = P.shape
+    n_sets, n, amax = Pcv.shape
+    # allocate
+    T_sq = empty((n, ),dtype='d')
+    Cov_i = zeros((n, amax, amax),dtype='d')
+    
+    # rotate sub_models to full model
+    if crot:
+        for i, Pi in enumerate(Pcv):
+            Pcv[i] = procrustes(P, Pi, strict=strict)
+
+    # center of pnull
+    if p_center=='med':
+        P_ctr = median(Pcv, 0)
+    elif p_center=='mean':
+        # fixme: mean is unstable
+        P_ctr = mean(Pcv, 0)
+    else: #use full
+        P_ctr = P
+
+    for i in xrange(n):
+        Pi = Pcv[:,i,:] # (n_sets x amax) 
+        Pi_ctr = P_ctr[i,:] # (1 x amax)
+        Pim = (Pi - Pi_ctr[newaxis])*sqrt(n_sets-1)
+        Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
+        
+    if cov_center == 'med':
+        Cov = median(Cov_i, 0)
+    else:
+        Cov = mean(Cov_i, 0)
+    
+    reg_cov = (1. - alpha)*Cov_i + alpha*Cov
+    for i in xrange(n):
+        #Pc = P_ctr[i,:][:,newaxis]
+        Pc = P_ctr[i,:]
+        sigma = reg_cov[i]
+        # T_sq[i] = (dot(Pc, inv(sigma) )*Pc).sum() #slow
+        T_sq[i] = dot(dot(Pc, inv(sigma)), Pc) # dont need to care about transposes
+        #T_sq[i] = dot(dot(Pc.T, inv(sigma)), Pc).ravel()
+    return T_sq
+
+def procrustes(A, B, strict=True, center=False, verbose=False):
+    """Rotation of B to A.
+
+    strict -- Only do flipping and shuffling
+    center -- Center before rotation, translate back after
+    verbose -- Print ssq
+    
+    No scaling calculated.
+    Output B_rot = Rotated B
+    """
+    if center:
+        A,mn_A = mat_center(A, ret_mn=True)
+        B,mn_B = mat_center(B, ret_mn=True)
+    u,s,vh = svd(dot(B.T, A))
+    v = vh.T
+    Cm = dot(u, v.T) #orthogonal rotation matrix
+    if strict: # just inverting and flipping
+       Cm = ensure_strict(Cm)
+    b_rot = dot(B, Cm)
+
+    if verbose:
+        print Cm.round()
+        fit = sum(ravel(B - b_rot)**2)
+        print "Sum of squares: %s" %fit
+    if center:
+        return mn_B + b_rot
+    else:
+        return b_rot
+
+def expl_var_x(Xc, T):
+    """Returns explained variance of X.
+    T should carry variance in length, Xc has zero col-mean.
+    """
+    exp_var_x = diag(dot(T.T, T))*100/(sum(Xc**2))
+    return exp_var_x
+
+def expl_var_y(Y, T, Q):
+    """Returns explained variance of Y.
+    """
+    # centered Y
+    exp_var_y = zeros((Q.shape[1], ))
+    for a in range(Q.shape[1]):
+        Ya = outer(T[:,a], Q[:,a])
+        exp_var_y[a] = 100*sum(Ya**2)/sum(Y**2)
+    return exp_var_y
+        
+def pls_qvals(a, b, aopt=None, alpha=.3,
+              n_iter=20, algo='pls',
+              center=True,
+              sim_method='shuffle',
+              p_center='med', cov_center='med',
+              crot=True, strict=False):
+
+    """Returns qvals for pls model.
+
+    input:
+    a -- data matrix
+    b -- data matrix
+    aopt -- scalar, opt. number of components
+    alpha -- [0,1] regularisation parameter for T2-test
+    n_iter -- number of permutations
+    sim_method -- permutation method ['shuffle']
+    p_center -- location estimator for sub models ['med']
+    cov_center -- location estimator for covariance of submodels ['med']
+    crot -- bool, use rotations of sub models?
+    strict -- bool, use stict (rot/flips only) rotations?
+    """
+    
+    m, n = a.shape
+    TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets)
+    n_false = zeros((n, n_iter), dtype='d')
+
+    #full model
+    if center:
+        ac = a - a.mean(0)
+        bc = b - b.mean(0)
+    
+    if algo=='bridge':
+        dat = bridge(ac, bc, aopt, 'loads', 'fast')
+    else:
+        dat = pls(ac, bc, aopt, 'loads', 'fast')
+    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
+    tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
+                         alpha=alpha, crot=crot, strict=strict,
+                         cov_center=cov_center)
+    #t0 = time.time()
+    Vs = shuffle_1d(bc, n_iter, axis=0)
+    for i, b_shuff in enumerate(Vs):
+        #t1 = time.time()
+        if algo=='bridge':
+            dat = bridge(ac, b_shuff, aopt, 'loads','fast')
+        else:
+            dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
+        Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
+        TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
+                             alpha=alpha, crot=crot, strict=strict,
+                             cov_center=cov_center)
+        #print time.time() - t1
+    
+    return fdr(tsq_full, TSQ, median)
+    
+
+def ensure_strict(C, only_flips=True):
+    """Ensure that a rotation matrix does only 90 degree rotations.
+    In multiplication with pcs this allows flips and reordering.
+
+    if only_flips is True there will onlt be flips allowed
+    """
+    Cm = C
+    S = sign(C) # signs
+    if only_flips==True:
+        C = eye(Cm.shape[0])*S
+        return C
+    Cm = zeros_like(C)
+    Cm.putmask(1.,abs(C)>.6)
+    if det(Cm)>1:
+        raise ValueError,"Implement this!"
+    return Cm*S
+
+def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
+                 n_iter=20, algo='pls',
+                 sim_method='shuffle',
+                 p_center='med', cov_center='med',
+                 crot=True, strict=False):
+
+    """Returns qvals for pls model.
+    Shuffling of variables in X.
+    Null model is 'If I put genes randomly on network' ... if they are sign:
+    then this is due to network structure and not covariance with response.
+
+    input:
+    a -- data matrix
+    b -- data matrix
+    aopt -- scalar, opt. number of components
+    alpha -- [0,1] regularisation parameter for T2-test
+    n_iter -- number of permutations
+    sim_method -- permutation method ['shuffle']
+    p_center -- location estimator for sub models ['med']
+    cov_center -- location estimator for covariance of submodels ['med']
+    crot -- bool, use rotations of sub models?
+    strict -- bool, use stict (rot/flips only) rotations?
+    """
+    
+    m, n = a.shape
+    TSQ = zeros((n, n_iter), dtype='<f8') # (nvars x n_subsets)
+    n_false = zeros((n, n_iter), dtype='<f8')
+
+    #full model
+
+    # center?
+    if center==True:
+        ac = a - a.mean(0)
+        bc = b - b.mean(0)
+    
+    if algo=='bridge':
+        dat = bridge(ac, bc, aopt, 'loads', 'fast')
+    else:
+        dat = pls(ac, bc, aopt, 'loads', 'fast')
+    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
+    tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
+                         alpha=alpha, crot=crot, strict=strict,
+                         cov_center=cov_center)
+    t0 = time.time()
+    Vs = shuffle_1d(a, n_iter, 1)
+    for i, a_shuff in enumerate(Vs):
+        t1 = time.time()
+        a = a_shuff - a_shuff.mean(0)
+        
+        if algo=='bridge':
+            dat = bridge(a, b, aopt, 'loads','fast')
+        else:
+            dat = pls(a, b, aopt, 'loads', 'fast')
+        Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
+        TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
+                             alpha=alpha, crot=crot, strict=strict,
+                             cov_center=cov_center)
+        print time.time() - t1
+    sort_index = argsort(tsq_full)[::-1]
+    back_sort_index = sort_index.argsort()
+    print time.time() - t0
+
+    # count false positives
+    tsq_full_sorted = tsq_full.take(sort_index)
+    for i in xrange(n_iter):
+        for j in xrange(n):
+            n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j])
+    false_pos = median(n_false, 1)
+    ll = arange(1, len(false_pos)+1, 1)
+    sort_qval = false_pos.take(sort_index)/ll
+    qval = false_pos/ll.take(back_sort_index)
+    print time.time() - t0
+    #return qval, false_pos, TSQ, tsq_full
+
+    return qval
+
+def leverage(aopt=1,*args):
+    """Returns leverages
+    input : aopt, number of components to base leverage calculations on
+            *args, matrices of normed blm-paramters
+    output: leverages
+    
+    For PCA typical inputs are normalised T or normalised P
+    For PLSR typical inputs are normalised T or normalised W
+    """
+    if aopt<1:
+        raise ValueError,"Leverages only make sense for aopt>0"
+    lev  = []
+    for u in args:
+        lev_u = 1./u.shape[0] + dot(u[:,:aopt], u[:,:aopt].T).diagonal()
+        lev.append(lev_u)
+    return lev
+
+def variances(a, t, p):
+    """Returns explained variance and ind. var from blm-params.
+    input:
+          a -- full centered matrix
+          t,p -- parameters from a bilinear approx of the above matrix.
+    output:
+          var -- variance of each component
+          var_exp -- cumulative explained variance in percentage
+
+    Typical inputs are:  X(centered),T,P for PCA or
+                         X(centered),T,P / Y(centered),T,Q for PLSR.
+    """
+    
+    tot_var = sum(a**2)
+    var = 100*(sum(p**2, 0)*sum(t**2, 0))/tot_var
+    var_exp = var.cumsum()
+    return var, var_exp
+
+def residual_diagnostics(Y, Yhat, aopt=1):
+    """Root mean errors and press values. 
+    R2 vals
+    """
+    pass
+    
+
+def ssq(E, axis=0, weights=None):
+    """Sum of squares, supports weights."""
+    n = E.shape[axis]
+    if weights==None:
+        weights = eye(n)
+    else:
+        weigths = diag(weigths)
+    if axis==0:
+        Ew = dot(weights, E)
+    elif axis==1:
+        Ew = dot(E, weights)
+    else:
+        raise NotImplementedError, "Higher order modes not supported"
+    return pow(Ew,2).sum(axis)
+
+
+def vnorm(x):
+    """Returns the euclidian norm of a vector.
+
+    This is considerably faster than linalg.norm
+    """
+    return sqrt(dot(x,x.conj()))
+
+def mahalanobis(a, loc=None, acov=None, invcov=None):
+    """Returns the distance of each observation in a
+    from the location estimate (loc) of the data,
+    relative to the shape of the data.
+    
+    
+    a : data matrix (n observations in rows, p variables in columns)
+    loc : location estimate of the data (p-dimensional vector)
+    covmat or invcov : scatter estimate of the data or the inverse of the scatter estimate (pxp matrix)
+
+    :Returns:
+    A vector containing the distances of all the observations to locvct.
+    
+    """
+    n, p = a.shape
+    if loc==None:
+        loc = a.mean(0)
+    loc = atleast_2d(loc)
+    if loc.shape[1]==1:
+        loc = loc.T; #ensure rowvector
+    assert(loc.shape[1]==p)
+    xc = a - loc
+    if acov==None and invcov==None:
+        acov = dot(xc.T, xc)
+        
+    if invcov != None:
+        covmat = atleast_2d(invcov)
+        if min(covmat.shape)==1:
+            covmat = diag(invcov.ravel())
+    else:
+        covmat = atleast_2d(acov)
+        if min(covmat.shape)==1:
+            covmat = diag(covmat.ravel())
+        covmat = inv(covmat)
+    # mdist = diag(dot(dot(xc, covmat),xc.T))
+    mdist = (dot(xc, covmat)*xc).sum(1)
+    return mdist
+
+def lpls_qvals(a, b, c, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
+               sim_method='shuffle',p_center='med', cov_center='med',crot=True,
+               strict=False, mean_ctr=[2,0,2], nsets=None):
+
+    """Returns qvals for l-pls model.
+
+    input:
+    a -- data matrix
+    b -- data matrix
+    c -- data matrix
+    aopt -- scalar, opt. number of components
+    alpha -- [0,1] regularisation parameter for T2-test
+    xz_alpha -- [0,1] how much z info to include
+    n_iter -- number of permutations
+    sim_method -- permutation method ['shuffle']
+    p_center -- location estimator for sub models ['med']
+    cov_center -- location estimator for covariance of submodels ['med']
+    crot -- bool, use rotations of sub models?
+    strict -- bool, use stict (rot/flips only) rotations?
+    """
+    
+    m, n = a.shape
+    p, k = c.shape
+    pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
+    pert_tsq_z = zeros((p, n_iter), dtype='d') # (nzvars x n_subsets)
+
+    # Full model
+    #print "Full model start"
+    dat = lpls(a, b, c, aopt, scale='loads', mean_ctr=mean_ctr)
+    Wc, Lc = lpls_jk(a, b, c , aopt, nsets=nsets)
+    #print "Full hot"
+    cal_tsq_x = hotelling(Wc, dat['W'], alpha = alpha)
+    cal_tsq_z = hotelling(Lc, dat['L'], alpha = 0)
+    
+    # Perturbations
+    Vs = shuffle_1d(b, n_iter, axis=0)
+    for i, b_shuff in enumerate(Vs):
+        print i
+        dat = lpls(a, b_shuff,c, aopt, scale='loads', mean_ctr=mean_ctr)
+        Wi, Li = lpls_jk(a, b_shuff, c, aopt, nsets=nsets)
+        pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
+        pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
+   
+    return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
+    
+
+
+def fdr(tsq, tsqp, loc_method='mean'):
+    n, = tsq.shape
+    k, m = tsqp.shape
+    assert(n==k)
+    n_false = empty((n, m), 'd')
+    sort_index = argsort(tsq)[::-1]
+    r_index = argsort(sort_index)
+    for i in xrange(m):
+        for j in xrange(n):
+            n_false[j,i] = (tsqp[:,i]>tsq[j]).sum()
+    #cPickle.dump(n_false, open("/tmp/nfalse.dat_"+str(n), "w"))
+    if loc_method=='mean':
+        fp = mean(n_false,1)
+    elif loc_method == 'median':
+        fp = median(n_false.T)
+    else:
+        raise ValueError
+    n_signif = (arange(n) + 1.0)[r_index]
+    fd_rate = fp/n_signif
+    return fd_rate
--- a/laydi/lib/cx_utils.py
+++ b/laydi/lib/cx_utils.py
@@ -0,0 +1,115 @@
+from scipy import apply_along_axis,newaxis,zeros,\
+     median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
+     trace,zeros_like,sign,sort,real,argsort,rand,array,\
+     matrix,nan
+from scipy.linalg import norm,svd,inv,eig
+from scipy.stats import median,mean
+
+def normalise(a, axis=0, return_scales=False):
+    s = apply_along_axis(norm, axis, a)
+    if axis==0:
+        s = s[newaxis]
+    else:
+        s = s[:,newaxis]
+	    
+    a_s = a/s
+
+    if return_scales:
+       return a_s, s
+
+    return a_s
+
+def sub2ind(shape, i, j):
+	"""Indices from subscripts. Only support for 2d"""
+	row,col = shape
+	ind = []
+	for k in xrange(len(i)):
+		for m in xrange(len(j)):
+			ind.append(i[k]*col + j[m])
+	return ind
+
+
+def sorted_eig(a, b=None,sort_by='sm'):
+    """
+    Just eig with real part of output sorted:
+    This is for convenience only, not general!
+    
+    sort_by='sm': return the eigenvectors by eigenvalues
+                  of smallest magnitude first. (default)
+            'lm': returns largest eigenvalues first      
+
+    output: just as eig with 2 outputs
+            -- s,v (eigvals,eigenvectors)
+    (This is reversed output compared to matlab)
+    
+    """
+    s,v = eig(a, b)
+    s = real(s) # dont expect any imaginary part
+    v = real(v)
+    ind = argsort(s)
+    if sort_by=='lm':
+        ind = ind[::-1]
+    v = v.take(ind, 1)
+    s = s.take(ind)
+
+    return s,v
+
+def str2num(string_number):
+    """Convert input (string number) into number, if float(string_number) fails, a nan is inserted. 
+    """
+    missings = ['','nan','NaN','NA']
+    try:
+        num = float(string_number)
+    except:
+        if string_number in missings:
+            num = nan
+        else:
+            print "Found strange entry: %s" %string_number
+            raise
+    return num
+
+def randperm(n):
+  r = rand(n)
+  dict={}
+  for i in range(n):
+     dict[r[i]] = i
+  r = sort(r)
+  out = zeros(n)
+  for i in range(n):
+     out[i] = dict[r[i]]
+  return array(out).astype('i')
+
+def mat_center(X,axis=0,ret_mn=False):
+    """Mean center matrix along axis.
+    
+        X -- matrix, data
+        axis -- dim,
+        ret_mn -- bool, return mean
+
+    output:
+            Xc, [mnX]
+            
+    NB: axis = 1 is column-centering, axis=0=row-centering
+    default is row centering (axis=0)
+    """
+
+    try:
+        rows,cols = X.shape
+    except ValueError:
+        print "The X data needs to be two-dimensional"
+        
+    if axis==0:
+        mnX = mean(X,axis)[newaxis]
+        Xs = X - mnX
+    
+    elif axis==1:
+        mnX = mean(X,axis)[newaxis]
+        Xs = (X.T - mnX).T
+    if ret_mn:
+        return Xs,mnX
+    else:
+        return Xs
+
+def m_shape(array):
+	"""Returns the array shape on the form of a numpy.matrix."""
+	return matrix(array).shape
--- a/laydi/lib/engines.py
+++ b/laydi/lib/engines.py
@@ -0,0 +1,879 @@
+"""Module contain algorithms for low-rank models.
+
+There is almost no typechecking of any kind here, just focus on speed
+"""
+
+import math
+import warnings
+from scipy.linalg import svd,inv
+from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
+     apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
+     rand,sum,cumsum,matrix, expand_dims,minimum,where,arange,inner,tile
+has_sym = True
+has_arpack = True
+try:
+    from symeig import symeig
+except:
+    has_sym = False
+try:
+    from scipy.sandbox import arpack
+except:
+    has_arpack = False
+
+
+def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
+    """ Principal Component Analysis.
+
+    Performs PCA on given matrix and returns results in a dictionary.
+
+    :Parameters:
+        a : array
+    Data measurement matrix, (samples x variables)
+        aopt : int
+    Number of components to use, aopt<=min(samples, variables)
+
+    :Returns:
+    results : dict
+        keys -- values,  T -- scores, P -- loadings, E -- residuals,
+        lev --leverages, ssq -- sum of squares, expvar -- cumulative
+        explained variance, aopt -- number of components used
+    
+    :OtherParam eters:
+    mode : str
+        Amount of info retained, ('fast', 'normal', 'detailed')
+    center_axis : int
+        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
+  
+    :SeeAlso:
+        - pcr : other blm
+        - pls : other blm
+        - lpls : other blm
+
+    Notes
+    -----
+    Uses kernel speed-up if m>>n or m<<n.
+
+    If residuals turn rank deficient, a lower number of component than given
+    in input will be used. The number of components used is given in
+    results-dict.
+    
+    Examples
+    --------
+    
+    >>> import scipy,engines
+    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
+    >>> dat=engines.pca(a, 2)
+    >>> dat['expvarx']
+    array([0.,99.8561562,  100.])
+
+    """
+    m, n = a.shape
+    assert(aopt<=min(m,n))
+    if center_axis>=0:
+        a = a - expand_dims(a.mean(center_axis), center_axis)
+    if m>(n+100) or n>(m+100):
+        u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
+    else:
+        u, s, vt = svd(a, 0)
+        v = vt.T
+    e = s**2
+    tol = 1e-10
+    eff_rank = sum(s>s[0]*tol)
+    aopt = minimum(aopt, eff_rank)
+    T = u*s
+    s = s[:aopt]
+    T = T[:,:aopt]
+    P = v[:,:aopt]
+    
+    if scale=='loads':
+        T = T/s
+        P = P*s
+
+    if mode == 'fast':
+        return {'T':T, 'P':P, 'aopt':aopt}
+    
+    if mode=='detailed':
+        E = empty((aopt, m, n))
+        ssq = []
+        lev = []
+        for ai in range(aopt):
+            E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
+            ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
+            if scale=='loads':
+                lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
+            else:
+                lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
+    else:
+        # residuals
+        E = a - dot(T, P.T)
+        #E = a
+        SEP = E**2
+        ssq = [SEP.sum(0), SEP.sum(1)]
+        # leverages
+        if scale=='loads':
+            lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
+        else:
+            lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
+        # variances
+    expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
+    
+    return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
+
+def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
+    """ Principal Component Regression.
+
+    Performs PCR on given matrix and returns results in a dictionary.
+
+    :Parameters:
+        a : array
+    Data measurement matrix, (samples x variables)
+        b : array
+    Data response matrix, (samples x responses)
+        aopt : int
+    Number of components to use, aopt<=min(samples, variables)
+
+    :Returns:
+    results : dict
+        keys -- values,  T -- scores, P -- loadings, E -- residuals,
+        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
+        explained variance, aopt -- number of components used
+    
+    :OtherParameters:
+    mode : str
+        Amount of info retained, ('fast', 'normal', 'detailed')
+    center_axis : int
+        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
+  
+    :SeeAlso:
+        - pca : other blm
+        - pls : other blm
+        - lpls : other blm
+ 
+    Notes
+    -----
+
+    Uses kernel speed-up if m>>n or m<<n.
+
+    If residuals turn rank deficient, a lower number of component than given
+    in input will be used. The number of components used is given in results-dict. 
+    
+
+    Examples
+    --------
+    
+    >>> import scipy,engines
+    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
+    >>> b=scipy.asarray([[1,1],[2,3]])
+    >>> dat=engines.pcr(a, 2)
+    >>> dat['expvarx']
+    array([0.,99.8561562,  100.])
+
+    """
+    k, l = m_shape(b)
+    if center_axis>=0:
+        b = b - expand_dims(b.mean(center_axis), center_axis)
+    dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
+    T = dat['T']
+    weights = apply_along_axis(vnorm, 0, T)**2
+    if scale=='loads':
+        Q = dot(b.T, T*weights)
+    else:
+        Q = dot(b.T, T/weights)
+
+    if mode=='fast':
+        dat.update({'Q':Q})
+        return dat
+    if mode=='detailed':
+        F = empty((aopt, k, l))
+        for i in range(aopt):
+            F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
+    else:
+        F = b - dot(T, Q.T)
+    expvary = r_[0,  100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
+    #fixme: Y-var leverages
+    dat.update({'Q':Q, 'F':F, 'expvary':expvary})
+    return dat
+
+def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
+    """Partial Least Squares Regression.
+
+    Performs PLS on given matrix and returns results in a dictionary.
+
+    :Parameters:
+        a : array
+    Data measurement matrix, (samples x variables)
+        b : array
+    Data response matrix, (samples x responses)
+        aopt : int
+    Number of components to use, aopt<=min(samples, variables)
+
+    :Returns:
+    results : dict
+        keys -- values,  T -- scores, P -- loadings, E -- residuals,
+        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
+        explained variance of descriptors, expvary -- cumulative explained
+        variance of responses, aopt -- number of components used
+    
+    :OtherParameters:
+    mode : str
+        Amount of info retained, ('fast', 'normal', 'detailed')
+    center_axis : int
+        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
+    
+    :SeeAlso:
+        - pca : other blm
+        - pcr : other blm
+        - lpls : other blm
+ 
+    Notes
+    -----
+
+    Uses kernel speed-up if m>>n or m<<n.
+
+    If residuals turn rank deficient, a lower number of component than given
+    in input will be used. The number of components used is given in results-dict.
+
+    Examples
+    --------
+    
+    >>> import scipy,engines
+    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
+    >>> b=scipy.asarray([[1,1],[2,3]])
+    >>> dat=engines.pls(a, b, 2)
+    >>> dat['expvarx']
+    array([0.,99.8561562,  100.])
+
+    """
+    
+    m, n = m_shape(a)
+    if ab!=None:
+        mm, l = m_shape(ab)
+        assert(m==mm)
+    else:
+         k, l = m_shape(b)
+
+    if center_axis>=0:
+        a = a - expand_dims(a.mean(center_axis), center_axis)
+        b = b - expand_dims(b.mean(center_axis), center_axis)
+    
+    W = empty((n, aopt))
+    P = empty((n, aopt))
+    R = empty((n, aopt))
+    Q = empty((l, aopt))
+    T = empty((m, aopt))
+    B = empty((aopt, n, l))
+    tt = empty((aopt,))
+
+    if ab==None:
+        ab = dot(a.T, b)
+    for i in range(aopt):
+        if ab.shape[1]==1: #pls 1
+            w = ab.reshape(n, l)
+            w = w/vnorm(w)
+        elif n<l: # more yvars than xvars
+            if has_sym:
+                s, w = symeig(dot(ab, ab.T),range=[n,n],overwrite=True)
+            else:
+                w, s, vh = svd(dot(ab, ab.T))
+            w = w[:,:1]
+        else: # standard wide xdata
+            if has_sym:
+                s, q = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
+            else:
+                q, s, vh = svd(dot(ab.T, ab))
+                q = q[:,:1]
+            w = dot(ab, q)
+            w = w/vnorm(w)
+        r = w.copy()
+        if i>0:
+            for j in range(0, i, 1):
+                r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
+        
+        t = dot(a, r)
+        tt[i] = tti = dot(t.T, t).ravel()
+        p  = dot(a.T, t)/tti
+        q = dot(r.T, ab).T/tti
+        ab = ab - dot(p, q.T)*tti
+        T[:,i] = t.ravel()
+        W[:,i] = w.ravel()
+
+        if mode=='fast' and i==aopt-1:
+            if scale=='loads':
+                tnorm = sqrt(tt)
+                T = T/tnorm
+                W = W*tnorm
+            return {'T':T, 'W':W}
+
+        P[:,i] = p.ravel()
+        R[:,i] = r.ravel()
+        Q[:,i] = q.ravel()
+        #B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
+
+
+        
+    qnorm = apply_along_axis(vnorm, 0, Q)
+    tnorm = sqrt(tt)
+    pp = (P**2).sum(0)
+    if mode=='detailed':
+        E = empty((aopt, m, n))
+        F = empty((aopt, k, l))
+        ssqx, ssqy = [], []
+        leverage = empty((aopt, m))
+        h2x = [] #hotellings T^2
+        h2y = []
+        for ai in range(aopt):
+            E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
+            F[i-1] = b - dot(T[:,:i], Q[:,:i].T)
+            ssqx.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
+            ssqy.append([(F[ai,:,:]**2).mean(0), (F[ai,:,:]**2).mean(1)])
+            leverage[ai,:] = 1./m + ((T[:,:ai+1]/tnorm[:ai+1])**2).sum(1)
+            h2y.append(1./k + ((Q[:,:ai+1]/qnorm[:ai+1])**2).sum(1))
+    else:
+        # residuals
+        E = a - dot(T, P.T)
+        F = b - dot(T, Q.T)
+        sepx = E**2
+        ssqx = [sepx.sum(0), sepx.sum(1)]
+        sepy = F**2
+        ssqy = [sepy.sum(0), sepy.sum(1)]
+        # leverage
+        leverage = 1./m + ((T/tnorm)**2).sum(1)
+        h2x = []
+        h2y = []
+    # variances
+    tp= tt*pp
+    tq = tt*qnorm*qnorm
+    expvarx = r_[0, 100*tp/(a*a).sum()]
+    expvary = r_[0, 100*tq/(b*b).sum()]
+    
+    if scale=='loads':
+        T = T/tnorm
+        W = W*tnorm
+        Q = Q*tnorm
+        P = P*tnorm
+
+    return {'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F,
+            'expvarx':expvarx, 'expvary':expvary, 'ssqx':ssqx, 'ssqy':ssqy,
+            'leverage':leverage, 'h2':h2x}
+
+def w_simpls(aat, b, aopt):
+    """ Simpls for wide matrices.
+    Fast pls for crossval, used in calc rmsep for wide X
+    There is no P or W.  T is normalised
+    """
+    bb = b.copy()
+    m, m = aat.shape
+    U = empty((m, aopt)) # W
+    T = empty((m, aopt))
+    H = empty((m, aopt)) # R
+    PROJ = empty((m, aopt)) # P?
+    for i in range(aopt):
+        q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
+        u = dot(b, q[:,:1]) #y-factor scores
+        U[:,i] = u.ravel()
+        t = dot(aat, u)
+        t = t/vnorm(t)
+        T[:,i] = t.ravel()
+        h = dot(aat, t) #score-weights
+        H[:,i] = h.ravel()
+        PROJ[:,:i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, H[:,:i+1])) )
+        if i<aopt:
+            b = b - dot(PROJ[:,:i+1], dot(H[:,:i+1].T,b) )
+    C = dot(bb.T, T)
+
+    return {'T':T, 'U':U, 'Q':C, 'H':H}
+
+def w_pls(aat, b, aopt):
+    """ Pls for wide matrices.
+    Fast pls for crossval, used in calc rmsep for wide X
+    There is no P or W.  T is normalised
+
+    aat = centered kernel matrix
+    b = centered y
+    """
+    bb = b.copy()
+    k, l = m_shape(b)
+    m, m = m_shape(aat)
+    U = empty((m, aopt)) # W
+    T = empty((m, aopt))
+    R = empty((m, aopt)) # R
+    PROJ = empty((m, aopt)) # P?
+
+    for i in range(aopt):
+        if has_sym:
+            s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
+        else:
+            q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
+            q = q[:,:1]
+        u = dot(b , q) #y-factor scores
+        U[:,i] = u.ravel()
+        t = dot(aat, u)
+        
+        t = t/vnorm(t)
+        T[:,i] = t.ravel()
+        r = dot(aat, t)#score-weights
+        #r = r/vnorm(r)
+        R[:,i] = r.ravel()
+        PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
+        if i<aopt:
+            b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,  b) )
+    C = dot(bb.T, T)
+
+    return {'T':T, 'U':U, 'Q':C, 'R':R}
+
+def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
+    """Undeflated Ridged svd(X'Y)
+    """
+    m, n = m_shape(a)
+    k, l = m_shape(b)
+    u, s, vt = svd(b, full_matrices=0)
+    g0 = dot(u*s, u.T)
+    g = (1 - r)*g0 + r*eye(m)
+    ag = dot(a.T, g)
+    u, s, vt = svd(ag, full_matrices=0)
+    W = u[:,:aopt]
+    K = vt[:aopt,:].T
+    T = dot(a, W)
+    tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns
+
+    if mode == 'fast':
+        if scale=='loads':
+            T = T/tnorm
+            W = W*tnorm
+        return {'T':T, 'W':W}
+
+    U = dot(g0, K) #fixme check this 
+    Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
+    B = zeros((aopt, n, l), dtype='f')
+    for i in range(aopt):
+        B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
+
+    if mode == 'detailed':
+        E = empty((aopt, m, n))
+        F = empty((aopt, k, l))
+        for i in range(aopt):
+            E[i] = a - dot(T[:,:i+1], W[:,:i+1].T)
+            F[i] = b - dot(a, B[i])
+    else: #normal
+        F = b - dot(a, B[-1])
+        E = a - dot(T, W.T)
+
+    if scale=='loads':
+        T = T/tnorm
+        W = W*tnorm
+        Q = Q*tnorm
+
+    return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
+
+
+def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False):
+    """ L-shaped Partial Least Sqaures Regression by the nipals algorithm.
+
+    (X!Z)->Y
+    :input:
+        X : data matrix (m, n)
+        Y : data matrix (m, l)
+        Z : data matrix (n, o)
+
+    :output:
+      T : X-scores
+      W : X-weights/Z-weights
+      P : X-loadings
+      Q : Y-loadings
+      U : X-Y relation
+      L : Z-scores
+      K : Z-loads
+      B : Regression coefficients X->Y
+      b0: Regression coefficient intercept
+      evx : X-explained variance
+      evy : Y-explained variance
+      evz : Z-explained variance
+      mnx : X location
+      mny : Y location
+      mnz : Z location
+
+    :Notes:
+    
+    """
+    if mean_ctr!=None:
+        xctr, yctr, zctr = mean_ctr
+        X, mnX = center(X, xctr)
+        Y, mnY = center(Y, yctr)
+        Z, mnZ = center(Z, zctr)
+
+    varX = (X**2).sum()
+    varY = (Y**2).sum()
+    varZ = (Z**2).sum()
+    
+    m, n = X.shape
+    k, l = Y.shape
+    u, o = Z.shape
+
+    # initialize 
+    U = empty((k, a_max))
+    Q = empty((l, a_max))
+    T = empty((m, a_max))
+    W = empty((n, a_max))
+    P = empty((n, a_max))
+    K = empty((o, a_max))
+    L = empty((u, a_max))
+    B = empty((a_max, n, l))
+    #b0 = empty((a_max, 1, l))
+    var_x = empty((a_max,))
+    var_y = empty((a_max,))
+    var_z = empty((a_max,))
+
+    MAX_ITER = 250
+    LIM = 1e-1
+    for a in range(a_max):
+        if verbose:
+            print "\nWorking on comp. %s" %a
+        u = Y[:,:1]
+        diff = 1
+        niter = 0
+        while (diff>LIM and niter<MAX_ITER):
+            niter += 1
+            u1 = u.copy()
+            w = dot(X.T, u)
+            w = w/sqrt(dot(w.T, w))
+            #w = w/dot(w.T, w)
+            l = dot(Z, w)
+            k = dot(Z.T, l)
+            k = k/sqrt(dot(k.T, k))
+            #k = k/dot(k.T, k)
+            w = alpha*k + (1-alpha)*w
+            #print sqrt(dot(w.T, w))
+            w = w/sqrt(dot(w.T, w))
+            t = dot(X, w)
+            c = dot(Y.T, t)
+            c = c/sqrt(dot(c.T, c))
+            u = dot(Y, c)
+            diff = dot((u-u1).T, (u-u1))
+        if verbose:
+            print "Converged after %s iterations" %niter
+            print "Error: %.2E" %diff
+        tt = dot(t.T, t)
+        p = dot(X.T, t)/tt
+        q = dot(Y.T, t)/tt
+        l = dot(Z, w)
+        
+        U[:,a] = u.ravel()
+        W[:,a] = w.ravel()
+        P[:,a] = p.ravel()
+        T[:,a] = t.ravel()
+        Q[:,a] = q.ravel()
+        L[:,a] = l.ravel()
+        K[:,a] = k.ravel()
+
+        X = X - dot(t, p.T)
+        Y = Y - dot(t, q.T)
+        Z = (Z.T - dot(w, l.T)).T
+
+        var_x[a] = pow(X, 2).sum()
+        var_y[a] = pow(Y, 2).sum()
+        var_z[a] = pow(Z, 2).sum()
+    
+        B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
+        #b0[a] = mnY - dot(mnX, B[a])
+        
+    
+    # variance explained
+    evx = 100.0*(1 - var_x/varX)
+    evy = 100.0*(1 - var_y/varY)
+    evz = 100.0*(1 - var_z/varZ)
+    if scale=='loads':
+        tnorm = apply_along_axis(vnorm, 0, T)
+        T = T/tnorm
+        W = W*tnorm
+        Q = Q*tnorm
+        knorm = apply_along_axis(vnorm, 0, K)
+        L = L*knorm
+        K = K/knorm
+    
+    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}    
+
+
+
+def nipals_pls(X, Y, a_max, alpha=.7, ax_center=0, mode='normal', scale='scores', verbose=False):
+    """Partial Least Sqaures Regression by the nipals algorithm.
+
+    (X!Z)->Y
+    :input:
+        X : data matrix (m, n)
+        Y : data matrix (m, l)
+
+    :output:
+      T : X-scores
+      W : X-weights/Z-weights
+      P : X-loadings
+      Q : Y-loadings
+      U : X-Y relation
+      B : Regression coefficients X->Y
+      b0: Regression coefficient intercept
+      evx : X-explained variance
+      evy : Y-explained variance
+      evz : Z-explained variance
+
+    :Notes:
+    
+    """
+    if ax_center>=0:
+        mn_x = expand_dims(X.mean(ax_center), ax_center)
+        mn_y = expand_dims(Y.mean(ax_center), ax_center)
+        X = X - mn_x
+        Y = Y - mn_y
+
+    varX = pow(X, 2).sum()
+    varY = pow(Y, 2).sum()
+    
+    m, n = X.shape
+    k, l = Y.shape
+
+    # initialize 
+    U = empty((k, a_max))
+    Q = empty((l, a_max))
+    T = empty((m, a_max))
+    W = empty((n, a_max))
+    P = empty((n, a_max))
+    B = empty((a_max, n, l))
+    b0 = empty((a_max, m, l))
+    var_x = empty((a_max,))
+    var_y = empty((a_max,))
+
+    t1 = X[:,:1]
+    for a in range(a_max):
+        if verbose:
+            print "\n Working on comp. %s" %a
+        u = Y[:,:1]
+        diff = 1
+        MAX_ITER = 100
+        lim = 1e-16
+        niter = 0
+        while (diff>lim and niter<MAX_ITER):
+            niter += 1
+            #u1 = u.copy()
+            w = dot(X.T, u)
+            w = w/sqrt(dot(w.T, w))
+            #l = dot(Z, w)
+            #k = dot(Z.T, l)
+            #k = k/sqrt(dot(k.T, k))
+            #w = alpha*k + (1-alpha)*w
+            #w = w/sqrt(dot(w.T, w))
+            t = dot(X, w)
+            q = dot(Y.T, t)
+            q = q/sqrt(dot(q.T, q))
+            u = dot(Y, q)
+            diff = vnorm(t1 - t)
+            t1 = t.copy()
+        if verbose:
+            print "Converged after %s iterations" %niter
+        #tt = dot(t.T, t)
+        #p = dot(X.T, t)/tt
+        #q = dot(Y.T, t)/tt
+        #l = dot(Z, w)
+        p = dot(X.T, t)/dot(t.T, t)
+        p_norm = vnorm(p)
+        t = t*p_norm
+        w = w*p_norm
+        p = p/p_norm
+        
+        U[:,a] = u.ravel()
+        W[:,a] = w.ravel()
+        P[:,a] = p.ravel()
+        T[:,a] = t.ravel()
+        Q[:,a] = q.ravel()
+
+        X = X - dot(t, p.T)
+        Y = Y - dot(t, q.T)
+
+        var_x[a] = pow(X, 2).sum()
+        var_y[a] = pow(Y, 2).sum()
+    
+        B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
+        b0[a] =  mn_y - dot(mn_x, B[a])
+    
+    # variance explained
+    evx = 100.0*(1 - var_x/varX)
+    evy = 100.0*(1 - var_y/varY)
+    
+    if scale=='loads':
+        tnorm = apply_along_axis(vnorm, 0, T)
+        T = T/tnorm
+        W = W*tnorm
+        Q = Q*tnorm
+    
+    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy,
+            'mnx': mnX, 'mny': mnY, 'xc': X, 'yc': Y}
+
+
+########### Helper routines #########
+
+def m_shape(array):
+    return matrix(array).shape
+
+def esvd(data, amax=None):
+    """SVD with the option of economy sized calculation
+    Calculate subspaces of X'X or XX' depending on the shape
+    of the matrix.
+
+    Good for extreme fat or thin matrices
+
+    :notes:
+    Numpy supports this by setting full_matrices=0
+    """
+    has_arpack = True
+    try:
+        import arpack
+    except:
+        has_arpack = False
+    m, n = data.shape
+    if m>=n:
+        kernel = dot(data.T, data)
+        if has_arpack:
+            if amax==None:
+                amax = n
+                s, v = arpack.eigen_symmetric(kernel,k=amax, which='LM',
+                                              maxiter=200,tol=1e-5)
+        if has_sym:
+            if amax==None:
+                amax = n
+                pcrange = None
+            else:
+                pcrange = [n-amax, n]
+            s, v = symeig(kernel, range=pcrange, overwrite=True)
+            s = s[::-1].real
+            v = v[:,::-1].real
+        else:
+            u, s, vt = svd(kernel)
+            v = vt.T
+        s = sqrt(s)
+        u = dot(data, v)/s
+    else:
+        kernel = dot(data, data.T)
+        if has_sym:
+            if amax==None:
+                amax = m
+                pcrange = None
+            else:
+                pcrange = [m-amax, m]
+            s, u = symeig(kernel, range=pcrange, overwrite=True)
+            s = s[::-1]
+            u = u[:,::-1]
+        else:
+            u, s, vt = svd(kernel)
+        s = sqrt(s)
+        v = dot(data.T, u)/s
+    # some use of symeig returns the 0 imaginary part
+    return u.real, s.real, v.real
+
+def vnorm(x):
+    # assume column arrays (or vectors)
+    return math.sqrt(dot(x.T, x))
+
+def center(a, axis):
+    # 0 = col center, 1 = row center, 2 = double center
+    # -1 = nothing
+
+    # check if we have a vector
+    is_vec = len(a.shape)==1
+    if not is_vec:
+        is_vec = a.shape[0]==1 or a.shape[1]==1
+    if is_vec:
+        if axis==2:
+            warnings.warn("Double centering of vecor ignored, using ordinary centering")
+        if axis==-1:
+            mn = 0
+        else:
+            mn = a.mean()
+        return a - mn, mn
+    # !!!fixme: use broadcasting
+    if axis==-1:
+        mn = zeros((1,a.shape[1],))
+        #mn = tile(mn, (a.shape[0], 1))
+    elif axis==0:
+        mn = a.mean(0)[newaxis]
+        #mn = tile(mn, (a.shape[0], 1)) 
+    elif axis==1:
+        mn = a.mean(1)[:,newaxis]
+        #mn = tile(mn, (1, a.shape[1]))
+    elif axis==2:
+        mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
+        return a - mn , a.mean(0)[newaxis]
+    else:
+        raise IOError("input error: axis must be in [-1,0,1,2]")
+
+    return a - mn, mn
+
+def scale(a, axis):
+    if axis==-1:
+        sc = zeros((a.shape[1],))
+    elif axis==0:
+        sc = a.std(0)
+    elif axis==1:
+        sc = a.std(1)[:,newaxis]
+    else:
+        raise IOError("input error: axis must be in [-1,0,1]")
+
+    return a - sc, sc
+
+
+
+## #PCA CALCS
+
+## %  Calculate Q limit using unused eigenvalues
+## temp = diag(s);
+## if n < m
+##   emod = temp(lv+1:n,:);
+## else
+##   emod = temp(lv+1:m,:);
+## end
+## th1 = sum(emod);
+## th2 = sum(emod.^2);
+## th3 = sum(emod.^3);
+## h0 = 1 - ((2*th1*th3)/(3*th2^2));
+## if h0 <= 0.0
+## h0 = .0001;
+## disp('  ')
+## disp('Warning:  Distribution of unused eigenvalues indicates that')
+## disp('          you should probably retain more PCs in the model.')
+## end
+## q = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
+## disp('  ')
+## disp('The 95% Q limit is')
+## disp(q)
+## if plots >= 1
+##   lim = [q q];
+##   plot(scl,res,scllim,lim,'--b')
+##   str = sprintf('Process Residual Q with 95 Percent Limit Based on %g PC Model',lv);
+##   title(str)
+##   xlabel('Sample Number')
+##   ylabel('Residual')
+##   pause
+## end
+## %  Calculate T^2 limit using ftest routine
+## if lv > 1
+##   if m > 300
+##     tsq = (lv*(m-1)/(m-lv))*ftest(.95,300,lv,2);
+##   else
+##     tsq = (lv*(m-1)/(m-lv))*ftest(.95,m-lv,lv,2);
+##   end
+##   disp('  ')
+##   disp('The 95% T^2 limit is')
+##   disp(tsq)
+## %  Calculate the value of T^2 by normalizing the scores to
+## %  unit variance and summing them up
+##   if plots >= 1.0
+##     temp2 = scores*inv(diag(ssq(1:lv,2).^.5));
+##     tsqvals = sum((temp2.^2)');
+##     tlim = [tsq tsq];
+##     plot(scl,tsqvals,scllim,tlim,'--b')
+##     str = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',lv);
+##     title(str)
+##     xlabel('Sample Number')
+##     ylabel('Value of T^2')
+##   end
+## else
+##   disp('T^2 not calculated when number of latent variables = 1')
+##   tsq = 1.96^2;
+## end
+
--- a/laydi/lib/hypergeom.py
+++ b/laydi/lib/hypergeom.py
@@ -0,0 +1,95 @@
+import scipy
+
+try:
+    # FIXME: remove rpy in a more proper way
+    import rpy_does_not_exist
+    has_rpy = True
+    silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
+except:
+    has_rpy = False
+
+def gene_hypergeo_test(selection, category_dataset):
+    """Returns the pvals from a hypergeometric test of significance.
+
+    input:
+           -- selection: list of selected identifiers along 0 dim of cat.set
+           -- category dataset, categories along dim 1 (cols)
+    """
+    gene_dim_name = category_dataset.get_dim_name(0)
+    category_dim_name = category_dataset.get_dim_name(1)
+
+    #categories
+    all_cats = category_dataset.get_identifiers(category_dim_name, sorted=True)
+
+    # gene_ids universe
+    all_genes = category_dataset.get_identifiers(gene_dim_name)
+    
+    # signifcant genes
+    good_genes_all = list(selection)
+    gg_index = category_dataset.get_indices(gene_dim_name, good_genes_all)
+    
+    # significant genes pr. category
+    good_genes_cat = []
+    for col in category_dataset.asarray().T:
+        index = scipy.where(col==1)[0]
+        index = scipy.intersect1d(index, gg_index)
+        if index.size==0:
+            good_genes_cat.append([])
+        else:
+            good_genes_cat.append(category_dataset.get_identifiers(gene_dim_name, index))
+    count = map(len, good_genes_cat)
+    count = scipy.asarray([max(i, 0) for i in count])
+    cat_count = category_dataset.asarray().sum(0)
+    if has_rpy:
+        rpy.r.assign("x", count - 1) #number of sign. genes in category i
+        rpy.r.assign("m", len(good_genes_all)) # number of sign. genes tot
+        rpy.r.assign("n", len(all_genes)-len(good_genes_all) ) # num. genes not sign.
+        rpy.r.assign("k", cat_count) #num. genes in cat i
+        silent_eval('pvals <- phyper(x, m, n, k, lower.tail=FALSE)')
+        pvals = rpy.r("pvals")
+
+    else:
+        pvals = p_hyper_geom(count, len(good_genes_all),
+                             len(all_genes)-len(good_genes_all),
+                             cat_count)
+    
+    pvals = scipy.where(cat_count==0, 2, pvals)
+    pvals = scipy.where(scipy.isnan(pvals), 2, pvals)
+    out = {}
+    for i in range(pvals.size):
+        out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
+    return out
+
+    
+def p_hyper_geom(x, m, n, k):
+    """Distribution function for the hypergeometric distribution.
+
+    Inputs:
+           -- x: vector of quantiles representing the number of white balls
+           drawn without replacement from an urn which contains both
+           black and white balls.
+           -- m: the number of white balls in the urn.
+           -- n: the number of black balls in the urn.
+           -- k: [vector] the number of balls drawn from the urn
+
+    Comments:
+    Similar to R's phyper with lower.tail=FALSE
+
+    """
+    
+    M = m + n
+    multiple_draws = False
+    if isinstance(k, scipy.ndarray) and k.size>1:
+        multiple_draws = True
+        n_draws = k.size
+        if n_draws<x.size:
+            print "n_draws: %d and n_found: %d  Length mismatch, zero padded" %(k.size, x.size)
+    N = k
+    n = m
+    if not multiple_draws:
+        out = scipy.stats.hypergeom.pmf(x, M, n, N).cumsum()
+    else:
+        out = scipy.zeros((max(n_draws, x.size),))
+        for i in xrange(N.size):
+            out[i] = scipy.stats.hypergeom.pmf(x, M, n, N[i]).cumsum()[i]
+    return out
--- a/laydi/lib/nx_utils.py
+++ b/laydi/lib/nx_utils.py
@@ -0,0 +1,567 @@
+import os,sys
+from itertools import izip
+import networkx as NX
+from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
+     outer,maximum,sum,diag,real,atleast_2d
+from scipy.linalg import eig,svd,inv,expm,norm
+from cx_utils import sorted_eig
+
+import numpy
+
+
+
+eps = numpy.finfo(float).eps.item()
+feps = numpy.finfo(numpy.single).eps.item()
+_array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
+
+class NXUTILSException(Exception): pass
+
+def xgraph_to_graph(G):
+    """Convert an Xgraph to an ordinary graph.
+    Edge attributes, mult.edges and self-loops are lost in the process.
+    """
+    
+    GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))
+    return GG
+
+def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):
+    """
+    Function for calculating a general affinity matrix, based upon distances.
+    Affiniy = 1 - distance ((10-1) 1 is far apart)
+    INPUT
+
+    data:
+    gene expression data, type dict data[gene] = expression-vector
+
+    G:
+    The network (networkx.base.Graph object)
+    
+    mask:
+    The array mask shows which data are missing. If mask[i][j]==0, then
+    data[i][j] is missing.
+
+    weights:
+    The array weight contains the weights to be used when calculating distances.
+
+    transpose:
+    If transpose==0, then genes are clustered. If transpose==1, microarrays are
+    clustered.
+
+    dist:
+    The character dist defines the distance function to be used:
+    dist=='e': Euclidean distance
+    dist=='b': City Block distance
+    dist=='h': Harmonically summed Euclidean distance
+    dist=='c': Pearson correlation
+    dist=='a': absolute value of the correlation
+    dist=='u': uncentered correlation
+    dist=='x': absolute uncentered correlation
+    dist=='s': Spearman's rank correlation
+    dist=='k': Kendall's tau
+    For other values of dist, the default (Euclidean distance) is used.
+
+    OUTPUT
+    D :
+    Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]
+    Normalized so max weight = 1.0
+    """
+    try:
+        from Bio import Cluster as CLS
+    except:
+        raise NXUTILSError("Import of Biopython failed")
+    n_var = len(data)
+    n_samp = len(data[data.keys()[0]])
+    X = zeros((nVar, nSamp),dtpye='<f8')
+
+    for i, gene in enumerate(ids): #this shuld be right!!
+        X[i,:] = data[gene]
+    
+    
+    #X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)
+    
+    D_list  = CLS.distancematrix(X, dist=dist)
+    D = zeros((nVar,nVar),dtype='<f8')
+    for i,row in enumerate(D_list):
+        if i>0:
+            D[i,:len(row)]=row
+
+    D = D + D.T
+    MAX = 30.0
+    D_max = max(ravel(D))/MAX
+    D_n = D/D_max #normalised (max = 10.0)
+    D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)
+    
+    A = NX.adj_matrix(G, nodelist=ids)
+    if out=='dist':
+        return D_n*A
+    elif out=='heat_kernel':
+        t=1.0
+        K = exp(-t*D*A)
+        return K
+    elif out=='complete':
+        return D_n
+    else:
+        return []
+
+def remove_one_degree_nodes(G, iter=True):
+    """Removes all nodes with only one neighbour.  These nodes does
+    not contribute to community structure.
+    input:
+             G -- graph
+             iter -- True/False iteratively remove?
+    """
+    G_copy = G.copy()
+    if iter==True:
+        while 1:
+            bad_nodes=[]
+            for node in G_copy.nodes():
+                if len(G_copy.neighbors(node))==1:
+                    bad_nodes.append(node)
+            if len(bad_nodes)>0:
+                G_copy.delete_nodes_from(bad_nodes)
+            else:
+                break
+    else:
+       bad_nodes=[]
+       for ngb in G_copy.neighbors_iter():
+           if len(G_copy.neighbors(node))==1:
+               bad_nodes.append(node)
+           if len(bad_nodes)>0:
+               G_copy.delete_nodes_from(bad_nodes)
+
+    print "Deleted %s nodes from network" %(len(G)-len(G_copy))
+    return G_copy
+
+def key_players(G, n=1, with_labels=False):
+    """
+    Resilince measure
+    Identification of key nodes by fraction of nodes in
+    disconnected subgraph when the node is removed.
+
+    output:
+           fraction of nodes disconnected when node i is removed
+    """
+    i=0
+    frac=[]
+    labels = {}
+    for node in G.nodes():
+        i+=1
+        print i
+        T = G.copy()
+        T.delete_node(node)
+        n_nodes = T.number_of_nodes()
+        sub_graphs = NX.connected_component_subgraphs(T)
+        n = len(sub_graphs)
+        if n>1:
+            strong_comp = sub_graphs[0]
+            fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes
+            frac.append(fraction)
+            labels[node]=fraction
+            
+        else:
+            frac.append(0.0)
+            labels[node]=0.0
+
+    out = 1.0 - array(frac)
+    if with_labels==True:
+        return out,labels
+    else:
+        return out
+
+def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):
+    """Return a weighted adjacency matrix of graph. The weights are
+    node weights.
+    input: G -- graph
+           weights -- dict, keys: nodes, values: weights
+           with_labels -- True/False, return labels?
+
+    output: A -- weighted eadjacency matrix
+            [index] -- node labels 
+    
+    """
+    n=G.order()
+    # make an dictionary that maps vertex name to position
+    index={}
+    count=0
+    for node in G.nodes():
+        index[node]=count
+        count = count+1
+  
+    a = zeros((n,n))
+    if type(G)=='networkx.xbase.XGraph':
+        raise
+    for head,tail in G.edges():
+        if ave_type == 'geometric':
+            a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])
+            a[index[tail],index[head]]= a[index[head],index[tail]]
+        elif ave_type == 'harmonic':
+            a[index[head],index[tail]] = mean(weights[head],weights[tail])
+            a[index[tail],index[head]]= mean(weights[head],weights[tail])
+    if with_labels:
+        return a,index
+    else:
+        return a            
+
+def weighted_adj_matrix(G, with_labels=False):
+    """Adjacency matrix of an XGraph whos weights are given in edges.
+    """
+    A, labels = NX.adj_matrix(G, with_labels=True)
+    W = A.astype('<f8')
+    for orf, i in labels.items():
+        for orf2, j in labels.items():
+            if G.has_edge(orf, orf2):
+                edge_weight = G.get_edge(orf, orf2)
+                W[i,j] = edge_weight
+                W[j,i] = edge_weight
+    if with_labels==True:
+        return W, labels
+    else:
+        return W
+
+def assortative_index(G):
+    """Ouputs two vectors: the degree and the neighbor average degree.
+    Used to measure the assortative mixing.  If the average degree is
+    pos. correlated with the degree we know that hubs tend to connect
+    to other hubs.
+
+    input: G, graph connected!!
+    ouput: d,mn_d: degree, and average degree of neighb.
+    (degree sorting from degree(with_labels=True))
+    """
+    d = G.degree(with_labels=True)
+    out=[]
+    for node in G.nodes():
+        nn = G.neighbors(node)
+        if len(nn)>0:
+            nn_d = mean([float(d[i]) for i in nn])
+            out.append((d[node], nn_d))
+    return array(out).T
+
+
+def struct_equivalence(G,n1,n2):
+    """Returns the structural equivalence of a node pair.  Two nodes
+    are structural equal if they share the same neighbors.
+
+    x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)
+    union ne(n2) + ne(n1) intersection ne(n2)]
+    ref: Brun et.al 2003
+    """
+
+    #[ne(n1) union ne(n2) - ne(n1) intersection ne(n2
+    s1 = set(G.neighbors(n1))
+    s2 = set(G.neighbors(n2))
+    num_union = len(s1.union(s2))
+    num_intersection = len(s1.intersection(s2))
+    if num_union & num_intersection:
+        xs=0
+    else:
+        xs = (num_union - num_intersection)/(num_union + num_intersection)
+    return xs
+
+def struct_equivalence_all(G):
+    """Not finnished.
+    """
+    A,labels = NX.adj_matrix(G,with_labels=True)
+    pass
+
+def hamming_distance(n1,n2):
+    """Not finnsihed.
+    """
+    pass
+
+def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
+    """Returns the correlation coefficient for each node. The
+    correlation coefficient is between the node and its neighbours.
+
+    """
+    if nodelist==None:
+        nodelist=G.nodes()
+    if vec == None:
+        vec = G.degree(nodelist)
+    if len(vec)!=len(nodelist):
+        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
+
+    A = NX.ad_matrix(G, nodelist=nodelist)
+    for i, node in enumerate(nodelist):
+        nei_i = A[i,:]==1
+        vec_i = vec[nei_i]
+
+def weighted_laplacian(G,with_labels=False):
+    """Return standard Laplacian of graph from a weighted adjacency matrix."""
+    n= G.order()
+    I = scipy.eye(n)
+    A = weighted_adj_matrix(G)
+    D = I*scipy.sum(A, 0)
+    L = D-A
+    if with_labels:
+        A,index = weighted_adj_matrix(G, with_labels=True)
+	return L, index
+    else:	
+        return L            
+
+def grow_subnetworks(G, T2):
+    """Return the highest scoring (T2-test) subgraph og G.
+
+    Use simulated annealing to identify highly grow subgraphs.
+
+    ref: -- Ideker et.al (Bioinformatics 18, 2002)
+         -- Patil and Nielsen (PNAS 2006)
+    
+    """
+    N = 1000
+    states = [(node, False) for node in G.nodes()]
+    t2_last = 0.0
+    for i in xrange(N):
+        if i==0: #assign random states
+            states = [(state[0], True) for state in states if rand(1)>.5]
+        sub_nodes = [state[0] for state in states if state[1]]
+        Gsub = NX.subgraph(G, sub_nodes)
+        Gsub = NX.connected_components_subgraphs(Gsub)[0]
+        t2 = [T2[node] for node in Gsub]
+        if t2>t2_last:
+            pass
+        else:
+            p = numpy.exp()
+            
+    
+
+"""Below are methods for calculating graph metrics
+
+Four main decompositions :
+0.) Adjacency diffusion kernel expm(A),
+1.) von neumann kernels (diagonalisation of adjacency matrix)
+
+2.) laplacian kernels (geometric series of adj.)
+
+3.) diffusion kernels (exponential series of adj.)
+
+---- Kv
+von_neumann : Kv = (I-alpha*A)^-1 (mod: A(I-alpha*A)^-1)? ,
+geom. series
+
+---- Kl
+laplacian: Kl = (I-alpha*L)^-1 , geom. series
+
+---- Kd
+laplacian_diffusion: Kd = expm(-alpha*L)
+exp. series
+
+---- Ke
+Exponential diffusion.
+Ke = expm(A) .... expm(-A)?
+
+"""
+
+# TODO:
+# check for numerical unstable eigenvalues and set to zero
+# othervise some inverses wil explode ->ok ..using pinv for inverses
+#
+# This gives results that look numerical unstable
+#
+# -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)
+#
+#
+#
+# the neumann kernel is defined in Kandola to be K = A*(I-A)^-1
+# lowest eigenvectors are same as the highest of K = A*A ?
+# this needs clarification
+
+# diffusion is still wrong! ... ok
+# diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc
+# Is it the negative of exp. of adj. metrix in Kandola?
+#
+# Normalised=False returns only nans (no idea why!!) ... fixed ok
+
+# 31.1: diff is ok exp(0)=1 not zero!
+# 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes
+# 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig
+# 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
+# 13.09.2206: update for use in numpy
+
+# 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last 
+
+def K_expAdj(W, normalised=True, alpha=1.0):
+    """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel. 
+    """
+    W = asarray(W)
+    t = W.dtype.char
+    if len(W.shape)!=2:
+        raise ValueError, "Non-matrix input to matrix function."
+    m,n = W.shape
+    if t in ['F','D']:
+        raise TypeError, "Complex input!"
+    if normalised==True:
+        T = diag( sqrt( 1./(sum(W,0))) )
+        W = dot(dot(T, W), T)
+    e,vr = eig(W)
+    s = real(e)**2 # from eigenvalues to singularvalues
+    vri = inv(vr)
+    s = maximum.reduce(s) + s
+    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
+    cutoff = abs(cond*maximum.reduce(s))
+    psigma = eye(m)
+    for i in range(len(s)):
+        if abs(s[i]) > cutoff:
+            psigma[i,i] = .5*alpha*exp(s[i])
+    
+    return dot(dot(vr,psigma),vri)
+
+def K_vonNeumann(W, normalised=True, alpha=1.0):
+    """ The geometric series of path lengths.
+    Returns matrix square root of pseudo inverse of the adjacency matrix.
+    """
+    W = asarray(W)
+    t = W.dtype.char
+    if len(W.shape)!=2:
+        raise ValueError, "Non-matrix input to matrix function."
+    m,n = W.shape
+    if t in ['F','D']:
+        raise TypeError, "Complex input!"
+    
+    if normalised==True:
+        T = diag(sqrt(1./(sum(W,0))))
+        W = dot(dot(T,W),T)
+    e,vr = eig(W)
+    vri = inv(vr)
+    e = real(e)  # we only work with real pos. eigvals
+    e = maximum.reduce(e) + e
+    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
+    cutoff = cond*maximum.reduce(e)
+    psigma = zeros((m,n),t)
+    for i in range(len(e)):
+        if e[i] > cutoff:
+            psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))
+    return dot(dot(vr,psigma),vri).astype(t)
+
+def K_laplacian(W, normalised=True, alpha=1.0):
+    """ This is the matrix pseudo inverse of L.
+    Also known as the average commute time matrix.
+    """
+    W = asarray(W)
+    t = W.dtype.char
+    if len(W.shape)!=2:
+        raise ValueError, "Non-matrix input to matrix function."
+    m,n = W.shape
+    if t in ['F','D']:
+        raise TypeError, "Complex input!"
+    D = diag(sum(W,0))
+    L = D - W
+    if normalised==True:
+        T = diag(sqrt(1./sum(W, 0)))
+        L = dot(dot(T, L), T)
+    e,vr = eig(L)
+    e = real(e)
+    vri = inv(vr)
+    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
+    cutoff = cond*maximum.reduce(e)
+    psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0 
+    for i in range(len(e)):
+        if e[i] > cutoff:
+            psigma[i] = 1.0/e[i]
+    K = dot(dot(vr, diag(psigma)), vri).astype(t)
+    K = real(K)
+    I = eye(n)
+    K = (1-alpha)*I + alpha*K
+    return K
+
+
+def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
+    """Returns diffusion kernel.
+    input:
+            -- W, adj. matrix
+            -- normalised [True/False]
+            -- alpha, [0,1] (degree of network influence)
+            -- beta, [0->), (diffusion degree)
+    """
+    W = asarray(W)
+    t = W.dtype.char
+    if len(W.shape)!=2:
+        raise ValueError, "Non-matrix input to matrix function."
+    m, n = W.shape
+    if t in ['F','D']:
+        raise TypeError, "Complex input!"
+    D = diag(W.sum(0))
+    L = D - W
+    if normalised==True:
+        T = diag(sqrt(1./W.sum(0)))
+        L = dot(dot(T, L), T)
+    e, vr = eig(L)
+    vri = inv(vr) #inv
+    cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
+    cutoff = 1.*abs(cond*maximum.reduce(e))
+    psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
+    #psigma = zeros((m,n), dtype='<f8')
+    for i in range(len(e)):
+        if abs(e[i]) > cutoff:
+            psigma[i,i] = exp(-beta*e[i])
+        #else:
+        #    psigma[i,i] = 0.0
+    K = real(dot(dot(vr, psigma), vri))
+    I = eye(n, dtype='<f8')
+    K = (1. - alpha)*I + alpha*K
+    return K
+
+def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
+    """Returns diffusion kernel, using fast pade approximation.
+    input:
+            -- W, adj. matrix
+            -- normalised [True/False]
+            -- beta, [0->), (diffusion degree)
+    """
+    
+    D = diag(W.sum(0))
+    L = D - W
+    if normalised==True:
+        T = diag(sqrt(1./W.sum(0)))
+        L = dot(dot(T, L), T)
+    return expm(-beta*L)
+    
+    
+def K_modularity(W, alpha=1.0):
+    """ Returns the matrix square root of Newmans modularity."""
+    W = asarray(W)
+    t = W.dtype.char
+    m, n = W.shape
+    d = sum(W, 0)
+    m = 1.*sum(d)
+    B = W - (outer(d, d)/m)
+    s,v = sorted_eig(B, sort_by='lm')
+    psigma = zeros( (n, n), dtype='<f8' )
+    for i in range(len(s)):
+        if s[i]>1e-7:
+            psigma[i,i] = sqrt(s[i])
+            #psigma[i,i] = s[i]
+    K = dot(dot(v, psigma), v.T)
+    I = eye(n)
+    K = (1 - alpha)*I + alpha*K
+    return K
+
+def kernel_score(K, W):
+    """Returns the modularity score.
+    K -- (modularity) kernel
+    W -- adjacency matrix (possibly weighted)
+    """
+    # normalize W (: W'W=I)
+    m, n = shape(W)
+    for i in range(n):
+        W[:,i] = W[:,i]/norm(W[:,i])
+    score = diag(dot(W, dot(K, W)) )
+    tot = sum(score)
+    return score, tot
+
+
+def modularity_matrix(G, nodelist=None):
+    if not nodelist:
+        nodelist = G.nodes()
+    else:
+        G = NX.subgraph(G, nodelist) 
+        
+    A = NX.adj_matrix(G, nodelist=nodelist)
+    d = atleast_2d(G.degree(nbunch=nodelist))
+    m = 1.*G.number_of_edges()
+    B = A - dot(d.T, d)/m
+    return B
+
+
+                        
+    
--- a/laydi/lib/packer.py
+++ b/laydi/lib/packer.py
@@ -0,0 +1,28 @@
+class Packer:
+    """A compression object used to speed up model calculations.
+
+    Often used in conjunction with crossvalidation and perturbations
+    analysis. 
+    """
+    def __init__(self,array):
+        self._shape = array.shape
+        self._array = array
+        self._packed_data = None
+        
+    def expand(self,a):
+        if self._inflater!=None:
+            return dot(self._inflater,a)
+        
+    def collapse(self,axis=None,mode='svd'):
+        if not axis:
+            axis = argmin(self._array.shape) # default is the smallest dim
+
+        if axis == 1:
+            self._array = self._array.T
+        u, s, vt = svd(self._array,full_matrices=0)
+        self._inflater = vt.T
+        self._packed_data = u*s
+        return self._packed_data
+
+    def get_packed_data(self):
+        return self._packed_data
--- a/laydi/lib/select_generators.py
+++ b/laydi/lib/select_generators.py
@@ -0,0 +1,223 @@
+"""Matrix cross validation selection generators
+"""
+from scipy import take,arange,ceil,repeat,newaxis,mean,asarray,dot,ones,\
+     random,array_split,floor,vstack,asarray,minimum
+from cx_utils import randperm
+
+def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
+     """Random block crossvalidation for wide (XX.T) trick in PLS.
+     Leave-one-out is a subset, with n_blocks equals nSamples
+     
+     aat -- outerproduct of X
+     b -- Y
+     n_blocks = 
+     center -- use centering of calibration ,sets (aat_in,b_in) are centered
+
+     Returns:
+         -- aat_in,aat_out,b_in,b_out,[out]
+     """
+     m, n = aat.shape
+     index = randperm(m)
+     if n_blocks==None: n_blocks = m
+     nValuesInBlock = m/n_blocks
+     if n_blocks==m:
+         index = arange(m)
+     out_ind = [index[i*nValuesInBlock:(i+1)*nValuesInBlock] for i in range(n_blocks)]
+     
+     for out in out_ind:
+          inn = [i for i in index if i not in out]
+          aat_in = aat[inn,:][:,inn]
+          aat_out = aat[out,:][:,inn]
+          b_in = b[inn,:]
+          b_out = b[out,:]
+          if center:
+               aat_in, mn = outerprod_centering(aat_in)
+               b_in = b_in - b_in.mean(0) # b_in + b_out/(b_in.shape[0])
+          if index_out:
+               yield aat_in,aat_out,b_in,b_out,out
+          else:
+               yield aat_in,aat_out,b_in,b_out
+
+def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
+     """Random block crossvalidation
+    Leave-one-out is a subset, with n_blocks equals a.shape[-1]
+    """
+     index = randperm(a.shape[axis])
+     #index = arange(a.shape[axis])
+     if n_blocks==None:
+          n_blocks = a.shape[axis]
+     n_in_set = ceil(float(a.shape[axis])/n_blocks)
+     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
+     for out in out_ind_sets:
+          inn = [i for i in index if i not in out]
+          acal = a.take(inn, 0)
+          atrue = a.take(out, 0)
+          bcal = b.take(inn, 0)
+          btrue = b.take(out, 0)
+          if center:
+               mn_a = acal.mean(0)[newaxis]
+               acal = acal - mn_a
+               atrue = atrue - mn_a
+               mn_b = bcal.mean(0)[newaxis]
+               bcal = bcal - mn_b
+               btrue = btrue - mn_b
+          
+          if index_out:
+               yield acal, atrue, bcal, btrue, out
+          else:     
+               yield acal, atrue, bcal, btrue
+
+         
+def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
+     """Returns a generator of crossvalidation sample segments.
+
+     input:
+           -- a, data matrix (m x n)
+           -- n_sets, number of segments/subsets to generate.
+           -- center, bool, choice of centering each subset
+           -- index_out, bool, return subset index
+           -- axis, int, which axis to get subset from
+
+     ouput:
+           -- V, generator with (n_sets) memebers (subsets)
+     
+     """
+     m = a.shape[axis]
+     index = randperm(m)
+     if n_sets==None:
+          n_sets = m
+     n_in_set = ceil(float(m)/n_sets)
+     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
+     for out in out_ind_sets:
+         inn = [i for i in index if i not in out]
+         acal = a.take(inn, 0)
+         atrue = a.take(out, 0)
+         if center:
+              mn_a = acal.mean(0)[newaxis]
+              acal = acal - mn_a
+              atrue = atrue - mn_a
+         
+         if index_out:
+              yield acal, atrue, out
+         else:
+              yield acal, atrue
+
+def w_pls_gen_jk(a, b, n_sets=None, center=True,
+                 index_out=False, axis=0):
+     """Random block crossvalidation for wide X (m>>n)
+     Leave-one-out is a subset, with n_sets equals a.shape[-1]
+
+     Returns : X_m and X_m'Y_m
+     """
+     m = a.shape[axis]
+     ab = dot(a.T, b)
+     index = randperm(m)
+     if n_sets==None:
+          n_sets = m
+     n_in_set = ceil(float(m)/n_sets)
+     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
+     for out in out_ind_sets:
+         inn = [i for i in index if i not in out]
+         nin = len(inn)
+         nout = len(out)
+         a_in = a[inn,:]
+         mn_a = 0
+         mAB = 0
+         if center:
+              mn_a = a_in.mean(0)[newaxis]
+              mAin = dot(-ones((1,nout)), a[out,:])/nin
+              mBin = dot(-ones((1,nout)), b[out,:])/nin
+              mAB = dot(mAin.T, (mBin*nin))
+         ab_in = ab - dot(a[out,].T, b[out,:]) - mAB
+         a_in = a_in - mn_a
+
+         if index_out:
+              yield a_in, ab_in, out
+         else:     
+              yield a_in, ab_in
+
+def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
+     """Random block shuffling along 1d axis
+     Returns : Shuffled a by axis
+     """
+     m = a.shape[axis]
+     if blocks==None:
+         blocks = m
+     for ii in xrange(n_sets):
+         index = randperm(m)
+         if blocks==m:
+             a_out = a.take(index, axis)
+         else:
+             index = arange(m)
+             dummy = map(random.shuffle, array_split(index, blocks))
+             a_out = a.take(index, axis)
+         
+         if index_out:
+              yield a_out, index
+         else:
+              yield a_out
+
+def shuffle_1d(a, n_sets, axis=0):
+     """Random shuffling along 1d axis.
+
+     Returns : Shuffled a by axis
+     """
+     m = a.shape[axis]
+     for ii in xrange(n_sets):
+         index = randperm(m)
+         a = a.take(index, axis)
+         yield a
+         
+def diag_pert(a, n_sets=10, center=True, index_out=False):
+    """Alter generator returning sets perturbed with means at diagonals.
+
+    input:
+            X -- matrix, data
+            alpha -- scalar, approx. portion of data perturbed  
+    """
+    
+    m, n = a.shape
+    tr=False
+    if m>n:
+         a = a.T
+         m, n = a.shape
+         tr = True
+    if n_sets>m or n_sets>n:
+         msg = "You may not use more subsets than max(n_rows, n_cols)"
+         raise ValueError, msg
+    nm=n*m
+    start_inds = array_split(randperm(m),n_sets) # we use random start diags
+    if center:
+         a = a - mean(a, 0)[newaxis]
+    for v in range(n_sets):
+        a_out = a.copy()
+        out = []
+        for start in start_inds[v]: 
+            ind = arange(start+v, nm, n+1)
+            [out.append(i) for i in ind]
+            if center:
+                a_out.put(a.mean(),ind) 
+            else:
+                 a_out.put(0, ind)
+        if tr:
+             a_out = a_out.T
+             
+        if index_out:
+             yield a_out, asarray(out)
+        else:
+             yield a_out
+
+ 
+def outerprod_centering(aat, ret_mn=True):
+    """Returns double centered symmetric outerproduct matrix.
+    """
+    h = aat.mean(0)[newaxis]
+    h = h - 0.5*h.mean()
+    mn_a = h + h.T # beauty of broadcasting
+    aatc = aat - mn_a
+    if ret_mn:
+        return aatc, h
+    return aatc
+     
+     
+     
--- a/laydi/lib/validation.py
+++ b/laydi/lib/validation.py
@@ -0,0 +1,315 @@
+"""This module implements some common validation schemes from pca and pls.
+"""
+from scipy import ones,mean,sqrt,dot,newaxis,zeros,sum,empty,\
+     apply_along_axis,eye,kron,array,sort,zeros_like,argmax,atleast_2d
+from scipy.stats import median
+from scipy.linalg import triu,inv,svd,norm
+
+from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
+from engines import w_simpls,pls,bridge,pca,nipals_lpls
+from cx_utils import m_shape
+
+
+def w_pls_cv_val(X, Y, amax, n_blocks=None):
+    """Returns rmsep and aopt for pls tailored for wide X.
+
+    The root mean square error of cross validation is calculated
+    based on random block cross-validation. With number of blocks equal to
+    number of samples [default] gives leave-one-out cv.
+    The pls model is based on the simpls algorithm for wide X.
+
+    :Parameters:
+    X : ndarray 
+        column centered data matrix of size (samples x variables)
+    Y : ndarray
+        column centered response matrix of size (samples x responses)
+    amax : scalar 
+        Maximum number of components
+    n_blocks : scalar
+        Number of blocks in cross validation
+    
+    :Returns: 
+    rmsep : ndarray
+        Root Mean Square Error of cross-validated Predictions 
+    aopt : scalar
+        Guestimate of the optimal number of components
+
+    :SeeAlso:
+    - pls_cv_val : Same output, not optimised for wide X
+    - w_simpls : Simpls algorithm for wide X
+    
+    Notes
+    -----
+    Based (cowardly translated) on m-files from the Chemoact toolbox
+    X, Y inputs need to be centered (fixme: check)
+    
+
+    Examples
+    --------
+
+    >>> import numpy as n
+    >>> X = n.array([[1., 2., 3.],[]])
+    >>> Y = n.array([[1., 2., 3.],[]])
+    >>> w_pls(X, Y, 1)
+    [4,5,6], 1
+    """
+    
+    k, l = m_shape(Y)
+    PRESS = zeros((l, amax+1), dtype='f')
+    if n_blocks==None:
+        n_blocks = Y.shape[0]
+    XXt = dot(X, X.T)
+    V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
+    for Din, Doi, Yin, Yout in V:
+        ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
+        PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
+        
+        dat = w_simpls(Din, Yin, amax)
+        Q, U, H = dat['Q'], dat['U'], dat['H']
+        That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
+        
+        Yhat = []
+        for j in range(l):
+            TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
+            E = Yout[:,j][:,newaxis] - TQ
+            E = E + sum(E, 0)/Din.shape[0] 
+            PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
+    #Yhat = Yin - dot(That,Q.T)
+    msep = PRESS/(Y.shape[0])
+    aopt = find_aopt_from_sep(msep)
+    return sqrt(msep), aopt
+
+def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
+    k, l = m_shape(Y)
+    PRESS = zeros((l, amax+1), dtype='<f8')
+    EE = zeros((amax, k, l), dtype='<f8')
+    Yhat = zeros((amax, k, l), dtype='<f8')
+    V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
+    for Xin, Xout, Yin, Yout, out in V:
+        ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
+        Yin = (Yin - ym)
+        PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
+
+        if algo=='pls':
+            dat = pls(Xin, Yin, amax, mode='normal')
+        elif algo=='bridge':
+            dat = simpls(Xin, Yin, amax, mode='normal')
+        
+        for a in range(amax):
+            Ba = dat['B'][a,:,:]
+            Yhat[a,out[:],:] = dot(Xout, Ba)
+            E = Yout -  dot(Xout, Ba)
+            EE[a,out,:] = E
+            PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
+
+    #rmsep = sqrt(PRESS/(k-1.))
+    msep = PRESS
+    aopt = find_aopt_from_sep(msep)
+    return msep, Yhat, aopt
+
+def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2]):
+    """Performs crossvalidation to get generalisation error in lpls"""
+    assert(nsets<=X.shape[0])
+    
+    cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
+    k, l = Y.shape
+    Yc = empty((k, l), 'd')
+    Yhat = empty((a_max, k, l), 'd')
+    Yhatc = empty((a_max, k, l), 'd')
+    sep2 = empty((a_max, k, l), 'd')
+    for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
+        print ind
+        dat = nipals_lpls(xcal,ycal,Z,
+                          a_max=a_max,
+                          alpha=alpha,
+                          mean_ctr=mean_ctr,
+                          verbose=False)
+        
+        B = dat['B']
+        #b0 = dat['b0'] 
+        for a in range(a_max):
+            if mean_ctr[0] in [0, 2]:
+                xi = xi - dat['mnx']
+            else:
+                xi = xi - xi.mean(1)[:,newaxis] #???: cheating?
+            if mean_ctr[1] in [0, 2]:
+                ym = dat['mny']
+            else:
+                ym = yi.mean(1)[:,newaxis] #???: check this
+                
+            Yhat[a,ind,:] = atleast_2d(ym + dot(xi, B[a]))
+            #Yhat[a,ind,:] = atleast_2d(b0[a] + dot(xi, B[a]))
+            
+    # todo: need a better support for class validation
+    y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
+    #print Y.dtype.char
+    if y_is_class:
+        Yhat_class = zeros_like(Yhat)
+        for a in range(a_max):
+            for i in range(k):
+                Yhat_class[a,i,argmax(Yhat[a,i,:])] = 1.0
+        class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
+
+    sep = (Y - Yhat)**2
+    rmsep = sqrt(sep.mean(1)).T
+    #rmsep2 = sqrt(sep2.mean(1))
+    
+    aopt = find_aopt_from_sep(rmsep)
+    
+    return rmsep, Yhat, aopt
+
+def pca_alter_val(a, amax, n_sets=10, method='diag'):
+    """Pca validation by altering elements in X.
+
+    comments:
+             -- may do all jk estimates in this loop
+    """
+    
+    V = diag_pert(a, n_sets, center=True, index_out=True)
+    sep = empty((n_sets, amax), dtype='f')
+    for i, (xi, ind) in enumerate(V):
+        dat_i = pca(xi, amax, mode='detailed')
+        Ti, Pi = dat_i['T'],dat_i['P']
+        for j in xrange(amax):
+            Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
+            a_sub = a.ravel().take(ind)
+            EE = a_sub - Xhat.ravel().take(ind)
+            tot = (a_sub**2).sum()
+            sep[i,j] = (EE**2).sum()/tot
+    sep = sqrt(sep)
+    aopt = find_aopt_from_sep(sep)
+    return sep, aopt
+
+def pca_cv_val(a, amax, n_sets):
+    """ Returns PRESS from cross-validated pca using random segments.
+
+    input:
+          -- a, data matrix (m x n)
+          -- amax, maximum nuber of components used
+          -- n_sets, number of segments to calculate
+    output:
+          -- sep, (amax x m x n), squared error of prediction (press)
+          -- aopt, guestimated optimal number of components
+
+    """
+
+    m, n = a.shape
+    E = empty((amax, m, n), dtype='f')
+    xtot = (a**2).sum() # this needs centering
+    V = pca_gen(a, n_sets=7, center=True, index_out=True)
+    for xi, xout, ind in V:
+        dat_i = pca(xi, amax, mode='fast')
+        Pi = dat_i['P']
+        for a in xrange(amax):
+            Pia = Pi[:,:a+1]
+            E[a][ind,:] = (X[ind,:] - dot(xout, dot(Pia,Pia.T) ))**2
+
+    sep = []
+    for a in xrange(amax):
+        sep.append(E[a].sum()/xtot)
+    sep = array(sep)
+    aopt = find_aopt_from_sep(sep)
+
+    return sep, aopt
+
+def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
+    """ Returns CV-segments of paramter W for wide X.
+    
+    todo: add support for T,Q and B
+    """
+    if n_blocks == None:
+        n_blocks = b.shape[0]
+
+    Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
+    if use_pack:
+        u, s, inflater = svd(a, full_matrices=0)
+        a = u*s
+    
+    V = pls_gen(a, b, n_blocks=n_blocks, center=center)
+    for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
+        if algo=='pls':
+            dat = pls(a_in, b_in, amax, 'loads', 'fast')
+
+        elif algo=='bridge':
+            dat = bridge(a_in, b_in, amax, 'loads', 'fast')
+
+        W = dat['W']
+        if use_pack:
+            W = dot(inflater.T, W)
+
+        Wcv[nn,:,:] = W[:,:,]
+        
+    return Wcv
+
+def pca_jkP(a, aopt, n_blocks=None):
+    """Returns loading from PCA on CV-segments.
+    
+    input:
+           -- a, data matrix (n x m)
+           -- aopt, number of components in model.
+           -- nblocks, number of segments
+    output:
+           -- PP, loadings collected in a three way matrix
+           (n_segments, m, aopt)
+
+    comments:
+    * The loadings are scaled with the (1/samples)*eigenvalues.
+    * Crossvalidation method is currently set to random blocks of samples.
+
+    todo: add support for T
+    fixme: more efficient to add this in validation loop
+    """
+    if n_blocks == None:
+        n_blocks = a.shape[0]
+
+    PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
+    V = pca_gen(a, n_sets=n_blocks, center=True)
+    for nn,(a_in, a_out) in enumerate(V):  
+        dat = pca(a_in, aopt, mode='fast', scale='loads')
+        P = dat['P']
+        PP[nn,:,:] = P
+        
+    return PP
+
+
+def lpls_jk(X, Y, Z, a_max, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2]):
+    cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
+    m, n = X.shape
+    k, l = Y.shape
+    o, p = Z.shape
+    if nsets==None:
+        nsets = m
+    WWx = empty((nsets, n, a_max), 'd')
+    WWz = empty((nsets, o, a_max), 'd')
+    #WWy = empty((nsets, l, a_max), 'd')
+    for i, (xcal, xi, ycal, yi) in enumerate(cv_iter):
+        dat = nipals_lpls(xcal,ycal,Z,a_max=a_max,alpha=xz_alpha,
+                          mean_ctr=mean_ctr,scale='loads',verbose=False)
+        WWx[i,:,:] = dat['W']
+        WWz[i,:,:] = dat['L']
+        #WWy[i,:,:] = dat['Q']
+
+    return WWx, WWz
+
+def find_aopt_from_sep(sep, method='75perc'):
+    """Returns an estimate of optimal number of components from rmsecv.
+    """
+    sep = sep.copy()
+    if method=='vanilla':
+        # min rmsep
+        rmsecv = sqrt(sep.mean(0))
+        return rmsecv.argmin() + 1
+
+    elif method=='75perc':
+        prct = .75 #percentile
+        ind = 1.*sep.shape[0]*prct
+        med = median(sep)
+        prc_75 = []
+        for col in sep.T:
+            col.sort() #this is inplace -> ruins sep, so we are doing a copy
+            prc_75.append(col[int(ind)])
+        prc_75 = array(prc_75)
+        for i in range(1, sep.shape[1], 1):
+            if med[i-1]<prc_75[i]:
+                return i
+        return len(med)
--- a/laydi/logger.py
+++ b/laydi/logger.py
@@ -0,0 +1,168 @@
+
+import gobject
+import gtk
+import time
+
+class Logger:
+    def __init__(self):
+        self.store = gtk.ListStore(gobject.TYPE_STRING, 
+                                   gobject.TYPE_STRING, 
+                                   gobject.TYPE_STRING)
+        self.levels = ['debug', 'notice', 'warning', 'error']
+        self.level_text = {'debug':   'Debug', 
+                           'notice':  'Notice', 
+                           'warning': 'Warning', 
+                           'error':   'Error'}
+        self.components = {}
+        self.colors = { 'debug':   'grey',
+                        'notice':  'black',
+                        'warning': 'brown',
+                        'error':   'red' }
+
+    def log(self, level, message):
+        iter = self.store.append()
+        self.store.set_value(iter, 0, level)
+        self.store.set_value(iter, 1, message)
+        self.store.set_value(iter, 2, self.colors[level])
+
+    def level_number(self, level):
+        return self.levels.index(level)
+
+    def debug(self, message):
+        self.log('debug', message)
+
+    def notice(self, message):
+        self.log('notice', message)
+
+    def warning(self, message):
+        self.log('warning', message)
+
+    def error(self, message):
+        self.log('error', message)
+
+
+class LogView(gtk.TreeView):
+
+    def __init__(self, logger=None, level='notice'):
+        self.logger = logger 
+        self.model = logger.store
+        self.level = level
+        self.level_no = logger.level_number(level)
+
+        # Set up filter
+        self.filter = self.model.filter_new()
+        gtk.TreeView.__init__(self, self.filter)
+        self.filter.set_visible_func(self.level_filter)
+
+        # Set up log level column
+        renderer = gtk.CellRendererText()
+        self.level_col = gtk.TreeViewColumn('Level', renderer, text=0)
+        self.level_col.add_attribute(renderer, "foreground", 2)
+        self.append_column(self.level_col)
+
+        # Set up message column
+        renderer = gtk.CellRendererText()
+        self.message_col = gtk.TreeViewColumn('Message', renderer, text=1)
+        self.message_col.add_attribute(renderer, "foreground", 2)
+        self.append_column(self.message_col)
+
+        # Activate context menu
+        self.menu = LogMenu(self.logger, self)
+        self.connect('popup_menu', self.popup_menu)
+        self.connect('button_press_event', self.mouse_popup_menu)
+
+        # Make sure tree view displays bottom entry when entered
+        def scroll_to_last(model, path, it):
+            if path:
+                self.scroll_to_cell(path)
+        self.model.connect('row-changed', scroll_to_last)
+
+    def set_level(self, level):
+        self.level = level
+        self.level_no = self.logger.levels.index(level)
+        self.filter.refilter()
+        self.queue_draw()
+
+    def popup_menu(self, *rest):
+        self.menu.popup(None, None, None, 0, 0)
+
+    def mouse_popup_menu(self, widget, event):
+        if event.button == 3:
+            self.menu.popup(None, None, None, event.button, event.time)
+
+    def level_filter(self, store, iter):
+        if store.get_value(iter,0):
+            value = self.logger.levels.index(store.get_value(iter, 0))
+            return value >= self.level_no
+        else:
+            return False
+
+
+class LogLevelMenu(gtk.Menu):
+    def __init__(self, logger, view):
+        self.logger = logger
+        self.view = view
+        items = []
+        gtk.Menu.__init__(self)
+
+        for level in logger.levels:
+            if len(items) == 0:
+                group = None
+            else:
+                group = items[0]
+            item = gtk.RadioMenuItem(group, logger.level_text[level], level)
+            item.connect('activate', self.set_log_level, level)
+            items.append(item)
+            self.append(item)
+            item.show()
+
+        item_no = logger.level_number(view.level)
+        items[item_no].set_active(True)
+
+    def set_log_level(self, widget, level, *rest):
+        if widget.active:
+            self.view.set_level(level)
+
+class LogComponentMenu(gtk.Menu):
+    def __init__(self, logger, view):
+        gtk.Menu.__init__(self)
+        components = logger.components.keys()
+        components.sort(str.__gt__)
+        
+        for c in components:
+            item = gtk.MenuItem(c)
+            self.append(item)
+            item.show()
+
+#        for component in logger.components
+class LogMenu(gtk.Menu):
+    def __init__(self, logger, view):
+        gtk.Menu.__init__(self)
+        self.logger = logger
+
+        # View Log Level
+        self.view_menu = LogLevelMenu(logger, view)
+        self.view_item = gtk.MenuItem('View Log Level')
+        self.view_item.set_submenu(self.view_menu)
+        self.append(self.view_item)
+        self.view_item.show()
+
+        # View Components
+        self.component_menu = LogComponentMenu(logger, view)
+        self.component_item = gtk.MenuItem('View Components')
+        self.component_item.set_submenu(self.component_menu)
+        self.append(self.component_item)
+        self.component_item.show()
+
+        # Clear Log
+        clear_item = gtk.MenuItem('Clear Log')
+        clear_item.connect('activate', self.activate_clear_button)
+        self.append(clear_item)
+        clear_item.show()
+
+    def activate_clear_button(self, item):
+        self.logger.store.clear()
+
+logger = Logger()
+log = logger.log
+
--- a/laydi/main.py
+++ b/laydi/main.py
@@ -0,0 +1,101 @@
+    
+import sys
+import os.path
+import paths
+
+# Site specific directories set by configure script.
+PREFIX = paths.PREFIX
+BINDIR = paths.BINDIR
+DATADIR = paths.DATADIR
+DOCDIR = paths.DOCDIR
+PYDIR = paths.PYDIR
+
+ICONDIR = os.path.join(DATADIR, 'icons')
+
+#: Dictionary of observers
+_observers = {}
+
+#: The current Navigator object.
+navigator = None
+
+#: The current application
+application = None
+
+#: The current project
+project = None
+
+#: The current workflow
+workflow = None
+
+#: A cfgparse/optparse options object.
+options = None
+        
+def notify_observers(name):
+    call = "%s_changed" % name
+    for s in _observers.get(name, []):
+        getattr(s, call)(getattr(sys.modules[__name__], name))
+
+def _add_observer(name, observer):
+    """Adds observer as an observer of the named object."""
+    if not _observers.has_key(name):
+        _observers[name] = []
+    _observers[name].append(observer)
+    
+def _remove_observer(name, observer):
+    """Removes observer as an observer of the named object."""
+    if not _observers.has_key(name):
+        return
+    _observers.remove(observer)
+
+def add_navigator_observer(observer):
+    _add_observer('navigator', observer)
+
+def add_project_observer(observer):
+    _add_observer('project', observer)
+
+def add_workflow_observer(observer):
+    _add_observer('workflow', observer)
+
+def add_application_observer(observer):
+    _add_observer('application', observer)
+
+def remove_navigator_observer(observer):
+    _remove_observer('navigator', observer)
+
+def remove_project_observer(observer):
+    _remove_observer('project', observer)
+
+def remove_workflow_observer(observer):
+    _remove_observer('workflow', observer)
+
+def remove_application_observer(observer):
+    _remove_observer('application', observer)
+
+def remove_options_observer(observer):
+    _remove_observer('options', observer)
+
+def set_navigator(nav):
+    global navigator
+    navigator = nav
+    notify_observers('navigator')
+
+def set_application(app):
+    global application
+    application = app
+    notify_observers('application')
+
+def set_project(p):
+    global project
+    project = p
+    notify_observers('project')
+
+def set_workflow(wf):
+    global workflow
+    workflow = wf
+    notify_observers('workflow')
+
+def set_options(opt):
+    global options 
+    options = opt
+    notify_observers('options')
+
--- a/laydi/navigator.py
+++ b/laydi/navigator.py
@@ -0,0 +1,444 @@
+import gtk
+import gobject
+import plots
+import time
+import fluents
+from logger import logger
+import dataset, plots, project, workflow, main
+import scipy
+
+class NavigatorView (gtk.TreeView):
+    """The NavigatorView is a tree view of the project.
+
+    There is always one NavigatorView, that shows the functions, plots and
+    datasets in the current project.
+    """
+
+    def __init__(self):
+        if main.project:
+            self.data_tree = main.project.data_tree
+        else:
+            self.data_tree = None
+        
+        gtk.TreeView.__init__(self) 
+
+        # Various properties
+        self.set_enable_tree_lines(True)
+        self.set_headers_visible(False)
+        self.get_hadjustment().set_value(0)
+
+        # Selection Mode
+        self.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
+        self.get_selection().connect('changed',self.on_selection_changed)
+        self._previous_selection = []
+
+        # Setting up TextRenderers etc
+        self.connect('row_activated', self.on_row_activated)
+        self.connect('cursor_changed', self.on_cursor_changed)
+
+        # Activate context menu
+        self.menu = NavigatorMenu(self)
+        self.connect('popup_menu', self.popup_menu)
+        self.connect('button_press_event', self.on_mouse_event)
+
+        self.textrenderer = textrenderer = gtk.CellRendererText()
+        pixbufrenderer = gtk.CellRendererPixbuf()
+        self.object_col = gtk.TreeViewColumn('Object')
+        self.object_col.pack_start(pixbufrenderer,expand=False)
+        self.object_col.pack_start(textrenderer,expand=False)
+        self.object_col.set_attributes(textrenderer, cell_background=3, 
+                                       foreground=4, text=0)
+        self.object_col.set_attributes(pixbufrenderer, pixbuf=5)
+        self.append_column(self.object_col)
+
+        # send events to plots / itself
+        self.enable_model_drag_source(gtk.gdk.BUTTON1_MASK,
+            [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
+            gtk.gdk.ACTION_LINK | gtk.gdk.ACTION_MOVE)
+    
+        self.connect("drag-data-get",self.slot_drag_data)
+
+        logger.debug('Initializing navigator window.')
+   
+    def slot_drag_data(self, treeview, context, selection, target_id, etime):
+        """Sets the data for a drag event."""
+        treeselection = treeview.get_selection()
+        model, paths = treeselection.get_selected_rows()
+        if paths:
+            self.data_tree.drag_data_get(paths[0], selection)
+
+    def add_project(self, project):
+        """Dependency injection."""
+        self.data_tree = project.data_tree
+        self.set_model(project.data_tree)
+        self.data_tree.connect('row-changed',self.on_row_changed)
+    
+    def on_selection_changed(self, selection):
+        """Update the list of currently selected datasets."""
+
+        # update prev selection right away in case of multiple events
+        model, paths = selection.get_selected_rows()
+        if not paths: # a plot is marked: do nothing
+            return
+
+        tmp = self._previous_selection
+        self._previous_selection = paths
+        tree = self.data_tree
+
+        # set timestamp on newly selected objects
+        [tree.set_value(tree.get_iter(path), 6, time.time()) 
+         for path in paths if path not in tmp]
+
+        objs = [tree.get_iter(path) for path in paths]
+        objs = [(tree[iter][6], tree[iter][2]) for iter in objs]
+
+        objs.sort()
+        objs = [obj for timestamp, obj in objs]
+
+        if objs and isinstance(objs[0], dataset.Dataset):
+            logger.debug('Selecting dataset')
+            main.project.current_data = objs
+        else:
+            logger.debug('Deselecting dataset')
+            main.project.current_data = []
+
+    def on_row_changed(self, treestore, pos, iter):
+        """Set correct focus and colours when rows have changed."""
+        obj = treestore[iter][2]
+        obj_type = treestore[iter][1]
+
+        if not (obj or obj_type):
+            return
+        self.expand_to_path(pos)        
+
+        if isinstance(obj, dataset.Dataset):
+            self.set_cursor(pos)
+            self.grab_focus()
+        
+    def on_row_activated(self, widget, path, column):
+        tree_iter = self.data_tree.get_iter(path)
+        obj = self.data_tree.get_value(tree_iter, 2)
+        
+        if isinstance(obj, plots.Plot):
+            logger.debug('Activating plot')
+            main.application.change_plot(obj)
+        elif isinstance(obj, dataset.Dataset):
+            pass
+        elif obj == None:
+            children = []
+            i = self.data_tree.iter_children(tree_iter)
+            while i:
+                child = self.data_tree.get(i, 2)[0]
+                if isinstance(child, plots.Plot):
+                    children.append(child)
+                i = self.data_tree.iter_next(i)
+            main.application.change_plots(children)
+        else:
+            t = type(obj)
+            logger.notice('Activated datatype was %s. Don\'t know what to do.' % t)
+
+    def popup_menu(self, *rest):
+        self.menu.popup(None, None, None, 0, 0)
+
+    def on_mouse_event(self, widget, event):
+        path = widget.get_path_at_pos(int(event.x), int(event.y))
+
+        if path:
+            iter = self.data_tree.get_iter(path[0])
+            obj = self.data_tree.get_value(iter, 2)
+        else:
+            iter = None
+            obj = None
+
+        if isinstance(obj, dataset.Dataset):
+            self.menu.set_dataset(obj, iter)
+        else:
+            self.menu.set_dataset(None, iter)
+
+        if event.button == 3:
+            self.menu.popup(None, None, None, event.button, event.time)
+
+    def on_cursor_changed(self, widget):
+        """Update statusbar to contain dataset information.
+
+        Lists the dimensions of a dataset in the statusbar of the program
+        if a dataset is focused in the navigator.
+        """
+        path = widget.get_cursor()[0]
+        tree_iter = self.data_tree.get_iter(path)
+        obj = self.data_tree.get_value(tree_iter, 2)
+
+        if isinstance(obj, dataset.Dataset):
+            dims = zip(obj.get_dim_name(), obj.shape)
+            dim_text = ", ".join(["%s (%d)" % dim for dim in dims])
+        else:
+            dim_text = ""
+        main.application['appbar1'].push(dim_text)
+        
+
+class NavigatorMenu(gtk.Menu):
+    def __init__(self, navigator):
+        gtk.Menu.__init__(self)
+        self.navigator = navigator
+        self.dataset = None
+        self.tree_iter = None        
+
+        # Populate main menu
+        self.load_item = gtk.MenuItem('Load dataset')
+        self.load_item.connect('activate', self.on_load_dataset, navigator)
+        self.append(self.load_item)
+        self.load_item.show()
+
+        self.save_item = gtk.MenuItem('Save dataset')
+        self.save_item.connect('activate', self.on_save_dataset, navigator)
+        self.append(self.save_item)
+        self.save_item.show()
+
+        self.delete_item = gtk.MenuItem('Delete')
+        self.delete_item.connect('activate', self.on_delete, navigator)
+        self.append(self.delete_item)
+        self.delete_item.show()
+
+        self.split_item = gtk.MenuItem('Split on selection')
+        self.split_item.connect('activate', self.on_split, navigator)
+        self.append(self.split_item)
+        self.split_item.show()
+
+        # Build transform sub menu
+        self.trans_menu = gtk.Menu()
+        
+        self.trans_tr_item = gtk.MenuItem('Transpose')
+        self.trans_tr_item.connect('activate', self.on_transpose, navigator)
+        self.trans_menu.append(self.trans_tr_item)
+        self.trans_tr_item.show()
+
+        self.trans_stdr_item = gtk.MenuItem('Std. rows')
+        self.trans_stdr_item.connect('activate', self.on_standardise_rows, navigator)
+        self.trans_menu.append(self.trans_stdr_item)
+        self.trans_stdr_item.show()
+        
+        self.trans_stdc_item = gtk.MenuItem('Std. cols')
+        self.trans_stdc_item.connect('activate', self.on_standardise_cols, navigator)
+        self.trans_menu.append(self.trans_stdc_item)
+        self.trans_stdc_item.show()
+
+        self.trans_log_item = gtk.MenuItem('Log')
+        self.trans_log_item.connect('activate', self.on_log, navigator)
+        self.trans_menu.append(self.trans_log_item)
+        self.trans_log_item.show()
+
+        self.trans_item = gtk.MenuItem("Transformation")
+        self.append(self.trans_item)
+        self.trans_item.set_submenu(self.trans_menu)
+        self.trans_item.show()
+        
+        # Build plot sub menu
+        self.plot_menu = gtk.Menu()
+
+        self.plot_image_item = gtk.MenuItem('Image Plot')
+        self.plot_image_item.connect('activate', self.on_plot_image, navigator)
+        self.plot_menu.append(self.plot_image_item)
+        self.plot_image_item.show()
+
+        self.plot_hist_item = gtk.MenuItem('Histogram')
+        self.plot_hist_item.connect('activate', self.on_plot_hist, navigator)
+        self.plot_menu.append(self.plot_hist_item)
+        self.plot_hist_item.show()
+        
+        self.plot_scatter_item = gtk.MenuItem('Scatter')
+        self.plot_scatter_item.connect('activate', self.on_plot_scatter, navigator)
+        self.plot_menu.append(self.plot_scatter_item)
+        self.plot_scatter_item.show()
+        
+        self.plot_line_item = gtk.MenuItem('Line view')
+        self.plot_line_item.connect('activate', self.on_plot_line, navigator)
+        self.plot_menu.append(self.plot_line_item)
+        self.plot_line_item.show()
+        
+        self.plot_item = gtk.MenuItem('Plot')
+        self.append(self.plot_item)
+        self.plot_item.set_submenu(self.plot_menu)
+        self.plot_item.show()
+
+    def set_dataset(self, ds, it):
+        self.dataset = ds
+        self.tree_iter = it
+
+        if ds == None:
+            self.save_item.set_property('sensitive', False)
+            self.plot_item.set_property('sensitive', False)
+            self.trans_item.set_property('sensitive', False)
+        else:
+            self.save_item.set_property('sensitive', True)
+            self.plot_item.set_property('sensitive', True)
+            self.trans_item.set_property('sensitive', True)
+
+    def load_dataset(self, filename):
+        """Load the dataset from the given file and add it to the project."""
+        ds = dataset.read_ftsv(filename)
+
+        if isinstance(ds, dataset.GraphDataset):
+            icon = fluents.icon_factory.get("graph_dataset")
+        elif isinstance(ds, dataset.CategoryDataset):
+            icon = fluents.icon_factory.get("category_dataset")
+        else:
+            icon = fluents.icon_factory.get("dataset")
+
+        main.project.add_dataset(ds)
+        main.project.data_tree_insert(None, ds.get_name(), ds, None, "black", icon)
+
+    def on_load_dataset(self, item, navigator):
+        # Set up file chooser.
+        dialog = gtk.FileChooserDialog('Load dataset')
+        dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
+        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
+                           gtk.STOCK_OPEN, gtk.RESPONSE_OK)
+        dialog.set_select_multiple(True)
+        dialog.set_current_folder(main.options.datadir)
+
+        retval = dialog.run()
+        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
+            pass
+        elif retval == gtk.RESPONSE_OK:
+            for filename in dialog.get_filenames():
+                self.load_dataset(filename)
+        else:
+            print "unknown; ", retval 
+        dialog.destroy()
+
+    def on_save_dataset(self, item, navigator):
+        dialog = gtk.FileChooserDialog('Save dataset')
+        dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
+        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_SAVE, gtk.RESPONSE_OK)
+        dialog.set_current_name("%s.ftsv" % self.dataset.get_name())
+        retval = dialog.run()
+        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
+            logger.debug("Cancelled save dataset")
+        elif retval == gtk.RESPONSE_OK:
+            logger.debug("Saving dataset as: %s" % dialog.get_filename())
+            fd = open(dialog.get_filename(), 'w')
+            dataset.write_ftsv(fd, self.dataset)
+            fd.close()
+        else:
+            print "unknown; ", retval 
+        dialog.destroy()
+
+    def on_delete(self, item, navigator):
+        tm, rows = navigator.get_selection().get_selected_rows()
+        iters = [tm.get_iter(r) for r in rows]
+        iters.reverse()
+        for i in iters:
+            main.project.delete_data(i)
+   #         tm.remove(i)
+    
+    def on_plot_image(self, item, navigator):
+        plot = plots.ImagePlot(self.dataset, name='Image Plot')
+        icon = fluents.icon_factory.get("line_plot")
+        main.project.data_tree_insert(self.tree_iter, 'Image Plot', plot, None, "black", icon)
+        # fixme: image plot selections are not well defined
+        #plot.set_selection_listener(project.set_selection)
+        #project._selection_observers.append(plot)
+
+    def on_plot_hist(self, item, navigator):
+        project = main.project
+        plot = plots.HistogramPlot(self.dataset, name='Histogram')
+        icon = fluents.icon_factory.get("line_plot")
+        project.data_tree_insert(self.tree_iter, 'Histogram', plot, None, "black", icon)
+        plot.set_selection_listener(project.set_selection)
+        project._selection_observers.append(plot)
+
+    def on_plot_scatter(self, item, navigator):
+        project = main.project
+        datasets = main.project.current_data
+        ds_major = datasets[0]
+        dims_major = ds_major.get_dim_name()
+        ids_major = ds_major.get_identifiers(dims_major[1], sorted=True)
+        if len(datasets) > 1:
+            # If there is more than one active dataset -> try to use the two first
+            ds_minor = datasets[1]
+            dims_minor = ds_minor.get_dim_name()
+            if dims_minor != dims_major or ds_minor.shape[0] != ds_major.shape[0]:
+                # the selected datasets are not matched -> use initial selected
+                ds_minor = ds_major
+        else:
+            #Only one dataset selected
+            ds_minor = ds_major
+        
+        plot = plots.ScatterPlot(ds_major, ds_minor, 
+                                 dims_major[0], dims_major[1], 
+                                 ids_major[0], ids_major[1],
+                                 name='Scatter (%s)' % ds_major.get_name())
+        plot.add_axes_spin_buttons(len(ids_major), 0, 1)
+        icon = fluents.icon_factory.get("line_plot")
+        project.data_tree_insert(self.tree_iter, 'Scatter', plot, None, "black", icon)
+        plot.set_selection_listener(project.set_selection)
+        project._selection_observers.append(plot)
+        
+    def on_plot_line(self, item, navigator):
+        project = main.project
+        ds = self.dataset
+        dims = ds.get_dim_name()
+        ids = ds.get_identifiers(dims[1])
+        plot = plots.LineViewPlot(ds, name='Line (%s)' % ds.get_name())
+        icon = fluents.icon_factory.get("line_plot")
+        project.data_tree_insert(self.tree_iter, 'Line view', plot, None, "black", icon)
+        plot.set_selection_listener(project.set_selection)
+        project._selection_observers.append(plot)
+        
+    def on_transpose(self, item, navigator):
+        project = main.project
+        ds = self.dataset.transpose()
+        ds._name = ds._name + ".T"
+        icon = fluents.icon_factory.get(ds)
+        project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
+
+    def on_standardise_rows(self, item, navigator):
+        project = main.project
+        ds = self.dataset.copy()
+        ds._name = self.dataset._name + ".rsc"
+        axis = 1
+        ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
+        icon = fluents.icon_factory.get(ds)
+        project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
+
+    def on_standardise_cols(self, item, navigator):
+        project = main.project
+        ds = self.dataset.copy()
+        ds._name = self.dataset._name + ".csc"
+        axis = 0
+        ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
+        icon = fluents.icon_factory.get(ds)
+        project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
+
+    def on_log(self, item, navigator):
+        project = main.project
+        try:
+            if not scipy.all(self.dataset.asarray()>0):
+                raise ValueError
+        except:
+            logger.log('warning', 'Datasets needs to be strictly positive for a log transform')
+            return
+        
+        ds = self.dataset.copy()
+        ds._array = scipy.log(ds.asarray())
+        icon = fluents.icon_factory.get(ds)
+        ds._name = ds._name + ".log"
+        project.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
+
+    def on_split(self, item, navigator):
+        if self.dataset is None:
+            logger.warn("Only datasets can be split.")
+            return
+
+        dim = self.dataset.get_dim_name(0)
+
+        project = main.project
+        sel_ids = set(project.get_selection()[dim])
+        sel_ds = self.dataset.subdata(dim, sel_ids)
+
+        unsel_ids = set(self.dataset.get_identifiers(dim)) - set(sel_ids)
+        unsel_ds = self.dataset.subdata(dim, unsel_ids)
+
+        icon = fluents.icon_factory.get(self.dataset)
+        project.data_tree_insert(self.tree_iter, 'Selected', sel_ds, None, "black", icon)
+        project.data_tree_insert(self.tree_iter, 'Unselected', unsel_ds, None, "black", icon)
--- a/laydi/paths.py.m4
+++ b/laydi/paths.py.m4
@@ -0,0 +1,7 @@
+
+PREFIX = "M4_PREFIX"
+BINDIR = "M4_BINDIR"
+DATADIR = "M4_DATADIR"
+DOCDIR = "M4_DOCDIR"
+PYDIR = "M4_PYDIR"
+
--- a/laydi/pca_options.glade
+++ b/laydi/pca_options.glade
--- a/laydi/plots.py
+++ b/laydi/plots.py
--- a/laydi/pls_options.glade
+++ b/laydi/pls_options.glade
--- a/laydi/project.py
+++ b/laydi/project.py
@@ -0,0 +1,154 @@
+import os
+import scipy
+import gobject
+import gtk
+import fluents
+import logger, dataset, plots, main
+
+class Project:
+    """A Project contains datasets, selections etc.
+    The project, of which the application has only one at any given time,
+    is the container for all datasets, plots and selections in use. The data
+    in the project is organized in a gtk.TreeStrore that is displayed in the
+    navigator.
+    """
+
+    def __init__(self, name="Testing"):
+        self.data_tree = gtk.TreeStore(str,
+                                       str,
+                                       object, 
+                                       str,
+                                       str,
+                                       gobject.TYPE_OBJECT, 
+                                       float)
+
+        self.name = name
+        self.dim_names = []
+        self._selection_observers = []
+        self._dataset_observers = []
+        self.current_data = []
+        self.datasets = []
+        self.sel_obj = dataset.Selection('Current Selection')
+        self.selections = []
+        self._last_selection = None
+        self._dataset_iter_map = {}
+        
+    def add_selection_observer(self, observer):
+        self._selection_observers.append(observer)
+        observer.selection_changed(None, self.get_selection())
+
+    def notify_selection_listeners(self, dim_name):
+        """Notifies observers"""
+        for observer in self._selection_observers:
+            observer.selection_changed(dim_name, self.get_selection())
+
+    def add_dataset_observer(self, observer):
+        self._dataset_observers.append(observer)
+        observer.dataset_changed()
+
+    def notify_dataset_listeners(self):
+        """Notifies observers when new datasets are added"""
+        for observer in self._dataset_observers:
+            observer.dataset_changed()
+        
+    def set_selection(self, dim_name, selection):
+        """Sets a current selection and notify observers"""
+        self.sel_obj[dim_name] = set(selection)
+        self.notify_selection_listeners(dim_name)
+        self._last_selection = selection
+
+    def get_selection(self):
+        """Returns the current selection object"""
+        return self.sel_obj
+
+    def delete_data(self, it):
+        """Delete elements from the project."""
+        child = self.data_tree.iter_children(it)
+        while child != None:
+            c = self.data_tree.iter_next(child)
+            self.delete_data(child)
+            child = c
+        main.application.main_view.remove_view(self.data_tree.get(it, 2)[0])
+        self.data_tree.remove(it)
+
+    def add_data(self, parents, data, fun='Function'):
+        """Adds a set of data and plots to the navigator.
+
+        This method is usually called after a Function in a workflow
+        has finished and returns its output."""
+        
+        if len(parents) > 0: 
+            parent_iter = self._dataset_iter_map[parents[0]]
+        else:
+            parent_iter = None
+
+        # Add the function node to the tree
+        icon = fluents.icon_factory.get("folder_grey")
+        it = self.data_tree_insert(parent_iter, fun, None, None, "black", icon)
+        
+        # Add all returned datasets/plots/selections
+        for d in data:
+            # Any kind of dataset
+            if isinstance(d, dataset.Dataset):
+                if isinstance(d, dataset.GraphDataset):
+                    icon = fluents.icon_factory.get("graph_dataset")
+                elif isinstance(d, dataset.CategoryDataset):
+                    icon = fluents.icon_factory.get("category_dataset")
+                else:
+                    icon = fluents.icon_factory.get("dataset")
+
+                self.add_dataset(d)
+                self.data_tree_insert(it, d.get_name(), d, None, "black", icon) 
+
+            # Any kind of plot
+            elif isinstance(d, plots.Plot):
+                icon = fluents.icon_factory.get("line_plot")
+                self.data_tree_insert(it, d.get_title(), d, None, "black", icon)
+                d.set_selection_listener(self.set_selection)
+                self._selection_observers.append(d)
+
+            # Selections are not added to the data tree
+            elif isinstance(d, dataset.Selection):
+                self.add_selection(d)
+
+    def data_tree_insert(self, parent, text, data, bg, fg, icon, selected = 0):
+        """Inserts data into the tree view.
+        @param text: The title of the object.
+        @param data: A dataset, plot or function object.
+        @param bg: Background color.
+        @param fg: Foreground (font) color.
+        @param icon: Pixmap icon.
+        """
+        tree = self.data_tree
+        it = tree.append(parent)
+        tree[it] = [text, type(data), data, bg, fg, icon, selected]
+        self._dataset_iter_map[data] = it
+        return it
+
+    def add_dataset(self, dataset):
+        """Appends a new Dataset to the project."""
+        logger.log('debug','Adding dataset: %s' %dataset.get_name())
+        self.datasets.append(dataset)
+        for dim_name in dataset.get_all_dims():
+            if dim_name not in self.dim_names:
+                self.dim_names.append(dim_name)
+                self.sel_obj[dim_name] = set()
+                self.notify_selection_listeners(dim_name)
+        self.notify_dataset_listeners()
+
+    def add_selection(self, selection):
+        """Adds a new selection to the project."""
+        self.selections.append(selection)
+        self.notify_dataset_listeners()
+        
+    def object_at(self, path):
+        """Returns the object at a given path in the tree."""
+        it = self.get_iter(path)
+        obj = self[it][2]
+        if obj:
+            obj.show()
+        return obj
+
+    #def set_current_data(self, obj):
+    #    self.current_data = obj
+
--- a/laydi/selections.py
+++ b/laydi/selections.py
@@ -0,0 +1,659 @@
+import pygtk
+import gtk
+import gtk.gdk
+import gtk.glade
+import gnome
+import gnome.ui
+import gobject
+import scipy
+
+import logger, dataset, main
+import annotations
+from lib import hypergeom
+
+
+class SimpleMenu(gtk.Menu):
+    def __init__(self):
+        gtk.Menu.__init__(self)
+        
+    def add_simple_item(self, title, function, *args):
+        item = gtk.MenuItem(title)
+        item.connect('activate', function, *args)
+        self.append(item)
+        item.show()
+
+
+class IdListController:
+    """Controller class for the identifier list."""
+
+    def __init__(self, idlist):
+        self._idlist = idlist
+        self._idlist.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
+        self._idlist.set_rubber_banding(True)
+
+        # dimname: current_annotation_name
+        self._annotation = {}
+
+        # current dimension
+        self._dimension = None
+
+        # id, annotation
+        self._idstore = gtk.ListStore(gobject.TYPE_STRING,
+                                      gobject.TYPE_STRING)
+        self._idstore.set_sort_func(0, self._numeric_compare)
+        
+        # Annotation tree column
+        self._annotation_column = None
+
+        ## Set up identifier list
+        idlist.set_model(self._idstore)
+        
+        renderer = gtk.CellRendererText()
+        dim_column = gtk.TreeViewColumn('Identifiers', renderer, text=0)
+        dim_column.set_sort_indicator(True)
+        dim_column.set_sort_column_id(0)
+        dim_column.set_sort_order(gtk.SORT_ASCENDING)
+        idlist.insert_column(dim_column, 0)
+        idlist.connect('button-press-event', self._button_pressed)
+
+        ## Enable dropping
+        idlist.drag_dest_set(gtk.DEST_DEFAULT_ALL,
+                              [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
+                              gtk.gdk.ACTION_LINK)
+        idlist.connect('drag-data-received', self._drag_data_received)
+
+        ## Set up identifier list context menu
+        menu = self._menu = SimpleMenu()
+        menu.add_simple_item('Import...', self._on_import_list)
+        menu.add_simple_item('Export...', self._on_export_list)
+        menu.add_simple_item('Add to selection', self._on_make_selection)
+        item = gtk.MenuItem('Show annotations')
+        menu.append(item)
+        item.show()
+        self._menu_ann = item
+       
+    ##
+    ## Public interface
+    ##
+    def set_dimension(self, dimname):
+        """Set dimension"""
+        if dimname == self._dimension:
+            return
+
+        self._dimension = dimname 
+        self.set_annotation(self._annotation.get(dimname, None))
+
+        if not self._annotation.has_key(dimname):
+            self._annotation[dimname] = None
+
+    def set_annotation(self, annotation):
+        """Set the displayed annotation to annotation.  If annotation is None,
+        the annotation column is hidden. Otherwise the annotation column is
+        shown and filled with values from the given annotation field."""
+
+        if annotation == None:
+            if self._annotation_column != None:
+                self._idlist.remove_column(self._annotation_column)
+            self._annotation_column = None
+        else:
+ 
+            idlist = [x[0] for x in self._idstore]
+            annlist = annotations.get_dim_annotations(self._dimension, 
+                          annotation,
+                          idlist)
+
+            for i, x in enumerate(self._idstore):
+                x[1] = annlist[i]
+
+            if self._annotation_column == None:
+                renderer = gtk.CellRendererText()
+                col = gtk.TreeViewColumn(annotation, renderer, text=1)
+                col.set_sort_indicator(True)
+                col.set_sort_column_id(1)
+                col.set_sort_order(gtk.SORT_ASCENDING)
+                self._idlist.append_column(col)
+                self._annotation_column = col
+            self._annotation_column.set_title(annotation)
+
+        self._annotation[self._dimension] = annotation
+
+    def set_selection(self, selection):
+        """Set the selection to be displayed.
+        The selection is not stored, the values are copied into the TreeStore"""
+        self._idstore.clear()
+
+        # Return if no selection
+        if selection == None:
+            return
+
+        # Otherwise show selection, possibly with annotations.
+        #id_list = list(selection[self._dimension])
+        idlist = list(selection[self._dimension])
+        if self._annotation[self._dimension] != None:
+            annlist = annotations.get_dim_annotations(self._dimension, 
+                          self._annotation[self._dimension],
+                          idlist)
+            for id, ann in zip(idlist, annlist):
+                self._idstore.append((id, ann))
+        else:
+            for e in idlist:
+                self._idstore.append((e, None))
+
+    ##
+    ## Private interface
+    ##
+    def _update_annotations_menu(self):
+        """Updates the annotations menu with the available annotations for the
+        current dim."""
+
+        dim_h = annotations.get_dim_handler(self._dimension)
+        if not dim_h:
+            print "set_sensitive(False)"
+            self._menu_ann.set_sensitive(False)
+        else:
+            annotations_menu = gtk.Menu()
+            print "set_sensitive(True)"
+            self._menu_ann.set_sensitive(True)
+            dh = annotations.get_dim_handler(self._dimension)
+            ann_names = dh.get_annotation_names()
+
+            for ann in ann_names:
+                item = gtk.MenuItem(ann)
+                item.connect('activate', self._on_annotation_activated, ann)
+                annotations_menu.append(item)
+                item.show()
+
+            self._menu_ann.set_submenu(annotations_menu)
+            
+
+    def import_annotation_file(self):
+        """Pops up a file dialog and ask the user to select the annotation
+        file to be loaded. Only one file can be selected. The file is loaded
+        into a annotations.AnnotationDictHandler object"""
+
+        dialog = gtk.FileChooserDialog('Load annotations')
+        dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
+        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
+                           gtk.STOCK_OPEN, gtk.RESPONSE_OK)
+        dialog.set_select_multiple(True)
+        retval = dialog.run()
+        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
+            pass
+        elif retval == gtk.RESPONSE_OK:
+            for filename in dialog.get_filenames():
+                annotations.read_annotations_file(filename)
+        else:
+            print "unknown; ", retval 
+        dialog.destroy()
+
+    def export_annotations(self):
+        """Pops up a file dialog and ask the user to select a file to save
+        the currently displayed annotations to.
+        """
+
+        dialog = gtk.FileChooserDialog('Load annotations')
+        dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
+        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
+                           gtk.STOCK_SAVE, gtk.RESPONSE_OK)
+        retval = dialog.run()
+        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
+            pass
+        elif retval == gtk.RESPONSE_OK:
+            filename = dialog.get_filename()
+            fd = open(filename, 'w')
+            dim = self._dimension
+            print >> fd, "%s\t%s" % (dim, self._annotation[dim])
+            for id, value in self._idstore:
+                print >> fd, "%s\t%s" % (id, value)
+            fd.close()
+        else:
+            print "unknown; ", retval 
+        dialog.destroy()
+
+    def set_rank(self, ds):
+        print "Set rank."
+
+        ra = scipy.sum(ds.asarray(), 1)
+        ranks = {}
+        dim = ds.get_dim_name()[0]
+        for key, value in ds[dim].items():
+            ranks[key] = ra[value]
+
+        ann_h = annotations.get_dim_handler(self._dimension)
+        if ann_h is None:
+            ann_h = annotations.DictAnnotationHandler()
+            annotations.set_dim_handler(self._dimension, ann_h)
+
+        ann_h.add_annotations('Rank', ranks)
+
+    ##
+    ## GTK Callbacks
+    ##
+
+    def _numeric_compare(self, treemodel, iter1, iter2):
+        column = treemodel.get_sort_column_id()[0]
+        
+        item1 = treemodel.get_value(iter1, column)
+        item2 = treemodel.get_value(iter2, column)
+        
+        try:
+            item1 = float(item1)
+            item2 = float(item2)
+        except:
+            logger.log("notice", "Could not convert to float: %s, %s" %(item1, item2))
+
+        return cmp(item1, item2)
+    
+    def _popup_menu(self, *rest):
+        self._update_annotations_menu()
+        self._menu.popup(None, None, None, 0, 0)
+
+    def _on_annotation_activated(self, menuitem, annotation):
+        self.set_annotation(annotation)
+
+    def _button_pressed(self, widget, event):
+        if event.button == 3:
+            self._update_annotations_menu()
+            self._menu.popup(None, None, None, event.button, event.time)
+        
+    def _on_export_list(self, menuitem):
+        self.export_annotations()
+
+    def _on_import_list(self, menuitem):
+        self.import_annotation_file()
+
+    def _on_make_selection(self, menuitem):
+        selection = self._idlist.get_selection()
+        model, paths = selection.get_selected_rows()
+        if paths==None: return
+        iters = [self._idstore.get_iter(p) for p in paths]
+        ids = [self._idstore.get_value(i, 0) for i in iters]
+        main.project.set_selection(self._dimension, ids)
+        
+    def _drag_data_received(self, widget, drag_context, x, y,
+                              selection, info, timestamp):
+        treestore, path = selection.tree_get_row_drag_data()
+        i = treestore.get_iter(path)
+        obj = treestore.get_value(i, 2)
+        if isinstance(obj, dataset.Dataset):
+            if self._dimension in obj.get_dim_name():
+                self.set_rank(obj)
+        widget.emit_stop_by_name('drag-data-received')
+
+
+class SelectionListController:
+    def __init__(self, seltree, idlist_controller):
+        self._seltree = seltree
+        self._sel_stores = {}
+        self._detail_cols = []
+        self._dimension = None
+        self._idlist_controller = idlist_controller
+        self._details_on = False
+        
+        # Selection column
+        renderer = gtk.CellRendererText()
+        sel_column = gtk.TreeViewColumn('Selection', renderer, text=0)
+        sel_column.set_resizable(True)
+        sel_column.set_max_width(200)
+        seltree.insert_column(sel_column, 0)
+
+        # Detail columns
+        cols = [('In CS', 3), ('All', 4), ('Rank', 5)]
+        for name, store_col_num in cols:
+            col = gtk.TreeViewColumn(name, renderer, text=store_col_num)
+            col.set_sort_indicator(True)
+            col.set_sort_column_id(store_col_num)
+            col.set_sort_order(gtk.SORT_ASCENDING)
+            
+            self._detail_cols.append(col)
+        # Signals
+        seltree.connect('row-activated', self._on_row_activated)
+        seltree.connect('cursor-changed', self._on_cursor_changed)
+        seltree.connect('button-press-event', self._on_button_pressed)
+        seltree.drag_dest_set(gtk.DEST_DEFAULT_ALL,
+                              [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
+                              gtk.gdk.ACTION_LINK)
+        
+        seltree.connect('drag-data-received', self._drag_data_received)
+
+        # Selections context menu
+        self._seltree_menu = SimpleMenu()
+        self._seltree_menu.add_simple_item('Sort by selection', 
+                                           self._on_seltree_sort)
+        self._seltree_menu.add_simple_item('Show details',
+                                           self._enable_details, True)
+        self._seltree_menu.add_simple_item('Hide details', 
+                                           self._enable_details, False)
+
+    #
+    # Public interface
+    #
+    def activate(self):
+        self._seltree.set_cursor((0,))
+
+    def set_project(self, project):
+        """Dependency injection."""
+        main.project.add_selection_observer(self)
+        
+    def set_dimlist_controller(self, dimlist_controller):
+        """Dependency injection of the dimension list controller."""
+        self._dimlist_controller = dimlist_controller
+
+    def set_dimension(self, dim):
+        """Set the current dimension, changing the model of the treeview
+        to match dim. After this the current dimension of the identifier list
+        is updated."""
+        self._ensure_selection_store(dim)
+        self._seltree.set_model(self._sel_stores[dim])
+        self._idlist_controller.set_dimension(dim)
+        self._dimension = dim
+
+    def selection_changed(self, dimname, selection):
+        """Callback function from Project."""
+        for dim in selection.dims():
+            self._ensure_selection_store(dim)
+            store = self._sel_stores[dim]
+
+            if not self._get_current_selection_iter(selection, dim):
+                n = len(selection[dim])
+                values = (selection.title, selection, dim, n, n, 0)
+                store.insert_after(None, None, values)
+            else:
+                # update size of current selection
+                for row in store:
+                    if row[1]==selection:
+                        row[3] = row[4] = len(selection[dim])
+
+        path = self._seltree.get_cursor()
+        if path and self._sel_stores.has_key(self._dimension):
+            it = self._sel_stores[self._dimension].get_iter(path[0])
+            sel = self._sel_stores[self._dimension].get_value(it, 1)
+            self._idlist_controller.set_selection(sel)
+
+    def add_dataset(self, dataset):
+        """Converts a CategoryDataset to Selection objects and adds it to
+        the selection tree. The name of the dataset will be the parent 
+        node in the tree, and the identifers along the first axis will
+        be added as the names of the subselections."""
+        dim_name = dataset.get_dim_name(0)
+        self._ensure_selection_store(dim_name)
+        store = self._sel_stores[dim_name]
+        di = self._get_dataset_iter(dataset)
+        if not di:
+            n_tot = dataset.shape[0]
+            selection = main.project.get_selection().get(dim_name)
+            ds_idents = dataset.get_identifiers(dim_name)
+            n_cs = len(selection.intersection(ds_idents))
+            values = (dataset.get_name(), dataset, dim_name, n_cs, n_tot, 2)
+
+            i = store.insert_after(None, None, values)
+            for selection in dataset.as_selections():
+                n_sel = len(selection[dim_name])
+                values = (selection.title, selection, dim_name, 0, n_sel, 0)
+                store.insert_after(i, None, values)
+        
+    #
+    # Private interface
+    #
+    def _add_selection_store(self, dim):
+        """Add a new gtk.TreeStore for the selections on a dimension."""
+        # Create new store
+        # Two types of lines, one for CategoryDatasets and one for
+        # Selections. The elements are title, link to dataset or selection,
+        # name of dimension, num. members in selection, num. in
+        # intersection with current selection and the rank of selection.
+        store = gtk.TreeStore(gobject.TYPE_STRING,
+                              gobject.TYPE_PYOBJECT,
+                              gobject.TYPE_STRING,
+                              gobject.TYPE_INT,
+                              gobject.TYPE_INT,
+                              gobject.TYPE_FLOAT)
+
+        # Set selection store for this dimension
+        self._sel_stores[dim] = store
+
+    def _ensure_selection_store(self, dim):
+        """Ensure that the object has a gtk.TreeStore for the given dimension"""
+        # Do not overwrite existing stores
+        if self._sel_stores.has_key(dim):    
+            return
+        self._add_selection_store(dim)
+
+    def _get_dataset_iter(self, ds):
+        """Returns the iterator to the selection tree row containing a
+        given dataset."""
+
+        store = self._sel_stores[ds.get_dim_name(0)]
+
+        i = store.get_iter_first()
+        while i:
+            if store.get_value(i, 1) == ds:
+                return i
+            i = store.iter_next(i)
+        return None
+
+    def _get_current_selection_iter(self, selection, dimension):
+        if not self._sel_stores.has_key(dimension):
+            return None
+
+        store = self._sel_stores[dimension]
+
+        i = store.get_iter_first()
+        while i:
+            if store.get_value(i, 1) == selection:
+                if store.get_value(i, 2) == dimension:
+                    return i
+            i = store.iter_next(i)
+        return None
+
+    def _sort_selections(self, dataset):
+        """Ranks selections by intersection with current selection.
+        Ranks determined by the hypergeometric distribution.
+        """
+        dim_name = dataset.get_dim_name(0)
+        sel_store = self._sel_stores[dim_name]
+        selection_obj = main.project.get_selection()
+        current_selection = selection_obj.get(dim_name)
+        if current_selection==None: return
+
+        pvals = hypergeom.gene_hypergeo_test(current_selection, dataset)
+
+        for row in sel_store:
+            if row[1]==dataset:
+                for child in row.iterchildren():
+                    name = child[0]
+                    child[3] = pvals[name][0]
+                    child[4] = pvals[name][1]
+                    child[5] = pvals[name][2]
+        
+        sel_store.set_sort_column_id(5, gtk.SORT_ASCENDING)
+
+    #
+    # GTK callbacks
+    #
+    def _enable_details(self, widget, bool):
+        if self._details_on == bool : return
+        self._details_on = bool
+        if bool==True:
+            for col in self._detail_cols:
+                self._seltree.insert_column(col, -1)
+        else:
+            for col in self._detail_cols:
+                self._seltree.remove_column(col)
+    
+    def _drag_data_received(self, widget, drag_context, x, y,
+                              selection, info, timestamp):
+
+        treestore, path = selection.tree_get_row_drag_data()
+        i = treestore.get_iter(path)
+        obj = treestore.get_value(i, 2)
+        if isinstance(obj, dataset.CategoryDataset):
+            self.add_dataset(obj)
+            self._dimlist_controller.set_dimension(obj.get_dim_name(0))
+        widget.emit_stop_by_name('drag-data-received')
+
+    def _on_cursor_changed(self, widget):
+        "Show the list of identifier strings."
+        store = self._sel_stores[self._dimension]
+
+        p = self._seltree.get_cursor()[0]
+        i = store.get_iter(p)
+        obj = store.get_value(i, 1)
+
+        if isinstance(obj, dataset.Selection):
+            self._idlist_controller.set_selection(obj)
+        else:
+            self._idlist_controller.set_selection(None)
+        
+    def _on_row_activated(self, widget, path, column):
+        store = self._sel_stores[self._dimension]
+        i = store.get_iter(path)
+        obj = store.get_value(i, 1)
+        if isinstance(obj, dataset.Dataset):
+            seltree = self._seltree
+            if seltree.row_expanded(path):
+                seltree.collapse_row(path)
+            else:
+                seltree.expand_row(path, True)
+        elif isinstance(obj, dataset.Selection):
+            main.project.set_selection(self._dimension,
+                                       obj[self._dimension])
+
+    def _on_button_pressed(self, widget, event):
+        """Button press callbak."""
+        if event.button == 3:
+            self._seltree_menu.popup(None, None, None, event.button, event.time)
+
+    def _on_seltree_sort(self, menuitem):
+        """Sort selection tree if row is category dataset."""
+        store = self._sel_stores[self._dimension]
+        p = self._seltree.get_cursor()[0]
+        i = store.get_iter(p)
+        obj = store.get_value(i, 1)
+        if isinstance(obj, dataset.CategoryDataset):
+            self._sort_selections(obj)
+
+
+class DimListController:
+    def __init__(self, dimlist, seltree_controller):
+        
+        self._current_dim = None
+        self._seltree_controller = seltree_controller
+        
+        self.show_hidden = False
+
+        ## dimstore is a list of all dimensions in the application
+        self.dimstore = gtk.ListStore(gobject.TYPE_STRING)
+
+        # filter for hiding dims prefixed with underscore
+        self.dimstore_filter = self.dimstore.filter_new()
+        self.dimstore_filter.set_visible_func(self._dimension_filter)
+        
+        ## The widgets we are controlling
+        self.dimlist = dimlist
+
+        ## Set up dimensions list
+        dimlist.set_model(self.dimstore_filter)
+        
+        renderer = gtk.CellRendererText()
+        dim_column = gtk.TreeViewColumn('Dimension', renderer, text=0)
+        dimlist.insert_column(dim_column, 0)
+
+        # Signals
+        dimlist.connect('row-activated', self._dim_row_activated)
+        dimlist.connect('cursor-changed', self._dim_cursor_changed)
+        dimlist.connect('button-press-event', self._dimlist_button_pressed)
+        
+        # Set up  dimension context menu
+        self._dimlist_menu = SimpleMenu()
+        self._dimlist_menu.add_simple_item('Hide', self._on_dim_hide)
+        self._dimlist_menu.add_simple_item('Show all', self._on_dim_show)
+
+
+    ## 
+    ## Public interface
+    ##
+    def set_project(self, project):
+        """Dependency injection."""
+#        self.project = project
+        self.dim_names = project.dim_names
+        self.update_dims()
+        project.add_dataset_observer(self)
+
+    def get_dimension(self, dim):
+        """Returns the iterator to the dimension with the given name, or
+        None if not found."""
+        
+        i = self.dimstore_filter.get_iter_first()
+        while i:
+            if self.dimstore_filter.get_value(i, 0) == dim:
+                return i
+            i = self.dimstore_filter.iter_next(i)
+        return None
+
+    def set_dimension(self, dimname):
+        """Sets the current dimension."""
+        self._current_dim = dimname
+        
+        dim = self.get_dimension(self._current_dim)
+        path = self.dimstore_filter.get_path(dim)
+        
+        if self.dimlist.get_cursor()[0] != path:
+            self.dimlist.set_cursor(self.dimstore_filter.get_path(dim))
+        self._seltree_controller.set_dimension(dimname)
+
+    def dataset_changed(self):
+        """Callback function from Project."""
+        self.update_dims()
+    
+    def update_dims(self):
+        """Update the list of dimensions shown"""
+        for dim in self.dim_names:
+            if not self.get_dimension(dim):
+                self.dimstore.insert_after(None, (dim,))
+        self.dimstore_filter.refilter()
+
+    #
+    # Private interface
+    #
+    def _dimension_filter(self, store, row):
+        """Filters out dimensions with underscore prefix."""
+        if self.show_hidden:
+            return True
+
+        visible = False
+        name = store.get_value(row, 0)
+        if name != None:
+            visible = name[0]!="_"
+        return visible
+
+    #
+    # GTK Callbacks.
+    #
+    def _on_dim_hide(self, menuitem):
+        """Menu item callback function which hides underscore prefixed
+        dimensions."""
+        self.show_hidden = False
+        self.dimstore_filter.refilter()
+
+    def _on_dim_show(self, menuitem):
+        """Menu item callback function that shows underscore prefixed
+        dimension names."""
+        self.show_hidden = True
+        self.dimstore_filter.refilter()
+    
+    def _dim_cursor_changed(self, widget):
+        cursor = self.dimlist.get_cursor()[0]
+        i = self.dimstore_filter.get_iter(cursor)
+        row = self.dimstore_filter.get_value(i, 0)
+        self.set_dimension(row)
+        self._seltree_controller.activate()
+
+    def _dim_row_activated(self, widget, path, column):
+        #self._seltree_controller.set_dimension(dim)
+        pass
+        
+    def _dimlist_button_pressed(self, widget, event):
+        if event.button == 3:
+            self._dimlist_menu.popup(None, None, None, event.button, event.time)
+
--- a/laydi/view.py
+++ b/laydi/view.py
--- a/laydi/workflow.py
+++ b/laydi/workflow.py
@@ -0,0 +1,480 @@
+import gtk, gobject
+import sys
+import os
+import inspect
+import logger
+import fluents
+import main
+
+def _workflow_classes(dir, modname):
+    """Returns a list of all subclasses of Workflow in a given module"""
+    workflow_classes = []
+
+    module = __import__('%s' % (modname,))
+
+    d = module.__dict__
+    for wf in d.values():
+        try:
+            if issubclass(wf, Workflow):
+                workflow_classes.append(wf)
+        except TypeError, e:
+            pass
+    return workflow_classes
+        
+def workflow_list():
+    """Returns a list containing all new workflows"""
+    retval = []
+
+    # List all .py files that can contain workflow classes
+    wf_path = sys.modules['workflows'].__path__
+    wf_files = []
+
+    for dir in wf_path:
+        for fn in os.listdir(dir):
+            if fn.endswith('.py') and ('#' not in fn):
+                wf_files.append(fn[:-3])
+
+    # Try to load each file and look for Workflow derived classes
+    for fn in wf_files:
+        try:
+            for wf in _workflow_classes(fn):
+                retval.append(wf)
+        except Exception, e:
+            logger.log('warning', 'Cannot load workflow: %s' % fn)
+            logger.log('warning', e)
+        
+    return retval
+
+def find_workflow(basename):
+    """Searches for a workflow with a given filename."""
+    print "find_workflow"
+    
+    # List all .py files that can contain workflow classes
+    wf_path = main.options.workflowdir.split(';')
+    wf_file = None
+
+    for dir in wf_path:
+        fn = os.path.join(dir, "%s.py" % basename)
+        if os.path.isfile(fn):
+            wf_file = fn
+            return _workflow_classes(dir, basename)[0]
+
+    return None
+
+class Workflow:
+    """Defines a workflow that contains a set of analysis stages.
+
+    A Workflow is a set of analysis stages for a certain type of analysis.
+    Each stage contains some possible operations to do accomplish that 
+    task.
+    """
+
+    name = "Workflow"
+    ident = None
+    description = "Workflow Description"
+
+    def __init__(self):
+        print "Setting stages"
+        self.stages = []
+        self.stages_by_id = {}
+
+    def get_data_file_name(self, filename):
+        """Checks if a file with the given name exists in the data directory.
+        Returns the file name if the file exists in the data directory, which
+        is defined as datadir/workflowname. If the file does not exist, or the
+        workflow does not have an identificator, this method returns None."""
+        print os.path.join(main.options.datadir, self.ident, filename)
+        if self.ident == None:
+            return None
+        fn = os.path.join(main.options.datadir, self.ident, filename)
+        if os.path.isfile(fn):
+            return fn
+        return None
+
+    def add_stage(self, stage):
+        self.stages.append(stage)
+        self.stages_by_id[stage.id] = stage
+
+    def print_tree(self):
+        print self.name
+        for stage in self.stages:
+            print '   %s' % stage.name
+            for fun in stage.functions:
+                print '        %s' % fun.name
+
+#    def add_project(self,project):
+#        if project == None:
+#            logger.log('notice','Proejct is empty')
+#        logger.log('notice','Project added in : %s' %self.name)
+#        self.project = project
+
+
+class EmptyWorkflow(Workflow):
+    name = 'Empty Workflow'
+
+    def __init__(self):
+        print "initing empty workflow"
+        Workflow.__init__(self)
+    
+
+class Stage:
+    """A stage is a part of the data analysis process.
+
+    Each stage contains a set of functions that can be used to
+    accomplish the task. A typical early stage is 'preprocessing', which
+    can be done in several ways, each represented by a function.
+    """
+
+    def __init__(self, id, name):
+        self.id = id
+        self.name = name
+        self.functions = []
+        self.functions_by_id = {}
+
+    def add_function(self, fun):
+        self.functions.append(fun)
+        self.functions_by_id[fun.id] = fun
+
+
+class Function:
+    """A Function object encapsulates a function on a data set.
+
+    Each Function instance encapsulates some function that can be applied
+    to one or more types of data.
+    """
+
+    def __init__(self, id, name):
+        self.id = id
+        self.name = name
+
+    # just return a Validation object
+    def validate_input(input):
+        return Validation(True,"Validation Not Implemented")
+
+    def run(self):
+        pass
+
+
+class Validation:
+    def __init__(self,result, reason):
+        self.succeeded = result
+        self.reason = reason
+
+
+class WorkflowView (gtk.VBox):
+   
+    def __init__(self, wf):
+        gtk.VBox.__init__(self)
+        self.workflow = wf
+        self.setup_workflow(wf)
+
+    def setup_workflow(self, wf):
+        # Add stage in the process
+        for stage in wf.stages:
+            exp = gtk.Expander(stage.name)
+            btn_align = gtk.Alignment(xscale=0.9)
+            btn_align.set_padding(0,4,20,0)
+            btn_align.show()
+            btn_box = gtk.VBox()
+            btn_align.add(btn_box)
+            btn_box.show()
+            exp.add(btn_align)
+            
+            # Add functions in each stage 
+            for fun in stage.functions:
+                btn = gtk.Button(fun.name)
+                btn.connect('clicked',
+                            lambda button, f=fun : run_function(f))
+
+                btn_box.add(btn)
+                btn.show()
+            
+            exp.show()
+            self.pack_start(exp, expand=False, fill=False)
+
+    def remove_workflow(self):
+        for c in self.get_children():
+            c.hide()
+            self.remove(c)
+
+    def set_workflow(self, workflow):
+        self.workflow = workflow
+        self.remove_workflow()
+        self.setup_workflow(workflow)
+
+
+class Options(dict):
+    """Options base class.
+    """
+    def __init__(self, *args,**kw):
+        dict.__init__(self, *args, **kw)
+        self['out_plots'] = []
+        self['out_data'] = []
+	self['all_plots'] = []
+	self['all_data'] = []
+
+    def _copy_from_list(self, key_list):
+        """Returns suboptions (dictionary) from a list of keys. 
+        """
+        d = {}
+        for key in key_list:
+            d[key] = self.get(key, None)
+        return d
+
+
+class OptionsDialog(gtk.Dialog):
+    """The basic input/output dialog box.
+    
+    This defines the first page of the function options-gui.
+    Any function that invokes a option-gui will inherit from this class.
+    """
+    def __init__(self, data, options, input_names=['X','Y']):
+        gtk.Dialog.__init__(self, 'Input-Output dialog',
+                            None,
+                            gtk.DIALOG_DESTROY_WITH_PARENT,
+                            (gtk.STOCK_OK, gtk.RESPONSE_OK,
+                             gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
+
+        self._options = options
+        self._data = data
+        self._editable = True
+        self.set_size_request(550,450)
+        
+        # create notebook
+        self.nb = nb = gtk.Notebook()
+
+        # 1. page: input/output
+        
+        #inputs
+        input_frame = gtk.Frame("Input")
+        hbox = gtk.HBox(True, 8)
+        align = gtk.Alignment(1, 1, 1, 1)
+        align.set_padding(8, 8, 8, 8)
+        align.add(hbox)
+        input_frame.add(align)
+        for i, name in enumerate(input_names):
+            frame = gtk.Frame(name)
+            frame.set_label_align(0.5, 0.5)
+            label = gtk.Label(data[i]._name + "\n" + str(data[i]._array.shape))
+            frame.add(label)
+            hbox.add(frame)
+        
+        #outputs
+        output_frame = gtk.Frame("Output")
+        output_hbox = gtk.HBox(True,4)
+        output_align = gtk.Alignment(1, 1, 1, 1)
+        output_align.set_padding(8, 8, 8, 8) #left padding:8
+        output_align.add(output_hbox)
+        output_frame.add(output_align)
+        
+        # plots
+        plot_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
+        plot_treeview = gtk.TreeView(plot_list)
+
+        # Add plots
+        plot_icon = fluents.icon_factory.get('line_plot')
+        for plt, name, use in self._options['all_plots']:
+            plot_list.append((name, use, plot_icon))
+
+        # Renderer for icon
+        plot_icon = fluents.icon_factory.get('line_plot')
+        icon_renderer = gtk.CellRendererPixbuf()
+        icon_renderer.set_property('pixbuf', plot_icon)
+
+        # Renderer for active toggle.
+        active_renderer = gtk.CellRendererToggle()
+        active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
+        active_renderer.connect('toggled', toggled, plot_list)
+        active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
+
+        # Renderer for plot title.
+        title_renderer = gtk.CellRendererText()
+        title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
+        title_column = gtk.TreeViewColumn('Plot', title_renderer, text=0)
+        title_column.pack_start(icon_renderer, expand=False)
+        
+        # Add columns to tree view.
+        plot_treeview.append_column(active_column)
+        plot_treeview.append_column(title_column)
+
+        ## datasets
+        dataset_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
+        dataset_treeview = gtk.TreeView(dataset_list)
+
+        # Add datasets
+        data_icon = fluents.icon_factory.get('dataset')
+        for dat, name, use in self._options['all_data']:
+            dataset_list.append((name, use, data_icon))
+
+        # Renderer for icon
+        icon_renderer = gtk.CellRendererPixbuf()
+        icon_renderer.set_property('pixbuf', data_icon)
+        
+        # Renderer for active toggle.
+        active_renderer = gtk.CellRendererToggle()
+        active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
+        active_renderer.connect('toggled', toggled, dataset_list)
+        active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
+        
+        # Renderer for dataset title.
+        title_renderer = gtk.CellRendererText()
+        title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
+        title_column = gtk.TreeViewColumn('Dataset', title_renderer, text=0)
+        title_column.pack_start(icon_renderer, expand=False)
+
+        # Add columns to tree view.
+        dataset_treeview.append_column(active_column)
+        dataset_treeview.append_column(title_column)
+        
+        # add treeviews to output frame
+        output_hbox.add(plot_treeview)
+        output_hbox.add(dataset_treeview)
+        
+        # vbox for input/spacer/output
+        vbox1 = gtk.VBox()
+        vbox1.add(input_frame)
+        vbox1.add(gtk.HSeparator())
+        vbox1.add(output_frame)
+        
+        # add vbox to notebook
+        nb.insert_page(vbox1, gtk.Label("Input/Output"), 0)
+        self.vbox.add(nb)
+
+        #keep ref to liststores
+        self.dataset_list = dataset_list
+        self.plot_list = plot_list
+
+    def run(self):
+        self.vbox.show_all()
+        return gtk.Dialog.run(self)
+        
+    def set_options(self, options):
+        self._options = options
+        
+    def update_options(self, options):
+        self._options.update(options)
+
+    def set_output(self):
+        # get toggled output data
+        out_data = [item[0] for name, mark, ic in self.dataset_list for item in self._options['all_data'] if mark==True and name==item[1]]
+        # get toggled plots
+        out_plots = [item[0] for name, mark, ic in self.plot_list for item in self._options['all_plots'] if mark==True and name==item[1]]
+        # update options
+        self._options['out_data'] = out_data
+        self._options['out_plots'] = out_plots
+        
+    def set_editable(self, editable):
+        self._editable = True
+                
+    def set_data(self, data):
+        self._data = data
+
+    def get_data(self):
+        return self._data
+
+    def get_options(self):
+        return self._options
+
+    def add_page_from_glade(self, glade_file, widget_name, page_title):
+        """Adds a new page(s) to the existing notebook.
+        The input widget (added as a page in notebook) is defined
+        in the glade file.
+
+        input:
+        glade_file -- path to glade file
+        widget_name -- name of widget from glade file
+        """
+        
+        try:
+            self.wTree = gtk.glade.XML(glade_file)
+        except:
+            logger.log('notice', 'Could not find glade file: %s' %glade_file)
+
+        widget = self.wTree.get_widget(widget_name)
+        win = widget.get_parent()
+        win.hide()
+        widget.unparent()
+        self.nb.insert_page(widget, gtk.Label(page_title), -1)
+        self.nb.set_current_page(0)
+
+
+def toggled(renderer, path, store):
+    it = store.get_iter(path)
+    old_value = store.get_value(it, 1)
+    store.set_value(it, 1, not old_value)
+
+
+class WorkflowMenu (gtk.Menu):
+    
+    def __init__(self, workflow):
+        gtk.Menu.__init__(self)
+        self._workflow = workflow
+        for stage in workflow.stages:
+            self.append(self._create_stage_item(stage))
+
+    def _create_stage_item(self, stage):
+        stage_menu_item = gtk.MenuItem(stage.name)
+        stage_menu_item.show()
+        stage_menu = gtk.Menu()
+        stage_menu_item.set_submenu(stage_menu)
+
+        for fun in stage.functions:
+            stage_menu.append(self._create_function_item(fun))
+        return stage_menu_item
+
+    def _create_function_item(self, func):
+        menuitem = gtk.MenuItem(func.name)
+        menuitem.connect('activate',
+                         lambda item, f=func : run_function(f))
+        menuitem.show()
+        return menuitem
+
+def run_function(function):
+    logger.log('debug', 'Starting function: %s' % function.name)
+    parent_data = main.project.current_data
+
+    validation  = function.validate_input()
+
+    if not validation.succeeded:
+        logger.log('warning','Invalid Inputdata: ' + str(reason))
+        return
+
+    args, varargs, varkw, defaults = inspect.getargspec(function.run)
+
+    # first argument is 'self' and second should be the selection
+    # and we don't care about those...
+    args.remove('self')
+    if "selection" in args:
+        pass_selection = True
+        args.remove('selection')
+    else:
+        pass_selection = False
+
+    if varargs and len(parent_data) < len(args):
+        logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
+        return
+    elif not varargs and args and len(args) != len(parent_data):
+        # functions requiring datasets have to have the right number
+        logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
+        return
+
+    if not args:
+        # we allow functions requiring no data to be run even if a
+        # dataset is is selected
+        data = []
+    else:
+        data = parent_data
+        
+    if pass_selection:
+        # if the function has a 'selection' argument, we pass in
+        # the selection
+        new_data = function.run(selection=main.project.get_selection(), *data)
+    else:
+        new_data = function.run(*data)
+        
+    if new_data != None:
+        main.project.add_data(parent_data, new_data, function.name)
+    
+    logger.log('debug', 'Function ended: %s' % function.name)
+
+