Relocate wiki from trac

Fixed selections. points_inside_poly is deprecated, and all calls are therefore
rewritten to use Path.contains_points.
2023-01-25 13:36:26 +01:00 · 2014-04-04 19:16:59 +00:00 · 2013-10-17 08:24:43 +00:00 · 2013-09-17 11:11:13 +00:00 · 2012-01-25 18:13:54 +00:00 · 2011-10-25 15:15:01 +00:00
143 changed files with 51766 additions and 4184 deletions
--- a/61
+++ b/61
@ -276,64 +276,3 @@ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGES.
 		     END OF TERMS AND CONDITIONS
 	    How to Apply These Terms to Your New Programs
  If you develop a new program, and you want it to be of the greatest
 possible use to the public, the best way to achieve this is to make it
 free software which everyone can redistribute and change under these terms.
  To do so, attach the following notices to the program.  It is safest
 to attach them to the start of each source file to most effectively
 convey the exclusion of warranty; and each file should have at least
 the "copyright" line and a pointer to where the full notice is found.
    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 Also add information on how to contact you by electronic and paper mail.
 If the program is interactive, make it output a short notice like this
 when it starts in an interactive mode:
    Gnomovision version 69, Copyright (C) year name of author
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.
 The hypothetical commands `show w' and `show c' should show the appropriate
 parts of the General Public License.  Of course, the commands you use may
 be called something other than `show w' and `show c'; they could even be
 mouse-clicks or menu items--whatever suits your program.
 You should also get your employer (if you work as a programmer) or your
 school, if any, to sign a "copyright disclaimer" for the program, if
 necessary.  Here is a sample; alter the names:
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
  <signature of Ty Coon>, 1 April 1989
  Ty Coon, President of Vice
 This General Public License does not permit incorporating your program into
 proprietary programs.  If your program is a subroutine library, you may
 consider it more useful to permit linking proprietary applications with the
 library.  If this is what you want to do, use the GNU Lesser General
 Public License instead of this License.
--- a/Makefile.m4
+++ b/Makefile.m4
@ -0,0 +1,30 @@
 PREFIX=M4_PREFIX
 BIN_DIR=M4_BINDIR
 DATA_DIR=M4_DATADIR
 PY_DIR=M4_PYDIR
 DOC_DIR=M4_DOCDIR
 SCRIPTS_DIR=${DATA_DIR}/laydi/scripts/
 install: install_laydi install_doc
 install_laydi:
 	## Install binary files		
 	install -m 755 -D bin/laydi ${BIN_DIR}/laydi
 	install -m 755 -D bin/dataset ${BIN_DIR}/dataset
 	## Install library files
 	find laydi/ -type f -name '*.py' -exec install -m 644 -D {} ${PY_DIR}/{} \;
 	find laydi/ -type f -name '*.glade' -exec install -m 644 -D {} ${PY_DIR}/{} \;
 	## Install icons
 	find icons/ -type f -name '*.png' -exec install -m 644 -D {} ${DATA_DIR}/{} \;
 install_scripts:
 	find scripts/ -type f -exec install -D {} ${SCRIPTS_DIR}/{} \;
 install_doc:
 	DOC_DIR=${DOC_DIR}/ make -C doc install
 	install -m 644 README ${DOC_DIR}/
 	install -m 644 LICENSE ${DOC_DIR}/
--- a/R/laydi/DESCRIPTION
+++ b/R/laydi/DESCRIPTION
@ -0,0 +1,10 @@
 Package: laydi
 Type: Package
 Title: Interface to Laydi
 Version: 0.1.0
 Date: 2011-03-05
 Author: Einar Ryeng <einarr@pvv.ntnu.no>
 Maintainer: Einar Ryeng <einarr@pvv.ntnu.no>
 Description: R interface to Laydi
 License: LGPL
 LazyLoad: yes
--- a/R/laydi/R/laydi.R
+++ b/R/laydi/R/laydi.R
@ -0,0 +1,61 @@
 write.ftsv <- function(data, con, name="unnamed_dataset", rowdim="rows", coldim="cols") {
    # If con is a file name, open it 
    opened.here = FALSE
    if (is.character(con)){
        con = file(con, "w")
        opened.here = TRUE 
    }
    # Substitute all whitespace with underscores in identifiers 
    rows <- paste(gsub("\\s", "_", rownames(data)), collapse=" ")
    cols <- paste(gsub("\\s", "_", colnames(data)), collapse=" ")
    # Write header 
    writeLines(c("# type: dataset", 
                 paste("# dimension:", rowdim, rows, collapse=' '), 
                 paste("# dimension:", coldim, cols, collapse=' '), 
                 paste("# name:", name, collapse=' '),
                 ""),
               con=con)
    # Write matrix 
    write.table(data, file=con, col.names=FALSE, row.names=FALSE, sep="\t")
    # If con was a string, close file now 
    if (opened.here)
        close(con)
 }
 write.laydi.selection <- function(data, con) {
    # If con is a file name, open it 
    opened.here = FALSE
    if (is.character(con)){
        con = file(con, "w")
        opened.here = TRUE 
    }
    writeLines(gsub("\\s", "_", data), con=con)
    # If con was a string, close file now 
    if (opened.here)
        close(con)
 }
 read.laydi.selection <- function(con) {
    # If con is a file name, open it 
    opened.here = FALSE
    if (is.character(con)){
        con = file(con)
        opened.here = TRUE 
    }
    ids <- readLines(con=con, encoding="UTF-8")
    # If con was a string, close file now 
    if (opened.here)
        close(con)
    ids
 }
--- a/R/laydi/man/laydi-package.Rd
+++ b/R/laydi/man/laydi-package.Rd
@ -0,0 +1,38 @@
 \name{laydi-package}
 \alias{laydi-package}
 \alias{laydi}
 \docType{package}
 \title{Interface to Laydi}
 \description{
 Interface to Laydi files and projects. Allows writing R matrices to laydi data files.
 }
 \details{
 \tabular{ll}{
 Package: \tab laydi\cr
 Type: \tab Package\cr
 Version: \tab 0.1.0\cr
 Date: \tab 2011-03-05\cr
 License: \tab LGPL\cr
 LazyLoad: \tab yes\cr
 }
 library(laydi)
 write.ftsv(matrix, file, ...)
 write.laydi.selection(idlist, file)
 }
 \author{
 Einar Ryeng <einarr@pvv.org>\cr
 Arnar Flatberg <arnar.flatberg@gmail.com>
 Maintainer: Einar Ryeng <einarr@pvv.org>
 }
 \references{
 }
 \keyword{ package }
 \seealso{
 % ~~ Optional links to other man pages, e.g. ~~
 % ~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
 }
 \examples{
 }
--- a/R/laydi/man/read.laydi.selection.Rd
+++ b/R/laydi/man/read.laydi.selection.Rd
@ -0,0 +1,53 @@
 \name{read.laydi.selection}
 \alias{read.laydi.selection}
 \title{read.laydi.selection}
 \description{
 Reads a list of identifiers from a file.
 }
 \usage{
 read.laydi.selection(con)
 }
 \arguments{
  \item{con}{
     Connection (or file name) to read from. If con is a character string, it
     will be treated as a file name, and the file will be opened, read and
     closed.  If con is an open connection (file descriptor), the file will
     remain open.
  }
 }
 \details{
 %%  ~~ If necessary, more details than the description above ~~
 }
 \value{
 %%  ~Describe the value returned
 %%  If it is a LIST, use
 %%  \item{comp1 }{Description of 'comp1'}
 %%  \item{comp2 }{Description of 'comp2'}
 %% ...
 }
 \references{
 Laydi and the laydi R package are not published in the litterature. Source code
 can be found at http://dev.pvv.ntnu.no/projects/laydi
 }
 \author{
 Einar Ryeng
 }
 \note{
 %%  ~~further notes~~
 }
 %% ~Make other sections like Warning with \section{Warning }{....} ~
 \seealso{
 %% ~~objects to See Also as \code{\link{help}}, ~~~
 }
 \examples{
 read.laydi.selection("/tmp/selected_samples")
 }
 \keyword{ IO }
 \keyword{ file }
--- a/R/laydi/man/write.ftsv.Rd
+++ b/R/laydi/man/write.ftsv.Rd
@ -0,0 +1,74 @@
 \name{write.ftsv}
 \alias{write.ftsv}
 \title{write.ftsv}
 \description{
 Writes a matrix to a ftsv (laydi dataset) file. The matrix must have rownames
 and colnames. Rownames and colnames must be unique, and whitespace characters
 will be replaced with underscores.
 }
 \usage{
 write.ftsv(data, con, name = "unnamed_dataset", rowdim = "rows", coldim = "cols")
 }
 \arguments{
  \item{data}{
     A data matrix. All items in rownames(data) and colnames(data) must be
     unique, and all whitespace characters will be replaced wity underscores in
     the output. (Laydi requires unique identifiers along a dimension, and does
     not allow identifiers to contain spaces.)
  }
  \item{con}{
     Connection (or file name) to write the data to. If con is a character
     string, it will be treated as a file name, and a file by that name will be
     created, written to and closed. If con is an open connection (file descriptor),
     the file will remain open.
  }
  \item{name}{
     Datasets in laydi have a name.
  }
  \item{rowdim}{
     Laydi names all dimensions of matrices. rowdim is the dimension
     name for rows in the the file. E.g. "samples", if rows denotes samples.
  }
  \item{coldim}{
     Laydi names all dimensions of matrices. coldim is the dimension
     name for columns in the the file. E.g. "gene-ids" if columns represent
     genes.
  }
 }
 \details{
 %%  ~~ If necessary, more details than the description above ~~
 }
 \value{
 %%  ~Describe the value returned
 %%  If it is a LIST, use
 %%  \item{comp1 }{Description of 'comp1'}
 %%  \item{comp2 }{Description of 'comp2'}
 %% ...
 }
 \references{
 Laydi and this R package are not published in the litterature. Source code can be found at
 http://dev.pvv.ntnu.no/projects/laydi
 }
 \author{
 Einar Ryeng
 }
 \note{
 %%  ~~further notes~~
 }
 %% ~Make other sections like Warning with \section{Warning }{....} ~
 \seealso{
 %% ~~objects to See Also as \code{\link{help}}, ~~~
 }
 \examples{
 library(datasets)
 write.ftsv(randu, "/tmp/randu.ftsv")
 }
 \keyword{ IO }
 \keyword{ file }
--- a/R/laydi/man/write.laydi.selection.Rd
+++ b/R/laydi/man/write.laydi.selection.Rd
@ -0,0 +1,61 @@
 \name{write.laydi.selection}
 \alias{write.laydi.selection}
 \title{write.laydi.selection}
 \description{
 Writes a Laydi selection file from a list of identifiers. Identifiers are
 written one per line, with all whitespace characters substituted with
 underscores.
 }
 \usage{
 write.laydi.selection(data, con)
 }
 \arguments{
  \item{data}{
     A list of identifiers. All whitespace characters will be replaced wity
     underscores in the output. (Laydi requires unique identifiers along a
     dimension, and does not allow identifiers to contain spaces.) This method
     does not ensure that identifiers are unique.  
  }
  \item{con}{
     Connection (or file name) to write the data to. If con is a character
     string, it will be treated as a file name, and a file by that name will be
     created, written to and closed. If con is an open connection (file descriptor),
     the file will remain open.
  }
 }
 \details{
 %%  ~~ If necessary, more details than the description above ~~
 }
 \value{
 %%  ~Describe the value returned
 %%  If it is a LIST, use
 %%  \item{comp1 }{Description of 'comp1'}
 %%  \item{comp2 }{Description of 'comp2'}
 %% ...
 }
 \references{
 Laydi and the laydi R package are not published in the litterature. Source code
 can be found at http://dev.pvv.ntnu.no/projects/laydi
 }
 \author{
 Einar Ryeng
 }
 \note{
 %%  ~~further notes~~
 }
 %% ~Make other sections like Warning with \section{Warning }{....} ~
 \seealso{
 %% ~~objects to See Also as \code{\link{help}}, ~~~
 }
 \examples{
 write.laydi.selection(c("sample1", "sample2", "sample3"), "/tmp/selected_samples")
 }
 \keyword{ IO }
 \keyword{ file }
--- a/27
+++ b/27
@ -1,27 +0,0 @@
 Fluents Data Analysis Software
 LICENSE
 -------
 Fluents is relased under the terms of the GNU GPL, included in the LICENSE file
 in this directory.
 DOCUMENTATION
 -------------
 The primary and canonical source of documentation is the source code. If a
 keyboard shortcut is listed on the wiki but it does not work in the program,
 the program is right, and the wiki is wrong.
 That said, the next best place to look for documentation is the project wiki,
 located at https://dev.pvv.ntnu.no/projects/fluent/help
 BUILDING
 --------
 Fluents is a python program, and as such, python will build compiled versions
 of each .py file as it loads them. You do not need to explicitly compile the
 program.
 TODO
 ----
 The current TODO list can be found on
 https://dev.pvv.ntnu.no/projects/fluent/report/1 
--- a/README.md
+++ b/README.md
@ -0,0 +1,57 @@
 ![](./wiki/graphics/project_icon.png)
 # Laydi
 ## Look At Your Data Interactively
 Laydi is an acronym for look at your data interactively, which is what the program is aimed at. It is a lightweight data analysis program for bilinear modeling (PCA and PLS) with a strong focus on interactive use. Laydi is released under the GNU GPL and the latest development snapshot can be downloaded from https://git.pvv.ntnu.no/Projects/laydi.git
 ![](./wiki/graphics/screenshot-00.png)
 ## Features
 - Principal Component Analysis (PCA)
 - Partial Least Squares Regression (PLS)
 - L-shaped PLS regression (L-PLS)
 - Easy mapping of variables between plots, selections in one plot propagates to other plots.
 ## Nonfeatures
 - Does not import arbitrary files. Files must be prepared in a (simple) file format prior to import.
 - Saving and loading of projects is not implemented. (Datasets can be saved and loaded, though, and plots can be exported)
 - Not very stable
 ## Installation requirements
 Laydi currently requires the following extra packages, available from apt on Debian and Ubuntu.
 - python2.4 or python2.5
 - python-glade2
 - python-gnome2
 - python-gtk2
 - python-matplotlib
 - python-scipy
 - python-numpy
 Partially needed
 - python-networkx
 - python-pygraphviz
 ## Download laydi
 Laydi is not debianized. To download it, use the clone the git repo.
 ```console
 git clone https://git.pvv.ntnu.no/Projects/laydi.git
 ```
 ## User documentation
 - [Frequently Asked Questions](./wiki/faq.md)
 - [Laydi help](./wiki/help.md) (the same as available through the help menu in the application.)
 - [Terminology](./wiki/Terminology.md)
 ## Developer documentation
 - [Developer tips and tricks](./wiki/development/hints.md)
--- a/README.old
+++ b/README.old
@ -0,0 +1,41 @@
 Laydi Data Analysis Software
 LICENSE
 -------
 Laydi is relased under the terms of the GNU GPL, included in the LICENSE file
 in this directory.
 DOCUMENTATION
 -------------
 The primary and canonical source of documentation is the source code. If a
 keyboard shortcut is listed on the wiki but it does not work in the program,
 the program is right, and the wiki is wrong.
 That said, the next best place to look for documentation is the project wiki,
 located at https://dev.pvv.ntnu.no/projects/laydi/help
 Class documentation is in HTML form in the doc/ directory.
 BUILDING
 --------
 Laydi is a python program, and as such, python will build compiled versions
 of each .py file as it loads them. You do not need to explicitly compile the
 program.
 If you have just checked out the program to a directory named laydi, e.g. with
 the command:
 svn co https://dev.pvv.org/svn/laydi/trunk laydi
 you can run it by typing:
 cd laydi
 ./configure --prefix=`pwd`/build
 make
 ./run-laydi
 TODO
 ----
 The current TODO list can be found on
 https://dev.pvv.ntnu.no/projects/laydi/report/1 
--- a/bin/dataset
+++ b/bin/dataset
@ -0,0 +1,116 @@
 #!/usr/bin/python
 import os,sys
 from laydi import dataset
 import cfgparse, optparse
 import re
 PROGRAM_NAME = 'dataset'
 VERSION = '0.1.0'
 def read_dataset_header(input):
    name = ""
    type = ""
    dimensions = []
    kv_re = re.compile('^\s*#\s*(\w+)\s*:(.*)$')
    lines = []
    line = input.readline()
    while line.startswith('#'):
        lines.append(line)
        line = input.readline()
    for line in lines:
        match = kv_re.match(line)
        if not match:
            continue
        k, v = match.groups()
        k = k.strip()
        if k == 'name':
            name = v
        elif k == 'type':
            type = v
        elif k == 'dimension':
            values = v.split()
            dimensions.append((values[0], values[1:]))
    return (name, type, dimensions)
 def show_info(input):
    name, type, dimensions = read_dataset_header(input)
    print "Name: %s" % name
    print "Type: %s" % type
    print "Dimensions:",
    for i, dim in enumerate(dimensions):
        dimname = dim[0]
        length = len(dim[1])
        print "%s(%i)" % (dimname, length),
        if i < len(dimensions)-1:
            print "x",
    print
 def list_dimension_ids(input, dimname):
    name, type, dimensions = read_dataset_header(input)
    for i, dim in enumerate(dimensions):
        name, ids = dim
        if name == dimname:
            for id in ids:
                print id
 def parse_options():
    conf_files = ['/etc/laydirc',
                  os.path.join(os.environ['HOME'], '.laydi')]
    cp = cfgparse.ConfigParser()
    op = optparse.OptionParser()
    op.add_option('-c', '--csv',
                  action='store_true', default=False,
                  help='Export as CSV file.')
    op.add_option('-d', '--dimension',
                  action='store', default=None,
                  help='Get all identifiers along a dimension.')
    op.add_option('-i', '--info',
                  action='store_true', default=False,
                  help='Show dataset information.')
    op.add_option('-l', '--longinfo',
                  action='store_true', default=False,
                  help='Display more information than -i.')
    op.add_option('-o', '--output-file',
                  action='store_true', default=False,
                  help='Send output to file instead of stdout.')
    op.add_option('-t', '--transpose',
                  action='store_true', default=False,
                  help='Transpose dataset.')
    op.add_option('-y', '--change-type',
                  action='store_true', default=False,
                  help='Set new dataset type.')
    for cf in conf_files:
        if os.path.isfile(cf):
            cp.add_file(cf)
    return cp.parse(op)
 if __name__ == '__main__':
    options, params = parse_options()
    input = sys.stdin
    output = sys.stdout
    if options.info:
        show_info(input)
        sys.exit(0)
    elif options.dimension != None:
        list_dimension_ids(input, options.dimension)
--- a/bin/ftsv2csv
+++ b/bin/ftsv2csv
@ -0,0 +1,50 @@
 #!/usr/bin/python
 import sys
 from laydi import dataset
 from getopt import getopt
 def read_options():
    short_opts = ""
    long_opts = []
    options, params = getopt(sys.argv[1:], short_opts, long_opts)
    return params
 def write_csv(fd, ds):
    rowdim, coldim = ds.get_dim_name()
    rowids = ds.get_identifiers(rowdim, sorted=True)
    colids = ds.get_identifiers(coldim, sorted=True)
    x = ds.asarray()
    ## Print ID row
    print >> fd, rowdim,
    for id in colids:
        print >> fd, id,
    print >> fd
    ## Print column IDs and data
    for i, row in enumerate(rowids):
        print >> fd, row,
        for j in range(len(colids)):
            print >> fd, x[i,j],
        print >> fd
 if __name__ == "__main__":
    params = read_options()
    input_fn = params[0]
    if len(params) == 2:
 	output_fn = params[1]
    else:
        name, ext = input_fn.rsplit('.', 1)
        output_fn = name + '.csv'
    ds = dataset.read_ftsv(input_fn)
    output_fd = open(output_fn, 'w')
    write_csv(output_fd, ds)
    output_fd.close()
--- a/bin/laydi
+++ b/bin/laydi
@ -0,0 +1,145 @@
 #!/usr/bin/python
 from getopt import getopt
 import os
 import sys
 from laydi import laydi, project, projectview, workflow, main
 #import workflows
 from laydi import cfgparse
 import optparse
 PROGRAM_NAME = 'laydi'
 VERSION = '0.1.0'
 def list_workflows():
    print 'laydi %s' % VERSION
    print
    print 'Available workflows:'
    wfs = workflow.workflow_list()
    for wf in wfs:
        print '    %s (%s)' % (wf.ident, wf.name)
    print
 def generate_config():
    fn = os.path.join(os.environ['HOME'], '.laydi')
    if not os.path.exists(fn):
 	fd = open(fn, 'w')
 	print >> fd, "home = %s" % os.environ['HOME']
        print >> fd, "datadir = %%(home)s/laydi/datasets"
        print >> fd, "workflowdir = %%(home)s/laydi/workflows"
 	fd.close()
    laydidir = os.path.join(os.environ['HOME'], 'laydi')
    if not os.path.exists(laydidir):
 	os.mkdir(laydidir, 0755)
    datadir = os.path.join(os.environ['HOME'], 'laydi/datasets')
    if not os.path.exists(datadir):
 	os.mkdir(datadir, 0755)
    workflowdir = os.path.join(os.environ['HOME'], 'laydi/workflows')
    if not os.path.exists(workflowdir):
 	os.mkdir(workflowdir, 0755)
 def parse_options():
    conf_files = ['/etc/laydirc',
                  os.path.join(os.environ['HOME'], '.laydi')]
    cp = cfgparse.ConfigParser()
    cp.add_option('home', type='string',
                  default=os.environ['HOME'])
    cp.add_option('datadir', type='string',
                  default=os.environ['HOME'])
    cp.add_option('workflowdir', type='string',
                  default='workflows')
    cp.parse()
    op = optparse.OptionParser()
    op.add_option('-l', '--list-workflows',
                  action='store_true',
                  default=False,
                  help='List available workflows.')
    op.add_option('-w', '--workflow',
                 default='default',
                 help='Start with selected workflow')
    op.add_option('-c', '--generate-config',
                 action='store_true',
                 help='Generate configuration file ~/.laydi if it does not exist.')
    op.add_option('-n', '--new-project',
                  action='store_true',
                  help='Create new project directory.')
    for cf in conf_files:
        if os.path.isfile(cf):
            cp.add_file(cf)
    options, params = cp.parse(op)
    if len(params) != 1:
        print "error: project directory must be specified."
        print "notice: to create a new project use -n /path/to/project"
        sys.exit(1)
    return options, params
 if __name__ == '__main__':
    import gtk
    import gnome
    gnome.program_init(PROGRAM_NAME, VERSION)
    options, params = parse_options()
    ## Workflow setup
    main.options = options
    for dir in main.options.workflowdir.split(';'):
        if dir.strip() != "" and os.path.exists(dir):
            sys.path.append(dir)
    if options.list_workflows:
        list_workflows()
        sys.exit(0)
    if options.generate_config:
        generate_config()
 	sys.exit(0)
    selected_wf = workflow.find_workflow(options.workflow) 
    if selected_wf == None: selected_wf = workflow.EmptyWorkflow
 #    workflow_list = workflow.workflow_list()
 #    for wf in workflow_list:
 #        if wf.ident == options.workflow:
 #            selected_wf = wf            
    main.set_workflow(selected_wf())
    main.set_options(options)
    app = laydi.LaydiApp()
    ## Project setup
    prjroot = params[0]
    if not project.is_project_directory(prjroot):
        if options.new_project:
            project.make_project_directory(prjroot)
        else:
            print "error: project directory not found: %s" % prjroot
            print "notice: use the -n option to make a new project"
            sys.exit(2)
    proj = project.Project(prjroot)
    main.project = proj
    main.set_application(app)
    main.set_projectview(projectview.ProjectView(proj))
    app.set_projectview(main.projectview)
    app.show()
    gtk.main()
--- a/bin/mat2ftsv
+++ b/bin/mat2ftsv
@ -0,0 +1,37 @@
 #!/usr/bin/python
 import sys 
 from getopt import getopt
 def show_help():
    print "mat2ftsv - Matlab matrix to laydi dataset converter."
    print
    print "Usage: mat2ftsv <mat-file> [<matfile> ...]"
    print 
    print "Description: For each mat file given as input, a ftsv file"
    print "    will be created with the same name, but suffixed with.ftsv"
    print "    in addition to .mat or any other suffix already on the"
    print "    file name."
 options, params = getopt(sys.argv[1:], 'h', ['help'])
 for opt, val in options:
    if opt in ['-h', '--help']:
        show_help()
        sys.exit(0)
 if len(params) == 0:
    show_help()
    sys.exit(0)
 from scipy import io
 from numpy import ndarray
 from laydi import dataset
 fn_in = params[0]
 data = io.loadmat(fn_in)
 for key, value in data.items():
    if isinstance(value, ndarray):
        ds = dataset.Dataset(value, name=key)
    dataset.write_ftsv(fn_in + '.ftsv', ds)
--- a/bin/txt2ftsv
+++ b/bin/txt2ftsv
@ -0,0 +1,100 @@
 #!/usr/bin/env python
 import numpy
 import os.path
 import sys
 from laydi import dataset
 from getopt import getopt
 dimension = 'dim_doe'
 output_fn = '-'
 ds_name = None
 category = False
 sparse = False
 def print_help():
    print
    print 'options:'
    print '    -h, --help             Show this help text.'
    print '    -c, --category         Make category dataset'
    print '    -d, --dimension=DIM    Make output in dimension DIM'
    print '    -n, --name=NAME        Set name of output dataset'
    print '    -o, --output=FILE      Save output dataset in FILE'
    print '    -s, --sparse           Save output in sparse format'
    print
 def parse_options():
    global ds_name
    global output_fn
    short_opts = 'cd:hn:o:'
    long_opts = ['help', 'category', 'dimension', 'name', 'output', 'sparse']
    options, params = getopt(sys.argv[1:], short_opts, long_opts)
    for opt, val in options:
        if opt in ['-h', '--help']:
            print_help()
            sys.exit(0)
        elif opt in ['-c', '--category']:
            global category
            category = True
        elif opt in ['-d', '--dimension']:
            global dimension
            dimension = val
        elif opt in ['-n', '--name']:
            ds_name = val
        elif opt in ['-o', '--output']:
            output_fn = val
        elif opt in ['-s', '--sparse']:
            global sparse
            sparse = True
    if ds_name == None:
        if output_fn != None:
            ds_name = output_fn
        else:
            ds_name = 'txt2ftsv'
    if len(params) == 0:
        print_help()
        sys.exit(1)
    return params
 def read_file(fd):
    lines = fd.readlines()
    return [l.strip() for l in lines if l.strip() != '']
 def build_dataset(dimension, id_lists, filenames):
    all_ids = list(reduce(set.union, [set(x) for x in id_lists]))
    x = numpy.zeros((len(all_ids), len(id_lists)), 'b')
    for i, idl in enumerate(id_lists):
        for id in idl:
            x[all_ids.index(id),i] = True
    if category:
        ds = dataset.CategoryDataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
    else:
        ds = dataset.Dataset(x, [(dimension, all_ids), ('files', filenames)], name=ds_name)
    return ds
 if __name__ == '__main__':
    id_lists = []
    filenames = parse_options()
    for fn in filenames:
        if os.path.exists(fn):
            fd = open(fn)
            id_lists.append(read_file(fd))
            fd.close()
        elif fn == '-':
            id_lists.append(read_file(sys.stdin))
    ds = build_dataset(dimension, id_lists, filenames)
    if output_fn == '-':
        dataset.write_ftsv(sys.stdout, ds, sp_format=sparse)
    else:
        dataset.write_ftsv(output_fn, ds, sp_format=sparse)
--- a/38
+++ b/38
@ -0,0 +1,38 @@
 #!/bin/bash
 TEMP=`getopt -o "" --long prefix:,bindir:,datadir:,pydir:,root: \
     -n 'configure' -- "$@"`
 eval set -- "$TEMP"
 while true ; do
 	case "$1" in
 		--prefix) PREFIX=$2 ; shift 2 ;;
 		--bindir) BINDIR=$2 ; shift 2 ;;
 		--datadir) DATADIR=$2 ; shift 2 ;;
 		--pydir) PYDIR=$2 ; shift 2 ;;
 		--root) ROOT=$2 ; shift 2 ;;
 		--) shift ; break ;;
 		*) echo "Internal error!" ; exit 1 ;;
 	esac
 done
 #if [[ $PREFIX == "" ]] ; then PREFIX=/usr/local ; fi
 if [[ $BINDIR == "" ]] ; then BINDIR=$PREFIX/bin ; fi
 if [[ $DATADIR == "" ]] ; then DATADIR=$PREFIX/share/laydi ; fi
 if [[ $DOCDIR == "" ]] ; then DOCDIR=$PREFIX/share/doc/laydi; fi
 if [[ $PYDIR == "" ]] ; then PYDIR=$PREFIX/share/pyshared/laydi; fi
 m4 -D M4_PREFIX=$ROOT/$PREFIX -D M4_BINDIR=$ROOT/$BINDIR \
   -D M4_DATADIR=$ROOT/$DATADIR -D M4_DOCDIR=$ROOT/$DOCDIR \
   -D M4_PYDIR=$ROOT/$PYDIR Makefile.m4 > Makefile
 m4 -D M4_PREFIX=$ROOT/$PREFIX -D M4_BINDIR=$ROOT/$BINDIR \
   -D M4_DATADIR=$ROOT/$DATADIR -D M4_DOCDIR=$ROOT/$DOCDIR \
   -D M4_PYDIR=$ROOT/$PYDIR doc/Makefile.m4 > doc/Makefile
 m4 -D M4_PREFIX=$PREFIX -D M4_BINDIR=$BINDIR \
   -D M4_DATADIR=$DATADIR -D M4_DOCDIR=$DOCDIR \
   -D M4_PYDIR=$PYDIR laydi/paths.py.m4 > laydi/paths.py
--- a/debian/changelog
+++ b/debian/changelog
@ -0,0 +1,6 @@
 laydi (0.1.0) unstable; urgency=low
  * Initial packaging
 -- Einar Ryeng <einarr@pvv.org>  Tue, 11 Dec 2007 16:12:59 +0100
--- a/debian/compat
+++ b/debian/compat
@ -0,0 +1 @@
 5
--- a/debian/control
+++ b/debian/control
@ -0,0 +1,22 @@
 Source: laydi
 Section: science
 Priority: optional
 Maintainer: Einar Ryeng <einarr@pvv.org>
 Build-Depends: debhelper (>= 5.0.37.2), python-dateutil, python-all-dev (>= 2.3.5-7), python-central (>= 0.5), python-epydoc, python-setuptools (>=0.6b3-1) 
 Standards-Version: 3.7.2
 Package: laydi
 Architecture: any
 Depends: ${python:Depends},  python-numpy (>= 1:1.0.1), python-dev
 Provides: ${python:Provides}
 Description: Python library of bilinear modeling algorithms.
 Bilinear modeling algorithms.
 Package: laydi-doc
 Architecture: all
 Enhances: laydi
 Description: Laydi API documentation.
 Bilinear modeling algorithms.
 .
 This package contains documentation for Laydi
--- a/debian/copyright
+++ b/debian/copyright
@ -0,0 +1,32 @@
 This package was debianized by Einar Ryeng <einarr@pvv.org> on
 2007-09-11.
 It was downloaded from https://dev.pvv.org/projects/laydi/downloads
 Upstream Author: Arnar Flatberg <arnar.flatberg@gmail.com>
 Copyright: Arnar Flatberg <arnar.flatberg@gmail.com>
 License:
    Redistribution and use in source and binary forms, with or without
    modification, are permitted under the terms of the BSD License.
    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    SUCH DAMAGE.
 On Debian systems, the complete text of the BSD License can be 
 found in `/usr/share/common-licenses/BSD'.
 The Debian packaging is (C) 2007, Einar Ryeng <einarr@pvv.org> and
 is licensed under the GPL, see `/usr/share/common-licenses/GPL'.
--- a/debian/docs
+++ b/debian/docs
@ -0,0 +1 @@
 README
--- a/debian/rules
+++ b/debian/rules
@ -0,0 +1,74 @@
 #!/usr/bin/make -f
 # -*- makefile -*-
 # Sample debian/rules that uses debhelper.
 # This file was originally written by Joey Hess and Craig Small.
 # As a special exception, when this file is copied by dh-make into a
 # dh-make output file, you may use that output file without restriction.
 # This special exception was added by Craig Small in version 0.37 of dh-make.
 # Uncomment this to turn on verbose mode.
 #export DH_VERBOSE=1
 CFLAGS = -Wall -g
 PYVERS=$(shell pyversions -vs)
 configure: configure-stamp
 configure-stamp:
 	dh_testdir
 	touch configure-stamp
 build: $(PYVERS:%=build-python%)
 build-python%:
 	dh_testdir
 #	python$* setup.py build
 	touch $@
 clean: 
 	# Add here commands to clean up after the build process.
 	-rm -r build
 	dh_clean 
 install: build $(PYVERS:%=install-python%)
 install-python%:
 	dh_testdir
 	dh_testroot
 	dh_clean -k 
 	dh_installdirs
 	./configure --root ${CURDIR}/debian/laydi --prefix /usr --pydir /usr/share/pyshared/
 	make install
 #	python$* setup.py install --root=$(CURDIR)/debian/laydi --install-data=/usr/share/laydi
 	# Remove all *.pyc files, created in the postinst
 #	find $(CURDIR)/debian/python-networkx -name "*.pyc" -exec rm {} ';'
 # Build architecture-independent files here.
 binary-indep: build install
 	make -C doc install
 #	mkdir -p $(CURDIR)/debian/laydi-doc/usr/share/doc/laydi-doc/html
 #	epydoc --html -o $(CURDIR)/debian/laydi-doc/usr/share/doc/laydi-doc/html laydi
 # Build architecture-dependent files here.
 binary-arch: build install
 	dh_testdir
 	dh_testroot
 	dh_installchangelogs 
 	dh_installdocs
 	dh_installexamples
 #	dh_install
 	dh_pysupport
 	dh_installman
 	dh_link
 	dh_strip
 	dh_compress
 #	dh_makeshlibs
 	dh_installdeb
 	dh_shlibdeps
 	dh_gencontrol
 	dh_md5sums
 	dh_builddeb
 binary: binary-indep binary-arch
 .PHONY:  build clean binary-indep binary-arch binary install configure
--- a/doc/Makefile.m4
+++ b/doc/Makefile.m4
@ -0,0 +1,20 @@
 DOC_DIR=M4_DOCDIR
 all: html
 html:
 	@echo ----------------
 	@echo Generating epydoc html code documentation.
 	@echo See epydoc-html.log for epydoc log.
 	@echo ----------------
 	epydoc --html --inheritance listed -o html/ ../laydi 2> epydoc-html.log
 install: html
 	find html/ -type f -exec install -m 644 -D '{}' ${DOC_DIR}/'{}' \;
 clean:
 	-rm -rf html
 	-rm epydoc-html.log
--- a/doc/examples/gastrin-ts/VERSION
+++ b/doc/examples/gastrin-ts/VERSION
@ -0,0 +1 @@
 Laydi project version 1
--- a/doc/examples/gastrin-ts/data/x.ftsv
+++ b/doc/examples/gastrin-ts/data/x.ftsv
--- a/doc/gui-overview.svg
+++ b/doc/gui-overview.svg
@ -0,0 +1,215 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   xmlns:ns="http://creativecommons.org/ns#"
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="744.09448819"
   height="1052.3622047"
   id="svg2"
   sodipodi:version="0.32"
   inkscape:version="0.45.1"
   sodipodi:docname="gui-overview.svg"
   inkscape:output_extension="org.inkscape.output.svg.inkscape"
   sodipodi:docbase="/home/einarr/src/laydi/doc"
   inkscape:export-filename="/home/einarr/src/laydi/doc/gui-overview.png"
   inkscape:export-xdpi="115"
   inkscape:export-ydpi="115">
  <defs
     id="defs4">
    <inkscape:perspective
       sodipodi:type="inkscape:persp3d"
       inkscape:vp_x="0 : 526.18109 : 1"
       inkscape:vp_y="0 : 1000 : 0"
       inkscape:vp_z="744.09448 : 526.18109 : 1"
       inkscape:persp3d-origin="372.04724 : 350.78739 : 1"
       id="perspective10" />
    <filter
       inkscape:collect="always"
       x="-0.010937911"
       width="1.0218758"
       y="-0.25053026"
       height="1.5010605"
       id="filter3210">
      <feGaussianBlur
         inkscape:collect="always"
         stdDeviation="2.1921397"
         id="feGaussianBlur3212" />
    </filter>
    <filter
       inkscape:collect="always"
       id="filter3266">
      <feGaussianBlur
         inkscape:collect="always"
         stdDeviation="2.1921397"
         id="feGaussianBlur3268" />
    </filter>
    <filter
       inkscape:collect="always"
       id="filter3306">
      <feGaussianBlur
         inkscape:collect="always"
         stdDeviation="2.4541485"
         id="feGaussianBlur3308" />
    </filter>
    <filter
       inkscape:collect="always"
       id="filter3200">
      <feGaussianBlur
         inkscape:collect="always"
         stdDeviation="2.6375546"
         id="feGaussianBlur3202" />
    </filter>
    <filter
       inkscape:collect="always"
       id="filter3240">
      <feGaussianBlur
         inkscape:collect="always"
         stdDeviation="2.5502183"
         id="feGaussianBlur3242" />
    </filter>
  </defs>
  <sodipodi:namedview
     id="base"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     gridtolerance="10000"
     guidetolerance="10"
     objecttolerance="10"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
     inkscape:zoom="1.4"
     inkscape:cx="472.59664"
     inkscape:cy="577.79368"
     inkscape:document-units="px"
     inkscape:current-layer="layer1"
     showgrid="false"
     inkscape:window-width="1280"
     inkscape:window-height="693"
     inkscape:window-x="0"
     inkscape:window-y="25">
    <inkscape:grid
       type="xygrid"
       id="grid3171" />
  </sodipodi:namedview>
  <metadata
     id="metadata7">
    <rdf:RDF>
      <ns:Work
         rdf:about="">
        <dc:format>image/svg+xml</dc:format>
        <dc:type
           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
      </ns:Work>
      <cc:Work
         rdf:about="">
        <dc:format>image/svg+xml</dc:format>
        <dc:type
           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
      </cc:Work>
    </rdf:RDF>
  </metadata>
  <g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1">
    <rect
       style="fill:#e3e6ff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2383"
       width="500"
       height="350"
       x="140"
       y="302.36218" />
    <rect
       style="fill:#eaf0ed;stroke:#000000;stroke-opacity:1;fill-opacity:1;filter:url(#filter3306)"
       id="rect3163"
       width="480"
       height="80"
       x="150"
       y="562.36218" />
    <rect
       style="fill:#eaf0ed;stroke:#000000;stroke-opacity:1;fill-opacity:1;filter:url(#filter3200)"
       id="rect3165"
       width="90"
       height="210"
       x="150"
       y="342.36218" />
    <rect
       style="fill:#eaf0ed;stroke:#000000;stroke-opacity:1;fill-opacity:1;filter:url(#filter3240)"
       id="rect3167"
       width="80"
       height="210"
       x="550"
       y="342.36218" />
    <rect
       style="fill:#eaf0ed;stroke:#000000;stroke-opacity:1;fill-opacity:1;filter:url(#filter3266)"
       id="rect3169"
       width="290"
       height="210"
       x="250"
       y="342.36218" />
    <rect
       style="fill:#eaf0ed;stroke:#000000;stroke-opacity:1;fill-opacity:1;filter:url(#filter3210)"
       id="rect3173"
       width="480"
       height="20"
       x="150"
       y="312.36218" />
    <text
       xml:space="preserve"
       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Vera Sans"
       x="336.85547"
       y="326.83582"
       id="text3175"><tspan
         sodipodi:role="line"
         id="tspan3177"
         x="336.85547"
         y="326.83582">Menus &amp; Toolbars</tspan></text>
    <text
       xml:space="preserve"
       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Vera Sans"
       x="562.50488"
       y="362.36218"
       id="text3179"><tspan
         sodipodi:role="line"
         id="tspan3181"
         x="562.50488"
         y="362.36218">Workflow</tspan></text>
    <text
       xml:space="preserve"
       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Vera Sans"
       x="165.0498"
       y="362.36218"
       id="text3187"><tspan
         sodipodi:role="line"
         id="tspan3189"
         x="165.0498"
         y="362.36218">Navigator</tspan></text>
    <text
       xml:space="preserve"
       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Vera Sans"
       x="380.26953"
       y="442.36218"
       id="text3191"><tspan
         sodipodi:role="line"
         id="tspan3193"
         x="380.26953"
         y="442.36218">Plots</tspan></text>
    <text
       xml:space="preserve"
       style="font-size:12px;font-style:normal;font-weight:normal;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Vera Sans"
       x="303.29297"
       y="602.36218"
       id="text3195"><tspan
         sodipodi:role="line"
         id="tspan3197"
         x="303.29297"
         y="602.36218">Log, Selections &amp; Extensions</tspan></text>
  </g>
 </svg>
--- a/68
+++ b/68
@ -1,68 +0,0 @@
 #!/usr/bin/python2.4
 from getopt import getopt
 import sys
 from system import fluents, project, workflow
 import workflows
 PROGRAM_NAME = 'fluents'
 VERSION = '0.1.0'
 parameters = {'workflow': workflow.EmptyWorkflow}
 def show_help():
    print 'fluent %s' % VERSION
    print 'This software is released under the GNU General Public Licence'
    print
    print 'Usage: fluent [options]'
    print 
    print 'Description:'
    print '    Fluent is a lightweight data analysis application for bilinear models.'
    print
    print 'Options:'
    print '    -h --help              Show this help text'
    print '    -l --list-workflows    Lists available workflows'
    print '    -w --workflow=<wf>     Generates a new project based on workflow wf.'
    print
 def list_workflows():
    print 'fluent %s' % VERSION
    print
    print 'Workflows:'
    wfs = workflow.workflow_list()
    for wf in wfs:
        print '    %s (%s)' % (wf.ident, wf.name)
    print
 def parse_options():
    short_opts = 'hlw:'
    long_opts = ['help', 'list-workflows', 'workflow=']
    options, params = getopt(sys.argv[1:], short_opts, long_opts)
    for opt, val in options:
        if opt in ['-h', '--help']:
            show_help()
            sys.exit(0)
        elif opt in ['-l', '--list-workflows']:
            list_workflows()
            sys.exit(0)
        elif opt in ['-w', '--workflow']:
            wfs = workflow.workflow_list()
            for wf in wfs:
                if wf.ident == val:
                    parameters['workflow'] = wf
            parameters['workflow']
 if __name__ == '__main__':
    parse_options()
    import gtk
    import gnome
    gnome.program_init(PROGRAM_NAME, VERSION)
    app = fluents.FluentApp(parameters['workflow'])
    app.set_project(project.Project())
    app.show()
    gtk.main()
--- a/icons/category_dataset.png
+++ b/icons/category_dataset.png
--- a/icons/category_dataset.svg
+++ b/icons/category_dataset.svg
@ -11,35 +11,16 @@
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="16px"
   height="16px"
-   id="svg1617"
+   id="svg8468"
   sodipodi:version="0.32"
   inkscape:version="0.43"
-   sodipodi:docbase="/home/flatberg/fluent/icons"
+   sodipodi:docbase="/home/flatberg/laydi/icons"
   sodipodi:docname="category_dataset.svg"
-   inkscape:export-filename="/home/flatberg/fluent/icons/dataset.png"
+   inkscape:export-filename="/home/flatberg/laydi/icons/category_dataset.png"
   inkscape:export-xdpi="90"
   inkscape:export-ydpi="90">
  <defs
-     id="defs1619">
+     id="defs8470">
    <linearGradient
       id="linearGradient9542">
      <stop
         style="stop-color:#978e8e;stop-opacity:1;"
         offset="0"
         id="stop9544" />
      <stop
         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#b075a6;stop-opacity:0.24705882;"
         offset="1"
         id="stop13093" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       id="linearGradient8653">
@ -53,40 +34,52 @@
         id="stop8657" />
    </linearGradient>
    <linearGradient
-       id="linearGradient2500">
+       inkscape:collect="always"
       xlink:href="#linearGradient8653"
       id="linearGradient1362"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497414e-10,1.276627e-10,0.992725,0.504498,4.555838)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076" />
    <linearGradient
       id="linearGradient9542">
      <stop
-         style="stop-color:#a8a8a8;stop-opacity:1;"
+         style="stop-color:#a0a0a0;stop-opacity:0.85123968;"
         offset="0"
-         id="stop2502" />
+         id="stop9544" />
      <stop
-         id="stop3387"
+         id="stop7591"
-         offset="0.75510204"
+         offset="0.14835165"
-         style="stop-color:#e5e5e5;stop-opacity:0.96907216;" />
+         style="stop-color:#918e9f;stop-opacity:0.6745098;" />
      <stop
-         style="stop-color:#ebebeb;stop-opacity:0;"
+         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#7975a6;stop-opacity:0.24705882;"
         offset="0.56043959"
         id="stop13093" />
      <stop
         id="stop7593"
         offset="1"
-         id="stop2504" />
+         style="stop-color:#7471aa;stop-opacity:0.12156863;" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient8653"
       id="linearGradient8659"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient9542"
-       id="linearGradient9548"
+       id="linearGradient1360"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497414e-10,1.276627e-10,0.992725,0.504498,4.555838)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
-       y2="3.2324076"
+       y2="3.2324076" />
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
  </defs>
  <sodipodi:namedview
     id="base"
@ -97,17 +90,17 @@
     inkscape:pageshadow="2"
     inkscape:zoom="22.197802"
     inkscape:cx="8"
-     inkscape:cy="8"
+     inkscape:cy="7.1848042"
     inkscape:current-layer="layer1"
     showgrid="true"
     inkscape:grid-bbox="true"
     inkscape:document-units="px"
     inkscape:window-width="749"
     inkscape:window-height="540"
-     inkscape:window-x="136"
+     inkscape:window-x="0"
-     inkscape:window-y="107" />
+     inkscape:window-y="0" />
  <metadata
-     id="metadata1622">
+     id="metadata8473">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
@ -122,105 +115,90 @@
     inkscape:label="Layer 1"
     inkscape:groupmode="layer">
    <rect
-       style="opacity:1;fill:url(#linearGradient9548);fill-opacity:1;stroke:url(#linearGradient8659);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect1625"
       width="11.395269"
       height="12.492741"
       x="-15.41898"
       y="2.9935551"
       transform="matrix(-1.602589e-3,-0.999999,0.999999,-1.530673e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.50445545"
       height="11.522277"
       x="15.495544"
       y="4.4777226" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect9550"
-       width="0.59595656"
+       width="0.82326102"
-       height="11.990877"
+       height="14.495289"
-       x="-16.000607"
+       x="-16.045271"
-       y="3.4989688"
+       y="1.0721804"
-       transform="matrix(-3.561914e-4,-1,1,-1.724514e-4,0,0)" />
+       transform="matrix(-3.116999e-4,-1,1,-1.970668e-4,0,0)" />
    <rect
-       style="fill:#e7577b;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;opacity:0.49746193"
+       style="opacity:0.95890407;fill:url(#linearGradient1360);fill-opacity:1;stroke:url(#linearGradient1362);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect1625"
       width="15.193709"
       height="15.375698"
       x="-15.219335"
       y="0.076888956"
       transform="matrix(-1.479312e-3,-0.999999,0.999999,-1.658227e-3,0,0)" />
    <rect
       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.62086815"
       height="15.000007"
       x="15.456053"
       y="1" />
    <rect
       style="opacity:1;fill:#fa96c7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect11308"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.307693"
-       y="5" />
+       y="1.333333" />
    <rect
-       style="fill:#e7577b;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#e68ab8;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect12183"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.307693"
-       y="8.5" />
+       y="6.0000024" />
    <rect
-       style="fill:#e7577b;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#cc7aa3;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect12185"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.307693"
-       y="12" />
+       y="10.666672" />
    <rect
-       style="opacity:0.49746195;fill:#d9728e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#fa96d5;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13079"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.8669081"
-       y="5" />
+       y="1.333333" />
    <rect
-       style="fill:#d9728e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#e68ac4;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13081"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.8669081"
-       y="8.5" />
+       y="6.0000024" />
    <rect
-       style="fill:#d9728e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#cc7aae;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13083"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.8669081"
-       y="12.000001" />
+       y="10.666674" />
    <rect
-       style="opacity:0.49746195;fill:#f4909c;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#fa96e1;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13085"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.617668"
-       y="4.999999" />
+       y="1.3333318" />
    <rect
-       style="fill:#f4909c;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#e68acf;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13087"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.617668"
-       y="8.499999" />
+       y="6.0000014" />
    <rect
-       style="fill:#f4909c;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#cc7ab8;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13089"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.617668"
-       y="12" />
+       y="10.666672" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13095"
       width="1"
       height="1.9999686"
       x="-10.45223"
       y="0.99966687"
       transform="matrix(-3.540536e-5,-1,0.999998,-1.734922e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13097"
       width="1"
       height="16"
       x="0"
       y="0" />
  </g>
 </svg>
--- a/icons/cursor.png
+++ b/icons/cursor.png
--- a/icons/dataset.png
+++ b/icons/dataset.png
--- a/icons/dataset.svg
+++ b/icons/dataset.svg
@ -14,24 +14,35 @@
   id="svg1617"
   sodipodi:version="0.32"
   inkscape:version="0.43"
-   sodipodi:docbase="/home/flatberg/fluent/icons"
+   sodipodi:docbase="/home/flatberg/laydi/icons"
-   sodipodi:docname="dataset.svg">
+   sodipodi:docname="dataset.svg"
   inkscape:export-filename="/home/flatberg/laydi/icons/dataset.png"
   inkscape:export-xdpi="90"
   inkscape:export-ydpi="90">
  <defs
     id="defs1619">
    <linearGradient
       id="linearGradient9542">
      <stop
-         style="stop-color:#978e8e;stop-opacity:1;"
+         style="stop-color:#a0a0a0;stop-opacity:0.85123968;"
         offset="0"
         id="stop9544" />
      <stop
         id="stop7591"
         offset="0.14835165"
         style="stop-color:#918e9f;stop-opacity:0.6745098;" />
      <stop
         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#7975a6;stop-opacity:0.24705882;"
-         offset="1"
+         offset="0.56043959"
         id="stop13093" />
      <stop
         id="stop7593"
         offset="1"
         style="stop-color:#7471aa;stop-opacity:0.12156863;" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
@ -52,13 +63,17 @@
    <linearGradient
       id="linearGradient2500">
      <stop
-         style="stop-color:#a8a8a8;stop-opacity:1;"
+         style="stop-color:#cacaca;stop-opacity:1;"
         offset="0"
         id="stop2502" />
      <stop
         id="stop3387"
         offset="0.75510204"
         style="stop-color:#e5e5e5;stop-opacity:0.96907216;" />
      <stop
         style="stop-color:#e8e8e8;stop-opacity:0.48627451;"
         offset="0.75510204"
         id="stop7589" />
      <stop
         style="stop-color:#ebebeb;stop-opacity:0;"
         offset="1"
@ -66,24 +81,24 @@
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
-       xlink:href="#linearGradient8653"
+       xlink:href="#linearGradient9542"
-       id="linearGradient8659"
+       id="linearGradient1360"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497408e-10,1.276627e-10,0.992725,0.504626,4.478908)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
-       y2="3.2324076"
+       y2="3.2324076" />
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
    <linearGradient
       inkscape:collect="always"
-       xlink:href="#linearGradient9542"
+       xlink:href="#linearGradient8653"
-       id="linearGradient9548"
+       id="linearGradient1362"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497408e-10,1.276627e-10,0.992725,0.504626,4.478908)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
-       y2="3.2324076"
+       y2="3.2324076" />
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
  </defs>
  <sodipodi:namedview
     id="base"
@ -101,8 +116,8 @@
     inkscape:document-units="px"
     inkscape:window-width="749"
     inkscape:window-height="540"
-     inkscape:window-x="136"
+     inkscape:window-x="236"
-     inkscape:window-y="107" />
+     inkscape:window-y="35" />
  <metadata
     id="metadata1622">
    <rdf:RDF>
@ -119,105 +134,90 @@
     inkscape:label="Layer 1"
     inkscape:groupmode="layer">
    <rect
-       style="opacity:1;fill:url(#linearGradient9548);fill-opacity:1;stroke:url(#linearGradient8659);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect1625"
       width="11.395269"
       height="12.492741"
       x="-15.41898"
       y="2.9935551"
       transform="matrix(-1.602589e-3,-0.999999,0.999999,-1.530673e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.50445545"
       height="11.522277"
       x="15.495544"
       y="4.4777226" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect9550"
-       width="0.59595656"
+       width="0.8232609"
-       height="11.990877"
+       height="14.495289"
-       x="-16.000607"
+       x="-16.045254"
-       y="3.4989688"
+       y="0.99525535"
-       transform="matrix(-3.561914e-4,-1,1,-1.724514e-4,0,0)" />
+       transform="matrix(-3.116999e-4,-1,1,-1.970668e-4,0,0)" />
    <rect
-       style="fill:#5797e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;opacity:0.49746193"
+       style="fill:url(#linearGradient1360);fill-opacity:1.0;stroke:url(#linearGradient1362);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;opacity:0.95890411"
       id="rect1625"
       width="15.193706"
       height="15.375687"
       x="-15.219205"
       y="-3.7501515e-05"
       transform="matrix(-1.479312e-3,-0.999999,0.999999,-1.658229e-3,0,0)" />
    <rect
       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.62086815"
       height="15.000007"
       x="15.379128"
       y="1" />
    <rect
       style="opacity:1;fill:#96c8fa;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect11308"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.230768"
-       y="5" />
+       y="1.333333" />
    <rect
-       style="fill:#5797e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#8ab8e6;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect12183"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.230768"
-       y="8.5" />
+       y="6.0000019" />
    <rect
-       style="fill:#5797e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#7ba4cc;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect12185"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="4"
+       x="1.230768"
-       y="12" />
+       y="10.666671" />
    <rect
-       style="opacity:0.49746195;fill:#57ade7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#96d5fa;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13079"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.7899828"
-       y="5" />
+       y="1.333333" />
    <rect
-       style="fill:#57ade7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#8ac4e6;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13081"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.7899828"
-       y="8.5" />
+       y="6.0000019" />
    <rect
-       style="fill:#57ade7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#7baecc;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13083"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="7.5945544"
+       x="5.7899828"
-       y="12.000001" />
+       y="10.666673" />
    <rect
-       style="opacity:0.49746195;fill:#57c2e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#96e1fa;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13085"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.540743"
-       y="4.999999" />
+       y="1.3333317" />
    <rect
-       style="fill:#57c2e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#8acfe6;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13087"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.540743"
-       y="8.499999" />
+       y="6.000001" />
    <rect
-       style="fill:#57c2e7;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="fill:#7bb8cc;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13089"
-       width="3"
+       width="3.692307"
-       height="3"
+       height="4.0000019"
-       x="11.564356"
+       x="10.540743"
-       y="12" />
+       y="10.666671" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13095"
       width="1"
       height="1.9999686"
       x="-10.45223"
       y="0.99966687"
       transform="matrix(-3.540536e-5,-1,0.999998,-1.734922e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect13097"
       width="1"
       height="16"
       x="0"
       y="0" />
  </g>
 </svg>
--- a/icons/filesave.png
+++ b/icons/filesave.png
--- a/icons/freeze.png
+++ b/icons/freeze.png
--- a/icons/graph_dataset.png
+++ b/icons/graph_dataset.png
--- a/icons/graph_dataset.svg
+++ b/icons/graph_dataset.svg
@ -11,61 +11,49 @@
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="16px"
   height="16px"
-   id="svg1617"
+   id="svg8468"
   sodipodi:version="0.32"
   inkscape:version="0.43"
-   sodipodi:docbase="/home/flatberg/fluent/icons"
+   sodipodi:docbase="/home/flatberg/laydi/icons"
   sodipodi:docname="graph_dataset.svg"
-   inkscape:export-filename="/home/flatberg/fluent/icons/graph_dataset.png"
+   inkscape:export-filename="/home/flatberg/laydi/icons/graph_dataset.png"
   inkscape:export-xdpi="90"
   inkscape:export-ydpi="90">
  <defs
-     id="defs1619">
+     id="defs8470">
    <marker
-       inkscape:stockid="Dot_s"
+       inkscape:stockid="TriangleOutS"
       orient="auto"
       refY="0.0"
       refX="0.0"
-       id="Dot_s"
+       id="TriangleOutS"
       style="overflow:visible">
      <path
-         id="path23705"
+         id="path10489"
-         d="M -2.5,-1.0 C -2.5,1.7600000 -4.7400000,4.0 -7.5,4.0 C -10.260000,4.0 -12.5,1.7600000 -12.5,-1.0 C -12.5,-3.7600000 -10.260000,-6.0 -7.5,-6.0 C -4.7400000,-6.0 -2.5,-3.7600000 -2.5,-1.0 z "
+         d="M 5.77,0.0 L -2.88,5.0 L -2.88,-5.0 L 5.77,0.0 z "
         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;marker-end:none"
         transform="scale(0.2) translate(7.125493, 1)" />
    </marker>
    <marker
       inkscape:stockid="Arrow1Lstart"
       orient="auto"
       refY="0.0"
       refX="0.0"
       id="Arrow1Lstart"
       style="overflow:visible">
      <path
         id="path23776"
         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none"
-         transform="scale(0.8)" />
+         transform="scale(0.2)" />
    </marker>
    <marker
       inkscape:stockid="DistanceIn"
       orient="auto"
       refY="0.0"
       refX="0.0"
       id="DistanceIn"
       style="overflow:visible">
      <g
         id="g10541"
         transform="scale(0.6,0.6) translate(8,0)">
        <path
           id="path10543"
           d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
           style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none" />
        <path
           id="path10545"
           d="M -14.759949,-7 L -14.759949,65"
           style="fill:none;fill-opacity:0.75000000;fill-rule:evenodd;stroke:#000000;stroke-width:1.2pt;marker-start:none" />
      </g>
    </marker>
    <linearGradient
       id="linearGradient9542">
      <stop
         style="stop-color:#978e8e;stop-opacity:1;"
         offset="0"
         id="stop9544" />
      <stop
         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#b075a6;stop-opacity:0.24705882;"
         offset="1"
         id="stop13093" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       id="linearGradient8653">
@ -79,40 +67,52 @@
         id="stop8657" />
    </linearGradient>
    <linearGradient
-       id="linearGradient2500">
+       inkscape:collect="always"
       xlink:href="#linearGradient8653"
       id="linearGradient1362"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497414e-10,1.276627e-10,0.992725,0.504498,4.555838)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076" />
    <linearGradient
       id="linearGradient9542">
      <stop
-         style="stop-color:#a8a8a8;stop-opacity:1;"
+         style="stop-color:#a0a0a0;stop-opacity:0.85123968;"
         offset="0"
-         id="stop2502" />
+         id="stop9544" />
      <stop
-         id="stop3387"
+         id="stop7591"
-         offset="0.75510204"
+         offset="0.14835165"
-         style="stop-color:#e5e5e5;stop-opacity:0.96907216;" />
+         style="stop-color:#918e9f;stop-opacity:0.6745098;" />
      <stop
-         style="stop-color:#ebebeb;stop-opacity:0;"
+         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#7975a6;stop-opacity:0.24705882;"
         offset="0.56043959"
         id="stop13093" />
      <stop
         id="stop7593"
         offset="1"
-         id="stop2504" />
+         style="stop-color:#7471aa;stop-opacity:0.12156863;" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient8653"
       id="linearGradient8659"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091644e-10,9.574701e-11,0.806589,-3.750022,6.643764)" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient9542"
-       id="linearGradient9548"
+       id="linearGradient1360"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497414e-10,1.276627e-10,0.992725,0.504498,4.555838)"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
-       y2="3.2324076"
+       y2="3.2324076" />
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091644e-10,9.574701e-11,0.806589,-3.750022,6.643764)" />
  </defs>
  <sodipodi:namedview
     id="base"
@ -121,19 +121,19 @@
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
-     inkscape:zoom="44.395604"
+     inkscape:zoom="31.392433"
-     inkscape:cx="7.3431998"
+     inkscape:cx="8"
-     inkscape:cy="7.7311881"
+     inkscape:cy="8.3153731"
     inkscape:current-layer="layer1"
     showgrid="true"
     inkscape:grid-bbox="true"
     inkscape:document-units="px"
-     inkscape:window-width="1280"
+     inkscape:window-width="992"
-     inkscape:window-height="955"
+     inkscape:window-height="672"
     inkscape:window-x="0"
     inkscape:window-y="0" />
  <metadata
-     id="metadata1622">
+     id="metadata8473">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
@ -148,127 +148,112 @@
     inkscape:label="Layer 1"
     inkscape:groupmode="layer">
    <rect
-       style="opacity:1;fill:url(#linearGradient9548);fill-opacity:1;stroke:url(#linearGradient8659);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect1625"
       width="11.395275"
       height="12.492744"
       x="-15.542892"
       y="3.0046201"
       transform="matrix(-1.602589e-3,-0.999999,0.999999,-1.530673e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.50445545"
       height="11.522277"
       x="15.495544"
       y="4.4777226" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect9550"
-       width="0.59595656"
+       width="0.82326102"
-       height="11.990877"
+       height="14.495289"
-       x="-16.000607"
+       x="-16.045271"
-       y="3.4989688"
+       y="1.0721804"
-       transform="matrix(-3.561914e-4,-1,1,-1.724514e-4,0,0)" />
+       transform="matrix(-3.116999e-4,-1,1,-1.970668e-4,0,0)" />
    <rect
-       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:0.95890407;fill:url(#linearGradient1360);fill-opacity:1;stroke:url(#linearGradient1362);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect13095"
+       id="rect1625"
-       width="1"
+       width="15.193709"
-       height="1.9999686"
+       height="15.375698"
-       x="-10.45223"
+       x="-15.219335"
-       y="0.99966687"
+       y="0.076888956"
-       transform="matrix(-3.540536e-5,-1,0.999998,-1.734922e-3,0,0)" />
+       transform="matrix(-1.479312e-3,-0.999999,0.999999,-1.658227e-3,0,0)" />
    <rect
-       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect13097"
+       id="rect3393"
-       width="1"
+       width="0.62086815"
-       height="16"
+       height="15.000007"
-       x="0"
+       x="15.456053"
-       y="0" />
+       y="1" />
    <path
       sodipodi:type="arc"
-       style="opacity:0.89847711;fill:#eb9213;fill-opacity:1;stroke:#000000;stroke-width:0.11331103;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#fce100;fill-opacity:1;stroke:#000000;stroke-width:0.11344237;stroke-linecap:square;stroke-linejoin:round;marker-start:none;stroke-miterlimit:0.60000002;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path17466"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
-       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1507906,5.6771397"
+       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1676788,5.8311826"
       sodipodi:start="0"
-       sodipodi:end="6.1067046"
+       sodipodi:end="6.2052379"
-       sodipodi:open="true"
+       transform="matrix(1.253157,0,0,1.113406,-3.467103,-3.568938)"
-       transform="matrix(0.936447,0,0,0.831711,0.751641,1.359412)" />
+       sodipodi:open="true" />
    <path
-       sodipodi:type="arc"
+       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.26768968;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       style="opacity:1;fill:#eb9213;fill-opacity:1;stroke:#000000;stroke-width:0.11331103;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stroke-linecap:round;marker-start:none"
+       d="M 4.2658336,3.1945274 C 6.8661552,3.3651026 6.8661552,3.3651026 6.8661552,3.3651026"
       id="path18341"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1507906,5.6771397"
       sodipodi:start="0"
       sodipodi:end="6.1067046"
       sodipodi:open="true"
       transform="matrix(0.936447,0,0,0.831711,5.220453,1.73664)" />
    <path
       sodipodi:type="arc"
       style="opacity:0.89847711;fill:#eb9213;fill-opacity:1;stroke:#000000;stroke-width:0.11331103;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path18343"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1507906,5.6771397"
       sodipodi:start="0"
       sodipodi:end="6.1067046"
       sodipodi:open="true"
       transform="matrix(0.936447,0,0,0.831711,1.369384,5.359412)" />
    <path
       sodipodi:type="arc"
       style="opacity:0.89847711;fill:#eb9213;fill-opacity:1;stroke:#000000;stroke-width:0.11331103;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path18345"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1507906,5.6771397"
       sodipodi:start="0"
       sodipodi:end="6.1067046"
       sodipodi:open="true"
       transform="matrix(0.936447,0,0,0.831711,8.751641,8.73664)" />
    <path
       sodipodi:type="arc"
       style="opacity:0.89847711;fill:#eb9213;fill-opacity:1;stroke:#000000;stroke-width:0.11331103;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path18347"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1507906,5.6771397"
       sodipodi:start="0"
       sodipodi:end="6.1067046"
       sodipodi:open="true"
       transform="matrix(0.936447,0,0,0.831711,5.751641,5.73664)" />
    <path
       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.2;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
       d="M 6.5302362,6.411702 C 8.4733797,6.5391212 8.4733797,6.5391212 8.4733797,6.5391212"
       id="path19226" />
    <path
-       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.2;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.26768968;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 7.1195502,10.520973 C 9.0626937,10.648392 9.0626937,10.648392 9.0626937,10.648392"
+       d="M 5.0544557,8.6955802 C 7.6547772,8.8661552 7.6547772,8.8661552 7.6547772,8.8661552"
       id="path20976" />
    <path
-       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.17171589;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.22983284;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 11.154855,11.623699 C 12.364938,12.802918 12.364938,12.802918 12.364938,12.802918"
+       d="M 10.454515,10.171792 C 12.073852,11.750404 12.073852,11.750404 12.073852,11.750404"
       id="path20978" />
    <path
-       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.15874009;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.21246541;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 9.9153246,7.9660097 C 10.052976,9.4051342 10.052976,9.4051342 10.052976,9.4051342"
+       d="M 8.7957709,5.2752684 C 8.9799765,7.2018145 8.9799765,7.2018145 8.9799765,7.2018145"
       id="path20980" />
    <path
-       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.15874009;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:#3a383b;fill-opacity:0.82485878;fill-rule:evenodd;stroke:#000000;stroke-width:0.21246541;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 5.3866948,7.6172167 C 5.6087428,9.0457551 5.6087428,9.0457551 5.6087428,9.0457551"
+       d="M 2.7355425,4.8083416 C 3.0326879,6.7207162 3.0326879,6.7207162 3.0326879,6.7207162"
       id="path20982" />
    <path
       sodipodi:type="arc"
       style="opacity:1;fill:#fce100;fill-opacity:1;stroke:#000000;stroke-width:0.11344237;stroke-linecap:round;stroke-linejoin:round;marker-start:none;stroke-miterlimit:0.60000002;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path11491"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1712767,5.9108317"
       sodipodi:start="0"
       sodipodi:end="6.2558287"
       transform="matrix(1.253157,0,0,1.113406,2.563866,-3.062839)"
       sodipodi:open="true" />
    <path
       sodipodi:type="arc"
       style="opacity:1;fill:#fce100;fill-opacity:1;stroke:#000000;stroke-width:0.11344237;stroke-linecap:square;stroke-linejoin:round;marker-start:none;stroke-miterlimit:0.60000002;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path11493"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1687496,5.8483902"
       sodipodi:start="0"
       sodipodi:end="6.2161801"
       sodipodi:open="true"
       transform="matrix(1.253157,0,0,1.113406,-2.616827,1.847557)" />
    <path
       sodipodi:type="arc"
       style="opacity:1;fill:#fce100;fill-opacity:1;stroke:#000000;stroke-width:0.11344237;stroke-linecap:square;stroke-linejoin:round;marker-start:none;stroke-miterlimit:0.60000002;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path11495"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1677975,5.8329691"
       sodipodi:start="0"
       sodipodi:end="6.2063743"
       sodipodi:open="true"
       transform="matrix(1.253157,0,0,1.113406,3.14951,2.388151)" />
    <path
       sodipodi:type="arc"
       style="opacity:1;fill:#fce100;fill-opacity:1;stroke:#000000;stroke-width:0.11344237;stroke-linecap:square;stroke-linejoin:round;marker-start:none;stroke-miterlimit:0.60000002;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="path11497"
       sodipodi:cx="4.8202972"
       sodipodi:cy="5.9539604"
       sodipodi:rx="1.3514851"
       sodipodi:ry="1.5767326"
       d="M 6.1717824,5.9539604 A 1.3514851,1.5767326 0 1 1 6.1678722,5.8341073"
       sodipodi:start="0"
       sodipodi:end="6.2070983"
       sodipodi:open="true"
       transform="matrix(1.253157,0,0,1.113406,7.198788,6.458182)" />
  </g>
 </svg>
--- a/icons/home.png
+++ b/icons/home.png
--- a/icons/lasso.png
+++ b/icons/lasso.png
--- a/icons/lasso.svg
+++ b/icons/lasso.svg
@ -0,0 +1,62 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:sodipodi="http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="48px"
   height="48px"
   id="svg1337"
   sodipodi:version="0.32"
   inkscape:version="0.43"
   sodipodi:docbase="/home/flatberg/laydi/icons"
   sodipodi:docname="lasso.svg"
   inkscape:export-filename="/home/flatberg/laydi/icons/lasso.png"
   inkscape:export-xdpi="37.5"
   inkscape:export-ydpi="37.5">
  <defs
     id="defs1339" />
  <sodipodi:namedview
     id="base"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
     inkscape:zoom="7"
     inkscape:cx="24"
     inkscape:cy="24"
     inkscape:current-layer="layer1"
     showgrid="true"
     inkscape:grid-bbox="true"
     inkscape:document-units="px"
     inkscape:window-width="749"
     inkscape:window-height="540"
     inkscape:window-x="0"
     inkscape:window-y="155" />
  <metadata
     id="metadata1342">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
        <dc:format>image/svg+xml</dc:format>
        <dc:type
           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
      </cc:Work>
    </rdf:RDF>
  </metadata>
  <g
     id="layer1"
     inkscape:label="Layer 1"
     inkscape:groupmode="layer">
    <path
       style="fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:2;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:4,4;stroke-dashoffset:8.39999991;stroke-opacity:1"
       d="M 21.5,5.5 C 15.071429,6.5 10.5,10.5 10.5,10.5 C 10.5,10.5 10.642857,9.2142857 7.6428571,15.214286 C 4.6428571,21.214286 16.071429,20.214285 17.214286,26.785714 C 18.357143,33.357143 25.5,40.5 30.5,40.5 C 35.5,40.5 38.5,35.5 38.5,32.5 C 38.5,29.5 38.5,29.5 38.5,29.5 C 38.5,29.5 31.850409,25.775944 34.642857,21.071429 C 37.808688,15.737865 37.5,5.7857143 21.5,5.5 z "
       id="path2402"
       sodipodi:nodetypes="ccsssscsc" />
  </g>
 </svg>
--- a/icons/line_plot.png
+++ b/icons/line_plot.png
--- a/icons/line_plot.svg
+++ b/icons/line_plot.svg
@ -11,13 +11,16 @@
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="16px"
   height="16px"
-   id="svg1617"
+   id="svg8468"
   sodipodi:version="0.32"
   inkscape:version="0.43"
-   sodipodi:docbase="/home/flatberg/fluent/icons"
+   sodipodi:docbase="/home/flatberg/laydi/icons"
-   sodipodi:docname="line_plot.svg">
+   sodipodi:docname="line_plot.svg"
   inkscape:export-filename="/home/flatberg/laydi/icons/line_plot.png"
   inkscape:export-xdpi="90"
   inkscape:export-ydpi="90">
  <defs
-     id="defs1619">
+     id="defs8470">
    <marker
       inkscape:stockid="SquareS"
       orient="auto"
@ -32,41 +35,42 @@
         transform="scale(0.2)" />
    </marker>
    <marker
-       inkscape:stockid="Arrow1Send"
+       inkscape:stockid="TriangleOutS"
       orient="auto"
       refY="0.0"
       refX="0.0"
-       id="Arrow1Send"
+       id="TriangleOutS"
-       style="overflow:visible;">
+       style="overflow:visible">
      <path
-         id="path3456"
+         id="path10489"
-         d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
+         d="M 5.77,0.0 L -2.88,5.0 L -2.88,-5.0 L 5.77,0.0 z "
-         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none;"
+         style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none"
-         transform="scale(0.2) rotate(180)" />
+         transform="scale(0.2)" />
    </marker>
    <marker
       inkscape:stockid="DistanceIn"
       orient="auto"
       refY="0.0"
       refX="0.0"
       id="DistanceIn"
       style="overflow:visible">
      <g
         id="g10541"
         transform="scale(0.6,0.6) translate(8,0)">
        <path
           id="path10543"
           d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z "
           style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;marker-start:none" />
        <path
           id="path10545"
           d="M -14.759949,-7 L -14.759949,65"
           style="fill:none;fill-opacity:0.75000000;fill-rule:evenodd;stroke:#000000;stroke-width:1.2pt;marker-start:none" />
      </g>
    </marker>
    <linearGradient
       id="linearGradient9542">
      <stop
         style="stop-color:#1d8b3d;stop-opacity:1;"
         offset="0"
         id="stop9544" />
      <stop
         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#7975a6;stop-opacity:0.24705882;"
         offset="1"
         id="stop13093" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       id="linearGradient8653">
      <stop
-         style="stop-color:#23fd00;stop-opacity:1;"
+         style="stop-color:#3def19;stop-opacity:0.97540987;"
         offset="0"
         id="stop8655" />
      <stop
@ -75,40 +79,52 @@
         id="stop8657" />
    </linearGradient>
    <linearGradient
-       id="linearGradient2500">
+       inkscape:collect="always"
       xlink:href="#linearGradient8653"
       id="linearGradient1362"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.048746,-7.497417e-10,1.276627e-10,0.992725,0.504627,4.478913)"
       x1="-13.565333"
       y1="3.2233276"
       x2="0.73984236"
       y2="3.2456837" />
    <linearGradient
       id="linearGradient9542">
      <stop
-         style="stop-color:#a8a8a8;stop-opacity:1;"
+         style="stop-color:#13e414;stop-opacity:0.92622954;"
         offset="0"
-         id="stop2502" />
+         id="stop9544" />
      <stop
-         id="stop3387"
+         id="stop7591"
-         offset="0.75510204"
+         offset="0.14835165"
-         style="stop-color:#e5e5e5;stop-opacity:0.96907216;" />
+         style="stop-color:#918e9f;stop-opacity:0.6745098;" />
      <stop
-         style="stop-color:#ebebeb;stop-opacity:0;"
+         id="stop13091"
         offset="0.2857143"
         style="stop-color:#837d9e;stop-opacity:0.49803922;" />
      <stop
         style="stop-color:#7975a6;stop-opacity:0.24705882;"
         offset="0.56043959"
         id="stop13093" />
      <stop
         id="stop7593"
         offset="1"
-         id="stop2504" />
+         style="stop-color:#7471aa;stop-opacity:0.12156863;" />
      <stop
         style="stop-color:#6f6daf;stop-opacity:0;"
         offset="1"
         id="stop9546" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient8653"
-       id="linearGradient8659"
+       id="linearGradient1360"
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076"
       gradientUnits="userSpaceOnUse"
-       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
+       gradientTransform="matrix(1.048746,-7.497417e-10,1.276627e-10,0.992725,0.504627,4.478913)"
-    <linearGradient
+       x1="-13.565333"
-       inkscape:collect="always"
+       y1="3.2233276"
-       xlink:href="#linearGradient8653"
+       x2="0.73984236"
-       id="linearGradient9548"
+       y2="3.2456837" />
       x1="-14.992936"
       y1="3.2324076"
       x2="-0.50547981"
       y2="3.2324076"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.786559,-6.091642e-10,9.574695e-11,0.806589,-3.626117,6.632697)" />
  </defs>
  <sodipodi:namedview
     id="base"
@ -117,19 +133,19 @@
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
-     inkscape:zoom="22.197802"
+     inkscape:zoom="31.392433"
-     inkscape:cx="8.4320071"
+     inkscape:cx="8"
-     inkscape:cy="4.4085456"
+     inkscape:cy="8.1498335"
     inkscape:current-layer="layer1"
     showgrid="true"
     inkscape:grid-bbox="true"
     inkscape:document-units="px"
-     inkscape:window-width="914"
+     inkscape:window-width="1024"
-     inkscape:window-height="712"
+     inkscape:window-height="699"
-     inkscape:window-x="237"
+     inkscape:window-x="0"
-     inkscape:window-y="43" />
+     inkscape:window-y="0" />
  <metadata
-     id="metadata1622">
+     id="metadata8473">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
@ -144,57 +160,42 @@
     inkscape:label="Layer 1"
     inkscape:groupmode="layer">
    <rect
-       style="opacity:1;fill:url(#linearGradient9548);fill-opacity:1.0;stroke:url(#linearGradient8659);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect1625"
       width="11.395269"
       height="12.492741"
       x="-15.41898"
       y="2.9935551"
       transform="matrix(-1.602589e-3,-0.999999,0.999999,-1.530673e-3,0,0)" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect3393"
       width="0.50445545"
       height="11.522277"
       x="15.495544"
       y="4.4777226" />
    <rect
       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
       id="rect9550"
-       width="0.59595656"
+       width="0.82326102"
-       height="11.990877"
+       height="14.495289"
-       x="-16.000607"
+       x="-16.045271"
-       y="3.4989688"
+       y="1.0721804"
-       transform="matrix(-3.561914e-4,-1,1,-1.724514e-4,0,0)" />
+       transform="matrix(-3.116999e-4,-1,1,-1.970668e-4,0,0)" />
    <rect
-       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:0.95890407;fill:url(#linearGradient1360);fill-opacity:1.0;stroke:url(#linearGradient1362);stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect13095"
+       id="rect1625"
-       width="1"
+       width="15.193711"
-       height="1.9999686"
+       height="15.375704"
-       x="-10.45223"
+       x="-15.219207"
-       y="0.99966687"
+       y="-3.7717124e-05"
-       transform="matrix(-3.540536e-5,-1,0.999998,-1.734922e-3,0,0)" />
+       transform="matrix(-1.479312e-3,-0.999999,0.999999,-1.658226e-3,0,0)" />
    <rect
-       style="opacity:0.86243388;fill:#929797;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="opacity:1;fill:#878e8e;fill-opacity:1;stroke:#000000;stroke-width:0;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect13097"
+       id="rect3393"
-       width="1"
+       width="0.62086815"
-       height="16"
+       height="15.000007"
-       x="0"
+       x="15.456053"
-       y="0" />
+       y="1" />
    <path
-       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:0.30343372;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:url(#SquareS);marker-mid:url(#SquareS);marker-end:url(#SquareS);stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:0.33539712;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:url(#SquareS);marker-mid:url(#SquareS);marker-end:url(#SquareS);stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 3.8080234,4.8809418 C 3.8080234,14.496824 3.8002372,14.496824 3.8002372,14.496824 L 3.8002372,14.496824"
+       d="M 1.6021029,2.0851203 C 1.6021029,13.166175 1.5938478,13.166175 1.5938478,13.166175 L 1.5938478,13.166175"
       id="path2423" />
    <path
-       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:0.35327095;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:url(#SquareS);marker-mid:url(#SquareS);marker-end:url(#SquareS);stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:0.39048415;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:url(#SquareS);marker-mid:url(#SquareS);marker-end:url(#SquareS);stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 15.345903,9.6197631 C 3.8038219,9.5920041 3.8038979,9.5832116 3.8038979,9.5832116 L 3.8038979,9.5832116"
+       d="M 13.834879,7.5459964 C 1.5976484,7.5140078 1.5977289,7.5038756 1.5977289,7.5038756 L 1.5977289,7.5038756"
       id="path3480" />
    <path
-       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#c72124;stroke-width:0.60000002;stroke-linecap:butt;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
+       style="opacity:1;color:#000000;fill:none;fill-opacity:0.75;fill-rule:evenodd;stroke:#fa0707;stroke-width:0.563;stroke-linecap:round;stroke-linejoin:miter;marker:none;marker-start:none;marker-mid:none;marker-end:none;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;visibility:visible;display:inline;overflow:visible"
-       d="M 4.3,10.7 C 4.3,10.7 4.3009598,7.1690281 5.8146309,6.1007426 C 7.0219601,5.2486603 8.1832644,8.8588352 8.5982942,9.6868812 C 9.2420898,10.971349 9.6086599,13.094711 10.882205,13.263119 C 12.289868,13.449262 12.836966,11.8966 13.197551,11.060396 C 13.862121,9.519248 14.454234,7.1641089 14.454234,7.1641089 C 14.454234,7.1641089 14.90473,5.7056312 14.679482,5.6774752"
+       d="M 2.12371,8.7908287 C 2.12371,8.7908287 2.1247276,4.7218426 3.7295633,3.4907823 C 5.0096068,2.5088681 6.2408535,6.6691258 6.6808795,7.6233413 C 7.3634493,9.1035237 7.752097,11.550423 9.1023444,11.744491 C 10.594787,11.958997 11.174836,10.169755 11.557138,9.2061391 C 12.261733,7.4301658 12.889508,4.7161739 12.889508,4.7161739 C 12.889508,4.7161739 13.367136,3.0354678 13.128322,3.0030216"
       id="path3482"
       sodipodi:nodetypes="csssscs"
-       inkscape:export-filename="/home/flatberg/fluent/icons/line_plot.png"
+       inkscape:export-filename="/home/flatberg/laydi/icons/line_plot.png"
       inkscape:export-xdpi="130.40465"
       inkscape:export-ydpi="130.40465" />
  </g>
--- a/icons/move.png
+++ b/icons/move.png
--- a/icons/move.svg
+++ b/icons/move.svg
@ -0,0 +1,176 @@
 <?xml version="1.0" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd"
 [
 <!ATTLIST svg
  xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
 ]>
 <!-- Created with Sodipodi ("http://www.sodipodi.com/") -->
 <svg
   width="128pt"
   height="128pt"
   id="svg1"
   sodipodi:version="0.27"
   sodipodi:docname="/mnt/windows/Themes/Work/Blue-Sphere/move.svg"
   sodipodi:docbase="/mnt/windows/Themes/Work/Blue-Sphere/"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:xlink="http://www.w3.org/1999/xlink">
  <defs
     id="defs22">
    <linearGradient
       id="linearGradient168">
      <stop
         offset="0.000000"
         style="stop-color:#cdffff;stop-opacity:1;"
         id="stop169" />
      <stop
         offset="1.000000"
         style="stop-color:#0c5d7d;stop-opacity:0.905882;"
         id="stop170" />
    </linearGradient>
    <linearGradient
       id="linearGradient90">
      <stop
         offset="0.000000"
         style="stop-color:#cdffff;stop-opacity:1;"
         id="stop91" />
      <stop
         offset="1.000000"
         style="stop-color:#006b97;stop-opacity:0.905882;"
         id="stop92" />
    </linearGradient>
    <linearGradient
       id="linearGradient67">
      <stop
         offset="0.000000"
         style="stop-color:#d7ffff;stop-opacity:0.898039;"
         id="stop70" />
      <stop
         offset="1.000000"
         style="stop-color:#2ea6b9;stop-opacity:0.952941;"
         id="stop69" />
    </linearGradient>
    <linearGradient
       id="linearGradient57">
      <stop
         offset="0.000000"
         style="stop-color:#ffffff;stop-opacity:1;"
         id="stop59" />
      <stop
         offset="1.000000"
         style="stop-color:#797979;stop-opacity:1;"
         id="stop58" />
    </linearGradient>
    <defs
       id="defs4">
      <radialGradient
         id="1"
         cx="869.603027"
         cy="1973.579956"
         r="2106.649902"
         fx="869.603027"
         fy="1973.579956"
         gradientUnits="userSpaceOnUse"
         xlink:href="#linearGradient67" />
    </defs>
    <defs
       id="defs11">
      <linearGradient
         id="2"
         x1="255.848"
         y1="119.147"
         x2="375.686"
         y2="34.1009"
         gradientUnits="userSpaceOnUse">
        <stop
           offset="0"
           style="stop-color:#ffffff"
           id="stop13" />
        <stop
           offset="1"
           style="stop-color:#000000"
           id="stop14" />
      </linearGradient>
    </defs>
    <defs
       id="defs16">
      <linearGradient
         id="3"
         x1="275.053009"
         y1="109.384003"
         x2="356.480988"
         y2="30.864300"
         gradientUnits="userSpaceOnUse"
         xlink:href="#linearGradient57" />
    </defs>
    <radialGradient
       cx="3.03981e-14"
       cy="1.05578e-10"
       r="0.773346"
       fx="3.03981e-14"
       fy="1.05578e-10"
       xlink:href="#linearGradient168"
       id="radialGradient88"
       gradientUnits="objectBoundingBox"
       gradientTransform="matrix(0.945877,0,0,1.05722,0.264368,0.249996)"
       spreadMethod="pad" />
    <linearGradient
       x1="2.69643"
       y1="1.14655"
       x2="3.85147"
       y2="0.623116"
       xlink:href="#linearGradient67"
       id="linearGradient89"
       gradientUnits="objectBoundingBox"
       gradientTransform="translate(-2.65767,-0.201241)"
       spreadMethod="pad" />
  </defs>
  <sodipodi:namedview
     id="base">
    <sodipodi:guide
       orientation="horizontal"
       position="114.876968"
       id="sodipodi:guide589" />
    <sodipodi:guide
       orientation="vertical"
       position="47.041008"
       id="sodipodi:guide590" />
    <sodipodi:guide
       orientation="vertical"
       position="83.381706"
       id="sodipodi:guide655" />
    <sodipodi:guide
       orientation="horizontal"
       position="76.921135"
       id="sodipodi:guide1057" />
    <sodipodi:guide
       orientation="horizontal"
       position="42.195583"
       id="sodipodi:guide1058" />
  </sodipodi:namedview>
  <path
     d="M 32.5 12 L 56.8988 34.5471 L 42.9771 34.8537 L 42.7252 61.5867 L 22.2748 61.5867 L 22.0229 34.8537 L 8.21868 35.1477 L 32.5 12 z "
     transform="matrix(0.797584,0,0,1.24992,56.2305,-2.71824)"
     style="stroke-width:3.72423;fill:#0c5d7d;stroke:#0c5d7d;stroke-opacity:0.99;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:0.992157;"
     id="polygon3"
     sodipodi:nodetypes="cccccccc" />
  <path
     d="M 32.5 12 L 56.8988 34.5471 L 42.9771 34.8537 L 42.0791 72.0798 L 21.6287 72.0798 L 22.0229 34.8537 L 8.21868 35.1477 L 32.5 12 z "
     transform="matrix(9.95215e-19,0.781212,-1.22426,1.55963e-18,171.015,60.3555)"
     style="stroke-width:3.72423;fill:#0c5d7d;stroke:#0c5d7d;stroke-opacity:0.99;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:0.992157;"
     id="path651"
     sodipodi:nodetypes="cccccccc" />
  <path
     d="M 32.5 12 L 56.8988 34.5471 L 42.9771 34.8537 L 42.7252 61.018 L 22.2748 61.018 L 22.0229 34.8537 L 8.21868 35.1477 L 32.5 12 z "
     transform="matrix(-0.81341,2.07245e-18,-3.24781e-18,-1.27472,108.565,172.894)"
     style="stroke-width:3.72423;fill:#0c5d7d;stroke:#0c5d7d;stroke-opacity:0.99;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:0.992157;"
     id="path652"
     sodipodi:nodetypes="cccccccc" />
  <path
     d="M 32.5 12 L 56.8988 34.5471 L 42.9771 34.8537 L 42.7252 69.8962 L 22.2748 69.8962 L 22.0229 34.8537 L 8.21868 35.1477 L 32.5 12 z "
     transform="matrix(-3.07844e-18,-0.805497,1.26232,-4.82433e-18,-9.11143,111.473)"
     style="stroke-width:3.72423;fill:#0c5d7d;stroke:#0c5d7d;stroke-opacity:0.99;stroke-linejoin:miter;stroke-linecap:butt;fill-opacity:0.992157;"
     id="path653"
     sodipodi:nodetypes="cccccccc" />
 </svg>
--- a/icons/select.png
+++ b/icons/select.png
--- a/icons/table_size.png
+++ b/icons/table_size.png
--- a/icons/table_size.svg
+++ b/icons/table_size.svg
@ -0,0 +1,346 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!-- Created with Inkscape (http://www.inkscape.org/) -->
 <svg
   xmlns:dc="http://purl.org/dc/elements/1.1/"
   xmlns:cc="http://web.resource.org/cc/"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
   xmlns:svg="http://www.w3.org/2000/svg"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns:sodipodi="http://inkscape.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   width="744.09448819"
   height="1052.3622047"
   id="svg2"
   sodipodi:version="0.32"
   inkscape:version="0.43"
   sodipodi:docbase="/home/einarr/src/laydi/icons"
   sodipodi:docname="table_size.svg">
  <defs
     id="defs4">
    <linearGradient
       inkscape:collect="always"
       id="linearGradient2192">
      <stop
         style="stop-color:#00d07c;stop-opacity:1;"
         offset="0"
         id="stop2194" />
      <stop
         style="stop-color:#00d07c;stop-opacity:0;"
         offset="1"
         id="stop2196" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       id="linearGradient2182">
      <stop
         style="stop-color:#0000ff;stop-opacity:1;"
         offset="0"
         id="stop2184" />
      <stop
         style="stop-color:#0000ff;stop-opacity:0;"
         offset="1"
         id="stop2186" />
    </linearGradient>
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2182"
       id="linearGradient2188"
       x1="425.21429"
       y1="850.93365"
       x2="425.21429"
       y2="125.84373"
       gradientUnits="userSpaceOnUse" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2198"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(0,155.2857)" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2202"
       gradientUnits="userSpaceOnUse"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404"
       gradientTransform="translate(162.8571,155.2857)" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2206"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(331.4286,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2221"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(0,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2223"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(162.8571,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2225"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(331.4286,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2235"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(0,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2237"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(162.8571,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2239"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(331.4286,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2247"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(0,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2249"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(162.8571,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2251"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(331.4286,155.2857)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2257"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(161.8571,160.5)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2260"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(-1,160.5)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2275"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(161.8571,-4.7143)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
    <linearGradient
       inkscape:collect="always"
       xlink:href="#linearGradient2192"
       id="linearGradient2278"
       gradientUnits="userSpaceOnUse"
       gradientTransform="translate(-1,-4.7143)"
       x1="202.35713"
       y1="426.64789"
       x2="202.35713"
       y2="223.78404" />
  </defs>
  <sodipodi:namedview
     id="base"
     pagecolor="#ffffff"
     bordercolor="#666666"
     borderopacity="1.0"
     inkscape:pageopacity="0.0"
     inkscape:pageshadow="2"
     inkscape:zoom="0.35"
     inkscape:cx="375"
     inkscape:cy="520"
     inkscape:document-units="px"
     inkscape:current-layer="layer1"
     inkscape:window-width="823"
     inkscape:window-height="583"
     inkscape:window-x="91"
     inkscape:window-y="59" />
  <metadata
     id="metadata7">
    <rdf:RDF>
      <cc:Work
         rdf:about="">
        <dc:format>image/svg+xml</dc:format>
        <dc:type
           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
      </cc:Work>
    </rdf:RDF>
  </metadata>
  <g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1">
    <rect
       style="fill:url(#linearGradient2188);fill-opacity:1.0;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect1307"
       width="517.14288"
       height="517.14288"
       x="125.71429"
       y="235.21933"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:url(#linearGradient2260);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2190"
       width="148.57143"
       height="148.57143"
       x="144.71428"
       y="418.57648"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:url(#linearGradient2257);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2200"
       width="148.57143"
       height="148.57143"
       x="307.57144"
       y="418.57648"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:#f3f4f9;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2204"
       width="148.57143"
       height="148.57143"
       x="476.14285"
       y="418.57648"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:#f3f4f9;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2215"
       width="148.57143"
       height="148.57143"
       x="144.71428"
       y="583.79077"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:#f3f4f9;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2217"
       width="148.57143"
       height="148.57143"
       x="307.57144"
       y="583.79077"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:#f3f4f9;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2219"
       width="148.57143"
       height="148.57143"
       x="476.14285"
       y="583.79077"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:url(#linearGradient2278);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2229"
       width="148.57143"
       height="148.57143"
       x="144.71428"
       y="253.36218"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:url(#linearGradient2275);fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2231"
       width="148.57143"
       height="148.57143"
       x="307.57144"
       y="253.36218"
       inkscape:export-filename="/home/einarr/src/laydi/icons/rect2233.png"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
    <rect
       style="fill:#f3f4f9;fill-opacity:0.75;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
       id="rect2233"
       width="148.57143"
       height="148.57143"
       x="476.14285"
       y="253.36218"
       inkscape:export-xdpi="3.4739451"
       inkscape:export-ydpi="3.4739451" />
  </g>
 </svg>
--- a/icons/zoom_to_rect.png
+++ b/icons/zoom_to_rect.png
--- a/laydi/init.py
+++ b/laydi/init.py
@ -0,0 +1,3 @@
 import main
--- a/laydi/annotations.py
+++ b/laydi/annotations.py
@ -0,0 +1,100 @@
 _dim_annotation_handlers = {}
 def get_dim_annotations(dimname, annotation, ids):
    """Returns a list of annotations corresponding to the given ids in 
    dimension dimname"""
    global _dim_annotation_handlers
    if _dim_annotation_handlers.has_key(dimname):
        return _dim_annotation_handlers[dimname].get_annotations(annotation, ids)
    return None
 def set_dim_handler(dimname, handler):
    """Set the handler for the given dimension."""
    global _dim_annotation_handlers
    _dim_annotation_handlers[dimname] = handler
 def get_dim_handler(dimname):
    """Get the handler for the given dimension."""
    global _dim_annotation_handlers
    return _dim_annotation_handlers.get(dimname, None)
 class AnnotationHandler:
    def __init__(self):
        pass
    def get_annotations(self, annotationname, ids, default=None):
        return None
    def get_annotation_names(self):
        return []
 class DictAnnotationHandler(AnnotationHandler):
    def __init__(self, d=None):
        if d == None:
            d = {}
        self._dict = d
    def get_annotations(self, annotationname, ids, default=None):
        d = self._dict
        retval = []
        for id in ids:
            if d[annotationname].has_key(id):
                retval.append(d[annotationname][id])
            else:
                retval.append(default)
        return retval
    def add_annotations(self, annotationname, d):
        self._dict[annotationname] = d
    def get_annotation_names(self):
        return self._dict.keys()
 def read_annotations_file(filename):
    """Read annotations from file.
    Reads annotations from a tab delimited file of the format::
     dimname     annotation_name1    annotation_name2 ...
     id1         Foo                 0.43
     id2         Bar                 0.59
    """
    ann = DictAnnotationHandler()
    dimname = None
    annotation_dicts = []
    annotation_names = []
    fd = open(filename)
    ## Read the first line, which contains the dimension name and
    ## annotation names.
    line = fd.readline()
    values = [x.strip() for x in line.split('\t')]
    dimname = values[0]  
    annotation_names = values[1:]
    annotation_dicts = [{} for x in annotation_names]
    ## Read the lines containing the annotations. The first value on
    ## each line is an id along the dimension.
    while line:
        values = [x.strip() for x in line.split('\t')]
        for i, x in enumerate(values[1:]):
            annotation_dicts[i][values[0]] = x
        line = fd.readline()
    fd.close()
    ## Add everything to the annotation object and add the object to
    ## the specified dimension.
    for i, a in enumerate(annotation_names):
        ann.add_annotations(a, annotation_dicts[i])
    _dim_annotation_handlers[dimname] = ann
    return ann
--- a/laydi/cfgparse.py
+++ b/laydi/cfgparse.py
--- a/laydi/dataset.py
+++ b/laydi/dataset.py
@ -0,0 +1,862 @@
 from scipy import ndarray, atleast_2d, asarray, intersect1d, zeros
 from scipy import empty, sparse, where
 from scipy import sort as array_sort
 from itertools import izip
 import shelve
 import copy
 import re
 class Universe(object):
    def __init__(self, name):
        self.name = name
        self._ids = {}
    def register(self, dim):
        """Increase reference count for identifiers in Dimension object dim"""
        if dim.name != self.name:
            return
        for i in dim:
            self._ids[i] = self._ids.get(i, 0) + 1
    def unregister(self, dim):
        """Update reference count for identifiers in Dimension object dim
           Update reference count for identifiers in Dimension object dim, and remove all
           identifiers with a reference count of 0, as they do not (by definition) exist 
           any longer.
        """
        if dim.name != self.name:
            return
        for i in dim:
            refcount = self._ids[i]
            if refcount == 1:
                self._ids.pop(i)
            else:
                self._ids[i] -= 1
    def __str__(self):
        return "%s: %i elements, %i references" % (self.name, len(self._ids), sum(self._ids.values()))
    def __contains__(self, element):
        return self._ids.__contains__(element)
    def __len__(self):
        return len(self._ids)
    def intersection(self, dim):
        return set(self._ids).intersection(dim.idset)
 class Dimension(object):
    """A Dimension represents the set of identifiers an object has along an axis.
    """
    def __init__(self, name, ids=[]):
        self.name = name
        self.idset = set(ids)
        self.idlist = list(ids)
    def __getitem__(self, element):
        return self.idlist[element]
    def __getslice__(self, start, end):
        return self.idlist[start:end]
    def __contains__(self, element):
        return self.idset.__contains__(element)
    def __str__(self):
        return "%s: %s" % (self.name, str(self.idlist))
    def __len__(self):
        return len(self.idlist)
    def __iter__(self):
        return iter(self.idlist)
    def intersection(self, dim):
        return self.idset.intersection(dim.idset)
 class Dataset(object):
    """The Dataset base class.
    A Dataset is an n-way array with defined string identifiers across
    all dimensions.
    example of use:
    ---
    dim_name_rows = 'rows'
    names_rows = ('row_a','row_b')
    ids_1 = [dim_name_rows, names_rows]
    dim_name_cols = 'cols'
    names_cols = ('col_a','col_b','col_c','col_d')
    ids_2 = [dim_name_cols, names_cols]
    Array_X = rand(2,4)
    data = Dataset(Array_X,(ids_1,ids_2),name="Testing")
    dim_names = [dim for dim in data]
    column_identifiers = [id for id in data['cols'].keys()]
    column_index = [index for index in data['cols'].values()]
    'cols' in data -> True
    ---
    data = Dataset(rand(10,20)) (generates dims and ids (no links))
    """
    def __init__(self, array, identifiers=None, name='Unnamed dataset'):
        self._dims = [] #existing dimensions in this dataset
        self._map = {} # internal mapping for dataset:  identifier <--> index
        self._name = name
        self._identifiers = identifiers
        if not isinstance(array, sparse.spmatrix):
            array = atleast_2d(asarray(array))
        # vector are column (array)
        if array.shape[0] == 1:
            array = array.T
        self.shape = array.shape
        if identifiers != None:
            self._validate_identifiers(identifiers)
            self._set_identifiers(identifiers, self._all_dims)
        else:
            self._identifiers = self._create_identifiers(self.shape, self._all_dims)
            self._set_identifiers(self._identifiers, self._all_dims)
        self._array = array
    def __iter__(self):
        """Returns an iterator over dimensions of dataset."""
        return self._dims.__iter__()
    def __contains__(self,dim):
        """Returns True if dim is a dimension name in dataset."""
        # return self._dims.__contains__(dim)
        return self._map.__contains__(dim)
    def __len__(self):
        """Returns the number of dimensions in the dataset"""
        return len(self._map)
    def __getitem__(self,dim):
        """Return the identifers along the dimension dim."""
        return self._map[dim]
    def _create_identifiers(self, shape, all_dims):
        """Creates dimension names and identifier names, and returns
        identifiers."""
        dim_names = ['rows','cols'] 
        ids = []
        for axis, n in enumerate(shape):
            if axis < 2:
                dim_suggestion = dim_names[axis]
            else:
                dim_suggestion = 'dim'
            dim_suggestion = self._suggest_dim_name(dim_suggestion, all_dims) 
            identifier_creation = [str(axis) + "_" + i for i in map(str, range(n))]
            ids.append((dim_suggestion, identifier_creation))
            all_dims.add(dim_suggestion)
        return ids
    def _set_identifiers(self, identifiers, all_dims):
        """Creates internal mapping of identifiers structure."""
        for dim, ids in identifiers:
            pos_map = ReverseDict()
            if dim not in self._dims:
                self._dims.append(dim)
                all_dims.add(dim)
            else:
                raise ValueError, "Dimension names must be unique whitin dataset"
            for pos, id in enumerate(ids):
                pos_map[id] = pos
            self._map[dim] = pos_map
    def _suggest_dim_name(self,dim_name,all_dims):
        """Suggests a unique name for dim and returns it"""
        c = 0
        new_name = dim_name
        while new_name in all_dims:
            new_name = dim_name + "_" + str(c)
            c += 1
        return new_name
    def asarray(self):
        """Returns the numeric array (data) of dataset"""
        if isinstance(self._array, sparse.spmatrix):
            return self._array.toarray()
        return self._array
    def set_array(self, array):
        """Adds array as an ArrayType object.
        A one-dim array is transformed to a two-dim array (row-vector)
        """
        if not isinstance(array, type(self._array)):
            raise ValueError("Input array of type: %s does not match existing array type: %s") %(type(array), type(self._array))
        if self.shape != array.shape:
            raise ValueError, "Input array must be of similar dimensions as dataset"
        self._array = atleast_2d(asarray(array))
    def get_name(self):
        """Returns dataset name"""
        return self._name
    def get_all_dims(self):
        """Returns all dimensions in project"""
        return self._all_dims
    def get_dim_name(self, axis=None):
        """Returns dim name for an axis, if no axis is provided it
        returns a list of dims"""
        if type(axis) == int:
            return self._dims[axis]
        else:
            return [dim for dim in self._dims]
    def common_dims(self, ds):
        """Returns a list of the common dimensions in the two datasets."""
        dims = self.get_dim_name()
        ds_dims = ds.get_dim_name()
        return [d for d in dims if d in ds_dims]
    def get_identifiers(self, dim, indices=None, sorted=False):
        """Returns identifiers along dim, sorted by position (index)
        is optional.
        You can optionally provide a list/ndarray of indices to get
        only the identifiers of a given position.
        Identifiers are the unique names (strings) for a variable in a
        given dim.  Index (Indices) are the Identifiers position in a
        matrix in a given dim.
        """
        if indices != None:
            if len(indices) == 0:# if empty list or empty array
                return []
        if indices != None:
            # be sure to match intersection
            #indices = intersect1d(self.get_indices(dim),indices)
            ids = [self._map[dim].reverse[i] for i in indices]
 	else:
            if sorted == True:
                ids = [self._map[dim].reverse[i] for i in array_sort(self._map[dim].values())]
            else:
                ids = self._map[dim].keys()
        return ids
    def get_indices(self, dim, idents=None):
        """Returns indices for identifiers along dimension.
        You can optionally provide a list of identifiers to retrieve a
        index subset.
        Identifiers are the unique names (strings) for a variable in a
        given dim.  Index (Indices) are the Identifiers position in a
        matrix in a given dim.  If none of the input identifiers are
        found an empty index is returned
        """
        if not isinstance(idents, list) and not isinstance(idents, set):
            raise ValueError("idents needs to be a list/set got: %s" %type(idents))
        if idents == None:
            index = array_sort(self._map[dim].values())
        else:
            index = [self._map[dim][key]
                     for key in idents if self._map[dim].has_key(key)]
        return asarray(index)        
    def existing_identifiers(self, dim, idents):
        """Filters a list of identifiers to find those that are present in the
        dataset.
        The most common use of this function is to get a list of
        identifiers who correspond one to one with the list of indices produced
        when get_indices is given an identifier list. That is
        ds.get_indices(dim, idents) and ds.exisiting_identifiers(dim, idents)
        will have the same order.
        @param dim: A dimension present in the dataset.
        @param idents: A list of identifiers along the given dimension.
        @return: A list of identifiers in the same order as idents, but
        without elements not present in the dataset.
        """
        if not isinstance(idents, list) and not isinstance(idents, set):
            raise ValueError("idents needs to be a list/set got: %s" %type(idents))
        return [key for key in idents if self._map[dim].has_key(key)]
    def copy(self):
        """ Returns deepcopy of dataset.
        """
        return copy.deepcopy(self)
    def subdata(self, dim, idents):
        """Returns a new dataset based on dimension and given identifiers.
        """
        ds = self.copy()
        indices = array_sort(ds.get_indices(dim, idents))
        idents = ds.get_identifiers(dim, indices=indices)
        if not idents:
            raise ValueError("No of identifers from: \n%s \nfound in %s" %(str(idents), ds._name))
        ax = [i for i, name in enumerate(ds._dims) if name == dim][0]
        subarr = ds._array.take(indices, ax)
        new_indices = range(len(idents))
        ds._map[dim] = ReverseDict(zip(idents, new_indices))
        ds.shape = tuple(len(ds._map[d]) for d in ds._dims)
        ds.set_array(subarr)
        return ds
    def transpose(self):
        """Returns a copy of transpose of a dataset.
        As for the moment: only support for 2D-arrays.
        """
        assert(len(self.shape) == 2)
        ds = self.copy()
        ds._array = ds._array.T
        ds._dims.reverse()
        ds.shape = ds._array.shape
        return ds
    def _validate_identifiers(self, identifiers):
        for dim_name, ids in identifiers: 
            if len(set(ids)) != len(ids):
                raise ValueError("Identifiers not unique in : %s" %dim_name)
        identifier_shape = [len(i[1]) for i in identifiers]
        if len(identifier_shape) != len(self.shape):
            raise ValueError("Identifier list length must equal array dims")
        for ni, na in zip(identifier_shape, self.shape):
            if ni != na:
                raise ValueError, "Identifier-array mismatch: %s: (idents: %s, array: %s)" %(self._name, ni, na)
 class CategoryDataset(Dataset):
    """The category dataset class.
    A dataset for representing class information as binary
    matrices (0/1-matrices).
    There is support for using a less memory demanding, sparse format. The
    prefered (default) format for a category dataset is the compressed sparse row 
    format (csr)
    Always has linked dimension in first dim:
    ex matrix:
    .        go_term1    go_term2  ...
    gene_1
    gene_2
    gene_3
    .
    .
    .
    """
    def __init__(self, array, identifiers=None, name='C'):
        Dataset.__init__(self, array, identifiers=identifiers, name=name)
    def as_spmatrix(self):
        if isinstance(self._array, sparse.spmatrix):
            return self._array
        else:
            arr = self.asarray()
            return sparse.csr_matrix(arr.astype('i'))
    def to_spmatrix(self):
        if isinstance(self._array, sparse.spmatrix):
            self._array = self._array.tocsr()
        else:
            self._array = sparse.scr_matrix(self._array)
    def as_dictlists(self):
        """Returns data as dict of identifiers along first dim.
        ex: data['gene_1'] = ['map0030','map0010', ...]
        fixme: Deprecated?
        """
        data = {}
        for name, ind in self._map[self.get_dim_name(0)].items():
            if isinstance(self._array, ndarray):
                indices = self._array[ind,:].nonzero()[0]
            elif isinstance(self._array, sparse.spmatrix):
                if not isinstance(self._array, sparse.csr_matrix):
                    array = self._array.tocsr()
                else:
                    array = self._array
                indices = array[ind,:].indices
            if len(indices) == 0: # should we allow categories with no members?
                continue
            data[name] = self.get_identifiers(self.get_dim_name(1), indices)
        self._dictlists = data
        return data
    def as_selections(self):
        """Returns data as a list of Selection objects.
        The list of selections is not ordered (sorted) by any means.
        """
        ret_list = []
        for cat_name, ind in self._map[self.get_dim_name(1)].items():
            if isinstance(self._array, sparse.spmatrix):
                if not isinstance(self._array, sparse.csc_matrix):
                    self._array = self._array.tocsc()
                indices = self._array[:,ind].indices
            else:
                indices = self._array[:,ind].nonzero()[0]
            if len(indices) == 0:
                continue
            ids = self.get_identifiers(self.get_dim_name(0), indices)
            selection = Selection(cat_name)
            selection.select(self.get_dim_name(0), ids)
            ret_list.append(selection)
        return ret_list
 class GraphDataset(Dataset):
    """The graph dataset class.
    A dataset class for representing graphs. The constructor may use an 
    incidence matrix (possibly sparse) or (if networkx installed) a 
    networkx.(X)Graph structure.
    If the networkx library is installed, there is support for
    representing the graph as a networkx.Graph, or networkx.XGraph structure.
    """
    def __init__(self, input, identifiers=None, name='A', nodepos = None):      
        if isinstance(input, sparse.spmatrix):
            arr = input
        else:
            try:
                arr = asarray(input)
            except:
                raise ValueError("Could not identify input")
        Dataset.__init__(self, array=arr, identifiers=identifiers, name=name)
        self._graph = None
        self.nodepos = nodepos
    def as_spmatrix(self):
        if isinstance(self._array, sparse.spmatrix):
            return self._array
        else:
            arr = self.asarray()
            return sparse.csr_matrix(arr.astype('i'))
    def to_spmatrix(self):
        if isinstance(self._array, sparse.spmatrix):
            self._array = self._array.tocsr()
        else:
            self._array = sparse.scr_matrix(self._array)
    def asnetworkx(self):
        if self._graph != None:
            return self._graph
        dim0, dim1 = self.get_dim_name()
        node_ids = self.get_identifiers(dim0, sorted=True)
        edge_ids = self.get_identifiers(dim1, sorted=True)
        G, weights = self._graph_from_incidence_matrix(self._array, node_ids=node_ids, edge_ids=edge_ids)
        self._graph = G
        return G
    def from_networkx(cls, G, node_dim, edge_dim, sp_format=True):
        """Create graph dataset from networkx graph.
        When G is a Graph/Digraph edge identifiers will be created,
        else (XGraoh/XDigraph) it is assumed that edge attributes are
        the edge identifiers.
        """
        import networkx as nx
        n = G.number_of_nodes()
        m = G.number_of_edges()
        if isinstance(G, nx.DiGraph):
            G = nx.XDiGraph(G)
        elif isinstance(G, nx.Graph):
            G = nx.XGraph(G)
        edge_ids = [e[2] for e in G.edges()]
        node_ids = map(str, G.nodes())
        n2ind = {}
        for ind, node in enumerate(node_ids):
            n2ind[node] = ind
        if sp_format:
            I = sparse.lil_matrix((n, m))
        else:
            I = zeros((m, n), dtype='i')
        for i, (h, t, eid) in enumerate(G.edges()):
            if eid != None:
                edge_ids[i] = eid
            else:
                edge_ids[i] = 'e_' + str(i)
            hind = n2ind[str(h)]
            tind = n2ind[str(t)]
            I[hind, i] = 1
            if G.is_directed():
                I[tind, i] = -1
            else:
                I[tind, i] = 1
        idents = [[node_dim, node_ids], [edge_dim, edge_ids]]
        if G.name != '':
            name = G.name
        else:
            name = 'A'
        ds = GraphDataset(I, idents, name)
        return ds
    from_networkx = classmethod(from_networkx)            
    def _incidence2adjacency(self, I):
        """Incidence to adjacency matrix.
        I*I.T - eye(n)?
        """
        raise NotImplementedError
    def _graph_from_incidence_matrix(self, I, node_ids, edge_ids):
        """Creates a networkx graph class from incidence
        (possibly weighted) matrix and ordered labels.
        labels = None, results in string-numbered labels
        """
        try:
            import networkx as nx
        except:
            print "Failed in import of NetworkX"
            return None
        m, n = I.shape
        assert(m == len(node_ids))
        assert(n == len(edge_ids))
        weights = []
        directed = False
        G = nx.XDiGraph(name=self._name)
        if isinstance(I, sparse.spmatrix):
            I = I.tocsr()
        for ename, col in izip(edge_ids, I.T):
            if isinstance(I, sparse.spmatrix):
                node_ind = col.indices
                w1, w2 = col.data
            else:
                node_ind = where(col != 0)[0]
                w1, w2 = col[node_ind]
            node1 = node_ids[node_ind[0]]
            node2 = node_ids[node_ind[1]]
            if w1 < 0: # w1 is tail
                directed = True
                assert(w2 > 0 and (w1 + w2) == 0)
                G.add_edge(node2, node1, ename)
                weights.append(w2)
            else: #w2 is tail or graph is undirected
                assert(w1 > 0)
                if w2 < 0:
                    directed = True
                G.add_edge(node1, node2, ename)
                weights.append(w1)
        if not directed:
            G = G.to_undirected()
        return G, asarray(weights)
 Dataset._all_dims = set()
 class ReverseDict(dict):
    """A dictionary which can lookup values by key, and keys by value.
    All values and keys must be hashable, and unique.
    example:
    >>d = ReverseDict((['a',1],['b',2]))
    >>print d['a'] --> 1
    >>print d.reverse[1] --> 'a'
    """
    def __init__(self, *args, **kw):
        dict.__init__(self, *args, **kw)
        self.reverse = dict([[v, k] for k, v in self.items()])
    def __setitem__(self, key, value):
        dict.__setitem__(self, key, value)
        try:
            self.reverse[value] = key
        except:
            self.reverse = {value:key}
 class Selection(dict):
    """Handles selected identifiers along each dimension of a dataset"""
    def __init__(self, title='Unnamed Selecton'):
        self.title = title
    def __getitem__(self, key):
        if not self.has_key(key):
            return None
        return dict.__getitem__(self, key)
    def dims(self):
        return self.keys()
    def axis_len(self, axis):
        if self._selection.has_key(axis):
            return len(self._selection[axis])
        return 0
    def select(self, axis, labels):
        self[axis] = labels
 def write_ftsv(fd, ds, decimals=7, sep='\t', fmt=None, sp_format=True):
    """Writes a dataset in laydi tab separated values (ftsv) form.
    @param fd: An open file descriptor to the output file.
    @param ds: The dataset to be written. 
    @param decimals: Number of decimals, only supported for dataset.
    @param fmt: String formating
    The function handles datasets of these classes: 
    Dataset, CategoryDataset and GraphDataset
    """
    opened = False
    if isinstance(fd, str):
        fd = open(fd, 'w')
        opened = True
    # Write header information
    if isinstance(ds, CategoryDataset):
        type = 'category'
        if fmt == None:
            fmt = '%d'
    elif isinstance(ds, GraphDataset):
        type = 'network'
        if fmt == None:
            fmt = '%d'
    elif isinstance(ds, Dataset):
        type = 'dataset'
        if fmt == None:
            fmt = '%%.%df' % decimals
        else:
            fmt = '%%.%d' %decimals + fmt
    else:
        raise Exception("Unknown object type")
    fd.write('# type: %s' %type + '\n')
    for dim in ds.get_dim_name():
        fd.write("# dimension: %s" % dim)
        for ident in ds.get_identifiers(dim, sorted=True):
            fd.write(" " + ident)
        fd.write("\n")
    fd.write("# name: %s" % ds.get_name() + '\n')
    # xy-node-positions
    if type == 'network' and ds.nodepos != None:
        fd.write("# nodepos:")
        node_dim = ds.get_dim_name(0)
        for ident in ds.get_identifiers(node_dim, sorted=True):
            fd.write(" %s,%s" %ds.nodepos[ident])
        fd.write("\n")
    # Write data
    if hasattr(ds, "as_spmatrix") and sp_format == True:
        m = ds.as_spmatrix()
    else:
        m = ds.asarray()
    if isinstance(m, sparse.spmatrix):
        _write_sparse_elements(fd, m, fmt, sep)
    else:
        _write_elements(fd, m, fmt, sep)
    if opened:
        fd.close()
 def read_ftsv(fd, sep=None):
    """Read a dataset in laydi tab separated values (ftsv) form and return it.
    @param fd: An open file descriptor.
    @return: A Dataset, CategoryDataset or GraphDataset depending on the information
    read.
    """
    opened = False
    if isinstance(fd, str):
        fd = open(fd)
        opened = True
    split_re = re.compile('^#\s*(\w+)\s*:\s*(.+)')
    dimensions = []
    identifiers = {}
    type = 'dataset'
    name = 'Unnamed dataset'
    sp_format = False
    nodepos = None
    # graphtype = 'graph'
    # Read header lines from file.
    line = fd.readline()
    while line:
        m = split_re.match(line)
        if m:
            key, val = m.groups()
            # The line is on the form;
            # dimension: dimname id1 id2 id3 ...
            if key == 'dimension':
                values = [v.strip() for v in val.split(' ')]
                dimensions.append(values[0])
                identifiers[values[0]] = values[1:]
            # Read type of dataset.
            # Should be dataset, category, or network
            elif key == 'type':
                type = val
            elif key == 'name':
                name = val
            # storage format
            # if sp_format is True then use coordinate triplets
            elif key == 'sp_format':
                if val in ['False', 'false', '0', 'F', 'f',]:
                    sp_format = False
                elif val in ['True', 'true', '1', 'T', 't']:
                    sp_format = True
                else:
                    raise ValueError("sp_format: %s not valid " %sp_format)
            elif key == 'nodepos':
                node_dim = dimensions[0]
                idents = identifiers[node_dim]
                nodepos = {}
                xys = val.split(" ")
                for node_id, xy in zip(idents, xys):
                    x, y = map(float, xy.split(","))
                    nodepos[node_id] = (x, y)
        else:
            break
        line = fd.readline()
    # Dimensions in the form [(dim1, [id1, id2, id3 ..) ...] 
    dims = [(x, identifiers[x]) for x in dimensions]
    dim_lengths = [len(identifiers[x]) for x in dimensions]
    # Create matrix and assign element reader
    if type == 'category':
        if sp_format:
            matrix = sparse.lil_matrix(dim_lengths)
        else:
            matrix = empty(dim_lengths, dtype='i')
    else:
        if sp_format:
            matrix = sparse.lil_matrix(dim_lengths)
        else:
            matrix = empty(dim_lengths)
    if sp_format:
        matrix = _read_sparse_elements(fd, matrix)
    else:
        matrix = _read_elements(fd, matrix)
    # Create dataset of specified type
    if type == 'category':
        ds = CategoryDataset(matrix, dims, name)
    elif type == 'network':
        ds = GraphDataset(matrix, dims, name=name, nodepos=nodepos)
    else:
        ds = Dataset(matrix, dims, name)
    if opened:
        fd.close()
    return ds
 def write_csv(fd, ds, decimals=7, sep='\t'):
    """Write a dataset as comma/tab/whatever dilimited data.
    @param fd: An open file descriptor to the output file.
    @param ds: The dataset to be written. 
    @param decimals: Number of decimals, only supported for dataset.
    @param sep: Value separator
    """
    ## Open file if a string is passed instead of a file descriptor
    opened = False
    if isinstance(fd, str):
        fd = open(fd, 'w')
        opened = True
    ## Get data
    rowdim, coldim = ds.get_dim_name()
    rowids = ds.get_identifiers(rowdim)
    colids = ds.get_identifiers(coldim)
    a = ds.asarray()
    y, x = a.shape
    fmt = '%%%if' % decimals
    ## Write header
    fd.write(rowdim)
    fd.write(sep)
    for i, id in enumerate(colids):
        fd.write(id)
        fd.write(sep)
    fd.write('\n')
    ## Write matrix data
    for j in range(y):
        fd.write(rowids[j])
        fd.write(sep)
        for i in range(x):
            fd.write(fmt % (a[j, i],))
            fd.write(sep)
        fd.write('\n')
    ## If we opened the stream, close it
    if opened:
        fd.close()
 def _write_sparse_elements(fd, arr, fmt='%d', sep=None):
    """ Sparse coordinate format.""" 
    fd.write('# sp_format: True\n\n')
    fmt = '%d %d ' + fmt + '\n'
    csr = arr.tocsr()
    for ii in xrange(csr.size):
        ir, ic = csr.rowcol(ii)
        data = csr.getdata(ii)
        fd.write(fmt % (ir, ic, data))
 def _write_elements(fd, arr, fmt='%f', sep='\t'):
    """Standard value separated format."""
    fmt = fmt + sep
    fd.write('\n')
    y, x = arr.shape
    for j in range(y):
        for i in range(x):
            fd.write(fmt %arr[j, i])
        fd.write('\n')
 def _read_elements(fd, arr, sep=None):
    line = fd.readline()
    i = 0
    while line:
        values = line.split(sep)
        for j, val in enumerate(values):
            arr[i,j] = float(val)
        i += 1
        line = fd.readline()
    return arr
 def _read_sparse_elements(fd, arr, sep=None):
    line = fd.readline()
    while line:
        i, j, val = line.split()
        arr[int(i),int(j)] = float(val)
        line = fd.readline()
    return arr.tocsr()
--- a/system/dialogs.py
+++ b/system/dialogs.py
@ -4,11 +4,10 @@ import gtk
 import sys
 import os
 import gobject
-from system import logger, project, workflow
+import logger, projectview, workflow
 import workflows
-DATADIR = os.path.dirname(sys.modules['system'].__file__)
+DATADIR = os.path.dirname(sys.modules['laydi'].__file__)
-GLADEFILENAME = os.path.join(DATADIR, 'fluents.glade')
+GLADEFILENAME = os.path.join(DATADIR, 'laydi.glade')
 class CreateProjectDruid(gtk.Window):
    """A druid for creating a new project.
@ -17,9 +16,8 @@ class CreateProjectDruid(gtk.Window):
    Workflow, and asks the user to select one of these. A new project of
    the selected class is added to the application."""
-    def __init__(self, app):
+    def __init__(self):
        gtk.Window.__init__(self)
        self.app = app
        self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'new_project_druid')
        self.workflows = self.make_workflow_list()
        self.selected = None
@ -63,10 +61,12 @@ class CreateProjectDruid(gtk.Window):
    def finish(self, *rest):
        tree, it = self['workflow_list'].get_selection().get_selected()
-        wf = self.workflows.get_value(it, 1)
+        wf_class = self.workflows.get_value(it, 1)
-        proj = project.Project()
+        proj = projectview.ProjectView()
-        self.app.set_workflow(wf(self.app))
+        main.set_workflow(wf_class())
-        self.app.set_project(proj)
+#        self.app.set_workflow(wf(self.app))
 #        self.app.set_project(proj)
        main.set_projectview(proj)
        self.hide()
        self.destroy()
--- a/system/fluents.glade
+++ b/system/fluents.glade
@ -7,7 +7,7 @@
 <widget class="GnomeApp" id="appwindow">
  <property name="visible">True</property>
-  <property name="title" translatable="yes">Fluent</property>
+  <property name="title" translatable="yes">Laydi</property>
  <property name="type">GTK_WINDOW_TOPLEVEL</property>
  <property name="window_position">GTK_WIN_POS_NONE</property>
  <property name="modal">False</property>
@ -101,78 +101,6 @@
 		</widget>
 	      </child>
 	      <child>
 		<widget class="GtkMenuItem" id="edit1">
 		  <property name="visible">True</property>
 		  <property name="stock_item">GNOMEUIINFO_MENU_EDIT_TREE</property>
 		  <child>
 		    <widget class="GtkMenu" id="edit1_menu">
 		      <child>
 			<widget class="GtkImageMenuItem" id="cut1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_CUT_ITEM</property>
 			  <signal name="activate" handler="on_cut1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkImageMenuItem" id="copy1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_COPY_ITEM</property>
 			  <signal name="activate" handler="on_copy1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkImageMenuItem" id="paste1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_PASTE_ITEM</property>
 			  <signal name="activate" handler="on_paste1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkImageMenuItem" id="clear1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_CLEAR_ITEM</property>
 			  <signal name="activate" handler="on_clear1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkSeparatorMenuItem" id="separator2">
 			  <property name="visible">True</property>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkImageMenuItem" id="properties1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_PROPERTIES_ITEM</property>
 			  <signal name="activate" handler="on_properties1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkSeparatorMenuItem" id="separator3">
 			  <property name="visible">True</property>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkImageMenuItem" id="preferences1">
 			  <property name="visible">True</property>
 			  <property name="stock_item">GNOMEUIINFO_MENU_PREFERENCES_ITEM</property>
 			  <signal name="activate" handler="on_preferences1_activate" last_modification_time="Thu, 13 Apr 2006 11:24:18 GMT"/>
 			</widget>
 		      </child>
 		    </widget>
 		  </child>
 		</widget>
 	      </child>
 	      <child>
 		<widget class="GtkMenuItem" id="view1">
 		  <property name="visible">True</property>
@ -181,10 +109,46 @@
 		  <child>
 		    <widget class="GtkMenu" id="view1_menu">
 		      <child>
 			<widget class="GtkCheckMenuItem" id="navigator1">
 			  <property name="visible">True</property>
 			  <property name="label" translatable="yes">_Navigator</property>
 			  <property name="use_underline">True</property>
 			  <property name="active">True</property>
 			  <signal name="activate" handler="on_navigator1_activate" last_modification_time="Thu, 06 Dec 2007 00:03:35 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkCheckMenuItem" id="workflow1">
 			  <property name="visible">True</property>
 			  <property name="label" translatable="yes">_Workflow</property>
 			  <property name="use_underline">True</property>
 			  <property name="active">True</property>
 			  <signal name="activate" handler="on_workflow1_activate" last_modification_time="Thu, 06 Dec 2007 00:03:35 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkCheckMenuItem" id="information1">
 			  <property name="visible">True</property>
 			  <property name="label" translatable="yes">_Information</property>
 			  <property name="use_underline">True</property>
 			  <property name="active">True</property>
 			  <signal name="activate" handler="on_information1_activate" last_modification_time="Thu, 06 Dec 2007 00:03:35 GMT"/>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkSeparatorMenuItem" id="separator5">
 			  <property name="visible">True</property>
 			</widget>
 		      </child>
 		      <child>
 			<widget class="GtkMenuItem" id="large_view1">
 			  <property name="visible">True</property>
-			  <property name="label" translatable="yes">Large View</property>
+			  <property name="label" translatable="yes">One plot</property>
 			  <property name="use_underline">True</property>
 			  <signal name="activate" handler="on_large_view1_activate" last_modification_time="Fri, 26 May 2006 12:15:59 GMT"/>
 			  <accelerator key="plus" modifiers="GDK_CONTROL_MASK" signal="activate"/>
@ -194,7 +158,7 @@
 		      <child>
 			<widget class="GtkMenuItem" id="small_view1">
 			  <property name="visible">True</property>
-			  <property name="label" translatable="yes">Small View</property>
+			  <property name="label" translatable="yes">All plots</property>
 			  <property name="use_underline">True</property>
 			  <signal name="activate" handler="on_small_view1_activate" last_modification_time="Fri, 26 May 2006 12:15:59 GMT"/>
 			  <accelerator key="minus" modifiers="GDK_CONTROL_MASK" signal="activate"/>
@ -270,7 +234,7 @@
 		      <child>
 			<widget class="GtkImageMenuItem" id="index1">
 			  <property name="visible">True</property>
-			  <property name="tooltip" translatable="yes">Open the fluents documentation.</property>
+			  <property name="tooltip" translatable="yes">Open the laydi documentation.</property>
 			  <property name="label" translatable="yes">_Index</property>
 			  <property name="use_underline">True</property>
 			  <signal name="activate" handler="on_index1_activate" last_modification_time="Thu, 27 Apr 2006 09:21:48 GMT"/>
@ -320,59 +284,13 @@
 	  <property name="shadow_type">GTK_SHADOW_OUT</property>
 	  <child>
-	    <widget class="GtkToolbar" id="toolbar1">
+	    <widget class="GtkToolbar" id="toolbar">
 	      <property name="visible">True</property>
 	      <property name="orientation">GTK_ORIENTATION_HORIZONTAL</property>
 	      <property name="toolbar_style">GTK_TOOLBAR_ICONS</property>
 	      <property name="tooltips">True</property>
 	      <property name="show_arrow">False</property>
 	      <child>
 		<widget class="GtkToolButton" id="button_new">
 		  <property name="visible">True</property>
 		  <property name="tooltip" translatable="yes">Ny fil</property>
 		  <property name="stock_id">gtk-new</property>
 		  <property name="visible_horizontal">True</property>
 		  <property name="visible_vertical">True</property>
 		  <property name="is_important">False</property>
 		  <signal name="clicked" handler="on_button_new_clicked" last_modification_time="Fri, 21 Apr 2006 13:46:38 GMT"/>
 		</widget>
 		<packing>
 		  <property name="expand">False</property>
 		  <property name="homogeneous">True</property>
 		</packing>
 	      </child>
 	      <child>
 		<widget class="GtkToolButton" id="toolbutton2">
 		  <property name="visible">True</property>
 		  <property name="tooltip" translatable="yes">Åpne fil</property>
 		  <property name="stock_id">gtk-open</property>
 		  <property name="visible_horizontal">True</property>
 		  <property name="visible_vertical">True</property>
 		  <property name="is_important">False</property>
 		</widget>
 		<packing>
 		  <property name="expand">False</property>
 		  <property name="homogeneous">True</property>
 		</packing>
 	      </child>
 	      <child>
 		<widget class="GtkToolButton" id="toolbutton3">
 		  <property name="visible">True</property>
 		  <property name="tooltip" translatable="yes">Lagre fil</property>
 		  <property name="stock_id">gtk-save</property>
 		  <property name="visible_horizontal">True</property>
 		  <property name="visible_vertical">True</property>
 		  <property name="is_important">False</property>
 		</widget>
 		<packing>
 		  <property name="expand">False</property>
 		  <property name="homogeneous">True</property>
 		</packing>
 	      </child>
 	      <child>
 		<widget class="GtkToolButton" id="zoom_in_button">
 		  <property name="visible">True</property>
@ -496,6 +414,15 @@
 			</packing>
 		      </child>
 		      <child>
 			<widget class="GtkScrolledWindow" id="scrolledwindow11">
 			  <property name="visible">True</property>
 			  <property name="can_focus">True</property>
 			  <property name="hscrollbar_policy">GTK_POLICY_AUTOMATIC</property>
 			  <property name="vscrollbar_policy">GTK_POLICY_AUTOMATIC</property>
 			  <property name="shadow_type">GTK_SHADOW_IN</property>
 			  <property name="window_placement">GTK_CORNER_TOP_LEFT</property>
 			  <child>
 			    <widget class="Custom" id="navigator_view">
 			      <property name="visible">True</property>
@ -504,6 +431,8 @@
 			      <property name="int2">0</property>
 			      <property name="last_modification_time">Sat, 15 Apr 2006 12:34:23 GMT</property>
 			    </widget>
 			  </child>
 			</widget>
 			<packing>
 			  <property name="padding">0</property>
 			  <property name="expand">True</property>
@ -967,15 +896,16 @@ The functions of the workflow you select will be available on the right part of
 </widget>
 <widget class="GtkAboutDialog" id="aboutdialog">
  <property name="border_width">5</property>
  <property name="visible">True</property>
  <property name="destroy_with_parent">True</property>
-  <property name="name" translatable="yes">Fluents</property>
+  <property name="name" translatable="yes">Laydi</property>
-  <property name="copyright" translatable="yes">Copyright (C) 2006 the Fluents Team
+  <property name="copyright" translatable="yes">Copyright (C) 2006 the Laydi Team
 Released under the GNU General Public Licence
 </property>
-  <property name="comments" translatable="yes">From WordNet (r) 2.0 [wn]: fluent (adj)
+  <property name="comments" translatable="yes">From WordNet (r) 2.0 [wn]: laydi (adj)
-1: easy and graceful in shape; &quot;a yacht with long, fluent curves&quot;
+1: easy and graceful in shape; &quot;a yacht with long, laydi curves&quot;
 2: smooth and unconstrained in movement; &quot;a long, smooth stride&quot;; &quot;the fluid motion of a cat&quot;; &quot;the liquid grace of a ballerina&quot;; &quot;liquid prose&quot; [syn:{flowing}, {fluid}, {liquid}, {smooth}]
@ -983,9 +913,9 @@ Released under the GNU General Public Licence
-From the Fluents team: fluents (sw)
+From the Laydi team: laydi (sw)
-1: fluent software for lightweight data analysis.</property>
+1: laydi software for lightweight data analysis.</property>
  <property name="license" translatable="yes">GNU GENERAL PUBLIC LICENSE
@ -1282,8 +1212,8 @@ NO WARRANTY
 END OF TERMS AND CONDITIONS
 </property>
  <property name="wrap_license">False</property>
-  <property name="website">https://dev.pvv.ntnu.no/projects/fluent</property>
+  <property name="website">https://dev.pvv.ntnu.no/projects/laydi</property>
-  <property name="website_label" translatable="yes">The Fluent project website</property>
+  <property name="website_label" translatable="yes">The Laydi project website</property>
  <property name="authors">Arnar Flatberg
 Einar Ryeng
 Truls A. Tangstad</property>
--- a/laydi/laydi.py
+++ b/laydi/laydi.py
@ -0,0 +1,401 @@
 #!/usr/bin/python
 import os
 import sys
 import pygtk
 pygtk.require('2.0')
 import gobject
 import gtk
 import gtk.gdk
 import gtk.glade
 import gnome
 import gnome.ui
 import scipy
 import pango
 import projectview, workflow, dataset, view, navigator, dialogs, selections, plots, main
 from logger import logger, LogView
 PROGRAM_NAME = 'laydi'
 VERSION = '0.1.0'
 DATADIR = os.path.join(main.PYDIR, 'laydi')
 #ICONDIR = os.path.join(DATADIR,"..","icons")
 ICONDIR = main.ICONDIR
 GLADEFILENAME = os.path.join(main.PYDIR, 'laydi/laydi.glade')
 _icon_mapper = {dataset.Dataset: 'dataset',
               dataset.CategoryDataset: 'category_dataset',
               dataset.GraphDataset: 'graph_dataset',
               plots.Plot: 'line_plot'}
 class IconFactory:
    """Factory for icons that ensures that each icon is only loaded once."""
    def __init__(self, path):
        self._path = path
        self._icons = {}
    def get(self, iconname):
        """Returns the gdk loaded PixBuf for the given icon.
        Reads the icon from file if necessary."""
        # if iconname isnt a string, try to autoconvert
        if not isinstance(iconname, str):
            for cls in _icon_mapper.keys():
                if isinstance(iconname, cls):
                    iconname = _icon_mapper[cls]
        if self._icons.has_key(iconname):
            return self._icons[iconname]
        icon_fname = os.path.join(self._path, '%s.png' % iconname)
        icon = gtk.gdk.pixbuf_new_from_file(icon_fname)
        self._icons[iconname] = icon
        return icon
 icon_factory = IconFactory(ICONDIR)
 class TableSizeSelection(gtk.Window):
    def __init__(self):
        self._SIZE = size = 5
        gtk.Window.__init__(self, gtk.WINDOW_POPUP)
        self._table = gtk.Table(size, size, True)
        self._items = []
        ## Create a 3x3 table of EventBox object, doubly stored because 
        ## gtk.Table does not support indexed retrieval.
        for y in range(size):
            line = []
            for x in range(size):
                ebox = gtk.EventBox()
                ebox.add(gtk.Frame())
                ebox.set_size_request(20, 20)
                ebox.set_visible_window(True)
                self._table.attach(ebox, x, x+1, y, y+1, gtk.FILL, gtk.FILL)
                line.append(ebox)
            self._items.append(line)
        self.set_border_width(5)
        self.add(self._table)
        self.connect_signals()
    def _get_child_pos(self, child):
        size = self._SIZE
        for x in range(size):
            for y in range(size):
                if self._items[y][x] == child:
                    return (x, y)
        return None
    def connect_signals(self):
        size = self._SIZE
        for x in range(size):
            for y in range(size):
                self._items[y][x].add_events(gtk.gdk.ENTER_NOTIFY_MASK)
                self._items[y][x].connect("enter-notify-event", 
                                          self._on_enter_notify)
                self._items[y][x].connect("button-release-event", 
                                          self._on_button_release)
    def _on_enter_notify(self, widget, event):
        size = self._SIZE
        x, y = self._get_child_pos(widget)
        for i in range(size):
            for j in range(size):
                if i <= x and j <= y:
                    self._items[j][i].set_state(gtk.STATE_SELECTED)
                else:
                    self._items[j][i].set_state(gtk.STATE_NORMAL)
        self.x = x
        self.y = y
    def _on_button_release(self, widget, event):
        size = self._SIZE
        self.emit('table-size-set', self.x+1, self.y+1)
        self.hide_all()
        for x in range(size):
            for y in range(size):
                self._items[y][x].set_state(gtk.STATE_NORMAL)
 class ViewFrameToolButton (gtk.ToolItem):
    def __init__(self):
        gtk.ToolItem.__init__(self)
        fname = os.path.join(ICONDIR, "table_size.png")
        image = gtk.Image()
        image.set_from_file(fname)
        self._button = gtk.Button()
        self._button.set_image(image)
        self._button.set_property("can-focus", False)
        eb = gtk.EventBox()
        eb.add(self._button)
        self.add(eb)
        self._item = TableSizeSelection()
        self._button.connect("button-press-event", self._on_show_menu)
        image.show()
        self._image = image
        self._item.connect("table-size-set", self._on_table_size_set)
        self._button.set_relief(gtk.RELIEF_NONE)
        self.show_all()
    def _on_show_menu(self, widget, event):
        x, y = self._image.window.get_origin()
        x2, y2, w, h, b = self._image.window.get_geometry()
        self._item.move(x, y+h)
        self._item.show_all()
    def _on_table_size_set(self, widget, width, height):
        main.application['main_view'].resize_table(width, height)
 class LaydiApp:
    def __init__(self): # Application variables
        self.current_data = None
        self._last_view = None
        self._plot_toolbar = None
        self._toolbar_state = None
        gtk.glade.set_custom_handler(self.custom_object_factory)
        self.widget_tree = gtk.glade.XML(GLADEFILENAME, 'appwindow')
 #        self.workflow = wf
        self.idlist_crt = selections.IdListController(self['identifier_list'])
        self.sellist_crt = selections.SelectionListController(self['selection_tree'],
                                                        self.idlist_crt)
        self.dimlist_crt = selections.DimListController(self['dim_list'],
                                                        self.sellist_crt)
        self.sellist_crt.set_dimlist_controller(self.dimlist_crt)
    def init_gui(self):
        self['appwindow'].set_size_request(800, 600)
        # Set up workflow
        self.wf_view = workflow.WorkflowView(main.workflow)
        self.wf_view.show()
        self['workflow_vbox'].pack_end(self.wf_view)
        self._wf_menu = workflow.WorkflowMenu(main.workflow)
        self._wf_menu.show()
        wf_menuitem = gtk.MenuItem('Fu_nctions')
        wf_menuitem.set_submenu(self._wf_menu)
        wf_menuitem.show()
        self['menubar1'].insert(wf_menuitem, 2)
        # Connect signals
        signals = {'on_quit1_activate' : (gtk.main_quit),
                   'on_appwindow_delete_event' : (gtk.main_quit),
                   'on_zoom_in_button_clicked' : (self.on_single_view),
                   'on_zoom_out_button_clicked' : (self.on_multiple_view),
                   'on_new1_activate' : (self.on_create_project),
                   'on_button_new_clicked' : (self.on_create_project),
                   'on_workflow_refresh_clicked' : (self.on_workflow_refresh_clicked),
                   'on_index1_activate' : (self.on_help_index),
                   'on_about1_activate' : (self.on_help_about),
                   'on_report_bug1_activate' : (self.on_help_report_bug),
                   'on_small_view1_activate' : (self.on_multiple_view),
                   'on_large_view1_activate' : (self.on_single_view),
                   'on_left1_activate' : (self.on_left),
                   'on_right1_activate' : (self.on_right),
                   'on_up1_activate' : (self.on_up),
                   'on_down1_activate' : (self.on_down),
                   'on_navigator1_activate' : (self.on_show_navigator),
                   'on_workflow1_activate' : (self.on_show_workflow),
                   'on_information1_activate' : (self.on_show_infopane),
                  }
        self.widget_tree.signal_autoconnect(signals)
        self['main_view'].connect('view-changed', self.on_view_changed)
        # Log that we've set up the app now
        logger.debug('Program started')
        # Add ViewFrame table size to toolbar
        tb = ViewFrameToolButton()
        self['toolbar'].add(tb)
    def set_projectview(self, proj):
        logger.notice('Welcome to your new project. Grasp That Data!')
        self.navigator_view.add_projectview(proj)
        self.dimlist_crt.set_projectview(proj)
        self.sellist_crt.set_projectview(proj)
    def set_workflow(self, workflow):
        main.workflow = workflow
        self.wf_view.set_workflow(main.workflow)
    def show(self):
        self.init_gui()
    def change_plot(self, plot):
        """Sets the plot in the currently active ViewFrame. If the plot is 
        already shown in another ViewFrame it will be moved from there."""
        # Set current selection in the plot before showing it.
        plot.selection_changed(None, main.projectview.get_selection())
        self['main_view'].insert_view(plot)
        self._update_toolbar(plot)
    def change_plots(self, plots):
        """Changes all plots.""" 
        self['main_view'].set_all_plots(plots)
        v = self.get_active_view_frame().get_view()
        self._update_toolbar(v)
    def get_active_view_frame(self):
        return self['main_view'].get_active_view_frame()
    def _update_toolbar(self, view):
        """Set the plot specific toolbar to the toolbar of the currently 
        active plot."""
        # don't do anything on no change
        if self._last_view == view:
            return
        self._last_view = view
        logger.debug("view changed to %s" % view)
        window = self['plot_toolbar_dock']
        if self._plot_toolbar:
            toolbar_state = self._plot_toolbar.get_mode()
            window.remove(self._plot_toolbar)
        else:
            toolbar_state = "default"
        if view:    
            self._plot_toolbar = view.get_toolbar()
            self._plot_toolbar.set_mode(toolbar_state)
        else:
            self._plot_toolbar = None
        if self._plot_toolbar:
            window.add(self._plot_toolbar)
    # Methods to create GUI widgets from CustomWidgets in the glade file.
    # The custom_object_factory calls other functions to generate specific
    # widgets.
    def custom_object_factory(self, glade, fun_name, widget_name, s1, s2, i1, i2):
        "Called by the glade file reader to create custom GUI widgets."
        handler = getattr(self, fun_name)
        return handler(s1, s2, i1, i2)
    def create_logview(self, str1, str2, int1, int2):
        self.log_view = LogView(logger)
        self.log_view.show()
        return self.log_view
    def create_main_view(self, str1, str2, int1, int2):
        self.main_view = view.MainView()
        self.main_view.show()
        return self.main_view
    def create_navigator_view(self, str1, str2, int1, int2):
        self.navigator_view = navigator.NavigatorView()
        self.navigator_view.show()
        return self.navigator_view
    def create_dim_list(self, str1, str2, int1, int2):
        self.dim_list = selections.DimList()
        self.dim_list.show()
        return self.dim_list
    def create_selection_tree(self, str1, str2, int1, int2):
        self.selection_tree = selections.SelectionTree()
        self.selection_tree.show()
        return self.selection_tree
    def create_identifier_list(self, str1, str2, int1, int2):
        self.identifier_list = selections.IdentifierList()
        self.identifier_list.show()
        return self.identifier_list
    def __getitem__(self, key):
        return self.widget_tree.get_widget(key)
    # Event handlers.
    # These methods are called by the gtk framework in response to events and
    # should not be called directly.
    def on_single_view(self, *ignored):
        self['main_view'].goto_large()
    def on_multiple_view(self, *ignored):
        self['main_view'].goto_small()
    def on_create_project(self, *rest):
        d = dialogs.CreateProjectDruid(self)
        d.run()
    def on_help_about(self, *rest):
        widget_tree = gtk.glade.XML(GLADEFILENAME, 'aboutdialog')
        about = widget_tree.get_widget('aboutdialog')
        about.run()
    def on_help_index(self, *ignored):
        gnome.help_display_uri('https://dev.pvv.org/projects/laydi/wiki/help')
    def on_help_report_bug(self, *ignored):
        gnome.help_display_uri('https://dev.pvv.org/projects/laydi/newticket')
    def on_workflow_refresh_clicked(self, *ignored):
        try:
            reload(sys.modules[main.workflow.__class__.__module__])
        except Exception, e:
            logger.warning('Cannot reload workflow')
            logger.warning(e)
        else:
            logger.notice('Successfully reloaded workflow')
    def on_view_changed(self, widget, vf):
        self._update_toolbar(vf.get_view())
    def on_show_navigator(self, item):
        if item.get_active():
            self['data_vbox'].show()
        else:
            self['data_vbox'].hide()
    def on_show_workflow(self, item):
        if item.get_active():
            self['workflow_vbox'].show()
        else:
            self['workflow_vbox'].hide()
    def on_show_infopane(self, item):
        if item.get_active():
            self['bottom_notebook'].show()
        else:
            self['bottom_notebook'].hide()
    def on_left(self, item):
        self.main_view.move_focus_left()
    def on_right(self, item):
        self.main_view.move_focus_right()
    def on_up(self, item):
        self.main_view.move_focus_up()
    def on_down(self, item):
        self.main_view.move_focus_down()
 gobject.signal_new('table-size-set', TableSizeSelection, 
                   gobject.SIGNAL_RUN_LAST,
                   gobject.TYPE_NONE, 
                   (gobject.TYPE_INT, gobject.TYPE_INT))
--- a/laydi/lib/R_utils.py
+++ b/laydi/lib/R_utils.py
@ -0,0 +1,284 @@
 """A collection of functions that use R.
 Most functions use libraries from bioconductor
 depends on:
 (not updated)
 -- bioconductor min. install
 -- hgu133a
 -- hgu133plus2
 """
 import scipy
 import Numeric as N
 import rpy
 silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
 def get_locusid(probelist=None,org="hgu133a"):
    """Returns a dictionary of locus link id for each affy probeset
    and reverse mapping
    innput:
    [probelist] -- probelist of affy probesets
    [org] -- chip type (organism)
    out:
    aff2loc, loc2aff
    The mapping is one-to-one for affy->locus_id
    However, there are several affy probesets for one locus_id
    From bioc-mail-archive: BioC takes the GeneBank ids associated
    with the probes (provided by the manufacture) and then maps them
    to Entrez Gene ids using data from UniGene, Entrez Gene, and other
    available data sources we trust. The Entrez Gene id a probe is
    assigned to is determined by votes from all the sources used. If
    there is no agreement among the sources, we take the smallest
    Entrez Gene id.
    """
    silent_eval("library("+org+")")
    silent_eval('locus_ids = as.list('+org+'LOCUSID)')
    silent_eval('pp<-as.list(locus_ids[!is.na(locus_ids)])')
    loc_ids = rpy.r("pp")
    for id in loc_ids:
        loc_ids[id] = str(loc_ids[id])
    aff2loc = {}
    if probelist:
        for pid in probelist:
            try:
                aff2loc[pid]=loc_ids[pid]
            except:
                print "Affy probeset: %s has no locus id" %pid
        print "\nCONVERSION SUMMARY:\n \
        Number of probesets input %s \n \
        Number of translated locus ids: %s \n \
        Number of missings: %s" %(len(probelist),len(aff2loc),len(probelist)-len(aff2loc))
    else:
        aff2loc = loc_ids
    # reverse mapping
    loc2aff = {}
    for k,v in aff2loc.items():
        if loc2aff.has_key(v):
            loc2aff[v].append(k)
        else:
            loc2aff[v]=[k]
    return aff2loc,loc2aff
 def get_kegg_paths(org="hgu133plus2",id_type='aff',probelist=None):
    """Returns a dictionary of KEGG maps.
    input:
             org  --  chip_type (see bioconductor.org)
             id_type -- id ['aff','loc']
    key: affy_id, value = list of kegg map id
    example: '65884_at': ['00510', '00513']
    """
    silent_eval("library("+org+")")
    silent_eval('xx<-as.list('+org+'PATH)')
    silent_eval('xp <- xx[!is.na(xx)]')
    aff2path = rpy.r("xp")
    dummy = rpy.r("xx")
    if id_type=='loc':
        aff2loc,loc2aff = get_locusid(org=org)
        loc2path = {}
        for id,path in aff2path.items():
            if loc2path.has_key(id):
                pp = [path.append(i) for i in loc2path[id]]
                print "Found duplicate in path: %s" %path
            loc2path[aff2loc[id]]=path
        aff2path = loc2path
    out = {}
    if probelist:
        for pid in probelist:
            try:
                out[pid]=aff2path[pid]
            except:
                print "Could not find id: %s" %pid
    else:
        out = aff2path
    for k,v in out.items():
        # if string convert tol list
        try:
            v + ''
            out[k] = [v]
        except:
            out[k] = v
    return out
 def get_probe_list(org="hgu133plus2"):
    rpy.r.library(org)
    silent_eval('probe_list<-ls('+org+'ACCNUM )')
    pl = rpy.r("probe_list")
    return pl
 def get_GO_from_aff(org="hgu133plus2",id_type='aff',probelist=None):
    """Returns a dictionary of GO terms.
    input:
             org  --  chip_type (see bioconductor.org)
             id_type -- id ['aff','loc']
    key: 
    example: '65884_at': 
    """
    silent_eval("library("+org+")")
    silent_eval('xx<-as.list('+org+'GO)')
    silent_eval('xp <- xx[!is.na(xx)]')
    aff2path = rpy.r("xp")
    dummy = rpy.r("xx")
    if id_type=='loc':
        LOC = get_locusid(org=org)
        loc2path = {}
        for id,path in aff2path.items():
            if loc2path.has_key(id):
                pp = [path.append(i) for i in loc2path[id]]
                print "Found duplicate in path: %s" %path
            loc2path[LOC[id]]=path
        aff2path = loc2path
    out = {}
    if probelist:
        for pid in probelist:
            try:
                out[pid]=aff2path[pid]
            except:
                print "Could not find id: %s" %pid
    return aff2path
 def get_kegg_as_category(org="hgu133plus2",id_type='aff',probelist=None):
    """Returns kegg pathway memberships in dummy (1/0) matrix (genes x maps)
    """
    kegg = get_kegg_paths(org=org, id_type=id_type, probelist=probelist)
    maps = set()
    for kpth in kegg.values():
        maps.update(kpth)
    n_maps = len(maps)
    n_genes = len(kegg)
    gene2index = dict(zip(kegg.keys(), range(n_genes)))
    map2index = dict(zip(maps, range(n_maps)))
    C = scipy.zeros((n_genes, n_maps))
    for k,v in kegg.items():
        for m in v:
            C[gene2index[k], map2index[m]]=1
    return C, list(maps), kegg.keys()
 def impute(X, k=10, rowmax=0.5, colmax=0.8, maxp=1500, seed=362436069):
    """
    A function to impute missing expression data, using nearest
    neighbor averaging. (from bioconductors impute)
    input:
    data: An expression matrix with genes in the rows, samples in the
          columns
       k: Number of neighbors to be used in the imputation (default=10)
    rowmax: The maximum percent missing data allowed in any row (default
          50%). For any rows with more than 'rowmax'% missing are
          imputed using the overall mean per sample.
    colmax: The maximum percent missing data allowed in any column
          (default 80%). If any column has more than 'colmax'% missing
          data, the program halts and reports an error.
    maxp: The largest block of genes imputed using the knn algorithm
          inside 'impute.knn' (default 1500); larger blocks are divided
          by two-means clustering (recursively) prior to imputation. If
          'maxp=p', only knn imputation is done
    seed: The seed used for the random number generator (default
          362436069) for reproducibility.
    call:
    impute(data ,k = 10, rowmax = 0.5, colmax = 0.8, maxp = 1500, rng.seed=362436069)
    """
    rpy.r.library("impute")
    X = N.asarray(X) # cast as numeric array
    m, n = scipy.shape(X)
    if m>n:
        print "Warning (impute): more samples than variables. running transpose"
        t_flag = True
    else:
        X = N.transpose(X)
        t_flag = False
    rpy.r.assign("X", X)
    rpy.r.assign("k", k)
    rpy.r.assign("rmax", rowmax)
    rpy.r.assign("cmax", colmax)
    rpy.r.assign("maxp", maxp)
    call = "out<-impute.knn(X,k=k,rowmax=rmax,colmax=cmax,maxp=maxp)"
    silent_eval(call)
    out = rpy.r("out")
    if not t_flag:
        E = out['data']
        E = scipy.asarray(E)
        E = E.T
    else:
        E =  out['data']
        E = scipy.asarray(E)
    return E
 def get_chip_annotation(org="hgu133a",annot='pmid', id_type='loc',probelist=None):
    """Returns a dictionary of annoations.
    input:
             org  --  chip_type (see bioconductor.org)
             annot -- annotation ['genename', 'pmid', ' symbol']
             id_type -- id ['aff','loc']
    key: id, value = list of annoations
    example: '65884_at': ['15672394', '138402']
    """
    _valid_annot = ['genename', 'pmid', 'symbol', 'enzyme', 'chr', 'chrloc']
    if annot.lower() not in _valid_annot:
        raise ValueError("Annotation must be one of %s" %_valid_annot)
    silent_eval("library("+org+")")
    silent_eval("dummy<-as.list("+org+annot.upper()+")")
    silent_eval('annotations <- dummy[!is.na(dummy)]')
    aff2annot = rpy.r("annotations")
    if id_type=='loc':
        aff2loc, loc2aff = get_locusid(org=org)
        loc2annot = {}
        for geneid, annotation in aff2annot.items():
            annotation = ensure_list(annotation)
            print annotation
            if loc2annot.has_key(geneid):
                for extra in loc2annot[geneid]:
                    annotation.append(extra) 
                print "Found duplicate in gene: %s" %geneid
            loc2annot[aff2loc[geneid]] = annotation
        aff2annot = loc2annot
    out = {}
    if probelist:
        for pid in probelist:
            try:
                out[pid] = aff2annot.get(pid, 'none')
            except:
                print "Could not find id: %s" %pid
    else:
        out = aff2annot
    return out
 def ensure_list(value):
    if isinstance(value, list):
        return value
    else:
        return [value]
--- a/laydi/lib/init.py
+++ b/laydi/lib/init.py
--- a/laydi/lib/blmfuncs.py
+++ b/laydi/lib/blmfuncs.py
--- a/laydi/lib/blmplots.py
+++ b/laydi/lib/blmplots.py
@ -0,0 +1,458 @@
 """Specialised plots for functions defined in blmfuncs.py.
 fixme:
        -- If scatterplot is not inited with a colorvector there will be no
        colorbar, but when adding colors the colorbar shoud be created.
 """
 from matplotlib import cm,patches
 import gtk
 import laydi
 from laydi import plots, main,logger
 import scipy
 from scipy import dot,sum,diag,arange,log,newaxis,sqrt,apply_along_axis,empty
 from numpy import corrcoef
 def correlation_loadings(data, T, test=True):
    """ Returns correlation loadings.
    :input:
        - D: [nsamps, nvars], data (non-centered data)
        - T: [nsamps, a_max], Scores
    :ouput:
        - R: [nvars, a_max], Correlation loadings
    :notes:
    """
    nsamps, nvars = data.shape
    nsampsT, a_max = T.shape
    if nsamps!=nsampsT: raise IOError("D/T mismatch")
    # center
    data = data - data.mean(0)
    R = empty((nvars, a_max),'d')
    for a in range(a_max):
        for k in range(nvars):
            R[k,a] = corrcoef(data[:,k], T[:,a])[0,1]
    return R
 class BlmScatterPlot(plots.ScatterPlot):
    """Scatter plot used for scores and loadings in bilinear models."""
    def __init__(self, title, model, absi=0, ordi=1, part_name='T', color_by=None):
        self.model = model
        if model.model.has_key(part_name)!=True:
            raise ValueError("Model part: %s not found in model" %mod_param)
        self._T = model.model[part_name]
        if self._T.shape[1]==1:
            logger.log('notice', 'Scores have only one component')
            absi= ordi = 0
        self._absi = absi
        self._ordi = ordi
        self._cmap = cm.summer
        dataset_1 = model.as_dataset(part_name)
        id_dim = dataset_1.get_dim_name(0)
        sel_dim = dataset_1.get_dim_name(1)
        id_1, = dataset_1.get_identifiers(sel_dim, [absi])
        id_2, = dataset_1.get_identifiers(sel_dim, [ordi])
        col = 'b'
        if model.model.has_key(color_by):
            col = model.model[color_by].ravel()
        plots.ScatterPlot.__init__(self, dataset_1, dataset_1, id_dim, sel_dim, id_1, id_2 ,c=col ,s=40 , name=title)
        self._mappable.set_cmap(self._cmap)
        self.sc = self._mappable
        self.add_pc_spin_buttons(self._T.shape[1], absi, ordi)
    def set_facecolor(self, colors):
        """Set patch facecolors.
        """
        pass
    def set_alphas(self, alphas):
        """Set alpha channel for all patches."""
        pass
    def set_sizes(self, sizes):
        """Set patch sizes."""
        pass
    def set_expvar_axlabels(self, param=None):
        if param == None:
            param = self._expvar_param
        else:
            self._expvar_param = param
        if not self.model.model.has_key(param):
            self.model.model[param] = None
        if self.model.model[param]==None:
            logger.log('notice', 'Param: %s not in model' %param)
            print self.model.model.keys()
            print self.model.model[param]
            pass #fixme: do expvar calc here if not present
        else:
            expvar = self.model.model[param]
            xstr = "Comp: %s , %.1f " %(self._absi, expvar[self._absi+1])
            ystr = "Comp: %s , %.1f " %(self._ordi, expvar[self._ordi+1])
            self.axes.set_xlabel(xstr)
            self.axes.set_ylabel(ystr)
    def add_pc_spin_buttons(self, amax, absi, ordi):    
        sb_a = gtk.SpinButton(climb_rate=1)
        sb_a.set_range(1, amax)
        sb_a.set_value(absi+1)
        sb_a.set_increments(1, 5)
        sb_a.connect('value_changed', self.set_absicca)
        sb_o = gtk.SpinButton(climb_rate=1)
        sb_o.set_range(1, amax)
        sb_o.set_value(ordi+1)
        sb_o.set_increments(1, 5)
        sb_o.connect('value_changed', self.set_ordinate)
        hbox = gtk.HBox()
        gtk_label_a = gtk.Label("A:")
        gtk_label_o = gtk.Label(" O:")
        toolitem = gtk.ToolItem()                          
        toolitem.set_expand(False)
        toolitem.set_border_width(2)
        toolitem.add(hbox)        
        hbox.pack_start(gtk_label_a)        
        hbox.pack_start(sb_a)
        hbox.pack_start(gtk_label_o)        
        hbox.pack_start(sb_o)
        self._toolbar.insert(toolitem, -1)
        toolitem.set_tooltip(self._toolbar.tooltips, "Set Principal component")
        self._toolbar.show_all() #do i need this?
    def set_absicca(self, sb):
        self._absi = sb.get_value_as_int() - 1
        xy = self._T[:,[self._absi, self._ordi]]
        self.xaxis_data = xy[:,0]
        self.yaxis_data = xy[:,1]
        self.sc._offsets = xy
        self.selection_collection._offsets = xy
        self.canvas.draw_idle()
        pad = abs(self.xaxis_data.min()-self.xaxis_data.max())*0.05
        new_lims = (self.xaxis_data.min() - pad, self.xaxis_data.max() + pad)
        self.axes.set_xlim(new_lims, emit=True)
        self.set_expvar_axlabels()
        self.canvas.draw_idle()
    def set_ordinate(self, sb):
        self._ordi = sb.get_value_as_int() - 1
        xy = self._T[:,[self._absi, self._ordi]]
        self.xaxis_data = xy[:,0]
        self.yaxis_data = xy[:,1]
        self.sc._offsets = xy
        self.selection_collection._offsets = xy
        pad = abs(self.yaxis_data.min()-self.yaxis_data.max())*0.05
        new_lims = (self.yaxis_data.min() - pad, self.yaxis_data.max() + pad)
        self.axes.set_ylim(new_lims, emit=True)
        self.set_expvar_axlabels()
        self.canvas.draw_idle()
    def show_labels(self, index=None):
        if self._text_labels == None:
            x = self.xaxis_data
            y = self.yaxis_data
            self._text_labels = {}
            for name, n in self.dataset_1[self.current_dim].items():
                txt = self.axes.text(x[n],y[n], name)
                txt.set_visible(False)
                self._text_labels[n] = txt
        if index!=None:
            self.hide_labels()
            for indx,txt in self._text_labels.items():
                if indx in index:
                    txt.set_visible(True)
        self.canvas.draw_idle()
    def hide_labels(self):
        for txt in self._text_labels.values():
            txt.set_visible(False)
        self.canvas.draw_idle()
 class PcaScreePlot(plots.BarPlot):
    def __init__(self, model):
        title = "Pca, (%s) Scree" %model._dataset['X'].get_name()
        ds = model.as_dataset('eigvals')
        if ds==None:
            logger.log('notice', 'Model does not contain eigvals')
        plots.BarPlot.__init__(self, ds, name=title)
 class PcaScorePlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pca scores (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
        self.set_expvar_axlabels(param="expvarx")
 class PcaLoadingPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pca loadings (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='p_tsq')
        self.set_expvar_axlabels(param="expvarx")
 class PlsScorePlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pls scores (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
 class PlsXLoadingPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pls x-loadings (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='w_tsq')
        #self.set_expvar_axlabels(self, param="expvarx")
 class PlsYLoadingPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pls y-loadings (%s)" %model._dataset['Y'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Q')
 class PlsCorrelationLoadingPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Pls correlation loadings (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='CP')
 class LplsScorePlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "L-pls scores (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'T')
        self.set_expvar_axlabels("evx")
 class LplsXLoadingPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Lpls x-loadings (%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='P', color_by='tsqx')
        self.set_expvar_axlabels("evx")
 class LplsZLoadingPlot(BlmScatterPlot, plots.PlotThresholder):
    def __init__(self, model, absi=0, ordi=1):
        title = "Lpls z-loadings (%s)" %model._dataset['Z'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='L', color_by='tsqz')
        self.set_expvar_axlabels(param="evz")
        plots.PlotThresholder.__init__(self, "IC")
    def _update_color_from_dataset(self, ds):
        BlmScatterPlot._update_color_from_dataset(self, ds)
        self.set_threshold_dataset(ds)
 class LplsXCorrelationPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Lpls x-corr. loads (%s)" %model._dataset['X'].get_name()
        if not model.model.has_key('Rx'):
            R = correlation_loadings(model._data['X'], model.model['T'])
            model.model['Rx'] = R
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rx')
        self.set_expvar_axlabels("evx")
        radius = 1
        center = (0,0)
        c100 = patches.Circle(center,radius=radius,
                              facecolor='gray',
                              alpha=.1,
                              zorder=1)
        c50 = patches.Circle(center, radius= sqrt(radius/2.0),
                             facecolor='gray',
                             alpha=.1,
                             zorder=2)
        self.axes.add_patch(c100)
        self.axes.add_patch(c50)
        self.axes.axhline(lw=1.5,color='k')
        self.axes.axvline(lw=1.5,color='k')
        self.axes.set_xlim([-1.05,1.05])
        self.axes.set_ylim([-1.05, 1.05])
        self.canvas.show()
 class LplsZCorrelationPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Lpls z-corr. loads (%s)" %model._dataset['Z'].get_name()
        if not model.model.has_key('Rz'):
            R = correlation_loadings(model._data['Z'].T, model.model['W'])
            model.model['Rz'] = R
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='Rz')
        self.set_expvar_axlabels("evz")
        radius = 1
        center = (0,0)
        c100 = patches.Circle(center,radius=radius,
                              facecolor='gray',
                              alpha=.1,
                              zorder=1)
        c50 = patches.Circle(center, radius=sqrt(radius/2.0),
                             facecolor='gray',
                             alpha=.1,
                             zorder=2)
        self.axes.add_patch(c100)
        self.axes.add_patch(c50)
        self.axes.axhline(lw=1.5,color='k')
        self.axes.axvline(lw=1.5,color='k')
        self.axes.set_xlim([-1.05,1.05])
        self.axes.set_ylim([-1.05, 1.05])
        self.canvas.show()
 class LplsHypoidCorrelationPlot(BlmScatterPlot):
    def __init__(self, model, absi=0, ordi=1):
        title = "Hypoid correlations(%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, part_name='W')
 class LplsExplainedVariancePlot(plots.Plot):
    def __init__(self, model):
        self.model = model
        plots.Plot.__init__(self, "Explained variance")
        xax = scipy.arange(model.model['evx'].shape[0])
        self.axes.plot(xax, model.model['evx'], 'b-', label='X', linewidth=1.5)
        self.axes.plot(xax, model.model['evy'], 'k-', label='Y', linewidth=1.5)
        self.axes.plot(xax, model.model['evz'], 'g-', label='Z', linewidth=1.5)
        self.canvas.draw()
 class LineViewXc(plots.LineViewPlot):
    """A line view of centered raw data
    """
    def __init__(self, model, name='Profiles'):
        dx = model._dataset['X']
        plots.LineViewPlot.__init__(self, dx, 1, None, False,name)
        self.add_center_check_button(self.data_is_centered)
    def add_center_check_button(self, ticked):
        """Add a checker button for centerd view of data."""
        cb = gtk.CheckButton("Center")
        cb.set_active(ticked)
        cb.connect('toggled', self._toggle_center)
        toolitem = gtk.ToolItem()   
        toolitem.set_expand(False)
        toolitem.set_border_width(2)
        toolitem.add(cb)
        self._toolbar.insert(toolitem, -1)
        toolitem.set_tooltip(self._toolbar.tooltips, "Column center the line view")
        self._toolbar.show_all() #do i need this?
    def _toggle_center(self, active):
        if self.data_is_centered:
            self._data = self._data + self._mn_data
            self.data_is_centered = False
        else:
            self._mn_data = self._data.mean(0)
            self._data = self._data - self._mn_data
            self.data_is_centered = True
        self.make_lines()
        self.set_background()
        self.set_current_selection(main.project.get_selection())
 class ParalellCoordinates(plots.Plot):
    """Parallell coordinates for score loads with many comp.
    """
    def __init__(self, model, p='loads'):
        pass
 class PlsQvalScatter(plots.ScatterPlot):
    """A vulcano like plot of loads vs qvals
    """
    def __init__(self, model, pc=0):
        if not model.model.has_key('w_tsq'):
            return None
        self._W = model.model['W']
        dataset_1 = model.as_dataset('W')
        dataset_2 = model.as_dataset('w_tsq')
        id_dim = dataset_1.get_dim_name(0) #genes
        sel_dim = dataset_1.get_dim_name(1) #_comp
        sel_dim_2 = dataset_2.get_dim_name(1) #_zero_dim
        id_1, = dataset_1.get_identifiers(sel_dim, [0])
        id_2, = dataset_2.get_identifiers(sel_dim_2, [0])
        if model.model.has_key('w_tsq'):
            col = model.model['w_tsq'].ravel()
            #col = normalise(col)
        else:
            col = 'g'
        plots.ScatterPlot.__init__(self, dataset_1, dataset_2,
                                   id_dim, sel_dim, id_1, id_2,
                                   c=col, s=20, sel_dim_2=sel_dim_2,
                                   name='Load Volcano')
 class PredictionErrorPlot(plots.Plot):
    """A boxplot of prediction error vs. comp. number.
    """
    def __init__(self, model, name="Prediction Error"):
        if not model.model.has_key('sep'):
            logger.log('notice', 'Model has no calculations of sep')
            return None
        plots.Plot.__init__(self, name)
        self._frozen = True
        self.current_dim = 'johndoe'
        self.axes = self.fig.add_subplot(111)
        # draw
        sep = model.model['sep']
        aopt = model.model['aopt']
        bx_plot_lines = self.axes.boxplot(sqrt(sep))
        aopt_marker = self.axes.axvline(aopt, linewidth=10,
                                      color='r',zorder=0,
                                      alpha=.5)
        # add canvas
        self.add(self.canvas)
        self.canvas.show()
    def set_current_selection(self, selection):
        pass
 class TRBiplot(plots.ScatterPlot):    
    def __init__(self, model, absi=0, ordi=1):
        title = "Target rotation biplot(%s)" %model._dataset['X'].get_name()
        BlmScatterPlot.__init__(self, title, model, absi, ordi, 'B')
        B = model.model.get('B')
        # normalize B
        Bnorm = scipy.apply_along_axis(scipy.linalg.norm, 1, B)
        x = model._dataset['X'].copy()
        Xc = x._array - x._array.mean(0)[newaxis]
        w_rot = B/Bnorm 
        t_rot = dot(Xc, w_rot)
 class InfluencePlot(plots.ScatterPlot):
    """ Returns a leverage vs resiudal scatter plot.
    """
    def __init__(self, model, dim, name="Influence"):
        if not model.model.has_key('levx'):
            logger.log('notice', 'Model has no calculations of leverages')
            return
        if not model.model.has_key('ssqx'):
            logger.log('notice', 'Model has no calculations of residuals')
            return
        ds1 = model.as_dataset('levx')
        ds2 = model.as_dataset('ssqx')
        plots.ScatterPlot.__init__(self, ds1, ds2,
                                   id_dim, sel_dim, id_1, id_2,
                                   c=col, s=20, sel_dim_2=sel_dim_2,
                                   name='Load Volcano')
 class RMSEPPlot(plots.BarPlot):
    def __init__(self, model, name="RMSEP"):
        if not model.model.has_key('rmsep'):
            logger.log('notice', 'Model has no calculations of sep')
            return
        dataset = model.as_dataset('rmsep')
        plots.BarPlot.__init__(self, dataset, name=name)
 def normalise(x):
    """Scale vector x to [0,1]
    """
    x = x - x.min()
    x = x/x.max()
    return x
--- a/laydi/lib/cv_index.py
+++ b/laydi/lib/cv_index.py
@ -0,0 +1,66 @@
 from numpy import array_split,arange
 def cv(n, k, randomise=False, sequential=False):
    """
    Generates k (training, validation) index pairs.
    Each pair is a partition of arange(n), where validation is an iterable
    of length ~n/k.
    If randomise is true, a copy of index is shuffled before partitioning,
    otherwise its order is preserved in training and validation.
    Randomise overrides the sequential argument. If randomise is true,
    sequential is False
    If sequential is true the index is partioned in continous blocks,
    otherwise interleaved ordering is used.
    """
    index = xrange(N)
    if randomise:
        from random import shuffle
        index = list(index)
        shuffle(index)
        sequential = False
    if sequential:
        for validation in array_split(index, K):
            training = [i for i in index if i not in validation]
            yield training, validation
    else:
        for k in xrange(K):
            training = [i for i in index if i % K != k]
            validation = [i for i in index if i % K == k]
            yield training, validation
 def shuffle_diag(shape, K, randomise=False, sequential=False):
    """
    Generates k (training, validation) index pairs.
    """
    m, n = shape
    if K>m or K>n:
        msg = "You may not use more subsets than max(n_rows, n_cols)"
        raise ValueError, msg
    mon = max(m, n)
    #index = xrange(n)
    index = [i for i in range(m*n) if i % m == 0]
    print index
    if randomise:
        from random import shuffle
        index = list(index)
        shuffle(index)
        sequential = False
    if sequential:
        start_inds = array_split(index, K)
    else:
        for k in xrange(K):
            start_inds = [index[i] for i in xrange(n) if i % K == k]
    print start_inds
    for start in start_inds:
        ind = arange(start, n*m, mon+1)
        yield ind
--- a/laydi/lib/cx_stats.py
+++ b/laydi/lib/cx_stats.py
@ -0,0 +1,438 @@
 import time
 import cPickle
 from scipy import zeros,zeros_like,sqrt,dot,trace,sign,round_,argmax,\
     sort,ravel,newaxis,asarray,diag,sum,outer,argsort,arange,ones_like,\
     all,apply_along_axis,eye,atleast_2d,empty
 from scipy.linalg import svd,inv,norm,det,sqrtm
 from numpy import median
 #import plots_lpls
 from cx_utils import mat_center
 from validation import pls_jkW, lpls_jk
 from select_generators import shuffle_1d
 from engines import pca, pls, bridge
 from engines import nipals_lpls as lpls
 def hotelling(Pcv, P, p_center='med', cov_center='med',
              alpha=0.3, crot=True, strict=False):
    """Returns regularized hotelling T^2.
    alpha -- regularisation towards pooled cov estimates
    beta -- regularisation for unstable eigenvalues
    p_center -- location method for submodels
    cov_center -- location method for sub coviariances
    alpha -- regularisation
    crot -- rotate submodels toward full?
    strict -- only rotate 90 degree ?
    """
    m, n = P.shape
    n_sets, n, amax = Pcv.shape
    # allocate
    T_sq = empty((n, ),dtype='d')
    Cov_i = zeros((n, amax, amax),dtype='d')
    # rotate sub_models to full model
    if crot:
        for i, Pi in enumerate(Pcv):
            Pcv[i] = procrustes(P, Pi, strict=strict)
    # center of pnull
    if p_center=='med':
        P_ctr = median(Pcv, 0)
    elif p_center=='mean':
        # fixme: mean is unstable
        P_ctr = Pcv.mean(0)
    else: #use full
        P_ctr = P
    for i in xrange(n):
        Pi = Pcv[:,i,:] # (n_sets x amax) 
        Pi_ctr = P_ctr[i,:] # (1 x amax)
        Pim = (Pi - Pi_ctr[newaxis])*sqrt(n_sets-1)
        Cov_i[i] = (1./n_sets)*dot(Pim.T, Pim)
    if cov_center == 'med':
        Cov = median(Cov_i, 0)
    else:
        Cov = Cov_i.mean(0)
    reg_cov = (1. - alpha)*Cov_i + alpha*Cov
    for i in xrange(n):
        #Pc = P_ctr[i,:][:,newaxis]
        Pc = P_ctr[i,:]
        sigma = reg_cov[i]
        # T_sq[i] = (dot(Pc, inv(sigma) )*Pc).sum() #slow
        T_sq[i] = dot(dot(Pc, inv(sigma)), Pc) # dont need to care about transposes
        #T_sq[i] = dot(dot(Pc.T, inv(sigma)), Pc).ravel()
    return T_sq
 def procrustes(A, B, strict=True, center=False, verbose=False):
    """Rotation of B to A.
    strict -- Only do flipping and shuffling
    center -- Center before rotation, translate back after
    verbose -- Print ssq
    No scaling calculated.
    Output B_rot = Rotated B
    """
    if center:
        A,mn_A = mat_center(A, ret_mn=True)
        B,mn_B = mat_center(B, ret_mn=True)
    u,s,vh = svd(dot(B.T, A))
    v = vh.T
    Cm = dot(u, v.T) #orthogonal rotation matrix
    if strict: # just inverting and flipping
       Cm = ensure_strict(Cm)
    b_rot = dot(B, Cm)
    if verbose:
        print Cm.round()
        fit = sum(ravel(B - b_rot)**2)
        print "Sum of squares: %s" %fit
    if center:
        return mn_B + b_rot
    else:
        return b_rot
 def expl_var_x(Xc, T):
    """Returns explained variance of X.
    T should carry variance in length, Xc has zero col-mean.
    """
    exp_var_x = diag(dot(T.T, T))*100/(sum(Xc**2))
    return exp_var_x
 def expl_var_y(Y, T, Q):
    """Returns explained variance of Y.
    """
    # centered Y
    exp_var_y = zeros((Q.shape[1], ))
    for a in range(Q.shape[1]):
        Ya = outer(T[:,a], Q[:,a])
        exp_var_y[a] = 100*sum(Ya**2)/sum(Y**2)
    return exp_var_y
 def pls_qvals(a, b, aopt=None, alpha=.3,
              n_iter=20, algo='pls',
              center=True,
              sim_method='shuffle',
              p_center='med', cov_center='med',
              crot=True, strict=False):
    """Returns qvals for pls model.
    input:
    a -- data matrix
    b -- data matrix
    aopt -- scalar, opt. number of components
    alpha -- [0,1] regularisation parameter for T2-test
    n_iter -- number of permutations
    sim_method -- permutation method ['shuffle']
    p_center -- location estimator for sub models ['med']
    cov_center -- location estimator for covariance of submodels ['med']
    crot -- bool, use rotations of sub models?
    strict -- bool, use stict (rot/flips only) rotations?
    """
    m, n = a.shape
    TSQ = zeros((n, n_iter), dtype='d') # (nvars x n_subsets)
    n_false = zeros((n, n_iter), dtype='d')
    #full model
    if center:
        ac = a - a.mean(0)
        bc = b - b.mean(0)
    if algo=='bridge':
        dat = bridge(ac, bc, aopt, 'loads', 'fast')
    else:
        dat = pls(ac, bc, aopt, 'loads', 'fast')
    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo,center=True)
    tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
                         alpha=alpha, crot=crot, strict=strict,
                         cov_center=cov_center)
    #t0 = time.time()
    Vs = shuffle_1d(bc, n_iter, axis=0)
    for i, b_shuff in enumerate(Vs):
        #t1 = time.time()
        if algo=='bridge':
            dat = bridge(ac, b_shuff, aopt, 'loads','fast')
        else:
            dat = pls(ac, b_shuff, aopt, 'loads', 'fast')
        Wcv = pls_jkW(a, b_shuff, aopt, n_blocks=None, algo=algo)
        TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
                             alpha=alpha, crot=crot, strict=strict,
                             cov_center=cov_center)
        #print time.time() - t1
    return fdr(tsq_full, TSQ, median)
 def ensure_strict(C, only_flips=True):
    """Ensure that a rotation matrix does only 90 degree rotations.
    In multiplication with pcs this allows flips and reordering.
    if only_flips is True there will onlt be flips allowed
    """
    Cm = C
    S = sign(C) # signs
    if only_flips==True:
        C = eye(Cm.shape[0])*S
        return C
    Cm = zeros_like(C)
    Cm.putmask(1.,abs(C)>.6)
    if det(Cm)>1:
        raise ValueError,"Implement this!"
    return Cm*S
 def pls_qvals_II(a, b, aopt=None, center=True, alpha=.3,
                 n_iter=20, algo='pls',
                 sim_method='shuffle',
                 p_center='med', cov_center='med',
                 crot=True, strict=False):
    """Returns qvals for pls model.
    Shuffling of variables in X.
    Null model is 'If I put genes randomly on network' ... if they are sign:
    then this is due to network structure and not covariance with response.
    input:
    a -- data matrix
    b -- data matrix
    aopt -- scalar, opt. number of components
    alpha -- [0,1] regularisation parameter for T2-test
    n_iter -- number of permutations
    sim_method -- permutation method ['shuffle']
    p_center -- location estimator for sub models ['med']
    cov_center -- location estimator for covariance of submodels ['med']
    crot -- bool, use rotations of sub models?
    strict -- bool, use stict (rot/flips only) rotations?
    """
    m, n = a.shape
    TSQ = zeros((n, n_iter), dtype='<f8') # (nvars x n_subsets)
    n_false = zeros((n, n_iter), dtype='<f8')
    #full model
    # center?
    if center==True:
        ac = a - a.mean(0)
        bc = b - b.mean(0)
    if algo=='bridge':
        dat = bridge(ac, bc, aopt, 'loads', 'fast')
    else:
        dat = pls(ac, bc, aopt, 'loads', 'fast')
    Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
    tsq_full = hotelling(Wcv, dat['W'], p_center=p_center,
                         alpha=alpha, crot=crot, strict=strict,
                         cov_center=cov_center)
    t0 = time.time()
    Vs = shuffle_1d(a, n_iter, 1)
    for i, a_shuff in enumerate(Vs):
        t1 = time.time()
        a = a_shuff - a_shuff.mean(0)
        if algo=='bridge':
            dat = bridge(a, b, aopt, 'loads','fast')
        else:
            dat = pls(a, b, aopt, 'loads', 'fast')
        Wcv = pls_jkW(a, b, aopt, n_blocks=None, algo=algo)
        TSQ[:,i] = hotelling(Wcv, dat['W'], p_center=p_center,
                             alpha=alpha, crot=crot, strict=strict,
                             cov_center=cov_center)
        print time.time() - t1
    sort_index = argsort(tsq_full)[::-1]
    back_sort_index = sort_index.argsort()
    print time.time() - t0
    # count false positives
    tsq_full_sorted = tsq_full.take(sort_index)
    for i in xrange(n_iter):
        for j in xrange(n):
            n_false[j,i] = sum(TSQ[:,i]>=tsq_full[j])
    false_pos = median(n_false, 1)
    ll = arange(1, len(false_pos)+1, 1)
    sort_qval = false_pos.take(sort_index)/ll
    qval = false_pos/ll.take(back_sort_index)
    print time.time() - t0
    #return qval, false_pos, TSQ, tsq_full
    return qval
 def leverage(aopt=1,*args):
    """Returns leverages
    input : aopt, number of components to base leverage calculations on
            *args, matrices of normed blm-paramters
    output: leverages
    For PCA typical inputs are normalised T or normalised P
    For PLSR typical inputs are normalised T or normalised W
    """
    if aopt<1:
        raise ValueError,"Leverages only make sense for aopt>0"
    lev  = []
    for u in args:
        lev_u = 1./u.shape[0] + dot(u[:,:aopt], u[:,:aopt].T).diagonal()
        lev.append(lev_u)
    return lev
 def variances(a, t, p):
    """Returns explained variance and ind. var from blm-params.
    input:
          a -- full centered matrix
          t,p -- parameters from a bilinear approx of the above matrix.
    output:
          var -- variance of each component
          var_exp -- cumulative explained variance in percentage
    Typical inputs are:  X(centered),T,P for PCA or
                         X(centered),T,P / Y(centered),T,Q for PLSR.
    """
    tot_var = sum(a**2)
    var = 100*(sum(p**2, 0)*sum(t**2, 0))/tot_var
    var_exp = var.cumsum()
    return var, var_exp
 def residual_diagnostics(Y, Yhat, aopt=1):
    """Root mean errors and press values. 
    R2 vals
    """
    pass
 def ssq(E, axis=0, weights=None):
    """Sum of squares, supports weights."""
    n = E.shape[axis]
    if weights==None:
        weights = eye(n)
    else:
        weigths = diag(weigths)
    if axis==0:
        Ew = dot(weights, E)
    elif axis==1:
        Ew = dot(E, weights)
    else:
        raise NotImplementedError, "Higher order modes not supported"
    return pow(Ew,2).sum(axis)
 def vnorm(x):
    """Returns the euclidian norm of a vector.
    This is considerably faster than linalg.norm
    """
    return sqrt(dot(x,x.conj()))
 def mahalanobis(a, loc=None, acov=None, invcov=None):
    """Returns the distance of each observation in a
    from the location estimate (loc) of the data,
    relative to the shape of the data.
    a : data matrix (n observations in rows, p variables in columns)
    loc : location estimate of the data (p-dimensional vector)
    covmat or invcov : scatter estimate of the data or the inverse of the scatter estimate (pxp matrix)
    :Returns:
    A vector containing the distances of all the observations to locvct.
    """
    n, p = a.shape
    if loc==None:
        loc = a.mean(0)
    loc = atleast_2d(loc)
    if loc.shape[1]==1:
        loc = loc.T; #ensure rowvector
    assert(loc.shape[1]==p)
    xc = a - loc
    if acov==None and invcov==None:
        acov = dot(xc.T, xc)
    if invcov != None:
        covmat = atleast_2d(invcov)
        if min(covmat.shape)==1:
            covmat = diag(invcov.ravel())
    else:
        covmat = atleast_2d(acov)
        if min(covmat.shape)==1:
            covmat = diag(covmat.ravel())
        covmat = inv(covmat)
    # mdist = diag(dot(dot(xc, covmat),xc.T))
    mdist = (dot(xc, covmat)*xc).sum(1)
    return mdist
 def lpls_qvals(a, b, c, aopt=None, alpha=.3, zx_alpha=.5, n_iter=20,
               sim_method='shuffle',p_center='med', cov_center='med',crot=True,
               strict=False, mean_ctr=[2,0,2], nsets=None):
    """Returns qvals for l-pls model.
    input:
    a -- data matrix
    b -- data matrix
    c -- data matrix
    aopt -- scalar, opt. number of components
    alpha -- [0,1] regularisation parameter for T2-test
    xz_alpha -- [0,1] how much z info to include
    n_iter -- number of permutations
    sim_method -- permutation method ['shuffle']
    p_center -- location estimator for sub models ['med']
    cov_center -- location estimator for covariance of submodels ['med']
    crot -- bool, use rotations of sub models?
    strict -- bool, use stict (rot/flips only) rotations?
    """
    m, n = a.shape
    p, k = c.shape
    pert_tsq_x = zeros((n, n_iter), dtype='d') # (nxvars x n_subsets)
    pert_tsq_z = zeros((p, n_iter), dtype='d') # (nzvars x n_subsets)
    # Full model
    #print "Full model start"
    dat = lpls(a, b, c, aopt, scale='loads', mean_ctr=mean_ctr)
    Wc, Lc = lpls_jk(a, b, c , aopt, nsets=nsets)
    #print "Full hot"
    cal_tsq_x = hotelling(Wc, dat['W'], alpha = alpha)
    cal_tsq_z = hotelling(Lc, dat['L'], alpha = 0)
    # Perturbations
    Vs = shuffle_1d(b, n_iter, axis=0)
    for i, b_shuff in enumerate(Vs):
        print i
        dat = lpls(a, b_shuff,c, aopt, scale='loads', mean_ctr=mean_ctr)
        Wi, Li = lpls_jk(a, b_shuff, c, aopt, nsets=nsets)
        pert_tsq_x[:,i] = hotelling(Wi, dat['W'], alpha=alpha)
        pert_tsq_z[:,i] = hotelling(Li, dat['L'], alpha=alpha)
    return cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x
 def fdr(tsq, tsqp, loc_method='mean'):
    n, = tsq.shape
    k, m = tsqp.shape
    assert(n==k)
    n_false = empty((n, m), 'd')
    sort_index = argsort(tsq)[::-1]
    r_index = argsort(sort_index)
    for i in xrange(m):
        for j in xrange(n):
            n_false[j,i] = (tsqp[:,i]>tsq[j]).sum()
    #cPickle.dump(n_false, open("/tmp/nfalse.dat_"+str(n), "w"))
    if loc_method=='mean':
        fp = n_false.mean(1)
    elif loc_method == 'median':
        fp = median(n_false.T)
    else:
        raise ValueError
    n_signif = (arange(n) + 1.0)[r_index]
    fd_rate = fp/n_signif
    return fd_rate
--- a/laydi/lib/cx_utils.py
+++ b/laydi/lib/cx_utils.py
@ -0,0 +1,115 @@
 from scipy import apply_along_axis,newaxis,zeros,\
     median,round_,nonzero,dot,argmax,any,sqrt,ndarray,\
     trace,zeros_like,sign,sort,real,argsort,rand,array,\
     matrix,nan
 from scipy.linalg import norm,svd,inv,eig
 from numpy import median
 def normalise(a, axis=0, return_scales=False):
    s = apply_along_axis(norm, axis, a)
    if axis==0:
        s = s[newaxis]
    else:
        s = s[:,newaxis]
    a_s = a/s
    if return_scales:
       return a_s, s
    return a_s
 def sub2ind(shape, i, j):
 	"""Indices from subscripts. Only support for 2d"""
 	row,col = shape
 	ind = []
 	for k in xrange(len(i)):
 		for m in xrange(len(j)):
 			ind.append(i[k]*col + j[m])
 	return ind
 def sorted_eig(a, b=None,sort_by='sm'):
    """
    Just eig with real part of output sorted:
    This is for convenience only, not general!
    sort_by='sm': return the eigenvectors by eigenvalues
                  of smallest magnitude first. (default)
            'lm': returns largest eigenvalues first      
    output: just as eig with 2 outputs
            -- s,v (eigvals,eigenvectors)
    (This is reversed output compared to matlab)
    """
    s,v = eig(a, b)
    s = real(s) # dont expect any imaginary part
    v = real(v)
    ind = argsort(s)
    if sort_by=='lm':
        ind = ind[::-1]
    v = v.take(ind, 1)
    s = s.take(ind)
    return s,v
 def str2num(string_number):
    """Convert input (string number) into number, if float(string_number) fails, a nan is inserted. 
    """
    missings = ['','nan','NaN','NA']
    try:
        num = float(string_number)
    except:
        if string_number in missings:
            num = nan
        else:
            print "Found strange entry: %s" %string_number
            raise
    return num
 def randperm(n):
  r = rand(n)
  dict={}
  for i in range(n):
     dict[r[i]] = i
  r = sort(r)
  out = zeros(n)
  for i in range(n):
     out[i] = dict[r[i]]
  return array(out).astype('i')
 def mat_center(X,axis=0,ret_mn=False):
    """Mean center matrix along axis.
        X -- matrix, data
        axis -- dim,
        ret_mn -- bool, return mean
    output:
            Xc, [mnX]
    NB: axis = 1 is column-centering, axis=0=row-centering
    default is row centering (axis=0)
    """
    try:
        rows,cols = X.shape
    except ValueError:
        print "The X data needs to be two-dimensional"
    if axis==0:
        mnX = X.mean(axis)[newaxis]
        Xs = X - mnX
    elif axis==1:
        mnX = X.mean(axis)[newaxis]
        Xs = (X.T - mnX).T
    if ret_mn:
        return Xs,mnX
    else:
        return Xs
 def m_shape(array):
 	"""Returns the array shape on the form of a numpy.matrix."""
 	return matrix(array).shape
--- a/laydi/lib/engines.py
+++ b/laydi/lib/engines.py
@ -0,0 +1,879 @@
 """Module contain algorithms for low-rank models.
 There is almost no typechecking of any kind here, just focus on speed
 """
 import math
 import warnings
 from scipy.linalg import svd,inv
 from scipy import dot,empty,eye,newaxis,zeros,sqrt,diag,\
     apply_along_axis,mean,ones,randn,empty_like,outer,r_,c_,\
     rand,sum,cumsum,matrix, expand_dims,minimum,where,arange,inner,tile
 has_sym = True
 has_arpack = True
 try:
    from symeig import symeig
 except:
    has_sym = False
 try:
    from scipy.sandbox import arpack
 except:
    has_arpack = False
 def pca(a, aopt,scale='scores',mode='normal',center_axis=0):
    """ Principal Component Analysis.
    Performs PCA on given matrix and returns results in a dictionary.
    :Parameters:
        a : array
    Data measurement matrix, (samples x variables)
        aopt : int
    Number of components to use, aopt<=min(samples, variables)
    :Returns:
    results : dict
        keys -- values,  T -- scores, P -- loadings, E -- residuals,
        lev --leverages, ssq -- sum of squares, expvar -- cumulative
        explained variance, aopt -- number of components used
    :OtherParam eters:
    mode : str
        Amount of info retained, ('fast', 'normal', 'detailed')
    center_axis : int
        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
    :SeeAlso:
        - pcr : other blm
        - pls : other blm
        - lpls : other blm
    Notes
    -----
    Uses kernel speed-up if m>>n or m<<n.
    If residuals turn rank deficient, a lower number of component than given
    in input will be used. The number of components used is given in
    results-dict.
    Examples
    --------
    >>> import scipy,engines
    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
    >>> dat=engines.pca(a, 2)
    >>> dat['expvarx']
    array([0.,99.8561562,  100.])
    """
    m, n = a.shape
    assert(aopt<=min(m,n))
    if center_axis>=0:
        a = a - expand_dims(a.mean(center_axis), center_axis)
    if m>(n+100) or n>(m+100):
        u, s, v = esvd(a, amax=None) # fixme:amax option need to work with expl.var
    else:
        u, s, vt = svd(a, 0)
        v = vt.T
    e = s**2
    tol = 1e-10
    eff_rank = sum(s>s[0]*tol)
    aopt = minimum(aopt, eff_rank)
    T = u*s
    s = s[:aopt]
    T = T[:,:aopt]
    P = v[:,:aopt]
    if scale=='loads':
        T = T/s
        P = P*s
    if mode == 'fast':
        return {'T':T, 'P':P, 'aopt':aopt}
    if mode=='detailed':
        E = empty((aopt, m, n))
        ssq = []
        lev = []
        for ai in range(aopt):
            E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
            ssq.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
            if scale=='loads':
                lev.append([((s*T)**2).sum(1), (P**2).sum(1)])
            else:
                lev.append([(T**2).sum(1), ((s*P)**2).sum(1)])
    else:
        # residuals
        E = a - dot(T, P.T)
        #E = a
        SEP = E**2
        ssq = [SEP.sum(0), SEP.sum(1)]
        # leverages
        if scale=='loads':
            lev = [(1./m)+(T**2).sum(1), (1./n)+((P/s)**2).sum(1)]
        else:
            lev = [(1./m)+((T/s)**2).sum(1), (1./n)+(P**2).sum(1)]
        # variances
    expvarx = r_[0, 100*e.cumsum()/e.sum()][:aopt+1]
    return {'T':T, 'P':P, 'E':E, 'expvarx':expvarx, 'levx':lev, 'ssqx':ssq, 'aopt':aopt, 'eigvals': e[:aopt,newaxis]}
 def pcr(a, b, aopt, scale='scores',mode='normal',center_axis=0):
    """ Principal Component Regression.
    Performs PCR on given matrix and returns results in a dictionary.
    :Parameters:
        a : array
    Data measurement matrix, (samples x variables)
        b : array
    Data response matrix, (samples x responses)
        aopt : int
    Number of components to use, aopt<=min(samples, variables)
    :Returns:
    results : dict
        keys -- values,  T -- scores, P -- loadings, E -- residuals,
        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
        explained variance, aopt -- number of components used
    :OtherParameters:
    mode : str
        Amount of info retained, ('fast', 'normal', 'detailed')
    center_axis : int
        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
    :SeeAlso:
        - pca : other blm
        - pls : other blm
        - lpls : other blm
    Notes
    -----
    Uses kernel speed-up if m>>n or m<<n.
    If residuals turn rank deficient, a lower number of component than given
    in input will be used. The number of components used is given in results-dict. 
    Examples
    --------
    >>> import scipy,engines
    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
    >>> b=scipy.asarray([[1,1],[2,3]])
    >>> dat=engines.pcr(a, 2)
    >>> dat['expvarx']
    array([0.,99.8561562,  100.])
    """
    k, l = m_shape(b)
    if center_axis>=0:
        b = b - expand_dims(b.mean(center_axis), center_axis)
    dat = pca(a, aopt=aopt, scale=scale, mode=mode, center_axis=center_axis)
    T = dat['T']
    weights = apply_along_axis(vnorm, 0, T)**2
    if scale=='loads':
        Q = dot(b.T, T*weights)
    else:
        Q = dot(b.T, T/weights)
    if mode=='fast':
        dat.update({'Q':Q})
        return dat
    if mode=='detailed':
        F = empty((aopt, k, l))
        for i in range(aopt):
            F[i,:,:] = b - dot(T[:,:i+1], Q[:,:i+1].T)
    else:
        F = b - dot(T, Q.T)
    expvary = r_[0,  100*((T**2).sum(0)*(Q**2).sum(0)/(b**2).sum()).cumsum()[:aopt]]
    #fixme: Y-var leverages
    dat.update({'Q':Q, 'F':F, 'expvary':expvary})
    return dat
 def pls(a, b, aopt=2, scale='scores', mode='normal', center_axis=-1, ab=None):
    """Partial Least Squares Regression.
    Performs PLS on given matrix and returns results in a dictionary.
    :Parameters:
        a : array
    Data measurement matrix, (samples x variables)
        b : array
    Data response matrix, (samples x responses)
        aopt : int
    Number of components to use, aopt<=min(samples, variables)
    :Returns:
    results : dict
        keys -- values,  T -- scores, P -- loadings, E -- residuals,
        levx -- leverages, ssqx -- sum of squares, expvarx -- cumulative
        explained variance of descriptors, expvary -- cumulative explained
        variance of responses, aopt -- number of components used
    :OtherParameters:
    mode : str
        Amount of info retained, ('fast', 'normal', 'detailed')
    center_axis : int
        Center along given axis. If neg.: no centering (-inf,..., matrix modes)
    :SeeAlso:
        - pca : other blm
        - pcr : other blm
        - lpls : other blm
    Notes
    -----
    Uses kernel speed-up if m>>n or m<<n.
    If residuals turn rank deficient, a lower number of component than given
    in input will be used. The number of components used is given in results-dict.
    Examples
    --------
    >>> import scipy,engines
    >>> a=scipy.asarray([[1,2,3],[2,4,5]])
    >>> b=scipy.asarray([[1,1],[2,3]])
    >>> dat=engines.pls(a, b, 2)
    >>> dat['expvarx']
    array([0.,99.8561562,  100.])
    """
    m, n = m_shape(a)
    if ab!=None:
        mm, l = m_shape(ab)
        assert(m==mm)
    else:
         k, l = m_shape(b)
    if center_axis>=0:
        a = a - expand_dims(a.mean(center_axis), center_axis)
        b = b - expand_dims(b.mean(center_axis), center_axis)
    W = empty((n, aopt))
    P = empty((n, aopt))
    R = empty((n, aopt))
    Q = empty((l, aopt))
    T = empty((m, aopt))
    B = empty((aopt, n, l))
    tt = empty((aopt,))
    if ab==None:
        ab = dot(a.T, b)
    for i in range(aopt):
        if ab.shape[1]==1: #pls 1
            w = ab.reshape(n, l)
            w = w/vnorm(w)
        elif n<l: # more yvars than xvars
            if has_sym:
                s, w = symeig(dot(ab, ab.T),range=[n,n],overwrite=True)
            else:
                w, s, vh = svd(dot(ab, ab.T))
            w = w[:,:1]
        else: # standard wide xdata
            if has_sym:
                s, q = symeig(dot(ab.T, ab),range=[l,l],overwrite=True)
            else:
                q, s, vh = svd(dot(ab.T, ab))
                q = q[:,:1]
            w = dot(ab, q)
            w = w/vnorm(w)
        r = w.copy()
        if i>0:
            for j in range(0, i, 1):
                r = r - dot(P[:,j].T, w)*R[:,j][:,newaxis]
        t = dot(a, r)
        tt[i] = tti = dot(t.T, t).ravel()
        p  = dot(a.T, t)/tti
        q = dot(r.T, ab).T/tti
        ab = ab - dot(p, q.T)*tti
        T[:,i] = t.ravel()
        W[:,i] = w.ravel()
        if mode=='fast' and i==aopt-1:
            if scale=='loads':
                tnorm = sqrt(tt)
                T = T/tnorm
                W = W*tnorm
            return {'T':T, 'W':W}
        P[:,i] = p.ravel()
        R[:,i] = r.ravel()
        Q[:,i] = q.ravel()
        #B[i] = dot(R[:,:i+1], Q[:,:i+1].T)
    qnorm = apply_along_axis(vnorm, 0, Q)
    tnorm = sqrt(tt)
    pp = (P**2).sum(0)
    if mode=='detailed':
        E = empty((aopt, m, n))
        F = empty((aopt, k, l))
        ssqx, ssqy = [], []
        leverage = empty((aopt, m))
        h2x = [] #hotellings T^2
        h2y = []
        for ai in range(aopt):
            E[ai,:,:] = a - dot(T[:,:ai+1], P[:,:ai+1].T)
            F[i-1] = b - dot(T[:,:i], Q[:,:i].T)
            ssqx.append([(E[ai,:,:]**2).mean(0), (E[ai,:,:]**2).mean(1)])
            ssqy.append([(F[ai,:,:]**2).mean(0), (F[ai,:,:]**2).mean(1)])
            leverage[ai,:] = 1./m + ((T[:,:ai+1]/tnorm[:ai+1])**2).sum(1)
            h2y.append(1./k + ((Q[:,:ai+1]/qnorm[:ai+1])**2).sum(1))
    else:
        # residuals
        E = a - dot(T, P.T)
        F = b - dot(T, Q.T)
        sepx = E**2
        ssqx = [sepx.sum(0), sepx.sum(1)]
        sepy = F**2
        ssqy = [sepy.sum(0), sepy.sum(1)]
        # leverage
        leverage = 1./m + ((T/tnorm)**2).sum(1)
        h2x = []
        h2y = []
    # variances
    tp= tt*pp
    tq = tt*qnorm*qnorm
    expvarx = r_[0, 100*tp/(a*a).sum()]
    expvary = r_[0, 100*tq/(b*b).sum()]
    if scale=='loads':
        T = T/tnorm
        W = W*tnorm
        Q = Q*tnorm
        P = P*tnorm
    return {'Q':Q, 'P':P, 'T':T, 'W':W, 'R':R, 'E':E, 'F':F,
            'expvarx':expvarx, 'expvary':expvary, 'ssqx':ssqx, 'ssqy':ssqy,
            'leverage':leverage, 'h2':h2x}
 def w_simpls(aat, b, aopt):
    """ Simpls for wide matrices.
    Fast pls for crossval, used in calc rmsep for wide X
    There is no P or W.  T is normalised
    """
    bb = b.copy()
    m, m = aat.shape
    U = empty((m, aopt)) # W
    T = empty((m, aopt))
    H = empty((m, aopt)) # R
    PROJ = empty((m, aopt)) # P?
    for i in range(aopt):
        q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
        u = dot(b, q[:,:1]) #y-factor scores
        U[:,i] = u.ravel()
        t = dot(aat, u)
        t = t/vnorm(t)
        T[:,i] = t.ravel()
        h = dot(aat, t) #score-weights
        H[:,i] = h.ravel()
        PROJ[:,:i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, H[:,:i+1])) )
        if i<aopt:
            b = b - dot(PROJ[:,:i+1], dot(H[:,:i+1].T,b) )
    C = dot(bb.T, T)
    return {'T':T, 'U':U, 'Q':C, 'H':H}
 def w_pls(aat, b, aopt):
    """ Pls for wide matrices.
    Fast pls for crossval, used in calc rmsep for wide X
    There is no P or W.  T is normalised
    aat = centered kernel matrix
    b = centered y
    """
    bb = b.copy()
    k, l = m_shape(b)
    m, m = m_shape(aat)
    U = empty((m, aopt)) # W
    T = empty((m, aopt))
    R = empty((m, aopt)) # R
    PROJ = empty((m, aopt)) # P?
    for i in range(aopt):
        if has_sym:
            s, q = symeig(dot(dot(b.T, aat), b), range=(l,l),overwrite=True)
        else:
            q, s, vh = svd(dot(dot(b.T, aat), b), full_matrices=0)
            q = q[:,:1]
        u = dot(b , q) #y-factor scores
        U[:,i] = u.ravel()
        t = dot(aat, u)
        t = t/vnorm(t)
        T[:,i] = t.ravel()
        r = dot(aat, t)#score-weights
        #r = r/vnorm(r)
        R[:,i] = r.ravel()
        PROJ[:,: i+1] = dot(T[:,:i+1], inv(dot(T[:,:i+1].T, R[:,:i+1])) )
        if i<aopt:
            b = b - dot(PROJ[:,:i+1], dot(R[:,:i+1].T,  b) )
    C = dot(bb.T, T)
    return {'T':T, 'U':U, 'Q':C, 'R':R}
 def bridge(a, b, aopt, scale='scores', mode='normal', r=0):
    """Undeflated Ridged svd(X'Y)
    """
    m, n = m_shape(a)
    k, l = m_shape(b)
    u, s, vt = svd(b, full_matrices=0)
    g0 = dot(u*s, u.T)
    g = (1 - r)*g0 + r*eye(m)
    ag = dot(a.T, g)
    u, s, vt = svd(ag, full_matrices=0)
    W = u[:,:aopt]
    K = vt[:aopt,:].T
    T = dot(a, W)
    tnorm = apply_along_axis(vnorm, 0, T) # norm of T-columns
    if mode == 'fast':
        if scale=='loads':
            T = T/tnorm
            W = W*tnorm
        return {'T':T, 'W':W}
    U = dot(g0, K) #fixme check this 
    Q = dot(b.T, dot(T, inv(dot(T.T, T)) ))
    B = zeros((aopt, n, l), dtype='f')
    for i in range(aopt):
        B[i] = dot(W[:,:i+1], Q[:,:i+1].T)
    if mode == 'detailed':
        E = empty((aopt, m, n))
        F = empty((aopt, k, l))
        for i in range(aopt):
            E[i] = a - dot(T[:,:i+1], W[:,:i+1].T)
            F[i] = b - dot(a, B[i])
    else: #normal
        F = b - dot(a, B[-1])
        E = a - dot(T, W.T)
    if scale=='loads':
        T = T/tnorm
        W = W*tnorm
        Q = Q*tnorm
    return {'B':B, 'W':W, 'T':T, 'Q':Q, 'E':E, 'F':F, 'U':U, 'P':W}
 def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=False):
    """ L-shaped Partial Least Sqaures Regression by the nipals algorithm.
    (X!Z)->Y
    :input:
        X : data matrix (m, n)
        Y : data matrix (m, l)
        Z : data matrix (n, o)
    :output:
      T : X-scores
      W : X-weights/Z-weights
      P : X-loadings
      Q : Y-loadings
      U : X-Y relation
      L : Z-scores
      K : Z-loads
      B : Regression coefficients X->Y
      b0: Regression coefficient intercept
      evx : X-explained variance
      evy : Y-explained variance
      evz : Z-explained variance
      mnx : X location
      mny : Y location
      mnz : Z location
    :Notes:
    """
    if mean_ctr!=None:
        xctr, yctr, zctr = mean_ctr
        X, mnX = center(X, xctr)
        Y, mnY = center(Y, yctr)
        Z, mnZ = center(Z, zctr)
    varX = (X**2).sum()
    varY = (Y**2).sum()
    varZ = (Z**2).sum()
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
    # initialize 
    U = empty((k, a_max))
    Q = empty((l, a_max))
    T = empty((m, a_max))
    W = empty((n, a_max))
    P = empty((n, a_max))
    K = empty((o, a_max))
    L = empty((u, a_max))
    B = empty((a_max, n, l))
    #b0 = empty((a_max, 1, l))
    var_x = empty((a_max,))
    var_y = empty((a_max,))
    var_z = empty((a_max,))
    MAX_ITER = 250
    LIM = 1e-1
    for a in range(a_max):
        if verbose:
            print "\nWorking on comp. %s" %a
        u = Y[:,:1]
        diff = 1
        niter = 0
        while (diff>LIM and niter<MAX_ITER):
            niter += 1
            u1 = u.copy()
            w = dot(X.T, u)
            w = w/sqrt(dot(w.T, w))
            #w = w/dot(w.T, w)
            l = dot(Z, w)
            k = dot(Z.T, l)
            k = k/sqrt(dot(k.T, k))
            #k = k/dot(k.T, k)
            w = alpha*k + (1-alpha)*w
            #print sqrt(dot(w.T, w))
            w = w/sqrt(dot(w.T, w))
            t = dot(X, w)
            c = dot(Y.T, t)
            c = c/sqrt(dot(c.T, c))
            u = dot(Y, c)
            diff = dot((u-u1).T, (u-u1))
        if verbose:
            print "Converged after %s iterations" %niter
            print "Error: %.2E" %diff
        tt = dot(t.T, t)
        p = dot(X.T, t)/tt
        q = dot(Y.T, t)/tt
        l = dot(Z, w)
        U[:,a] = u.ravel()
        W[:,a] = w.ravel()
        P[:,a] = p.ravel()
        T[:,a] = t.ravel()
        Q[:,a] = q.ravel()
        L[:,a] = l.ravel()
        K[:,a] = k.ravel()
        X = X - dot(t, p.T)
        Y = Y - dot(t, q.T)
        Z = (Z.T - dot(w, l.T)).T
        var_x[a] = pow(X, 2).sum()
        var_y[a] = pow(Y, 2).sum()
        var_z[a] = pow(Z, 2).sum()
        B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
        #b0[a] = mnY - dot(mnX, B[a])
    # variance explained
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    evz = 100.0*(1 - var_z/varZ)
    if scale=='loads':
        tnorm = apply_along_axis(vnorm, 0, T)
        T = T/tnorm
        W = W*tnorm
        Q = Q*tnorm
        knorm = apply_along_axis(vnorm, 0, K)
        L = L*knorm
        K = K/knorm
    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'L':L, 'K':K, 'B':B, 'evx':evx, 'evy':evy, 'evz':evz,'mnx': mnX, 'mny': mnY, 'mnz': mnZ}    
 def nipals_pls(X, Y, a_max, alpha=.7, ax_center=0, mode='normal', scale='scores', verbose=False):
    """Partial Least Sqaures Regression by the nipals algorithm.
    (X!Z)->Y
    :input:
        X : data matrix (m, n)
        Y : data matrix (m, l)
    :output:
      T : X-scores
      W : X-weights/Z-weights
      P : X-loadings
      Q : Y-loadings
      U : X-Y relation
      B : Regression coefficients X->Y
      b0: Regression coefficient intercept
      evx : X-explained variance
      evy : Y-explained variance
      evz : Z-explained variance
    :Notes:
    """
    if ax_center>=0:
        mn_x = expand_dims(X.mean(ax_center), ax_center)
        mn_y = expand_dims(Y.mean(ax_center), ax_center)
        X = X - mn_x
        Y = Y - mn_y
    varX = pow(X, 2).sum()
    varY = pow(Y, 2).sum()
    m, n = X.shape
    k, l = Y.shape
    # initialize 
    U = empty((k, a_max))
    Q = empty((l, a_max))
    T = empty((m, a_max))
    W = empty((n, a_max))
    P = empty((n, a_max))
    B = empty((a_max, n, l))
    b0 = empty((a_max, m, l))
    var_x = empty((a_max,))
    var_y = empty((a_max,))
    t1 = X[:,:1]
    for a in range(a_max):
        if verbose:
            print "\n Working on comp. %s" %a
        u = Y[:,:1]
        diff = 1
        MAX_ITER = 100
        lim = 1e-16
        niter = 0
        while (diff>lim and niter<MAX_ITER):
            niter += 1
            #u1 = u.copy()
            w = dot(X.T, u)
            w = w/sqrt(dot(w.T, w))
            #l = dot(Z, w)
            #k = dot(Z.T, l)
            #k = k/sqrt(dot(k.T, k))
            #w = alpha*k + (1-alpha)*w
            #w = w/sqrt(dot(w.T, w))
            t = dot(X, w)
            q = dot(Y.T, t)
            q = q/sqrt(dot(q.T, q))
            u = dot(Y, q)
            diff = vnorm(t1 - t)
            t1 = t.copy()
        if verbose:
            print "Converged after %s iterations" %niter
        #tt = dot(t.T, t)
        #p = dot(X.T, t)/tt
        #q = dot(Y.T, t)/tt
        #l = dot(Z, w)
        p = dot(X.T, t)/dot(t.T, t)
        p_norm = vnorm(p)
        t = t*p_norm
        w = w*p_norm
        p = p/p_norm
        U[:,a] = u.ravel()
        W[:,a] = w.ravel()
        P[:,a] = p.ravel()
        T[:,a] = t.ravel()
        Q[:,a] = q.ravel()
        X = X - dot(t, p.T)
        Y = Y - dot(t, q.T)
        var_x[a] = pow(X, 2).sum()
        var_y[a] = pow(Y, 2).sum()
        B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
        b0[a] =  mn_y - dot(mn_x, B[a])
    # variance explained
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    if scale=='loads':
        tnorm = apply_along_axis(vnorm, 0, T)
        T = T/tnorm
        W = W*tnorm
        Q = Q*tnorm
    return {'T':T, 'W':W, 'P':P, 'Q':Q, 'U':U, 'B':B, 'b0':b0, 'evx':evx, 'evy':evy,
            'mnx': mnX, 'mny': mnY, 'xc': X, 'yc': Y}
 ########### Helper routines #########
 def m_shape(array):
    return matrix(array).shape
 def esvd(data, amax=None):
    """SVD with the option of economy sized calculation
    Calculate subspaces of X'X or XX' depending on the shape
    of the matrix.
    Good for extreme fat or thin matrices
    :notes:
    Numpy supports this by setting full_matrices=0
    """
    has_arpack = True
    try:
        import arpack
    except:
        has_arpack = False
    m, n = data.shape
    if m>=n:
        kernel = dot(data.T, data)
        if has_arpack:
            if amax==None:
                amax = n
                s, v = arpack.eigen_symmetric(kernel,k=amax, which='LM',
                                              maxiter=200,tol=1e-5)
        if has_sym:
            if amax==None:
                amax = n
                pcrange = None
            else:
                pcrange = [n-amax, n]
            s, v = symeig(kernel, range=pcrange, overwrite=True)
            s = s[::-1].real
            v = v[:,::-1].real
        else:
            u, s, vt = svd(kernel)
            v = vt.T
        s = sqrt(s)
        u = dot(data, v)/s
    else:
        kernel = dot(data, data.T)
        if has_sym:
            if amax==None:
                amax = m
                pcrange = None
            else:
                pcrange = [m-amax, m]
            s, u = symeig(kernel, range=pcrange, overwrite=True)
            s = s[::-1]
            u = u[:,::-1]
        else:
            u, s, vt = svd(kernel)
        s = sqrt(s)
        v = dot(data.T, u)/s
    # some use of symeig returns the 0 imaginary part
    return u.real, s.real, v.real
 def vnorm(x):
    # assume column arrays (or vectors)
    return math.sqrt(dot(x.T, x))
 def center(a, axis):
    # 0 = col center, 1 = row center, 2 = double center
    # -1 = nothing
    # check if we have a vector
    is_vec = len(a.shape)==1
    if not is_vec:
        is_vec = a.shape[0]==1 or a.shape[1]==1
    if is_vec:
        if axis==2:
            warnings.warn("Double centering of vecor ignored, using ordinary centering")
        if axis==-1:
            mn = 0
        else:
            mn = a.mean()
        return a - mn, mn
    # !!!fixme: use broadcasting
    if axis==-1:
        mn = zeros((1,a.shape[1],))
        #mn = tile(mn, (a.shape[0], 1))
    elif axis==0:
        mn = a.mean(0)[newaxis]
        #mn = tile(mn, (a.shape[0], 1)) 
    elif axis==1:
        mn = a.mean(1)[:,newaxis]
        #mn = tile(mn, (1, a.shape[1]))
    elif axis==2:
        mn = a.mean(0)[newaxis] + a.mean(1)[:,newaxis] - a.mean()
        return a - mn , a.mean(0)[newaxis]
    else:
        raise IOError("input error: axis must be in [-1,0,1,2]")
    return a - mn, mn
 def scale(a, axis):
    if axis==-1:
        sc = zeros((a.shape[1],))
    elif axis==0:
        sc = a.std(0)
    elif axis==1:
        sc = a.std(1)[:,newaxis]
    else:
        raise IOError("input error: axis must be in [-1,0,1]")
    return a - sc, sc
 ## #PCA CALCS
 ## %  Calculate Q limit using unused eigenvalues
 ## temp = diag(s);
 ## if n < m
 ##   emod = temp(lv+1:n,:);
 ## else
 ##   emod = temp(lv+1:m,:);
 ## end
 ## th1 = sum(emod);
 ## th2 = sum(emod.^2);
 ## th3 = sum(emod.^3);
 ## h0 = 1 - ((2*th1*th3)/(3*th2^2));
 ## if h0 <= 0.0
 ## h0 = .0001;
 ## disp('  ')
 ## disp('Warning:  Distribution of unused eigenvalues indicates that')
 ## disp('          you should probably retain more PCs in the model.')
 ## end
 ## q = th1*(((1.65*sqrt(2*th2*h0^2)/th1) + 1 + th2*h0*(h0-1)/th1^2)^(1/h0));
 ## disp('  ')
 ## disp('The 95% Q limit is')
 ## disp(q)
 ## if plots >= 1
 ##   lim = [q q];
 ##   plot(scl,res,scllim,lim,'--b')
 ##   str = sprintf('Process Residual Q with 95 Percent Limit Based on %g PC Model',lv);
 ##   title(str)
 ##   xlabel('Sample Number')
 ##   ylabel('Residual')
 ##   pause
 ## end
 ## %  Calculate T^2 limit using ftest routine
 ## if lv > 1
 ##   if m > 300
 ##     tsq = (lv*(m-1)/(m-lv))*ftest(.95,300,lv,2);
 ##   else
 ##     tsq = (lv*(m-1)/(m-lv))*ftest(.95,m-lv,lv,2);
 ##   end
 ##   disp('  ')
 ##   disp('The 95% T^2 limit is')
 ##   disp(tsq)
 ## %  Calculate the value of T^2 by normalizing the scores to
 ## %  unit variance and summing them up
 ##   if plots >= 1.0
 ##     temp2 = scores*inv(diag(ssq(1:lv,2).^.5));
 ##     tsqvals = sum((temp2.^2)');
 ##     tlim = [tsq tsq];
 ##     plot(scl,tsqvals,scllim,tlim,'--b')
 ##     str = sprintf('Value of T^2 with 95 Percent Limit Based on %g PC Model',lv);
 ##     title(str)
 ##     xlabel('Sample Number')
 ##     ylabel('Value of T^2')
 ##   end
 ## else
 ##   disp('T^2 not calculated when number of latent variables = 1')
 ##   tsq = 1.96^2;
 ## end
--- a/laydi/lib/hypergeom.py
+++ b/laydi/lib/hypergeom.py
@ -0,0 +1,95 @@
 import scipy
 try:
    # FIXME: remove rpy in a more proper way
    import rpy_does_not_exist
    has_rpy = True
    silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
 except:
    has_rpy = False
 def gene_hypergeo_test(selection, category_dataset):
    """Returns the pvals from a hypergeometric test of significance.
    input:
           -- selection: list of selected identifiers along 0 dim of cat.set
           -- category dataset, categories along dim 1 (cols)
    """
    gene_dim_name = category_dataset.get_dim_name(0)
    category_dim_name = category_dataset.get_dim_name(1)
    #categories
    all_cats = category_dataset.get_identifiers(category_dim_name, sorted=True)
    # gene_ids universe
    all_genes = category_dataset.get_identifiers(gene_dim_name)
    # signifcant genes
    good_genes_all = list(selection)
    gg_index = category_dataset.get_indices(gene_dim_name, good_genes_all)
    # significant genes pr. category
    good_genes_cat = []
    for col in category_dataset.asarray().T:
        index = scipy.where(col==1)[0]
        index = scipy.intersect1d(index, gg_index)
        if index.size==0:
            good_genes_cat.append([])
        else:
            good_genes_cat.append(category_dataset.get_identifiers(gene_dim_name, index))
    count = map(len, good_genes_cat)
    count = scipy.asarray([max(i, 0) for i in count])
    cat_count = category_dataset.asarray().sum(0)
    if has_rpy:
        rpy.r.assign("x", count - 1) #number of sign. genes in category i
        rpy.r.assign("m", len(good_genes_all)) # number of sign. genes tot
        rpy.r.assign("n", len(all_genes)-len(good_genes_all) ) # num. genes not sign.
        rpy.r.assign("k", cat_count) #num. genes in cat i
        silent_eval('pvals <- phyper(x, m, n, k, lower.tail=FALSE)')
        pvals = rpy.r("pvals")
    else:
        pvals = p_hyper_geom(count, len(good_genes_all),
                             len(all_genes)-len(good_genes_all),
                             cat_count)
    pvals = scipy.where(cat_count==0, 2, pvals)
    pvals = scipy.where(scipy.isnan(pvals), 2, pvals)
    out = {}
    for i in range(pvals.size):
        out[str(all_cats[i])] = (count[i], cat_count[i], pvals[i])
    return out
 def p_hyper_geom(x, m, n, k):
    """Distribution function for the hypergeometric distribution.
    Inputs:
           -- x: vector of quantiles representing the number of white balls
           drawn without replacement from an urn which contains both
           black and white balls.
           -- m: the number of white balls in the urn.
           -- n: the number of black balls in the urn.
           -- k: [vector] the number of balls drawn from the urn
    Comments:
    Similar to R's phyper with lower.tail=FALSE
    """
    M = m + n
    multiple_draws = False
    if isinstance(k, scipy.ndarray) and k.size>1:
        multiple_draws = True
        n_draws = k.size
        if n_draws<x.size:
            print "n_draws: %d and n_found: %d  Length mismatch, zero padded" %(k.size, x.size)
    N = k
    n = m
    if not multiple_draws:
        out = scipy.stats.hypergeom.pmf(x, M, n, N).cumsum()
    else:
        out = scipy.zeros((max(n_draws, x.size),))
        for i in xrange(N.size):
            out[i] = scipy.stats.hypergeom.pmf(x, M, n, N[i]).cumsum()[i]
    return out
--- a/laydi/lib/nx_utils.py
+++ b/laydi/lib/nx_utils.py
@ -0,0 +1,567 @@
 import os,sys
 from itertools import izip
 import networkx as NX
 from scipy import shape,diag,dot,asarray,sqrt,real,zeros,eye,exp,maximum,\
     outer,maximum,sum,diag,real,atleast_2d
 from scipy.linalg import eig,svd,inv,expm,norm
 from cx_utils import sorted_eig
 import numpy
 eps = numpy.finfo(float).eps.item()
 feps = numpy.finfo(numpy.single).eps.item()
 _array_precision = {'f': 0, 'd': 1, 'F': 0, 'D': 1,'i': 1}
 class NXUTILSException(Exception): pass
 def xgraph_to_graph(G):
    """Convert an Xgraph to an ordinary graph.
    Edge attributes, mult.edges and self-loops are lost in the process.
    """
    GG = NX.convert.from_dict_of_lists(NX.convert.to_dict_of_lists(G))
    return GG
 def get_affinity_matrix(G, data, ids, dist='e', mask=None, weight=None, t=0, out='dist'):
    """
    Function for calculating a general affinity matrix, based upon distances.
    Affiniy = 1 - distance ((10-1) 1 is far apart)
    INPUT
    data:
    gene expression data, type dict data[gene] = expression-vector
    G:
    The network (networkx.base.Graph object)
    mask:
    The array mask shows which data are missing. If mask[i][j]==0, then
    data[i][j] is missing.
    weights:
    The array weight contains the weights to be used when calculating distances.
    transpose:
    If transpose==0, then genes are clustered. If transpose==1, microarrays are
    clustered.
    dist:
    The character dist defines the distance function to be used:
    dist=='e': Euclidean distance
    dist=='b': City Block distance
    dist=='h': Harmonically summed Euclidean distance
    dist=='c': Pearson correlation
    dist=='a': absolute value of the correlation
    dist=='u': uncentered correlation
    dist=='x': absolute uncentered correlation
    dist=='s': Spearman's rank correlation
    dist=='k': Kendall's tau
    For other values of dist, the default (Euclidean distance) is used.
    OUTPUT
    D :
    Similariy matrix (nGenes x nGenes), symetric, d_ij e in [0,1]
    Normalized so max weight = 1.0
    """
    try:
        from Bio import Cluster as CLS
    except:
        raise NXUTILSError("Import of Biopython failed")
    n_var = len(data)
    n_samp = len(data[data.keys()[0]])
    X = zeros((nVar, nSamp),dtpye='<f8')
    for i, gene in enumerate(ids): #this shuld be right!!
        X[i,:] = data[gene]
    #X = transpose(X) # distancematrix needs matrix as (nGenes,nSamples)
    D_list  = CLS.distancematrix(X, dist=dist)
    D = zeros((nVar,nVar),dtype='<f8')
    for i,row in enumerate(D_list):
        if i>0:
            D[i,:len(row)]=row
    D = D + D.T
    MAX = 30.0
    D_max = max(ravel(D))/MAX
    D_n = D/D_max #normalised (max = 10.0)
    D_n = (MAX+1.) - D_n #using correlation (inverse distance for dists)
    A = NX.adj_matrix(G, nodelist=ids)
    if out=='dist':
        return D_n*A
    elif out=='heat_kernel':
        t=1.0
        K = exp(-t*D*A)
        return K
    elif out=='complete':
        return D_n
    else:
        return []
 def remove_one_degree_nodes(G, iter=True):
    """Removes all nodes with only one neighbour.  These nodes does
    not contribute to community structure.
    input:
             G -- graph
             iter -- True/False iteratively remove?
    """
    G_copy = G.copy()
    if iter==True:
        while 1:
            bad_nodes=[]
            for node in G_copy.nodes():
                if len(G_copy.neighbors(node))==1:
                    bad_nodes.append(node)
            if len(bad_nodes)>0:
                G_copy.delete_nodes_from(bad_nodes)
            else:
                break
    else:
       bad_nodes=[]
       for ngb in G_copy.neighbors_iter():
           if len(G_copy.neighbors(node))==1:
               bad_nodes.append(node)
           if len(bad_nodes)>0:
               G_copy.delete_nodes_from(bad_nodes)
    print "Deleted %s nodes from network" %(len(G)-len(G_copy))
    return G_copy
 def key_players(G, n=1, with_labels=False):
    """
    Resilince measure
    Identification of key nodes by fraction of nodes in
    disconnected subgraph when the node is removed.
    output:
           fraction of nodes disconnected when node i is removed
    """
    i=0
    frac=[]
    labels = {}
    for node in G.nodes():
        i+=1
        print i
        T = G.copy()
        T.delete_node(node)
        n_nodes = T.number_of_nodes()
        sub_graphs = NX.connected_component_subgraphs(T)
        n = len(sub_graphs)
        if n>1:
            strong_comp = sub_graphs[0]
            fraction = 1.0 - 1.0*strong_comp.number_of_nodes()/n_nodes
            frac.append(fraction)
            labels[node]=fraction
        else:
            frac.append(0.0)
            labels[node]=0.0
    out = 1.0 - array(frac)
    if with_labels==True:
        return out,labels
    else:
        return out
 def node_weighted_adj_matrix(G, weights=None, ave_type='harmonic', with_labels=False):
    """Return a weighted adjacency matrix of graph. The weights are
    node weights.
    input: G -- graph
           weights -- dict, keys: nodes, values: weights
           with_labels -- True/False, return labels?
    output: A -- weighted eadjacency matrix
            [index] -- node labels 
    """
    n=G.order()
    # make an dictionary that maps vertex name to position
    index={}
    count=0
    for node in G.nodes():
        index[node]=count
        count = count+1
    a = zeros((n,n))
    if type(G)=='networkx.xbase.XGraph':
        raise
    for head,tail in G.edges():
        if ave_type == 'geometric':
            a[index[head],index[tail]]= sqrt(weights[head]*weights[tail])
            a[index[tail],index[head]]= a[index[head],index[tail]]
        elif ave_type == 'harmonic':
            a[index[head],index[tail]] = mean(weights[head],weights[tail])
            a[index[tail],index[head]]= mean(weights[head],weights[tail])
    if with_labels:
        return a,index
    else:
        return a            
 def weighted_adj_matrix(G, with_labels=False):
    """Adjacency matrix of an XGraph whos weights are given in edges.
    """
    A, labels = NX.adj_matrix(G, with_labels=True)
    W = A.astype('<f8')
    for orf, i in labels.items():
        for orf2, j in labels.items():
            if G.has_edge(orf, orf2):
                edge_weight = G.get_edge(orf, orf2)
                W[i,j] = edge_weight
                W[j,i] = edge_weight
    if with_labels==True:
        return W, labels
    else:
        return W
 def assortative_index(G):
    """Ouputs two vectors: the degree and the neighbor average degree.
    Used to measure the assortative mixing.  If the average degree is
    pos. correlated with the degree we know that hubs tend to connect
    to other hubs.
    input: G, graph connected!!
    ouput: d,mn_d: degree, and average degree of neighb.
    (degree sorting from degree(with_labels=True))
    """
    d = G.degree(with_labels=True)
    out=[]
    for node in G.nodes():
        nn = G.neighbors(node)
        if len(nn)>0:
            nn_d = mean([float(d[i]) for i in nn])
            out.append((d[node], nn_d))
    return array(out).T
 def struct_equivalence(G,n1,n2):
    """Returns the structural equivalence of a node pair.  Two nodes
    are structural equal if they share the same neighbors.
    x_s = [ne(n1) union ne(n2) - ne(n1) intersection ne(n2)]/[ne(n1)
    union ne(n2) + ne(n1) intersection ne(n2)]
    ref: Brun et.al 2003
    """
    #[ne(n1) union ne(n2) - ne(n1) intersection ne(n2
    s1 = set(G.neighbors(n1))
    s2 = set(G.neighbors(n2))
    num_union = len(s1.union(s2))
    num_intersection = len(s1.intersection(s2))
    if num_union & num_intersection:
        xs=0
    else:
        xs = (num_union - num_intersection)/(num_union + num_intersection)
    return xs
 def struct_equivalence_all(G):
    """Not finnished.
    """
    A,labels = NX.adj_matrix(G,with_labels=True)
    pass
 def hamming_distance(n1,n2):
    """Not finnsihed.
    """
    pass
 def graph_corrcoeff(G, vec=None, nodelist=None, sim='corr'):
    """Returns the correlation coefficient for each node. The
    correlation coefficient is between the node and its neighbours.
    """
    if nodelist==None:
        nodelist=G.nodes()
    if vec == None:
        vec = G.degree(nodelist)
    if len(vec)!=len(nodelist):
        raise NXUTILSError("The node value vector is not of same length (%s) as the nodelist(%s)") %(len(vec), len(nodelist))
    A = NX.ad_matrix(G, nodelist=nodelist)
    for i, node in enumerate(nodelist):
        nei_i = A[i,:]==1
        vec_i = vec[nei_i]
 def weighted_laplacian(G,with_labels=False):
    """Return standard Laplacian of graph from a weighted adjacency matrix."""
    n= G.order()
    I = scipy.eye(n)
    A = weighted_adj_matrix(G)
    D = I*scipy.sum(A, 0)
    L = D-A
    if with_labels:
        A,index = weighted_adj_matrix(G, with_labels=True)
 	return L, index
    else:	
        return L            
 def grow_subnetworks(G, T2):
    """Return the highest scoring (T2-test) subgraph og G.
    Use simulated annealing to identify highly grow subgraphs.
    ref: -- Ideker et.al (Bioinformatics 18, 2002)
         -- Patil and Nielsen (PNAS 2006)
    """
    N = 1000
    states = [(node, False) for node in G.nodes()]
    t2_last = 0.0
    for i in xrange(N):
        if i==0: #assign random states
            states = [(state[0], True) for state in states if rand(1)>.5]
        sub_nodes = [state[0] for state in states if state[1]]
        Gsub = NX.subgraph(G, sub_nodes)
        Gsub = NX.connected_components_subgraphs(Gsub)[0]
        t2 = [T2[node] for node in Gsub]
        if t2>t2_last:
            pass
        else:
            p = numpy.exp()
 """Below are methods for calculating graph metrics
 Four main decompositions :
 0.) Adjacency diffusion kernel expm(A),
 1.) von neumann kernels (diagonalisation of adjacency matrix)
 2.) laplacian kernels (geometric series of adj.)
 3.) diffusion kernels (exponential series of adj.)
 ---- Kv
 von_neumann : Kv = (I-alpha*A)^-1 (mod: A(I-alpha*A)^-1)? ,
 geom. series
 ---- Kl
 laplacian: Kl = (I-alpha*L)^-1 , geom. series
 ---- Kd
 laplacian_diffusion: Kd = expm(-alpha*L)
 exp. series
 ---- Ke
 Exponential diffusion.
 Ke = expm(A) .... expm(-A)?
 """
 # TODO:
 # check for numerical unstable eigenvalues and set to zero
 # othervise some inverses wil explode ->ok ..using pinv for inverses
 #
 # This gives results that look numerical unstable
 #
 # -- divided adj by sum(A[:]), check this one (paper by Lebart scales with number of edges)
 #
 #
 #
 # the neumann kernel is defined in Kandola to be K = A*(I-A)^-1
 # lowest eigenvectors are same as the highest of K = A*A ?
 # this needs clarification
 # diffusion is still wrong! ... ok
 # diff needs normalisation?! check the meaning of exp(-s) = exp(1/s) -L = 1/degree ... etc
 # Is it the negative of exp. of adj. metrix in Kandola?
 #
 # Normalised=False returns only nans (no idea why!!) ... fixed ok
 # 31.1: diff is ok exp(0)=1 not zero!
 # 07.03.2005: normalisation is ok: -> normalisation will emphasize high degree nodes
 # 10.03.2005: symeig is unstable an returns nans of some eigenvectors? switching back to eig
 # 14.05.2006: diffusion returns negative values, using expm(-LL) instead (FIX)
 # 13.09.2206: update for use in numpy
 # 27.04.2007: diffusion now uses pade approximations to matrix exponential. Also the last 
 def K_expAdj(W, normalised=True, alpha=1.0):
    """Matrix exponential of adjacency matrix, mentioned in Kandola as a general diffusion kernel. 
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    if normalised==True:
        T = diag( sqrt( 1./(sum(W,0))) )
        W = dot(dot(T, W), T)
    e,vr = eig(W)
    s = real(e)**2 # from eigenvalues to singularvalues
    vri = inv(vr)
    s = maximum.reduce(s) + s
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = abs(cond*maximum.reduce(s))
    psigma = eye(m)
    for i in range(len(s)):
        if abs(s[i]) > cutoff:
            psigma[i,i] = .5*alpha*exp(s[i])
    return dot(dot(vr,psigma),vri)
 def K_vonNeumann(W, normalised=True, alpha=1.0):
    """ The geometric series of path lengths.
    Returns matrix square root of pseudo inverse of the adjacency matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    if normalised==True:
        T = diag(sqrt(1./(sum(W,0))))
        W = dot(dot(T,W),T)
    e,vr = eig(W)
    vri = inv(vr)
    e = real(e)  # we only work with real pos. eigvals
    e = maximum.reduce(e) + e
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,n),t)
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i,i] = 1.0/e[i] #these are eig.vals (=sqrt(sing.vals))
    return dot(dot(vr,psigma),vri).astype(t)
 def K_laplacian(W, normalised=True, alpha=1.0):
    """ This is the matrix pseudo inverse of L.
    Also known as the average commute time matrix.
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m,n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(sum(W,0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./sum(W, 0)))
        L = dot(dot(T, L), T)
    e,vr = eig(L)
    e = real(e)
    vri = inv(vr)
    cond = {0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = cond*maximum.reduce(e)
    psigma = zeros((m,),t) # if s close to zero -> set 1/s = 0 
    for i in range(len(e)):
        if e[i] > cutoff:
            psigma[i] = 1.0/e[i]
    K = dot(dot(vr, diag(psigma)), vri).astype(t)
    K = real(K)
    I = eye(n)
    K = (1-alpha)*I + alpha*K
    return K
 def K_diffusion(W, normalised=True, alpha=1.0, beta=0.5, use_cut=False):
    """Returns diffusion kernel.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- alpha, [0,1] (degree of network influence)
            -- beta, [0->), (diffusion degree)
    """
    W = asarray(W)
    t = W.dtype.char
    if len(W.shape)!=2:
        raise ValueError, "Non-matrix input to matrix function."
    m, n = W.shape
    if t in ['F','D']:
        raise TypeError, "Complex input!"
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    e, vr = eig(L)
    vri = inv(vr) #inv
    cond = 1.0*{0: feps*1e3, 1: eps*1e6}[_array_precision[t]]
    cutoff = 1.*abs(cond*maximum.reduce(e))
    psigma = eye(m) # if eigvals are 0 exp(0)=1 (unnecessary)
    #psigma = zeros((m,n), dtype='<f8')
    for i in range(len(e)):
        if abs(e[i]) > cutoff:
            psigma[i,i] = exp(-beta*e[i])
        #else:
        #    psigma[i,i] = 0.0
    K = real(dot(dot(vr, psigma), vri))
    I = eye(n, dtype='<f8')
    K = (1. - alpha)*I + alpha*K
    return K
 def K_diffusion2(W, normalised=True, alpha=1.0, beta=0.5, ncomp=None):
    """Returns diffusion kernel, using fast pade approximation.
    input:
            -- W, adj. matrix
            -- normalised [True/False]
            -- beta, [0->), (diffusion degree)
    """
    D = diag(W.sum(0))
    L = D - W
    if normalised==True:
        T = diag(sqrt(1./W.sum(0)))
        L = dot(dot(T, L), T)
    return expm(-beta*L)
 def K_modularity(W, alpha=1.0):
    """ Returns the matrix square root of Newmans modularity."""
    W = asarray(W)
    t = W.dtype.char
    m, n = W.shape
    d = sum(W, 0)
    m = 1.*sum(d)
    B = W - (outer(d, d)/m)
    s,v = sorted_eig(B, sort_by='lm')
    psigma = zeros( (n, n), dtype='<f8' )
    for i in range(len(s)):
        if s[i]>1e-7:
            psigma[i,i] = sqrt(s[i])
            #psigma[i,i] = s[i]
    K = dot(dot(v, psigma), v.T)
    I = eye(n)
    K = (1 - alpha)*I + alpha*K
    return K
 def kernel_score(K, W):
    """Returns the modularity score.
    K -- (modularity) kernel
    W -- adjacency matrix (possibly weighted)
    """
    # normalize W (: W'W=I)
    m, n = shape(W)
    for i in range(n):
        W[:,i] = W[:,i]/norm(W[:,i])
    score = diag(dot(W, dot(K, W)) )
    tot = sum(score)
    return score, tot
 def modularity_matrix(G, nodelist=None):
    if not nodelist:
        nodelist = G.nodes()
    else:
        G = NX.subgraph(G, nodelist) 
    A = NX.adj_matrix(G, nodelist=nodelist)
    d = atleast_2d(G.degree(nbunch=nodelist))
    m = 1.*G.number_of_edges()
    B = A - dot(d.T, d)/m
    return B
--- a/laydi/lib/packer.py
+++ b/laydi/lib/packer.py
@ -0,0 +1,28 @@
 class Packer:
    """A compression object used to speed up model calculations.
    Often used in conjunction with crossvalidation and perturbations
    analysis. 
    """
    def __init__(self,array):
        self._shape = array.shape
        self._array = array
        self._packed_data = None
    def expand(self,a):
        if self._inflater!=None:
            return dot(self._inflater,a)
    def collapse(self,axis=None,mode='svd'):
        if not axis:
            axis = argmin(self._array.shape) # default is the smallest dim
        if axis == 1:
            self._array = self._array.T
        u, s, vt = svd(self._array,full_matrices=0)
        self._inflater = vt.T
        self._packed_data = u*s
        return self._packed_data
    def get_packed_data(self):
        return self._packed_data
--- a/laydi/lib/select_generators.py
+++ b/laydi/lib/select_generators.py
@ -0,0 +1,223 @@
 """Matrix cross validation selection generators
 """
 from scipy import take,arange,ceil,repeat,newaxis,asarray,dot,ones,\
     random,array_split,floor,vstack,asarray,minimum
 from cx_utils import randperm
 def w_pls_gen(aat,b,n_blocks=None,center=True,index_out=False):
     """Random block crossvalidation for wide (XX.T) trick in PLS.
     Leave-one-out is a subset, with n_blocks equals nSamples
     aat -- outerproduct of X
     b -- Y
     n_blocks = 
     center -- use centering of calibration ,sets (aat_in,b_in) are centered
     Returns:
         -- aat_in,aat_out,b_in,b_out,[out]
     """
     m, n = aat.shape
     index = randperm(m)
     if n_blocks==None: n_blocks = m
     nValuesInBlock = m/n_blocks
     if n_blocks==m:
         index = arange(m)
     out_ind = [index[i*nValuesInBlock:(i+1)*nValuesInBlock] for i in range(n_blocks)]
     for out in out_ind:
          inn = [i for i in index if i not in out]
          aat_in = aat[inn,:][:,inn]
          aat_out = aat[out,:][:,inn]
          b_in = b[inn,:]
          b_out = b[out,:]
          if center:
               aat_in, mn = outerprod_centering(aat_in)
               b_in = b_in - b_in.mean(0) # b_in + b_out/(b_in.shape[0])
          if index_out:
               yield aat_in,aat_out,b_in,b_out,out
          else:
               yield aat_in,aat_out,b_in,b_out
 def pls_gen(a, b, n_blocks=None, center=False, index_out=False,axis=0):
     """Random block crossvalidation
    Leave-one-out is a subset, with n_blocks equals a.shape[-1]
    """
     index = randperm(a.shape[axis])
     #index = arange(a.shape[axis])
     if n_blocks==None:
          n_blocks = a.shape[axis]
     n_in_set = ceil(float(a.shape[axis])/n_blocks)
     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_blocks)]
     for out in out_ind_sets:
          inn = [i for i in index if i not in out]
          acal = a.take(inn, 0)
          atrue = a.take(out, 0)
          bcal = b.take(inn, 0)
          btrue = b.take(out, 0)
          if center:
               mn_a = acal.mean(0)[newaxis]
               acal = acal - mn_a
               atrue = atrue - mn_a
               mn_b = bcal.mean(0)[newaxis]
               bcal = bcal - mn_b
               btrue = btrue - mn_b
          if index_out:
               yield acal, atrue, bcal, btrue, out
          else:     
               yield acal, atrue, bcal, btrue
 def pca_gen(a, n_sets=None, center=False, index_out=False, axis=0):
     """Returns a generator of crossvalidation sample segments.
     input:
           -- a, data matrix (m x n)
           -- n_sets, number of segments/subsets to generate.
           -- center, bool, choice of centering each subset
           -- index_out, bool, return subset index
           -- axis, int, which axis to get subset from
     ouput:
           -- V, generator with (n_sets) memebers (subsets)
     """
     m = a.shape[axis]
     index = randperm(m)
     if n_sets==None:
          n_sets = m
     n_in_set = ceil(float(m)/n_sets)
     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
     for out in out_ind_sets:
         inn = [i for i in index if i not in out]
         acal = a.take(inn, 0)
         atrue = a.take(out, 0)
         if center:
              mn_a = acal.mean(0)[newaxis]
              acal = acal - mn_a
              atrue = atrue - mn_a
         if index_out:
              yield acal, atrue, out
         else:
              yield acal, atrue
 def w_pls_gen_jk(a, b, n_sets=None, center=True,
                 index_out=False, axis=0):
     """Random block crossvalidation for wide X (m>>n)
     Leave-one-out is a subset, with n_sets equals a.shape[-1]
     Returns : X_m and X_m'Y_m
     """
     m = a.shape[axis]
     ab = dot(a.T, b)
     index = randperm(m)
     if n_sets==None:
          n_sets = m
     n_in_set = ceil(float(m)/n_sets)
     out_ind_sets = [index[i*n_in_set:(i+1)*n_in_set] for i in range(n_sets)]
     for out in out_ind_sets:
         inn = [i for i in index if i not in out]
         nin = len(inn)
         nout = len(out)
         a_in = a[inn,:]
         mn_a = 0
         mAB = 0
         if center:
              mn_a = a_in.mean(0)[newaxis]
              mAin = dot(-ones((1,nout)), a[out,:])/nin
              mBin = dot(-ones((1,nout)), b[out,:])/nin
              mAB = dot(mAin.T, (mBin*nin))
         ab_in = ab - dot(a[out,].T, b[out,:]) - mAB
         a_in = a_in - mn_a
         if index_out:
              yield a_in, ab_in, out
         else:     
              yield a_in, ab_in
 def shuffle_1d_block(a, n_sets=None, blocks=None, index_out=False, axis=0):
     """Random block shuffling along 1d axis
     Returns : Shuffled a by axis
     """
     m = a.shape[axis]
     if blocks==None:
         blocks = m
     for ii in xrange(n_sets):
         index = randperm(m)
         if blocks==m:
             a_out = a.take(index, axis)
         else:
             index = arange(m)
             dummy = map(random.shuffle, array_split(index, blocks))
             a_out = a.take(index, axis)
         if index_out:
              yield a_out, index
         else:
              yield a_out
 def shuffle_1d(a, n_sets, axis=0):
     """Random shuffling along 1d axis.
     Returns : Shuffled a by axis
     """
     m = a.shape[axis]
     for ii in xrange(n_sets):
         index = randperm(m)
         a = a.take(index, axis)
         yield a
 def diag_pert(a, n_sets=10, center=True, index_out=False):
    """Alter generator returning sets perturbed with means at diagonals.
    input:
            X -- matrix, data
            alpha -- scalar, approx. portion of data perturbed  
    """
    m, n = a.shape
    tr=False
    if m>n:
         a = a.T
         m, n = a.shape
         tr = True
    if n_sets>m or n_sets>n:
         msg = "You may not use more subsets than max(n_rows, n_cols)"
         raise ValueError, msg
    nm=n*m
    start_inds = array_split(randperm(m),n_sets) # we use random start diags
    if center:
         a = a - a.mean(0)[newaxis]
    for v in range(n_sets):
        a_out = a.copy()
        out = []
        for start in start_inds[v]: 
            ind = arange(start+v, nm, n+1)
            [out.append(i) for i in ind]
            if center:
                a_out.put(a.mean(),ind) 
            else:
                 a_out.put(0, ind)
        if tr:
             a_out = a_out.T
        if index_out:
             yield a_out, asarray(out)
        else:
             yield a_out
 def outerprod_centering(aat, ret_mn=True):
    """Returns double centered symmetric outerproduct matrix.
    """
    h = aat.mean(0)[newaxis]
    h = h - 0.5*h.mean()
    mn_a = h + h.T # beauty of broadcasting
    aatc = aat - mn_a
    if ret_mn:
        return aatc, h
    return aatc
--- a/laydi/lib/validation.py
+++ b/laydi/lib/validation.py
@ -0,0 +1,315 @@
 """This module implements some common validation schemes from pca and pls.
 """
 from scipy import ones,sqrt,dot,newaxis,zeros,sum,empty,\
     apply_along_axis,eye,kron,array,sort,zeros_like,argmax,atleast_2d
 from numpy import median
 from scipy.linalg import triu,inv,svd,norm
 from select_generators import w_pls_gen,w_pls_gen_jk,pls_gen,pca_gen,diag_pert
 from engines import w_simpls,pls,bridge,pca,nipals_lpls
 from cx_utils import m_shape
 def w_pls_cv_val(X, Y, amax, n_blocks=None):
    """Returns rmsep and aopt for pls tailored for wide X.
    The root mean square error of cross validation is calculated
    based on random block cross-validation. With number of blocks equal to
    number of samples [default] gives leave-one-out cv.
    The pls model is based on the simpls algorithm for wide X.
    :Parameters:
    X : ndarray 
        column centered data matrix of size (samples x variables)
    Y : ndarray
        column centered response matrix of size (samples x responses)
    amax : scalar 
        Maximum number of components
    n_blocks : scalar
        Number of blocks in cross validation
    :Returns: 
    rmsep : ndarray
        Root Mean Square Error of cross-validated Predictions 
    aopt : scalar
        Guestimate of the optimal number of components
    :SeeAlso:
    - pls_cv_val : Same output, not optimised for wide X
    - w_simpls : Simpls algorithm for wide X
    Notes
    -----
    Based (cowardly translated) on m-files from the Chemoact toolbox
    X, Y inputs need to be centered (fixme: check)
    Examples
    --------
    >>> import numpy as n
    >>> X = n.array([[1., 2., 3.],[]])
    >>> Y = n.array([[1., 2., 3.],[]])
    >>> w_pls(X, Y, 1)
    [4,5,6], 1
    """
    k, l = m_shape(Y)
    PRESS = zeros((l, amax+1), dtype='f')
    if n_blocks==None:
        n_blocks = Y.shape[0]
    XXt = dot(X, X.T)
    V = w_pls_gen(XXt, Y, n_blocks=n_blocks, center=True)
    for Din, Doi, Yin, Yout in V:
        ym = -sum(Yout, 0)[newaxis]/(1.0*Yin.shape[0])
        PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
        dat = w_simpls(Din, Yin, amax)
        Q, U, H = dat['Q'], dat['U'], dat['H']
        That = dot(Doi, dot(U, inv(triu(dot(H.T, U))) ))
        Yhat = []
        for j in range(l):
            TQ = dot(That, triu(dot(Q[j,:][:,newaxis], ones((1,amax)))) )
            E = Yout[:,j][:,newaxis] - TQ
            E = E + sum(E, 0)/Din.shape[0] 
            PRESS[j,1:] = PRESS[j,1:] + sum(E**2, 0)
    #Yhat = Yin - dot(That,Q.T)
    msep = PRESS/(Y.shape[0])
    aopt = find_aopt_from_sep(msep)
    return sqrt(msep), aopt
 def pls_val(X, Y, amax=2, n_blocks=10, algo='pls'):
    k, l = m_shape(Y)
    PRESS = zeros((l, amax+1), dtype='<f8')
    EE = zeros((amax, k, l), dtype='<f8')
    Yhat = zeros((amax, k, l), dtype='<f8')
    V = pls_gen(X, Y, n_blocks=n_blocks, center=True, index_out=True)
    for Xin, Xout, Yin, Yout, out in V:
        ym = -sum(Yout,0)[newaxis]/Yin.shape[0]
        Yin = (Yin - ym)
        PRESS[:,0] = PRESS[:,0] + ((Yout - ym)**2).sum(0)
        if algo=='pls':
            dat = pls(Xin, Yin, amax, mode='normal')
        elif algo=='bridge':
            dat = simpls(Xin, Yin, amax, mode='normal')
        for a in range(amax):
            Ba = dat['B'][a,:,:]
            Yhat[a,out[:],:] = dot(Xout, Ba)
            E = Yout -  dot(Xout, Ba)
            EE[a,out,:] = E
            PRESS[:,a+1] = PRESS[:,a+1] + sum(E**2,0)
    #rmsep = sqrt(PRESS/(k-1.))
    msep = PRESS
    aopt = find_aopt_from_sep(msep)
    return msep, Yhat, aopt
 def lpls_val(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,2]):
    """Performs crossvalidation to get generalisation error in lpls"""
    assert(nsets<=X.shape[0])
    cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
    k, l = Y.shape
    Yc = empty((k, l), 'd')
    Yhat = empty((a_max, k, l), 'd')
    Yhatc = empty((a_max, k, l), 'd')
    sep2 = empty((a_max, k, l), 'd')
    for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
        print ind
        dat = nipals_lpls(xcal,ycal,Z,
                          a_max=a_max,
                          alpha=alpha,
                          mean_ctr=mean_ctr,
                          verbose=False)
        B = dat['B']
        #b0 = dat['b0'] 
        for a in range(a_max):
            if mean_ctr[0] in [0, 2]:
                xi = xi - dat['mnx']
            else:
                xi = xi - xi.mean(1)[:,newaxis] #???: cheating?
            if mean_ctr[1] in [0, 2]:
                ym = dat['mny']
            else:
                ym = yi.mean(1)[:,newaxis] #???: check this
            Yhat[a,ind,:] = atleast_2d(ym + dot(xi, B[a]))
            #Yhat[a,ind,:] = atleast_2d(b0[a] + dot(xi, B[a]))
    # todo: need a better support for class validation
    y_is_class = Y.dtype.char.lower() in ['i','p', 'b', 'h','?']
    #print Y.dtype.char
    if y_is_class:
        Yhat_class = zeros_like(Yhat)
        for a in range(a_max):
            for i in range(k):
                Yhat_class[a,i,argmax(Yhat[a,i,:])] = 1.0
        class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
    sep = (Y - Yhat)**2
    rmsep = sqrt(sep.mean(1)).T
    #rmsep2 = sqrt(sep2.mean(1))
    aopt = find_aopt_from_sep(rmsep)
    return rmsep, Yhat, aopt
 def pca_alter_val(a, amax, n_sets=10, method='diag'):
    """Pca validation by altering elements in X.
    comments:
             -- may do all jk estimates in this loop
    """
    V = diag_pert(a, n_sets, center=True, index_out=True)
    sep = empty((n_sets, amax), dtype='f')
    for i, (xi, ind) in enumerate(V):
        dat_i = pca(xi, amax, mode='detailed')
        Ti, Pi = dat_i['T'],dat_i['P']
        for j in xrange(amax):
            Xhat = dot(Ti[:,:j+1], Pi[:,:j+1].T)
            a_sub = a.ravel().take(ind)
            EE = a_sub - Xhat.ravel().take(ind)
            tot = (a_sub**2).sum()
            sep[i,j] = (EE**2).sum()/tot
    sep = sqrt(sep)
    aopt = find_aopt_from_sep(sep)
    return sep, aopt
 def pca_cv_val(a, amax, n_sets):
    """ Returns PRESS from cross-validated pca using random segments.
    input:
          -- a, data matrix (m x n)
          -- amax, maximum nuber of components used
          -- n_sets, number of segments to calculate
    output:
          -- sep, (amax x m x n), squared error of prediction (press)
          -- aopt, guestimated optimal number of components
    """
    m, n = a.shape
    E = empty((amax, m, n), dtype='f')
    xtot = (a**2).sum() # this needs centering
    V = pca_gen(a, n_sets=7, center=True, index_out=True)
    for xi, xout, ind in V:
        dat_i = pca(xi, amax, mode='fast')
        Pi = dat_i['P']
        for a in xrange(amax):
            Pia = Pi[:,:a+1]
            E[a][ind,:] = (X[ind,:] - dot(xout, dot(Pia,Pia.T) ))**2
    sep = []
    for a in xrange(amax):
        sep.append(E[a].sum()/xtot)
    sep = array(sep)
    aopt = find_aopt_from_sep(sep)
    return sep, aopt
 def pls_jkW(a, b, amax, n_blocks=None, algo='pls', use_pack=True, center=True):
    """ Returns CV-segments of paramter W for wide X.
    todo: add support for T,Q and B
    """
    if n_blocks == None:
        n_blocks = b.shape[0]
    Wcv = empty((n_blocks, a.shape[1], amax), dtype='d')
    if use_pack:
        u, s, inflater = svd(a, full_matrices=0)
        a = u*s
    V = pls_gen(a, b, n_blocks=n_blocks, center=center)
    for nn,(a_in, a_out, b_in, b_out) in enumerate(V):
        if algo=='pls':
            dat = pls(a_in, b_in, amax, 'loads', 'fast')
        elif algo=='bridge':
            dat = bridge(a_in, b_in, amax, 'loads', 'fast')
        W = dat['W']
        if use_pack:
            W = dot(inflater.T, W)
        Wcv[nn,:,:] = W[:,:,]
    return Wcv
 def pca_jkP(a, aopt, n_blocks=None):
    """Returns loading from PCA on CV-segments.
    input:
           -- a, data matrix (n x m)
           -- aopt, number of components in model.
           -- nblocks, number of segments
    output:
           -- PP, loadings collected in a three way matrix
           (n_segments, m, aopt)
    comments:
    * The loadings are scaled with the (1/samples)*eigenvalues.
    * Crossvalidation method is currently set to random blocks of samples.
    todo: add support for T
    fixme: more efficient to add this in validation loop
    """
    if n_blocks == None:
        n_blocks = a.shape[0]
    PP = empty((n_blocks, a.shape[1], aopt), dtype='f')
    V = pca_gen(a, n_sets=n_blocks, center=True)
    for nn,(a_in, a_out) in enumerate(V):  
        dat = pca(a_in, aopt, mode='fast', scale='loads')
        P = dat['P']
        PP[nn,:,:] = P
    return PP
 def lpls_jk(X, Y, Z, a_max, nsets=None, xz_alpha=.5, mean_ctr=[2,0,2]):
    cv_iter = pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
    m, n = X.shape
    k, l = Y.shape
    o, p = Z.shape
    if nsets==None:
        nsets = m
    WWx = empty((nsets, n, a_max), 'd')
    WWz = empty((nsets, o, a_max), 'd')
    #WWy = empty((nsets, l, a_max), 'd')
    for i, (xcal, xi, ycal, yi) in enumerate(cv_iter):
        dat = nipals_lpls(xcal,ycal,Z,a_max=a_max,alpha=xz_alpha,
                          mean_ctr=mean_ctr,scale='loads',verbose=False)
        WWx[i,:,:] = dat['W']
        WWz[i,:,:] = dat['L']
        #WWy[i,:,:] = dat['Q']
    return WWx, WWz
 def find_aopt_from_sep(sep, method='75perc'):
    """Returns an estimate of optimal number of components from rmsecv.
    """
    sep = sep.copy()
    if method=='vanilla':
        # min rmsep
        rmsecv = sqrt(sep.mean(0))
        return rmsecv.argmin() + 1
    elif method=='75perc':
        prct = .75 #percentile
        ind = 1.*sep.shape[0]*prct
        med = median(sep)
        prc_75 = []
        for col in sep.T:
            col.sort() #this is inplace -> ruins sep, so we are doing a copy
            prc_75.append(col[int(ind)])
        prc_75 = array(prc_75)
        for i in range(1, sep.shape[1], 1):
            if med[i-1]<prc_75[i]:
                return i
        return len(med)
--- a/system/logger.py
+++ b/system/logger.py
@ -28,6 +28,19 @@ class Logger:
    def level_number(self, level):
        return self.levels.index(level)
    def debug(self, message):
        self.log('debug', message)
    def notice(self, message):
        self.log('notice', message)
    def warning(self, message):
        self.log('warning', message)
    def error(self, message):
        self.log('error', message)
 class LogView(gtk.TreeView):
    def __init__(self, logger=None, level='notice'):
@ -152,3 +165,4 @@ class LogMenu(gtk.Menu):
 logger = Logger()
 log = logger.log
--- a/laydi/main.py
+++ b/laydi/main.py
@ -0,0 +1,101 @@
 import sys
 import os.path
 import paths
 # Site specific directories set by configure script.
 PREFIX = paths.PREFIX
 BINDIR = paths.BINDIR
 DATADIR = paths.DATADIR
 DOCDIR = paths.DOCDIR
 PYDIR = paths.PYDIR
 ICONDIR = os.path.join(DATADIR, 'icons')
 #: Dictionary of observers
 _observers = {}
 #: The current Navigator object.
 navigator = None
 #: The current application
 application = None
 #: The current projectview
 projectview = None
 #: The current workflow
 workflow = None
 #: A cfgparse/optparse options object.
 options = None
 def notify_observers(name):
    call = "%s_changed" % name
    for s in _observers.get(name, []):
        getattr(s, call)(getattr(sys.modules[__name__], name))
 def _add_observer(name, observer):
    """Adds observer as an observer of the named object."""
    if not _observers.has_key(name):
        _observers[name] = []
    _observers[name].append(observer)
 def _remove_observer(name, observer):
    """Removes observer as an observer of the named object."""
    if not _observers.has_key(name):
        return
    _observers.remove(observer)
 def add_navigator_observer(observer):
    _add_observer('navigator', observer)
 def add_project_observer(observer):
    _add_observer('project', observer)
 def add_workflow_observer(observer):
    _add_observer('workflow', observer)
 def add_application_observer(observer):
    _add_observer('application', observer)
 def remove_navigator_observer(observer):
    _remove_observer('navigator', observer)
 def remove_project_observer(observer):
    _remove_observer('project', observer)
 def remove_workflow_observer(observer):
    _remove_observer('workflow', observer)
 def remove_application_observer(observer):
    _remove_observer('application', observer)
 def remove_options_observer(observer):
    _remove_observer('options', observer)
 def set_navigator(nav):
    global navigator
    navigator = nav
    notify_observers('navigator')
 def set_application(app):
    global application
    application = app
    notify_observers('application')
 def set_projectview(p):
    global projectview
    projectview = p
    notify_observers('project')
 def set_workflow(wf):
    global workflow
    workflow = wf
    notify_observers('workflow')
 def set_options(opt):
    global options 
    options = opt
    notify_observers('options')
--- a/laydi/navigator.py
+++ b/laydi/navigator.py
@ -0,0 +1,476 @@
 import gtk
 import gobject
 import plots
 import time
 import laydi
 from logger import logger
 import dataset, plots, projectview, workflow, main
 import scipy
 class NavigatorView (gtk.TreeView):
    """The NavigatorView is a tree view of the project.
    There is always one NavigatorView, that shows the functions, plots and
    datasets in the current project.
    """
    def __init__(self):
        if main.projectview:
            self.data_tree = main.projectview.data_tree
        else:
            self.data_tree = None
        gtk.TreeView.__init__(self) 
        # Various properties
        self.set_enable_tree_lines(True)
        self.set_headers_visible(False)
        self.get_hadjustment().set_value(0)
        # Selection Mode
        self.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
        self.get_selection().connect('changed',self.on_selection_changed)
        self._previous_selection = []
        # Setting up TextRenderers etc
        self.connect('row_activated', self.on_row_activated)
        self.connect('cursor_changed', self.on_cursor_changed)
        # Activate context menu
        self.menu = NavigatorMenu(self)
        self.connect('popup_menu', self.popup_menu)
        self.connect('button_press_event', self.on_mouse_event)
        self.textrenderer = textrenderer = gtk.CellRendererText()
        pixbufrenderer = gtk.CellRendererPixbuf()
        self.object_col = gtk.TreeViewColumn('Object')
        self.object_col.pack_start(pixbufrenderer,expand=False)
        self.object_col.pack_start(textrenderer,expand=False)
        self.object_col.set_attributes(textrenderer, cell_background=3, 
                                       foreground=4, text=0)
        self.object_col.set_attributes(pixbufrenderer, pixbuf=5)
        self.append_column(self.object_col)
        # send events to plots / itself
        self.enable_model_drag_source(gtk.gdk.BUTTON1_MASK,
            [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
            gtk.gdk.ACTION_LINK | gtk.gdk.ACTION_MOVE)
        self.connect("drag-data-get",self.slot_drag_data)
        logger.debug('Initializing navigator window.')
    def slot_drag_data(self, treeview, context, selection, target_id, etime):
        """Sets the data for a drag event."""
        treeselection = treeview.get_selection()
        model, paths = treeselection.get_selected_rows()
        if paths:
            self.data_tree.drag_data_get(paths[0], selection)
    def add_projectview(self, projectview):
        """Dependency injection."""
        self.data_tree = projectview.data_tree
        self.set_model(projectview.data_tree)
        self.data_tree.connect('row-changed',self.on_row_changed)
    def on_selection_changed(self, selection):
        """Update the list of currently selected datasets."""
        # update prev selection right away in case of multiple events
        model, paths = selection.get_selected_rows()
        if not paths: # a plot is marked: do nothing
            return
        tmp = self._previous_selection
        self._previous_selection = paths
        tree = self.data_tree
        # set timestamp on newly selected objects
        [tree.set_value(tree.get_iter(path), 6, time.time()) 
         for path in paths if path not in tmp]
        objs = [tree.get_iter(path) for path in paths]
        objs = [(tree[iter][6], tree[iter][2]) for iter in objs]
        objs.sort()
        objs = [obj for timestamp, obj in objs]
        if objs and isinstance(objs[0], dataset.Dataset):
            logger.debug('Selecting dataset')
            main.projectview.current_data = objs
        else:
            logger.debug('Deselecting dataset')
            main.projectview.current_data = []
    def on_row_changed(self, treestore, pos, iter):
        """Set correct focus and colours when rows have changed."""
        obj = treestore[iter][2]
        obj_type = treestore[iter][1]
        if not (obj or obj_type):
            return
        self.expand_to_path(pos)        
        if isinstance(obj, dataset.Dataset):
            self.set_cursor(pos)
            self.grab_focus()
    def on_row_activated(self, widget, path, column):
        tree_iter = self.data_tree.get_iter(path)
        obj = self.data_tree.get_value(tree_iter, 2)
        if isinstance(obj, plots.Plot):
            logger.debug('Activating plot')
            main.application.change_plot(obj)
        elif isinstance(obj, dataset.Dataset):
            pass
        elif obj == None:
            children = []
            i = self.data_tree.iter_children(tree_iter)
            while i:
                child = self.data_tree.get(i, 2)[0]
                if isinstance(child, plots.Plot):
                    children.append(child)
                i = self.data_tree.iter_next(i)
            main.application.change_plots(children)
        else:
            t = type(obj)
            logger.notice('Activated datatype was %s. Don\'t know what to do.' % t)
    def popup_menu(self, *rest):
        self.menu.popup(None, None, None, 0, 0)
    def on_mouse_event(self, widget, event):
        path = widget.get_path_at_pos(int(event.x), int(event.y))
        if path:
            iter = self.data_tree.get_iter(path[0])
            obj = self.data_tree.get_value(iter, 2)
        else:
            iter = None
            obj = None
        if isinstance(obj, dataset.Dataset):
            self.menu.set_dataset(obj, iter)
        else:
            self.menu.set_dataset(None, iter)
        if event.button == 3:
            self.menu.popup(None, None, None, event.button, event.time)
    def on_cursor_changed(self, widget):
        """Update statusbar to contain dataset information.
        Lists the dimensions of a dataset in the statusbar of the program
        if a dataset is focused in the navigator.
        """
        path = widget.get_cursor()[0]
        tree_iter = self.data_tree.get_iter(path)
        obj = self.data_tree.get_value(tree_iter, 2)
        if isinstance(obj, dataset.Dataset):
            dims = zip(obj.get_dim_name(), obj.shape)
            dim_text = ", ".join(["%s (%d)" % dim for dim in dims])
        else:
            dim_text = ""
        main.application['appbar1'].push(dim_text)
 class NavigatorMenu(gtk.Menu):
    def __init__(self, navigator):
        gtk.Menu.__init__(self)
        self.navigator = navigator
        self.dataset = None
        self.tree_iter = None        
        # Populate main menu
        self.load_item = gtk.MenuItem('Load dataset')
        self.load_item.connect('activate', self.on_load_dataset, navigator)
        self.append(self.load_item)
        self.load_item.show()
        self.save_item = gtk.MenuItem('Save dataset')
        self.save_item.connect('activate', self.on_save_dataset, navigator)
        self.append(self.save_item)
        self.save_item.show()
        self.delete_item = gtk.MenuItem('Delete')
        self.delete_item.connect('activate', self.on_delete, navigator)
        self.append(self.delete_item)
        self.delete_item.show()
        self.split_item = gtk.MenuItem('Split on selection')
        self.split_item.connect('activate', self.on_split, navigator)
        self.append(self.split_item)
        self.split_item.show()
        # Build transform sub menu
        self.trans_menu = gtk.Menu()
        self.trans_tr_item = gtk.MenuItem('Transpose')
        self.trans_tr_item.connect('activate', self.on_transpose, navigator)
        self.trans_menu.append(self.trans_tr_item)
        self.trans_tr_item.show()
        self.trans_stdr_item = gtk.MenuItem('Std. rows')
        self.trans_stdr_item.connect('activate', self.on_standardise_rows, navigator)
        self.trans_menu.append(self.trans_stdr_item)
        self.trans_stdr_item.show()
        self.trans_stdc_item = gtk.MenuItem('Std. cols')
        self.trans_stdc_item.connect('activate', self.on_standardise_cols, navigator)
        self.trans_menu.append(self.trans_stdc_item)
        self.trans_stdc_item.show()
        self.trans_log_item = gtk.MenuItem('Log')
        self.trans_log_item.connect('activate', self.on_log, navigator)
        self.trans_menu.append(self.trans_log_item)
        self.trans_log_item.show()
        self.trans_item = gtk.MenuItem("Transformation")
        self.append(self.trans_item)
        self.trans_item.set_submenu(self.trans_menu)
        self.trans_item.show()
        # Build plot sub menu
        self.plot_menu = gtk.Menu()
        self.plot_image_item = gtk.MenuItem('Image Plot')
        self.plot_image_item.connect('activate', self.on_plot_image, navigator)
        self.plot_menu.append(self.plot_image_item)
        self.plot_image_item.show()
        self.plot_hist_item = gtk.MenuItem('Histogram')
        self.plot_hist_item.connect('activate', self.on_plot_hist, navigator)
        self.plot_menu.append(self.plot_hist_item)
        self.plot_hist_item.show()
        self.plot_scatter_item = gtk.MenuItem('Scatter')
        self.plot_scatter_item.connect('activate', self.on_plot_scatter, navigator)
        self.plot_menu.append(self.plot_scatter_item)
        self.plot_scatter_item.show()
        self.plot_line_item = gtk.MenuItem('Line view')
        self.plot_line_item.connect('activate', self.on_plot_line, navigator)
        self.plot_menu.append(self.plot_line_item)
        self.plot_line_item.show()
        self.plot_bar_item = gtk.MenuItem('Bar Plot')
        self.plot_bar_item.connect('activate', self.on_plot_bar, navigator)
        self.plot_menu.append(self.plot_bar_item)
        self.plot_bar_item.show()
        self.plot_box_item = gtk.MenuItem('Box Plot')
        self.plot_box_item.connect('activate', self.on_plot_box, navigator)
        self.plot_menu.append(self.plot_box_item)
        self.plot_box_item.show()
        self.plot_item = gtk.MenuItem('Plot')
        self.append(self.plot_item)
        self.plot_item.set_submenu(self.plot_menu)
        self.plot_item.show()
    def set_dataset(self, ds, it):
        self.dataset = ds
        self.tree_iter = it
        if ds == None:
            self.save_item.set_property('sensitive', False)
            self.plot_item.set_property('sensitive', False)
            self.trans_item.set_property('sensitive', False)
        else:
            self.save_item.set_property('sensitive', True)
            self.plot_item.set_property('sensitive', True)
            self.trans_item.set_property('sensitive', True)
    def load_dataset(self, filename):
        """Load the dataset from the given file and add it to the project."""
        ds = dataset.read_ftsv(filename)
        if isinstance(ds, dataset.GraphDataset):
            icon = laydi.icon_factory.get("graph_dataset")
        elif isinstance(ds, dataset.CategoryDataset):
            icon = laydi.icon_factory.get("category_dataset")
        else:
            icon = laydi.icon_factory.get("dataset")
        main.projectview.add_dataset(ds)
        main.projectview.data_tree_insert(None, ds.get_name(), ds, None, "black", icon)
    def on_load_dataset(self, item, navigator):
        # Set up file chooser.
        dialog = gtk.FileChooserDialog('Load dataset')
        dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
                           gtk.STOCK_OPEN, gtk.RESPONSE_OK)
        dialog.set_select_multiple(True)
        dialog.set_current_folder(main.project.datadir)
        retval = dialog.run()
        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
            pass
        elif retval == gtk.RESPONSE_OK:
            for filename in dialog.get_filenames():
                self.load_dataset(filename)
        else:
            print "unknown; ", retval 
        dialog.destroy()
    def on_save_dataset(self, item, navigator):
        dialog = gtk.FileChooserDialog('Save dataset')
        dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_SAVE, gtk.RESPONSE_OK)
        dialog.set_current_name("%s.ftsv" % self.dataset.get_name())
        retval = dialog.run()
        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
            logger.debug("Cancelled save dataset")
        elif retval == gtk.RESPONSE_OK:
            logger.debug("Saving dataset as: %s" % dialog.get_filename())
            fd = open(dialog.get_filename(), 'w')
            dataset.write_ftsv(fd, self.dataset)
            fd.close()
        else:
            print "unknown; ", retval 
        dialog.destroy()
    def on_delete(self, item, navigator):
        tm, rows = navigator.get_selection().get_selected_rows()
        iters = [tm.get_iter(r) for r in rows]
        iters.reverse()
        for i in iters:
            main.projectview.delete_data(i)
   #         tm.remove(i)
    def on_plot_image(self, item, navigator):
        plot = plots.ImagePlot(self.dataset, name='Image Plot')
        icon = laydi.icon_factory.get("line_plot")
        main.projectview.data_tree_insert(self.tree_iter, 'Image Plot', plot, None, "black", icon)
        # fixme: image plot selections are not well defined
        #plot.set_selection_listener(projectview.set_selection)
        #projectview._selection_observers.append(plot)
    def on_plot_hist(self, item, navigator):
        projectview = main.projectview
        plot = plots.HistogramPlot(self.dataset, name='Histogram')
        icon = laydi.icon_factory.get("line_plot")
        projectview.data_tree_insert(self.tree_iter, 'Histogram', plot, None, "black", icon)
        plot.set_selection_listener(projectview.set_selection)
        projectview._selection_observers.append(plot)
    def on_plot_scatter(self, item, navigator):
        projectview = main.projectview
        datasets = main.projectview.current_data
        ds_major = datasets[0]
        dims_major = ds_major.get_dim_name()
        ids_major = ds_major.get_identifiers(dims_major[1], sorted=True)
        if len(datasets) > 1:
            # If there is more than one active dataset -> try to use the two first
            ds_minor = datasets[1]
            dims_minor = ds_minor.get_dim_name()
            if dims_minor != dims_major or ds_minor.shape[0] != ds_major.shape[0]:
                # the selected datasets are not matched -> use initial selected
                ds_minor = ds_major
        else:
            #Only one dataset selected
            ds_minor = ds_major
        plot = plots.ScatterPlot(ds_major, ds_minor, 
                                 dims_major[0], dims_major[1], 
                                 ids_major[0], ids_major[1],
                                 name='Scatter (%s)' % ds_major.get_name())
        plot.add_axes_spin_buttons(len(ids_major), 0, 1)
        icon = laydi.icon_factory.get("line_plot")
        projectview.data_tree_insert(self.tree_iter, 'Scatter', plot, None, "black", icon)
        plot.set_selection_listener(projectview.set_selection)
        projectview._selection_observers.append(plot)
    def on_plot_line(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset
        dims = ds.get_dim_name()
        ids = ds.get_identifiers(dims[1])
        plot = plots.LineViewPlot(ds, name='Line (%s)' % ds.get_name())
        icon = laydi.icon_factory.get("line_plot")
        projectview.data_tree_insert(self.tree_iter, 'Line view', plot, None, "black", icon)
        plot.set_selection_listener(projectview.set_selection)
        projectview._selection_observers.append(plot)
    def on_plot_bar(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset
        dims = ds.get_dim_name()
        ids = ds.get_identifiers(dims[1])
        plot = plots.BarPlot(ds, name='Bar (%s)' % ds.get_name())
        icon = laydi.icon_factory.get("line_plot")
        projectview.data_tree_insert(self.tree_iter, 'Bar plot', plot, None, "black", icon)
        plot.set_selection_listener(projectview.set_selection)
        projectview._selection_observers.append(plot)
    def on_plot_box(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset
        dims = ds.get_dim_name()
        ids = ds.get_identifiers(dims[1])
        plot = plots.BoxPlot(ds, name='Box (%s)' % ds.get_name())
        icon = laydi.icon_factory.get("line_plot")
        projectview.data_tree_insert(self.tree_iter, 'Box plot', plot, None, "black", icon)
        plot.set_selection_listener(projectview.set_selection)
        projectview._selection_observers.append(plot)
    def on_transpose(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset.transpose()
        ds._name = ds._name + ".T"
        icon = laydi.icon_factory.get(ds)
        projectview.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
    def on_standardise_rows(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset.copy()
        ds._name = self.dataset._name + ".rsc"
        axis = 1
        ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
        icon = laydi.icon_factory.get(ds)
        projectview.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
    def on_standardise_cols(self, item, navigator):
        projectview = main.projectview
        ds = self.dataset.copy()
        ds._name = self.dataset._name + ".csc"
        axis = 0
        ds._array = ds.asarray()/scipy.expand_dims(ds.asarray().std(axis), axis)
        icon = laydi.icon_factory.get(ds)
        projectview.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
    def on_log(self, item, navigator):
        projectview = main.projectview
        try:
            if not scipy.all(self.dataset.asarray()>0):
                raise ValueError
        except:
            logger.log('warning', 'Datasets needs to be strictly positive for a log transform')
            return
        ds = self.dataset.copy()
        ds._array = scipy.log(ds.asarray())
        icon = laydi.icon_factory.get(ds)
        ds._name = ds._name + ".log"
        projectview.data_tree_insert(self.tree_iter, ds.get_name(), ds, None, "black", icon)
    def on_split(self, item, navigator):
        if self.dataset is None:
            logger.warn("Only datasets can be split.")
            return
        dim = self.dataset.get_dim_name(0)
        projectview = main.projectview
        sel_ids = set(projectview.get_selection()[dim])
        sel_ds = self.dataset.subdata(dim, sel_ids)
        unsel_ids = set(self.dataset.get_identifiers(dim)) - set(sel_ids)
        unsel_ds = self.dataset.subdata(dim, unsel_ids)
        icon = laydi.icon_factory.get(self.dataset)
        projectview.data_tree_insert(self.tree_iter, 'Selected', sel_ds, None, "black", icon)
        projectview.data_tree_insert(self.tree_iter, 'Unselected', unsel_ds, None, "black", icon)
--- a/laydi/paths.py.m4
+++ b/laydi/paths.py.m4
@ -0,0 +1,7 @@
 PREFIX = "M4_PREFIX"
 BINDIR = "M4_BINDIR"
 DATADIR = "M4_DATADIR"
 DOCDIR = "M4_DOCDIR"
 PYDIR = "M4_PYDIR"
--- a/laydi/pca_options.glade
+++ b/laydi/pca_options.glade
--- a/laydi/plots.py
+++ b/laydi/plots.py
--- a/laydi/pls_options.glade
+++ b/laydi/pls_options.glade
--- a/laydi/project.py
+++ b/laydi/project.py
@ -0,0 +1,434 @@
 import os, os.path
 import sys
 import time
 import dataset
 import annotations
 NAME = "laydi-cmd"
 VERSION = "0.1.0"
 PROJECT_VERSION_STRING = "Laydi project version 1"
 def is_project_directory(dirname):
    """Verifies that a directory is a laydi project"""
    if not os.path.isdir(dirname):
        return False
    ## Verify that the version is correct.
    version_fn = os.path.join(dirname, "VERSION")
    if not os.path.exists(version_fn):
        return False
    fd = open(version_fn)
    line = fd.readline()
    fd.close()
    if line.strip() != PROJECT_VERSION_STRING:
        return False
    ## Require directories to be present.
    if not os.path.isdir(os.path.join(dirname, "annotations")):
        return False
    if not os.path.isdir(os.path.join(dirname, "data")):
        return False
    if not os.path.isdir(os.path.join(dirname, "selections")):
        return False
    if not os.path.isdir(os.path.join(dirname, "exports")):
        return False
    ## If no tests failed, return True
    return True
 def make_project_directory(dirname, force=False):
    """Creates a project directory
    force: ignore that directory exists and proceed anyway.
    """
    if os.path.exists(dirname) and not force:
        return False
    rootdir = dirname
    anndir = os.path.join(dirname, "annotations")
    seldir = os.path.join(dirname, "selections")
    datadir = os.path.join(dirname, "data")
    exportdir = os.path.join(dirname, "exports")
    version_file_path = os.path.join(dirname, "VERSION")
    os.makedirs(rootdir)
    for d in [anndir, seldir, datadir, exportdir]:
        os.mkdir(d)
    fd = open(version_file_path, "w")
    print >> fd, PROJECT_VERSION_STRING
    fd.close()
 class Universe(object):
    """A Universe is a collection of all existing identifiers in a set of datasets"""
    def __init__(self):
        self.refcount = {}
    def register_dim(self, dim):
        """Increase reference count for identifiers in Dimension object dim"""
        d = self.refcount.get(dim.name, None)
        if d == None:
            d = {}
            self.refcount[dim.name] = d
        for i in dim:
            d[i] = d.get(i, 0) + 1
    def register_ds(self, ds):
        """Increase reference count for identifiers in all Dimensions of dataset ds"""
        for dim in ds.dims:
            self.register_dim(dim)
    def unregister_dim(self, dim):
        """Update reference count for identifiers in Dimension object dim
           Update reference count for identifiers in Dimension object dim, and remove all
           identifiers with a reference count of 0, as they do not (by definition) exist 
           any longer.
        """
        ids = self.refcount[dim.name]
        for i in dim:
            refcount = ids[i]
            if refcount == 1:
                ids.pop(i)
            else:
                ids[i] -= 1
        if len(ids) == 0:
            self.refcount.pop(dim.name)
    def unregister_ds(self, ds):
        """Update reference count for identifiers along Dimensions in Dataset ds.
 	   Update reference count for identifiers along all Dimensions in
 	   Dataset ds, and remove all identifiers with a reference count of 0,
           as they do not (by definition) exist any longer.
        """
        for dim in ds:
            self.register_dim(dim)
    def register(self, obj):
        if isinstance(obj, Dataset):
            self.register_ds(obj)
        else:
            self.register_dim(obj)
    def unregister(self, obj):
        if isinstance(obj, Dataset):
            self.unregister_ds(obj)
        else:
            self.unregister_dim(obj)
    def __getitem___(self, dimname):
        return set(self.references[dimname].keys())
    def __iter__(self):
        return self.references.keys().__iter__()
 class Dimension(object):
    """A Dimension represents the set of identifiers an object has along an axis.
    """
    def __init__(self, name, ids=[]):
        self.name = name
        self.idset = set(ids)
        self.idlist = list(ids)
        if len(self.idset) != len(self.idlist):
            raise Exception("Duplicate identifiers are not allowed")
    def __getitem__(self, element):
        return self.idlist[element]
    def __getslice__(self, start, end):
        return self.idlist[start:end]
    def __contains__(self, element):
        return self.idset.__contains__(element)
    def __str__(self):
        return "%s: %s" % (self.name, str(self.idlist))
    def __len__(self):
        return len(self.idlist)
    def __iter__(self):
        return iter(self.idlist)
    def intersection(self, dim):
        if self.name != dim.name:
            return None
        return Dimension(self.name, self.idset.intersection(dim.idset))
    def as_tuple(self):
        return (self.name, self.idlist)
    def verify(self):
        for i in self.idlist:
            if " " in i or "\t" in i:
                raise Exception("Invalid identifier: %s" % i)
 class Directory(object):
    def __init__(self, path):
        self.path = path
        self.files = set()
        self.timestamp = -1
        self.update()
    def update(self):
        now = time.time()
        newfiles = set(os.listdir(self.path))
        for fn in newfiles - self.files:
            if os.path.isdir(os.path.join(self.path, fn)):
                self.dir_created(fn)
            else:
                self.file_created(fn)
        for fn in self.files - newfiles:
            if os.path.isdir(os.path.join(self.path, fn)):
                self.dir_deleted(fn)
            else:
                self.file_removed(fn)
        for fn in self.files.intersection(newfiles):
            filepath = os.path.join(self.path, fn)
            if os.path.getctime(filepath) >= self.timestamp:
                if os.path.isdir(filepath):
                    self.dir_changed(fn)
                else:
                    self.file_changed(fn)
        self.files = newfiles
        self.timestamp = now
    def file_created(self, fn):
        print "file created: %s" % fn
        pass
    def file_changed(self, fn):
        print "file changed: %s" % fn
        pass
    def file_removed(self, fn):
        print "file removed: %s" % fn
        pass
    def dir_created(self, fn):
        print "directory created: %s" % fn
        pass
    def dir_changed(self, fn):
        print "directory changed: %s" % fn
        pass
    def dir_removed(self, fn):
        print "directory removed: %s" % fn
        pass
 class DataDirectory(Directory):
    def __init__(self, dirname, project):
        self.project = project
        self.datasets= []
        self.dsfiles = {}
        Directory.__init__(self, dirname)
    def file_created(self, fn):
        """Called from update() when new files are created.
           Load new datasets that have appeared since last update.
        """
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".ftsv":
            ds = dataset.read_ftsv(filepath)
            self.datasets.append(ds)
            self.dsfiles[fn] = ds
    def file_changed(self, fn):
        """Called from update() when files are changed.
           Delete old dataset and load the new one when dataset files 
           have been changed.
        """
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".ftsv":
            oldds = self.dsfiles[fn]
            self.datasets.remove(oldds)
            ds = dataset.read_ftsv(filepath)
            self.datasets.append(ds)
            self.dsfiles[fn] = ds
    def file_removed(self, fn):
        """Called from update() when a file is deleted
           Removes the associated dataset if a dataset file is removed.
        """
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".ftsv":
            ds = self.dsfiles[fn]
            self.datasets.remove(ds)
            self.dsfiles.pop(fn)
    def dir_created(self, fn):
        """Called from update() when a subdirectory is created.
           Instantiate new handlers for the directory if possible.
        """
        filepath = os.path.join(self.path, fn)
 class SelectionParentDirectory(Directory):
    def __init__(self, dirname, project):
        self.project = project
        self.handlers = {}
        Directory.__init__(self, dirname)
    def dimensions(self):
        return self.handlers.keys()
    def __getitem__(self, key):
        return self.handlers[key]
    def file_created(self, fn):
        pass
    def file_changed(self, fn):
        pass
    def file_removed(self, fn):
        pass
    def dir_created(self, fn):
        print("dir_created: %s" % fn)
        dimname = os.path.split(fn)[-1]
        self.handlers[dimname] = SelectionDirectory(os.path.join(self.path, fn), dimname, self.project)
    def dir_removed(self, fn):
        print("dir_removed: %s" % fn)
        dimname = os.path.split(fn)[-1]
        removed = self.handlers.pop(dimname)
    def update(self):
        Directory.update(self)
        for e in self.handlers.values():
            e.update()
 class SelectionDirectory(Directory):
    def __init__(self, fn, dimname, project):
        self.project = project
        self.dimension = dimname
        self.selections = {}
        Directory.__init__(self, fn)
    def read_selection_file(self, fn):
        """Reads a selection file and returns the corresponding Dimension object.
        Warnings are printed to terminal on duplicated ids and invalid ids.
        """
        print "read_selection_file(%s)" % (fn,)
        fd = open(fn)
        ids = []
        for line in fd.readlines():
            e = line.strip()
            if e.startswith("#") or e == "":
                continue
            ids.append(e)
        fd.close()
        return Dimension(self.dimname, ids)
    def file_created(self, fn):
        """Called from update() when new files are created.
           Load new datasets that have appeared since last update.
        """
        print "loading selection: %s [%s]" % (fn, self.dimension)
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".sel":
            sel = read_selection_files(fn)
            self.dsfiles[fn] = ds
    def file_changed(self, fn):
        """Called from update() when files are changed.
           Delete old dataset and load the new one when dataset files 
           have been changed.
        """
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".ftsv":
            oldds = self.dsfiles[fn]
            self.datasets.remove(oldds)
            ds = dataset.read_ftsv(filepath)
            self.datasets.append(ds)
            self.dsfiles[fn] = ds
    def file_removed(self, fn):
        """Called from update() when a file is deleted
           Removes the associated dataset if a dataset file is removed.
        """
        filepath = os.path.join(self.path, fn)
        name, ext = os.path.splitext(fn)
        if ext == ".ftsv":
            ds = self.dsfiles[fn]
            self.datasets.remove(ds)
            self.dsfiles.pop(fn)
    def dir_created(self, fn):
        """Called from update() when a subdirectory is created.
           Instantiate new handlers for the directory if possible.
        """
        filepath = os.path.join(self.path, fn)
 class AnnotationDirectory(Directory):
    def __init__(self, dirname, project):
        self.project = project
        self.dirname = dirname
        Directory.__init__(self, dirname)
    def file_created(self, fn):
        annotations.read_annotations_file(os.path.join(self.dirname, fn))
    def file_changed(self, fn):
        annotations.read_annotations_file(os.path.join(self.dirname, fn))
    def file_removed(self, fn):
        print "File removed: %s" % fn
 class Project(object):
    def __init__(self, dirname):
        """Opens a project directory. The directory must exist and be a valid project."""
        ## Set path names.
        self.rootdir = dirname
        self.anndir = os.path.join(dirname, "annotations")
        self.seldir = os.path.join(dirname, "selections")
        self.datadir = os.path.join(dirname, "data")
        self.exportdir = os.path.join(dirname, "exports")
        version_file_path = os.path.join(dirname, "VERSION")
        self.universe = Universe()
        self.data = DataDirectory(self.datadir, self)
        self.annotations = AnnotationDirectory(self.anndir, self)
        self.selections = SelectionParentDirectory(self.seldir, self)
    def update(self):
        print "updating project"
        self.data.update()
        self.selections.update()
--- a/laydi/projectview.py
+++ b/laydi/projectview.py
@ -0,0 +1,169 @@
 import os
 import scipy
 import gobject
 import gtk
 import laydi
 import logger, dataset, plots, main, project
 class ProjectView:
    """A Project contains datasets, selections etc.
    The project, of which the application has only one at any given time,
    is the container for all datasets, plots and selections in use. The data
    in the project is organized in a gtk.TreeStrore that is displayed in the
    navigator.
    """
    def __init__(self, proj):
        self.data_tree = gtk.TreeStore(str,
                                       str,
                                       object, 
                                       str,
                                       str,
                                       gobject.TYPE_OBJECT, 
                                       float)
        self.project = proj
        self.dim_names = []
        self._selection_observers = []
        self._dataset_observers = []
        self.current_data = []
        self.datasets = []
        self.sel_obj = dataset.Selection('Current Selection')
        self.selections = []
        self._last_selection = None
        self._dataset_iter_map = {}
        self._load_datasets()
    def _load_datasets(self):
        print "load datasets from project..."
        print "datasets: ", self.project.data.datasets
        for ds in self.project.data.datasets:
            if isinstance(ds, dataset.GraphDataset):
                icon = laydi.icon_factory.get("graph_dataset")
            elif isinstance(ds, dataset.CategoryDataset):
                icon = laydi.icon_factory.get("category_dataset")
            else:
                icon = laydi.icon_factory.get("dataset")
            self.add_dataset(ds)
            self.data_tree_insert(None, ds.get_name(), ds, None, "black", icon)
        print "...loaded"
    def add_selection_observer(self, observer):
        self._selection_observers.append(observer)
        observer.selection_changed(None, self.get_selection())
    def notify_selection_listeners(self, dim_name):
        """Notifies observers"""
        for observer in self._selection_observers:
            observer.selection_changed(dim_name, self.get_selection())
    def add_dataset_observer(self, observer):
        self._dataset_observers.append(observer)
        observer.dataset_changed()
    def notify_dataset_listeners(self):
        """Notifies observers when new datasets are added"""
        for observer in self._dataset_observers:
            observer.dataset_changed()
    def set_selection(self, dim_name, selection):
        """Sets a current selection and notify observers"""
        self.sel_obj[dim_name] = set(selection)
        self.notify_selection_listeners(dim_name)
        self._last_selection = selection
    def get_selection(self):
        """Returns the current selection object"""
        return self.sel_obj
    def delete_data(self, it):
        """Delete elements from the project."""
        child = self.data_tree.iter_children(it)
        while child != None:
            c = self.data_tree.iter_next(child)
            self.delete_data(child)
            child = c
        main.application.main_view.remove_view(self.data_tree.get(it, 2)[0])
        self.data_tree.remove(it)
    def add_data(self, parents, data, fun='Function'):
        """Adds a set of data and plots to the navigator.
        This method is usually called after a Function in a workflow
        has finished and returns its output."""
        if len(parents) > 0: 
            parent_iter = self._dataset_iter_map[parents[0]]
        else:
            parent_iter = None
        # Add the function node to the tree
        icon = laydi.icon_factory.get("folder_grey")
        it = self.data_tree_insert(parent_iter, fun, None, None, "black", icon)
        # Add all returned datasets/plots/selections
        for d in data:
            # Any kind of dataset
            if isinstance(d, dataset.Dataset):
                if isinstance(d, dataset.GraphDataset):
                    icon = laydi.icon_factory.get("graph_dataset")
                elif isinstance(d, dataset.CategoryDataset):
                    icon = laydi.icon_factory.get("category_dataset")
                else:
                    icon = laydi.icon_factory.get("dataset")
                self.add_dataset(d)
                self.data_tree_insert(it, d.get_name(), d, None, "black", icon) 
            # Any kind of plot
            elif isinstance(d, plots.Plot):
                icon = laydi.icon_factory.get("line_plot")
                self.data_tree_insert(it, d.get_title(), d, None, "black", icon)
                d.set_selection_listener(self.set_selection)
                self._selection_observers.append(d)
            # Selections are not added to the data tree
            elif isinstance(d, dataset.Selection):
                self.add_selection(d)
    def data_tree_insert(self, parent, text, data, bg, fg, icon, selected = 0):
        """Inserts data into the tree view.
        @param text: The title of the object.
        @param data: A dataset, plot or function object.
        @param bg: Background color.
        @param fg: Foreground (font) color.
        @param icon: Pixmap icon.
        """
        tree = self.data_tree
        it = tree.append(parent)
        tree[it] = [text, type(data), data, bg, fg, icon, selected]
        self._dataset_iter_map[data] = it
        return it
    def add_dataset(self, dataset):
        """Appends a new Dataset to the project."""
        logger.log('debug','Adding dataset: %s' %dataset.get_name())
        self.datasets.append(dataset)
        for dim_name in dataset.get_all_dims():
            if dim_name not in self.dim_names:
                self.dim_names.append(dim_name)
                self.sel_obj[dim_name] = set()
                self.notify_selection_listeners(dim_name)
        self.notify_dataset_listeners()
    def add_selection(self, selection):
        """Adds a new selection to the project."""
        self.selections.append(selection)
        self.notify_dataset_listeners()
    def object_at(self, path):
        """Returns the object at a given path in the tree."""
        it = self.get_iter(path)
        obj = self[it][2]
        if obj:
            obj.show()
        return obj
--- a/laydi/selections.py
+++ b/laydi/selections.py
@ -0,0 +1,659 @@
 import pygtk
 import gtk
 import gtk.gdk
 import gtk.glade
 import gnome
 import gnome.ui
 import gobject
 import scipy
 import logger, dataset, main
 import annotations
 from lib import hypergeom
 class SimpleMenu(gtk.Menu):
    def __init__(self):
        gtk.Menu.__init__(self)
    def add_simple_item(self, title, function, *args):
        item = gtk.MenuItem(title)
        item.connect('activate', function, *args)
        self.append(item)
        item.show()
 class IdListController:
    """Controller class for the identifier list."""
    def __init__(self, idlist):
        self._idlist = idlist
        self._idlist.get_selection().set_mode(gtk.SELECTION_MULTIPLE)
        self._idlist.set_rubber_banding(True)
        # dimname: current_annotation_name
        self._annotation = {}
        # current dimension
        self._dimension = None
        # id, annotation
        self._idstore = gtk.ListStore(gobject.TYPE_STRING,
                                      gobject.TYPE_STRING)
        self._idstore.set_sort_func(0, self._numeric_compare)
        # Annotation tree column
        self._annotation_column = None
        ## Set up identifier list
        idlist.set_model(self._idstore)
        renderer = gtk.CellRendererText()
        dim_column = gtk.TreeViewColumn('Identifiers', renderer, text=0)
        dim_column.set_sort_indicator(True)
        dim_column.set_sort_column_id(0)
        dim_column.set_sort_order(gtk.SORT_ASCENDING)
        idlist.insert_column(dim_column, 0)
        idlist.connect('button-press-event', self._button_pressed)
        ## Enable dropping
        idlist.drag_dest_set(gtk.DEST_DEFAULT_ALL,
                              [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
                              gtk.gdk.ACTION_LINK)
        idlist.connect('drag-data-received', self._drag_data_received)
        ## Set up identifier list context menu
        menu = self._menu = SimpleMenu()
        menu.add_simple_item('Import...', self._on_import_list)
        menu.add_simple_item('Export...', self._on_export_list)
        menu.add_simple_item('Add to selection', self._on_make_selection)
        item = gtk.MenuItem('Show annotations')
        menu.append(item)
        item.show()
        self._menu_ann = item
    ##
    ## Public interface
    ##
    def set_dimension(self, dimname):
        """Set dimension"""
        if dimname == self._dimension:
            return
        self._dimension = dimname 
        self.set_annotation(self._annotation.get(dimname, None))
        if not self._annotation.has_key(dimname):
            self._annotation[dimname] = None
    def set_annotation(self, annotation):
        """Set the displayed annotation to annotation.  If annotation is None,
        the annotation column is hidden. Otherwise the annotation column is
        shown and filled with values from the given annotation field."""
        if annotation == None:
            if self._annotation_column != None:
                self._idlist.remove_column(self._annotation_column)
            self._annotation_column = None
        else:
            idlist = [x[0] for x in self._idstore]
            annlist = annotations.get_dim_annotations(self._dimension, 
                          annotation,
                          idlist)
            for i, x in enumerate(self._idstore):
                x[1] = annlist[i]
            if self._annotation_column == None:
                renderer = gtk.CellRendererText()
                col = gtk.TreeViewColumn(annotation, renderer, text=1)
                col.set_sort_indicator(True)
                col.set_sort_column_id(1)
                col.set_sort_order(gtk.SORT_ASCENDING)
                self._idlist.append_column(col)
                self._annotation_column = col
            self._annotation_column.set_title(annotation)
        self._annotation[self._dimension] = annotation
    def set_selection(self, selection):
        """Set the selection to be displayed.
        The selection is not stored, the values are copied into the TreeStore"""
        self._idstore.clear()
        # Return if no selection
        if selection == None:
            return
        # Otherwise show selection, possibly with annotations.
        #id_list = list(selection[self._dimension])
        idlist = list(selection[self._dimension])
        if self._annotation[self._dimension] != None:
            annlist = annotations.get_dim_annotations(self._dimension, 
                          self._annotation[self._dimension],
                          idlist)
            for id, ann in zip(idlist, annlist):
                self._idstore.append((id, ann))
        else:
            for e in idlist:
                self._idstore.append((e, None))
    ##
    ## Private interface
    ##
    def _update_annotations_menu(self):
        """Updates the annotations menu with the available annotations for the
        current dim."""
        dim_h = annotations.get_dim_handler(self._dimension)
        if not dim_h:
            self._menu_ann.set_sensitive(False)
        else:
            annotations_menu = gtk.Menu()
            self._menu_ann.set_sensitive(True)
            dh = annotations.get_dim_handler(self._dimension)
            ann_names = dh.get_annotation_names()
            for ann in ann_names:
                item = gtk.MenuItem(ann)
                item.connect('activate', self._on_annotation_activated, ann)
                annotations_menu.append(item)
                item.show()
            self._menu_ann.set_submenu(annotations_menu)
    def import_annotation_file(self):
        """Pops up a file dialog and ask the user to select the annotation
        file to be loaded. Only one file can be selected. The file is loaded
        into a annotations.AnnotationDictHandler object"""
        dialog = gtk.FileChooserDialog('Load annotations')
        dialog.set_action(gtk.FILE_CHOOSER_ACTION_OPEN)
        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
                           gtk.STOCK_OPEN, gtk.RESPONSE_OK)
        dialog.set_select_multiple(True)
        dialog.set_current_folder(main.project.anndir)
        retval = dialog.run()
        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
            pass
        elif retval == gtk.RESPONSE_OK:
            for filename in dialog.get_filenames():
                annotations.read_annotations_file(filename)
        else:
            print "unknown; ", retval 
        dialog.destroy()
    def export_annotations(self):
        """Pops up a file dialog and ask the user to select a file to save
        the currently displayed annotations to.
        """
        dialog = gtk.FileChooserDialog('Load annotations')
        dialog.set_current_folder(main.project.exportdir)
        dialog.set_action(gtk.FILE_CHOOSER_ACTION_SAVE)
        dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, 
                           gtk.STOCK_SAVE, gtk.RESPONSE_OK)
        retval = dialog.run()
        if retval in [gtk.RESPONSE_CANCEL, gtk.RESPONSE_DELETE_EVENT]:
            pass
        elif retval == gtk.RESPONSE_OK:
            filename = dialog.get_filename()
            fd = open(filename, 'w')
            dim = self._dimension
            print >> fd, "%s\t%s" % (dim, self._annotation[dim])
            for id, value in self._idstore:
                print >> fd, "%s\t%s" % (id, value)
            fd.close()
        else:
            print "unknown; ", retval 
        dialog.destroy()
    def set_rank(self, ds):
        print "Set rank."
        ra = scipy.sum(ds.asarray(), 1)
        ranks = {}
        dim = ds.get_dim_name()[0]
        for key, value in ds[dim].items():
            ranks[key] = ra[value]
        ann_h = annotations.get_dim_handler(self._dimension)
        if ann_h is None:
            ann_h = annotations.DictAnnotationHandler()
            annotations.set_dim_handler(self._dimension, ann_h)
        ann_h.add_annotations('Rank', ranks)
    ##
    ## GTK Callbacks
    ##
    def _numeric_compare(self, treemodel, iter1, iter2):
        column = treemodel.get_sort_column_id()[0]
        item1 = treemodel.get_value(iter1, column)
        item2 = treemodel.get_value(iter2, column)
        try:
            item1 = float(item1)
            item2 = float(item2)
        except:
            logger.log("notice", "Could not convert to float: %s, %s" %(item1, item2))
        return cmp(item1, item2)
    def _popup_menu(self, *rest):
        self._update_annotations_menu()
        self._menu.popup(None, None, None, 0, 0)
    def _on_annotation_activated(self, menuitem, annotation):
        self.set_annotation(annotation)
    def _button_pressed(self, widget, event):
        if event.button == 3:
            self._update_annotations_menu()
            self._menu.popup(None, None, None, event.button, event.time)
            return True
    def _on_export_list(self, menuitem):
        self.export_annotations()
    def _on_import_list(self, menuitem):
        self.import_annotation_file()
    def _on_make_selection(self, menuitem):
        selection = self._idlist.get_selection()
        model, paths = selection.get_selected_rows()
        if paths==None: return
        iters = [self._idstore.get_iter(p) for p in paths]
        ids = [self._idstore.get_value(i, 0) for i in iters]
        main.projectview.set_selection(self._dimension, ids)
    def _drag_data_received(self, widget, drag_context, x, y,
                              selection, info, timestamp):
        treestore, path = selection.tree_get_row_drag_data()
        i = treestore.get_iter(path)
        obj = treestore.get_value(i, 2)
        if isinstance(obj, dataset.Dataset):
            if self._dimension in obj.get_dim_name():
                self.set_rank(obj)
        widget.emit_stop_by_name('drag-data-received')
 class SelectionListController:
    def __init__(self, seltree, idlist_controller):
        self._seltree = seltree
        self._sel_stores = {}
        self._detail_cols = []
        self._dimension = None
        self._idlist_controller = idlist_controller
        self._details_on = False
        # Selection column
        renderer = gtk.CellRendererText()
        sel_column = gtk.TreeViewColumn('Selection', renderer, text=0)
        sel_column.set_resizable(True)
        sel_column.set_max_width(200)
        seltree.insert_column(sel_column, 0)
        # Detail columns
        cols = [('In CS', 3), ('All', 4), ('Rank', 5)]
        for name, store_col_num in cols:
            col = gtk.TreeViewColumn(name, renderer, text=store_col_num)
            col.set_sort_indicator(True)
            col.set_sort_column_id(store_col_num)
            col.set_sort_order(gtk.SORT_ASCENDING)
            self._detail_cols.append(col)
        # Signals
        seltree.connect('row-activated', self._on_row_activated)
        seltree.connect('cursor-changed', self._on_cursor_changed)
        seltree.connect('button-press-event', self._on_button_pressed)
        seltree.drag_dest_set(gtk.DEST_DEFAULT_ALL,
                              [("GTK_TREE_MODEL_ROW", gtk.TARGET_SAME_APP, 7)],
                              gtk.gdk.ACTION_LINK)
        seltree.connect('drag-data-received', self._drag_data_received)
        # Selections context menu
        self._seltree_menu = SimpleMenu()
        self._seltree_menu.add_simple_item('Sort by selection', 
                                           self._on_seltree_sort)
        self._seltree_menu.add_simple_item('Show details',
                                           self._enable_details, True)
        self._seltree_menu.add_simple_item('Hide details', 
                                           self._enable_details, False)
    #
    # Public interface
    #
    def activate(self):
        self._seltree.set_cursor((0,))
    def set_projectview(self, projectview):
        """Dependency injection."""
        main.projectview.add_selection_observer(self)
    def set_dimlist_controller(self, dimlist_controller):
        """Dependency injection of the dimension list controller."""
        self._dimlist_controller = dimlist_controller
    def set_dimension(self, dim):
        """Set the current dimension, changing the model of the treeview
        to match dim. After this the current dimension of the identifier list
        is updated."""
        self._ensure_selection_store(dim)
        self._seltree.set_model(self._sel_stores[dim])
        self._idlist_controller.set_dimension(dim)
        self._dimension = dim
    def selection_changed(self, dimname, selection):
        """Callback function from Project."""
        for dim in selection.dims():
            self._ensure_selection_store(dim)
            store = self._sel_stores[dim]
            if not self._get_current_selection_iter(selection, dim):
                n = len(selection[dim])
                values = (selection.title, selection, dim, n, n, 0)
                store.insert_after(None, None, values)
            else:
                # update size of current selection
                for row in store:
                    if row[1]==selection:
                        row[3] = row[4] = len(selection[dim])
        path = self._seltree.get_cursor()
        if path and self._sel_stores.has_key(self._dimension):
            it = self._sel_stores[self._dimension].get_iter(path[0])
            sel = self._sel_stores[self._dimension].get_value(it, 1)
            self._idlist_controller.set_selection(sel)
    def add_dataset(self, dataset):
        """Converts a CategoryDataset to Selection objects and adds it to
        the selection tree. The name of the dataset will be the parent 
        node in the tree, and the identifers along the first axis will
        be added as the names of the subselections."""
        dim_name = dataset.get_dim_name(0)
        self._ensure_selection_store(dim_name)
        store = self._sel_stores[dim_name]
        di = self._get_dataset_iter(dataset)
        if not di:
            n_tot = dataset.shape[0]
            selection = main.projectview.get_selection().get(dim_name)
            ds_idents = dataset.get_identifiers(dim_name)
            n_cs = len(selection.intersection(ds_idents))
            values = (dataset.get_name(), dataset, dim_name, n_cs, n_tot, 2)
            i = store.insert_after(None, None, values)
            for selection in dataset.as_selections():
                n_sel = len(selection[dim_name])
                values = (selection.title, selection, dim_name, 0, n_sel, 0)
                store.insert_after(i, None, values)
    #
    # Private interface
    #
    def _add_selection_store(self, dim):
        """Add a new gtk.TreeStore for the selections on a dimension."""
        # Create new store
        # Two types of lines, one for CategoryDatasets and one for
        # Selections. The elements are title, link to dataset or selection,
        # name of dimension, num. members in selection, num. in
        # intersection with current selection and the rank of selection.
        store = gtk.TreeStore(gobject.TYPE_STRING,
                              gobject.TYPE_PYOBJECT,
                              gobject.TYPE_STRING,
                              gobject.TYPE_INT,
                              gobject.TYPE_INT,
                              gobject.TYPE_FLOAT)
        # Set selection store for this dimension
        self._sel_stores[dim] = store
    def _ensure_selection_store(self, dim):
        """Ensure that the object has a gtk.TreeStore for the given dimension"""
        # Do not overwrite existing stores
        if self._sel_stores.has_key(dim):    
            return
        self._add_selection_store(dim)
    def _get_dataset_iter(self, ds):
        """Returns the iterator to the selection tree row containing a
        given dataset."""
        store = self._sel_stores[ds.get_dim_name(0)]
        i = store.get_iter_first()
        while i:
            if store.get_value(i, 1) == ds:
                return i
            i = store.iter_next(i)
        return None
    def _get_current_selection_iter(self, selection, dimension):
        if not self._sel_stores.has_key(dimension):
            return None
        store = self._sel_stores[dimension]
        i = store.get_iter_first()
        while i:
            if store.get_value(i, 1) == selection:
                if store.get_value(i, 2) == dimension:
                    return i
            i = store.iter_next(i)
        return None
    def _sort_selections(self, dataset):
        """Ranks selections by intersection with current selection.
        Ranks determined by the hypergeometric distribution.
        """
        dim_name = dataset.get_dim_name(0)
        sel_store = self._sel_stores[dim_name]
        selection_obj = main.projectview.get_selection()
        current_selection = selection_obj.get(dim_name)
        if current_selection==None: return
        pvals = hypergeom.gene_hypergeo_test(current_selection, dataset)
        for row in sel_store:
            if row[1]==dataset:
                for child in row.iterchildren():
                    name = child[0]
                    child[3] = pvals[name][0]
                    child[4] = pvals[name][1]
                    child[5] = pvals[name][2]
        sel_store.set_sort_column_id(5, gtk.SORT_ASCENDING)
    #
    # GTK callbacks
    #
    def _enable_details(self, widget, bool):
        if self._details_on == bool : return
        self._details_on = bool
        if bool==True:
            for col in self._detail_cols:
                self._seltree.insert_column(col, -1)
        else:
            for col in self._detail_cols:
                self._seltree.remove_column(col)
    def _drag_data_received(self, widget, drag_context, x, y,
                              selection, info, timestamp):
        treestore, path = selection.tree_get_row_drag_data()
        i = treestore.get_iter(path)
        obj = treestore.get_value(i, 2)
        if isinstance(obj, dataset.CategoryDataset):
            self.add_dataset(obj)
            self._dimlist_controller.set_dimension(obj.get_dim_name(0))
        widget.emit_stop_by_name('drag-data-received')
    def _on_cursor_changed(self, widget):
        "Show the list of identifier strings."
        store = self._sel_stores[self._dimension]
        p = self._seltree.get_cursor()[0]
        i = store.get_iter(p)
        obj = store.get_value(i, 1)
        if isinstance(obj, dataset.Selection):
            self._idlist_controller.set_selection(obj)
        else:
            self._idlist_controller.set_selection(None)
    def _on_row_activated(self, widget, path, column):
        store = self._sel_stores[self._dimension]
        i = store.get_iter(path)
        obj = store.get_value(i, 1)
        if isinstance(obj, dataset.Dataset):
            seltree = self._seltree
            if seltree.row_expanded(path):
                seltree.collapse_row(path)
            else:
                seltree.expand_row(path, True)
        elif isinstance(obj, dataset.Selection):
            main.projectview.set_selection(self._dimension,
                                       obj[self._dimension])
    def _on_button_pressed(self, widget, event):
        """Button press callbak."""
        if event.button == 3:
            self._seltree_menu.popup(None, None, None, event.button, event.time)
    def _on_seltree_sort(self, menuitem):
        """Sort selection tree if row is category dataset."""
        store = self._sel_stores[self._dimension]
        p = self._seltree.get_cursor()[0]
        i = store.get_iter(p)
        obj = store.get_value(i, 1)
        if isinstance(obj, dataset.CategoryDataset):
            self._sort_selections(obj)
 class DimListController:
    def __init__(self, dimlist, seltree_controller):
        self._current_dim = None
        self._seltree_controller = seltree_controller
        self.show_hidden = False
        ## dimstore is a list of all dimensions in the application
        self.dimstore = gtk.ListStore(gobject.TYPE_STRING)
        # filter for hiding dims prefixed with underscore
        self.dimstore_filter = self.dimstore.filter_new()
        self.dimstore_filter.set_visible_func(self._dimension_filter)
        ## The widgets we are controlling
        self.dimlist = dimlist
        ## Set up dimensions list
        dimlist.set_model(self.dimstore_filter)
        renderer = gtk.CellRendererText()
        dim_column = gtk.TreeViewColumn('Dimension', renderer, text=0)
        dimlist.insert_column(dim_column, 0)
        # Signals
        dimlist.connect('row-activated', self._dim_row_activated)
        dimlist.connect('cursor-changed', self._dim_cursor_changed)
        dimlist.connect('button-press-event', self._dimlist_button_pressed)
        # Set up  dimension context menu
        self._dimlist_menu = SimpleMenu()
        self._dimlist_menu.add_simple_item('Hide', self._on_dim_hide)
        self._dimlist_menu.add_simple_item('Show all', self._on_dim_show)
    ## 
    ## Public interface
    ##
    def set_projectview(self, projectview):
        """Dependency injection."""
        self.dim_names = projectview.dim_names
        self.update_dims()
        projectview.add_dataset_observer(self)
    def get_dimension(self, dim):
        """Returns the iterator to the dimension with the given name, or
        None if not found."""
        i = self.dimstore_filter.get_iter_first()
        while i:
            if self.dimstore_filter.get_value(i, 0) == dim:
                return i
            i = self.dimstore_filter.iter_next(i)
        return None
    def set_dimension(self, dimname):
        """Sets the current dimension."""
        self._current_dim = dimname
        dim = self.get_dimension(self._current_dim)
        path = self.dimstore_filter.get_path(dim)
        if self.dimlist.get_cursor()[0] != path:
            self.dimlist.set_cursor(self.dimstore_filter.get_path(dim))
        self._seltree_controller.set_dimension(dimname)
    def dataset_changed(self):
        """Callback function from Project."""
        self.update_dims()
    def update_dims(self):
        """Update the list of dimensions shown"""
        for dim in self.dim_names:
            if not self.get_dimension(dim):
                self.dimstore.insert_after(None, (dim,))
        self.dimstore_filter.refilter()
    #
    # Private interface
    #
    def _dimension_filter(self, store, row):
        """Filters out dimensions with underscore prefix."""
        if self.show_hidden:
            return True
        visible = False
        name = store.get_value(row, 0)
        if name != None:
            visible = name[0]!="_"
        return visible
    #
    # GTK Callbacks.
    #
    def _on_dim_hide(self, menuitem):
        """Menu item callback function which hides underscore prefixed
        dimensions."""
        self.show_hidden = False
        self.dimstore_filter.refilter()
    def _on_dim_show(self, menuitem):
        """Menu item callback function that shows underscore prefixed
        dimension names."""
        self.show_hidden = True
        self.dimstore_filter.refilter()
    def _dim_cursor_changed(self, widget):
        cursor = self.dimlist.get_cursor()[0]
        i = self.dimstore_filter.get_iter(cursor)
        row = self.dimstore_filter.get_value(i, 0)
        self.set_dimension(row)
        self._seltree_controller.activate()
    def _dim_row_activated(self, widget, path, column):
        #self._seltree_controller.set_dimension(dim)
        pass
    def _dimlist_button_pressed(self, widget, event):
        if event.button == 3:
            self._dimlist_menu.popup(None, None, None, event.button, event.time)
--- a/laydi/view.py
+++ b/laydi/view.py
--- a/laydi/workflow.py
+++ b/laydi/workflow.py
@ -0,0 +1,476 @@
 import gtk, gobject
 import sys
 import os
 import inspect
 import logger
 import laydi
 import main
 def _workflow_classes(dir, modname):
    """Returns a list of all subclasses of Workflow in a given module"""
    workflow_classes = []
    module = __import__('%s' % (modname,))
    d = module.__dict__
    for wf in d.values():
        try:
            if issubclass(wf, Workflow):
                workflow_classes.append(wf)
        except TypeError, e:
            pass
    return workflow_classes
 def workflow_list():
    """Returns a list containing all new workflows"""
    retval = []
    # List all .py files that can contain workflow classes
    wf_path = sys.modules['workflows'].__path__
    wf_files = []
    for dir in wf_path:
        for fn in os.listdir(dir):
            if fn.endswith('.py') and ('#' not in fn):
                wf_files.append(fn[:-3])
    # Try to load each file and look for Workflow derived classes
    for fn in wf_files:
        try:
            for wf in _workflow_classes(fn):
                retval.append(wf)
        except Exception, e:
            logger.log('warning', 'Cannot load workflow: %s' % fn)
            logger.log('warning', e)
    return retval
 def find_workflow(basename):
    """Searches for a workflow with a given filename."""
    # List all .py files that can contain workflow classes
    wf_path = main.options.workflowdir.split(':')
    wf_file = None
    for dir in wf_path:
        fn = os.path.join(dir, "%s.py" % basename)
        if os.path.isfile(fn):
            wf_file = fn
            return _workflow_classes(dir, basename)[0]
    return None
 class Workflow:
    """Defines a workflow that contains a set of analysis stages.
    A Workflow is a set of analysis stages for a certain type of analysis.
    Each stage contains some possible operations to do accomplish that 
    task.
    """
    name = "Workflow"
    ident = None
    description = "Workflow Description"
    def __init__(self):
        self.stages = []
        self.stages_by_id = {}
    def get_data_file_name(self, filename):
        """Checks if a file with the given name exists in the data directory.
        Returns the file name if the file exists in the data directory, which
        is defined as datadir/workflowname. If the file does not exist, or the
        workflow does not have an identificator, this method returns None."""
        if self.ident == None:
            return None
        fn = os.path.join(main.options.datadir, self.ident, filename)
        if os.path.isfile(fn):
            return fn
        return None
    def add_stage(self, stage):
        self.stages.append(stage)
        self.stages_by_id[stage.id] = stage
    def print_tree(self):
        print "Workflow:", self.name
        for stage in self.stages:
            print '   %s' % stage.name
            for fun in stage.functions:
                print '        %s' % fun.name
 #    def add_project(self,project):
 #        if project == None:
 #            logger.log('notice','Proejct is empty')
 #        logger.log('notice','Project added in : %s' %self.name)
 #        self.project = project
 class EmptyWorkflow(Workflow):
    name = 'Empty Workflow'
    def __init__(self):
        Workflow.__init__(self)
 class Stage:
    """A stage is a part of the data analysis process.
    Each stage contains a set of functions that can be used to
    accomplish the task. A typical early stage is 'preprocessing', which
    can be done in several ways, each represented by a function.
    """
    def __init__(self, id, name):
        self.id = id
        self.name = name
        self.functions = []
        self.functions_by_id = {}
    def add_function(self, fun):
        self.functions.append(fun)
        self.functions_by_id[fun.id] = fun
 class Function:
    """A Function object encapsulates a function on a data set.
    Each Function instance encapsulates some function that can be applied
    to one or more types of data.
    """
    def __init__(self, id, name):
        self.id = id
        self.name = name
    # just return a Validation object
    def validate_input(input):
        return Validation(True,"Validation Not Implemented")
    def run(self):
        pass
 class Validation:
    def __init__(self,result, reason):
        self.succeeded = result
        self.reason = reason
 class WorkflowView (gtk.VBox):
    def __init__(self, wf):
        gtk.VBox.__init__(self)
        self.workflow = wf
        self.setup_workflow(wf)
    def setup_workflow(self, wf):
        # Add stage in the process
        for stage in wf.stages:
            exp = gtk.Expander(stage.name)
            btn_align = gtk.Alignment(xscale=0.9)
            btn_align.set_padding(0,4,20,0)
            btn_align.show()
            btn_box = gtk.VBox()
            btn_align.add(btn_box)
            btn_box.show()
            exp.add(btn_align)
            # Add functions in each stage 
            for fun in stage.functions:
                btn = gtk.Button(fun.name)
                btn.connect('clicked',
                            lambda button, f=fun : run_function(f))
                btn_box.add(btn)
                btn.show()
            exp.show()
            self.pack_start(exp, expand=False, fill=False)
    def remove_workflow(self):
        for c in self.get_children():
            c.hide()
            self.remove(c)
    def set_workflow(self, workflow):
        self.workflow = workflow
        self.remove_workflow()
        self.setup_workflow(workflow)
 class Options(dict):
    """Options base class.
    """
    def __init__(self, *args,**kw):
        dict.__init__(self, *args, **kw)
        self['out_plots'] = []
        self['out_data'] = []
 	self['all_plots'] = []
 	self['all_data'] = []
    def _copy_from_list(self, key_list):
        """Returns suboptions (dictionary) from a list of keys. 
        """
        d = {}
        for key in key_list:
            d[key] = self.get(key, None)
        return d
 class OptionsDialog(gtk.Dialog):
    """The basic input/output dialog box.
    This defines the first page of the function options-gui.
    Any function that invokes a option-gui will inherit from this class.
    """
    def __init__(self, data, options, input_names=['X','Y']):
        gtk.Dialog.__init__(self, 'Input-Output dialog',
                            None,
                            gtk.DIALOG_DESTROY_WITH_PARENT,
                            (gtk.STOCK_OK, gtk.RESPONSE_OK,
                             gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL))
        self._options = options
        self._data = data
        self._editable = True
        self.set_size_request(550,450)
        # create notebook
        self.nb = nb = gtk.Notebook()
        # 1. page: input/output
        #inputs
        input_frame = gtk.Frame("Input")
        hbox = gtk.HBox(True, 8)
        align = gtk.Alignment(1, 1, 1, 1)
        align.set_padding(8, 8, 8, 8)
        align.add(hbox)
        input_frame.add(align)
        for i, name in enumerate(input_names):
            frame = gtk.Frame(name)
            frame.set_label_align(0.5, 0.5)
            label = gtk.Label(data[i]._name + "\n" + str(data[i]._array.shape))
            frame.add(label)
            hbox.add(frame)
        #outputs
        output_frame = gtk.Frame("Output")
        output_hbox = gtk.HBox(True,4)
        output_align = gtk.Alignment(1, 1, 1, 1)
        output_align.set_padding(8, 8, 8, 8) #left padding:8
        output_align.add(output_hbox)
        output_frame.add(output_align)
        # plots
        plot_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
        plot_treeview = gtk.TreeView(plot_list)
        # Add plots
        plot_icon = laydi.icon_factory.get('line_plot')
        for plt, name, use in self._options['all_plots']:
            plot_list.append((name, use, plot_icon))
        # Renderer for icon
        plot_icon = laydi.icon_factory.get('line_plot')
        icon_renderer = gtk.CellRendererPixbuf()
        icon_renderer.set_property('pixbuf', plot_icon)
        # Renderer for active toggle.
        active_renderer = gtk.CellRendererToggle()
        active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
        active_renderer.connect('toggled', toggled, plot_list)
        active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
        # Renderer for plot title.
        title_renderer = gtk.CellRendererText()
        title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
        title_column = gtk.TreeViewColumn('Plot', title_renderer, text=0)
        title_column.pack_start(icon_renderer, expand=False)
        # Add columns to tree view.
        plot_treeview.append_column(active_column)
        plot_treeview.append_column(title_column)
        ## datasets
        dataset_list = gtk.ListStore(str, 'gboolean', gtk.gdk.Pixbuf)
        dataset_treeview = gtk.TreeView(dataset_list)
        # Add datasets
        data_icon = laydi.icon_factory.get('dataset')
        for dat, name, use in self._options['all_data']:
            dataset_list.append((name, use, data_icon))
        # Renderer for icon
        icon_renderer = gtk.CellRendererPixbuf()
        icon_renderer.set_property('pixbuf', data_icon)
        # Renderer for active toggle.
        active_renderer = gtk.CellRendererToggle()
        active_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_ACTIVATABLE)
        active_renderer.connect('toggled', toggled, dataset_list)
        active_column = gtk.TreeViewColumn('Use', active_renderer, active=1)
        # Renderer for dataset title.
        title_renderer = gtk.CellRendererText()
        title_renderer.set_property('mode', gtk.CELL_RENDERER_MODE_EDITABLE)
        title_column = gtk.TreeViewColumn('Dataset', title_renderer, text=0)
        title_column.pack_start(icon_renderer, expand=False)
        # Add columns to tree view.
        dataset_treeview.append_column(active_column)
        dataset_treeview.append_column(title_column)
        # add treeviews to output frame
        output_hbox.add(plot_treeview)
        output_hbox.add(dataset_treeview)
        # vbox for input/spacer/output
        vbox1 = gtk.VBox()
        vbox1.add(input_frame)
        vbox1.add(gtk.HSeparator())
        vbox1.add(output_frame)
        # add vbox to notebook
        nb.insert_page(vbox1, gtk.Label("Input/Output"), 0)
        self.vbox.add(nb)
        #keep ref to liststores
        self.dataset_list = dataset_list
        self.plot_list = plot_list
    def run(self):
        self.vbox.show_all()
        return gtk.Dialog.run(self)
    def set_options(self, options):
        self._options = options
    def update_options(self, options):
        self._options.update(options)
    def set_output(self):
        # get toggled output data
        out_data = [item[0] for name, mark, ic in self.dataset_list for item in self._options['all_data'] if mark==True and name==item[1]]
        # get toggled plots
        out_plots = [item[0] for name, mark, ic in self.plot_list for item in self._options['all_plots'] if mark==True and name==item[1]]
        # update options
        self._options['out_data'] = out_data
        self._options['out_plots'] = out_plots
    def set_editable(self, editable):
        self._editable = True
    def set_data(self, data):
        self._data = data
    def get_data(self):
        return self._data
    def get_options(self):
        return self._options
    def add_page_from_glade(self, glade_file, widget_name, page_title):
        """Adds a new page(s) to the existing notebook.
        The input widget (added as a page in notebook) is defined
        in the glade file.
        input:
        glade_file -- path to glade file
        widget_name -- name of widget from glade file
        """
        try:
            self.wTree = gtk.glade.XML(glade_file)
        except:
            logger.log('notice', 'Could not find glade file: %s' %glade_file)
        widget = self.wTree.get_widget(widget_name)
        win = widget.get_parent()
        win.hide()
        widget.unparent()
        self.nb.insert_page(widget, gtk.Label(page_title), -1)
        self.nb.set_current_page(0)
 def toggled(renderer, path, store):
    it = store.get_iter(path)
    old_value = store.get_value(it, 1)
    store.set_value(it, 1, not old_value)
 class WorkflowMenu (gtk.Menu):
    def __init__(self, workflow):
        gtk.Menu.__init__(self)
        self._workflow = workflow
        for stage in workflow.stages:
            self.append(self._create_stage_item(stage))
    def _create_stage_item(self, stage):
        stage_menu_item = gtk.MenuItem(stage.name)
        stage_menu_item.show()
        stage_menu = gtk.Menu()
        stage_menu_item.set_submenu(stage_menu)
        for fun in stage.functions:
            stage_menu.append(self._create_function_item(fun))
        return stage_menu_item
    def _create_function_item(self, func):
        menuitem = gtk.MenuItem(func.name)
        menuitem.connect('activate',
                         lambda item, f=func : run_function(f))
        menuitem.show()
        return menuitem
 def run_function(function):
    logger.log('debug', 'Starting function: %s' % function.name)
    parent_data = main.projectview.current_data
    validation  = function.validate_input()
    if not validation.succeeded:
        logger.log('warning','Invalid Inputdata: ' + str(reason))
        return
    args, varargs, varkw, defaults = inspect.getargspec(function.run)
    # first argument is 'self' and second should be the selection
    # and we don't care about those...
    args.remove('self')
    if "selection" in args:
        pass_selection = True
        args.remove('selection')
    else:
        pass_selection = False
    if varargs and len(parent_data) < len(args):
        logger.log('warning', "Function requires minimum %d datasets selected." % len(args))
        return
    elif not varargs and args and len(args) != len(parent_data):
        # functions requiring datasets have to have the right number
        logger.log('warning', "Function requires %d datasets, but only %d selected." % (len(args), len(parent_data)))
        return
    if not args:
        # we allow functions requiring no data to be run even if a
        # dataset is is selected
        data = []
    else:
        data = parent_data
    if pass_selection:
        # if the function has a 'selection' argument, we pass in
        # the selection
        new_data = function.run(selection=main.projectview.get_selection(), *data)
    else:
        new_data = function.run(*data)
    if new_data != None:
        main.projectview.add_data(parent_data, new_data, function.name)
    logger.log('debug', 'Function ended: %s' % function.name)
--- a/256
+++ b/256
@ -1,256 +0,0 @@
 ### MATPLOTLIBRC FORMAT
 # This is a sample matplotlib configuration file.  It should be placed
 # in HOME/.matplotlib/matplotlibrc (unix/linux like systems) and
 # C:\Documents and Settings\yourname\.matplotlib (win32 systems)
 #
 # By default, the installer will overwrite the existing file in the
 # install path, so if you want to preserve your's, please move it to
 # your HOME dir and set the environment variable if necessary.
 #
 # This file is best viewed in a editor which supports python mode
 # syntax highlighting
 #
 # Blank lines, or lines starting with a comment symbol, are ignored,
 # as are trailing comments.  Other lines must have the format
 #
 #   key : val   # optional comment
 #
 # Colors: for the color values below, you can either use
 #  - a matplotlib color string, such as r, k, or b
 #  - an rgb tuple, such as (1.0, 0.5, 0.0)
 #  - a hex string, such as ff00ff  (no '#' symbol)
 #  - a scalar grayscale intensity such as 0.75
 #  - a legal html color name, eg red, blue, darkslategray
 #### CONFIGURATION BEGINS HERE
 # the default backend; one of GTK GTKAgg GTKCairo FltkAgg QtAgg TkAgg
 #     Agg Cairo GD GDK Paint PS PDF SVG Template
 backend      : GTKAgg
 numerix      : numpy  # numpy, Numeric or numarray
 interactive  : False      # see http://matplotlib.sourceforge.net/interactive.html
 toolbar      : toolbar2   # None | classic | toolbar2
 timezone     : UTC        # a pytz timezone string, eg US/Central or Europe/Paris
 # Where your matplotlib data lives if you installed to a non-default
 # location.  This is where the matplotlib fonts, bitmaps, etc reside
 #datapath : /home/jdhunter/mpldata
 ### LINES
 # See http://matplotlib.sourceforge.net/matplotlib.lines.html for more
 # information on line properties.
 lines.linewidth   : 1.0     # line width in points
 lines.linestyle   : -       # solid line
 lines.color       : blue
 lines.marker      : None    # the default marker
 lines.markerfacecolor  : blue
 lines.markeredgecolor  : black
 lines.markeredgewidth  : 0.5     # the line width around the marker symbol
 lines.markersize  : 6            # markersize, in points
 lines.dash_joinstyle : miter        # miter|round|bevel
 lines.dash_capstyle : butt          # butt|round|projecting
 lines.solid_joinstyle : miter       # miter|round|bevel
 lines.solid_capstyle : projecting   # butt|round|projecting
 lines.antialiased : True         # render lines in antialised (no jaggies)
 ### PATCHES
 # Patches are graphical objects that fill 2D space, like polygons or
 # circles.  See
 # http://matplotlib.sourceforge.net/matplotlib.patches.html for more
 # information on patch properties
 patch.linewidth   	: 1.0     # edge width in points
 patch.facecolor  	: blue
 patch.edgecolor  	: black
 patch.antialiased 	: True    # render patches in antialised (no jaggies)
 ### FONT
 #
 # font properties used by text.Text.  See
 # http://matplotlib.sourceforge.net/matplotlib.font_manager.html for more
 # information on font properties.  The 6 font properties used for font
 # matching are given below with their default values.
 #
 # The font.family property has five values: 'serif' (e.g. Times),
 # 'sans-serif' (e.g. Helvetica), 'cursive' (e.g. Zapf-Chancery),
 # 'fantasy' (e.g. Western), and 'monospace' (e.g. Courier).  Each of
 # these font families has a default list of font names in decreasing
 # order of priority associated with them.
 #
 # The font.style property has three values: normal (or roman), italic
 # or oblique.  The oblique style will be used for italic, if it is not
 # present.
 #
 # The font.variant property has two values: normal or small-caps.  For
 # TrueType fonts, which are scalable fonts, small-caps is equivalent
 # to using a font size of 'smaller', or about 83% of the current font
 # size.
 #
 # The font.weight property has effectively 13 values: normal, bold,
 # bolder, lighter, 100, 200, 300, ..., 900.  Normal is the same as
 # 400, and bold is 700.  bolder and lighter are relative values with
 # respect to the current weight.
 #
 # The font.stretch property has 11 values: ultra-condensed,
 # extra-condensed, condensed, semi-condensed, normal, semi-expanded,
 # expanded, extra-expanded, ultra-expanded, wider, and narrower.  This
 # property is not currently implemented.
 #
 # The font.size property is the default font size for text, given in pts.
 # 12pt is the standard value.
 #
 font.family         : sans-serif
 font.style          : normal
 font.variant        : normal
 font.weight         : medium
 font.stretch        : normal
 # note that font.size controls default text sizes.  To configure
 # special text sizes tick labels, axes, labels, title, etc, see the rc
 # settings for axes and ticks. Special text sizes can be defined
 # relative to font.size, using the following values: xx-small, x-small,
 # small, medium, large, x-large, xx-large, larger, or smaller
 font.size           : 12.0
 font.serif          : Bitstream Vera Serif, New Century Schoolbook, Century Schoolbook L, Utopia, ITC Bookman, Bookman, Nimbus Roman No9 L, Times New Roman, Times, Palatino, Charter, serif
 font.sans-serif     : Bitstream Vera Sans, Lucida Grande, Verdana, Geneva, Lucid, Arial, Helvetica, Avant Garde, sans-serif
 font.cursive        : Apple Chancery, Textile, Zapf Chancery, Sand, cursive
 font.fantasy        : Comic Sans MS, Chicago, Charcoal, Impact, Western, fantasy
 font.monospace      : Bitstream Vera Sans Mono, Andale Mono, Nimbus Mono L, Courier New, Courier, Fixed, Terminal, monospace
 ### TEXT
 # text properties used by text.Text.  See
 # http://matplotlib.sourceforge.net/matplotlib.text.html for more
 # information on text properties
 text.color          : black
 text.usetex         : False  # use latex for all text handling.  For more information, see
                             # http://www.scipy.org/Wiki/Cookbook/Matplotlib/UsingTex
 text.dvipnghack     : False  # some versions of dvipng don't handle
                             # alpha channel properly.  Use True to correct and flush
                             # ~/.matplotlib/tex.cache before testing
 ### AXES
 # default face and edge color, default tick sizes,
 # default fontsizes for ticklabels, and so on.  See
 # http://matplotlib.sourceforge.net/matplotlib.axes.html#Axes
 axes.hold           : True    # whether to clear the axes by default on
 axes.facecolor      : white   # axes background color
 axes.edgecolor      : black   # axes edge color
 axes.linewidth      : 1.0     # edge linewidth
 axes.grid           : True   # display grid or not
 axes.titlesize      : 12      # fontsize of the axes title
 axes.labelsize      : 10     # fontsize of the x any y labels
 axes.labelcolor     : black
 axes.axisbelow      : True   # whether axis gridlines and ticks are below
                              # the axes elements (lines, text, etc)  
 polaraxes.grid      : True    # display grid on polar axes
 ### TICKS
 # see http://matplotlib.sourceforge.net/matplotlib.axis.html#Ticks
 xtick.major.size     : 4      # major tick size in points
 xtick.minor.size     : 0      # minor tick size in points
 xtick.major.pad      : 2      # distance to major tick label in points
 xtick.minor.pad      : 2      # distance to the minor tick label in points
 xtick.color          : k      # color of the tick labels
 xtick.labelsize      : 8     # fontsize of the tick labels
 xtick.direction      : in     # direction: in or out
 ytick.major.size     : 4      # major tick size in points
 ytick.minor.size     : 0      # minor tick size in points
 ytick.major.pad      : 2      # distance to major tick label in points
 ytick.minor.pad      : 2      # distance to the minor tick label in points
 ytick.color          : k      # color of the tick labels
 ytick.labelsize      : 8     # fontsize of the tick labels
 ytick.direction      : in     # direction: in or out
 ### GRIDS
 grid.color       :   0.85   # grid color
 grid.linestyle   :   :      # dotted
 grid.linewidth   :   0.5     # in points
 ### Legend
 legend.isaxes        : True
 legend.numpoints     : 4      # the number of points in the legend line
 legend.fontsize      : 12
 legend.pad           : 0.2    # the fractional whitespace inside the legend border
 legend.markerscale   : 1.0    # the relative size of legend markers vs. original
 # the following dimensions are in axes coords
 legend.labelsep      : 0.010  # the vertical space between the legend entries
 legend.handlelen     : 0.05   # the length of the legend lines
 legend.handletextsep : 0.02   # the space between the legend line and legend text
 legend.axespad       : 0.02   # the border between the axes and legend edge
 legend.shadow        : False
 ### FIGURE
 # See http://matplotlib.sourceforge.net/matplotlib.figure.html#Figure
 figure.figsize   : 5, 4    # figure size in inches
 figure.dpi       : 72      # figure dots per inch
 figure.facecolor : white    # figure facecolor; 0.75 is scalar gray
 figure.edgecolor : white   # figure edgecolor
 # The figure subplot parameters.  All dimensions are fraction of the
 # figure width or height
 figure.subplot.left  : 0.1  # the left side of the subplots of the figure
 figure.subplot.right : 0.975    # the right side of the subplots of the figure
 figure.subplot.bottom : 0.1   # the bottom of the subplots of the figure
 figure.subplot.top : 0.90      # the top of the subplots of the figure
 figure.subplot.wspace : 0.1   # the amount of width reserved for blank space between subplots
 figure.subplot.hspace : 0.1   # the amount of height reserved for white space between subplots
 ### IMAGES
 image.aspect : equal             # equal | auto | a number
 image.interpolation  : bilinear  # see help(imshow) for options
 image.cmap   : jet               # gray | jet etc...
 image.lut    : 256               # the size of the colormap lookup table
 image.origin : upper             # lower | upper
 ### CONTOUR PLOTS
 contour.negative_linestyle :  6.0, 6.0 # negative contour dashstyle (size in points)
 ### SAVING FIGURES
 # the default savefig params can be different for the GUI backends.
 # Eg, you may want a higher resolution, or to make the figure
 # background white
 savefig.dpi       : 100      # figure dots per inch
 savefig.facecolor : white    # figure facecolor when saving
 savefig.edgecolor : white    # figure edgecolor when saving
 # tk backend params
 tk.window_focus   : False    # Maintain shell focus for TkAgg
 tk.pythoninspect  : False    # tk sets PYTHONINSEPCT
 # ps backend params
 ps.papersize      : A4   # auto, letter, legal, ledger, A0-A10, B0-B10
 ps.useafm         : False    # use of afm fonts, results in small files
 ps.usedistiller   : False    # can be: None, ghostscript or xpdf
                                          # Experimental: may produce smaller files.
                                          # xpdf intended for production of publication quality files,
                                          # but requires ghostscript, xpdf and ps2eps
 ps.distiller.res  : 6000      # dpi
 # pdf backend params
 pdf.compression   : 6 # integer from 0 to 9
 		      # 0 disables compression (good for debugging)
 # Set the verbose flags.  This controls how much information
 # matplotlib gives you at runtime and where it goes.  Ther verbosity
 # levels are: silent, helpful, debug, debug-annoying.  Any level is
 # inclusive of all the levels below it.  If you setting is debug,
 # you'll get all the debug and helpful messages.  When submitting
 # problems to the mailing-list, please set verbose to helpful or debug
 # and paste the output into your report.
 #
 # The fileo gives the destination for any calls to verbose.report.
 # These objects can a filename, or a filehandle like sys.stdout.
 #
 # You can override the rc default verbosity from the command line by
 # giving the flags --verbose-LEVEL where LEVEL is one of the legal
 # levels, eg --verbose-helpful.
 #
 # You can access the verbose instance in your code
 #   from matplotlib import verbose.
 verbose.level  : silent      # one of silent, helpful, debug, debug-annoying
 verbose.fileo  : sys.stdout  # a log filename, sys.stdout or sys.stderr
--- a/8
+++ b/8
@ -0,0 +1,8 @@
 #!/bin/sh
 ## To be able to run laydi without installing it, we have to set
 ## PYTHONPATH.
 export PYTHONPATH=$PYTHONPATH:.:./workflows
 ./bin/laydi $@
--- a/scripts/README
+++ b/scripts/README
@ -0,0 +1,10 @@
 This directory and its subdirectories are intended for small scripts that are
 not considered parts of laydi proper. They are included because they do
 useful tings in preprocessing data, often for a specific use (e.g. microarray
 analysis with gene ontology background information).
 To find out what each script does, try running it with the --help option.
 2007-03-15, Einar Ryeng
--- a/scripts/geneontology/entrez-go-mapping
+++ b/scripts/geneontology/entrez-go-mapping
@ -0,0 +1,112 @@
 #!/usr/bin/python 
 import optparse
 import os
 import sys
 probes = {}
 bp = {}
 cc = {}
 mf = {}
 def split_value(string):
    """Splits a tab delimited value from affymetrix csv files"""
    string = string.strip()
    values = [x.strip() for x in string.split('///')]
    if len(values) == 1 and values[0] == '---':
        return []
    return values
 def split_subvalues(string):
    """Splits a value into smaller components"""
    string = string.strip()
    values = [x.strip() for x in string.split('//')]
    if len(values) == 1 and values[0] == '--':
        return []
    return values
 def set_probes(probe, entrez):
    """Set probe values for each entrez value."""
    for gene_id in split_value(entrez):
        if not probes.has_key(gene_id):
            probes[gene_id] = []
        probes[gene_id].append(probe.strip())
 def set_go(d, entrez, terms):
    genes = split_value(entrez)
    terms = split_value(terms)
    for gene in genes:
        if not d.has_key(gene):
            d[gene] = []
        for term in terms:
            d[gene].append(split_subvalues(term)[0])
 def parse_options():
    op = optparse.OptionParser()
    op.add_option('-b', '--biological-process', dest="bp",
                  help="Output annotations in the biological process tree.",
                  action="store_true", default=False)
    op.add_option('-c', '--cellular-component', dest="cc",
                  help="Output annotations in the cellular component tree.",
                  action="store_true", default=False)
    op.add_option('-d', '--output-dataset',
                  help="Export as ftsv (Laydi dataset) file.")
    op.add_option('-m', '--molecular-function', dest="mf",
                  help="Output annotations in the molecular function tree.",
                  action="store_true", default=False)
    op.add_option('-u', '--unique-terms-only', dest="only_terms",
                  help="Output only a list of all unique GO terms annotated to the genes",
                  action="store_true", default=False)
    return op.parse_args()
 def read_file(options):
    fd = open('entrez-go-mapping.cccsv')
    for line in fd.readlines():
        values = line.split(':::')
        probeid = values[0]
        set_probes(probeid, values[1])
        if options.bp:
            set_go(bp, values[1], values[2])
        if options.cc:
            set_go(cc, values[1], values[3])
        if options.mf:
            set_go(mf, values[1], values[4])
    fd.close()
 if __name__ == '__main__':
    options, args = parse_options()
    read_file(options)
    if options.only_terms:
        s = set()
        for gene in args:
            if options.bp and bp.has_key(gene):
                for x in bp[gene]:
                    s.add(x)
            if options.mf and bp.has_key(gene):
                for x in mf[gene]:
                    s.add(x)
            if options.cc and bp.has_key(gene):
                for x in cc[gene]:
                    s.add(x)
        for term in s:
            print "GO:%07d" % int(term)
        sys.exit(0)
    for gene in args:
        print gene,
        if options.bp and bp.has_key(gene):
            for x in bp[gene]:
                print "GO:%07d" % int(x),
        if options.cc and bp.has_key(gene):
            for x in cc[gene]:
                print "GO:%07d" % int(x),
        if options.mf and bp.has_key(gene):
            for x in mf[gene]:
                print "GO:%07d" % int(x),
        print
--- a/scripts/geneontology/go-distance/Makefile
+++ b/scripts/geneontology/go-distance/Makefile
@ -0,0 +1,12 @@
 all: go-distance
 godist.o: godist.c godist.h
 	gcc -ggdb -c godist.c
 go-distance: godist.o main.o
 	gcc -ggdb -o go-distance godist.o main.o -lm
 clean:
 	-rm go-distance godist.o main.o 
--- a/scripts/geneontology/go-distance/go-terms.txt
+++ b/scripts/geneontology/go-distance/go-terms.txt
--- a/scripts/geneontology/go-distance/go-tree.txt
+++ b/scripts/geneontology/go-distance/go-tree.txt
--- a/scripts/geneontology/go-distance/godist.c
+++ b/scripts/geneontology/go-distance/godist.c
@ -0,0 +1,359 @@
 #include <math.h>
 #include <string.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <search.h>
 #include "godist.h"
 void print_terms();
 void add_link(char*, char*);
 struct node* get_bp();
 struct node* get_term(char *);
 void calc_ic(struct node *, unsigned int);
 struct node *common_subsumer(struct node *, struct node *);
 float resnik(struct node *, struct node *);
 /* initialisation */
 int godist_init() {
    /* Initialize hash table and array */
    hcreate(MAX_NODES);
    term_array_size = 0;
    link_count = 0;
    struct node *n;
    /* Read ontology terms from file */
    printf("Reading GO terms from go-terms.txt...");
    FILE *term_fd = fopen("go-terms.txt", "r");
    if (term_fd == NULL) {
        printf("cannot open file: go-terms.txt\n");
        exit(errno);
    }
    int i;
    while((i = godist_read_term(term_fd)) == 13) {
 /*       printf("%d\n", i);*/
    }
    fclose(term_fd);
    printf(" %d terms\n", term_array_size);
    /* Read ontology structure from file */
    printf("Reading GO structure from go-tree.txt...");
    FILE *tree_fd = fopen("go-tree.txt", "r");
    if (tree_fd == NULL) {
        printf("cannot open file: go-tree.txt\n");
        exit(errno);
    }
    while((i = godist_read_assoc(tree_fd)) == 2) {
 	link_count++;
    }
    fclose(tree_fd);
    printf(" %d edges\n", link_count);
    printf("Calculating accumulated evidence...");
    fflush(stdout);
    for (i=0; i<term_array_size; i++) {
        clear_flags(get_bp());
        accumulate_evidence(term_array[i]);
    }
    printf("\n");
    evidence = 0xff;
    total_ann = 0;
    n = get_bp();
    for (i=0; i<12; i++)
 	if (evidence & 1<<i)
 	    total_ann += n->acc_evidence[i];
    printf("Using %d annotations.\n", total_ann);
    print_term(get_term("GO:0006006"));
    print_term(get_term("GO:0019318"));
    print_term(get_term("GO:0005996"));
    print_term(get_bp());
   /* 
    print_term(get_term("GO:0040007"));
    print_term(get_term("GO:0007275"));
    print_term(get_term("GO:0007582"));
    print_term(get_term("GO:0043473"));
    print_term(get_term("GO:0000004"));
    print_term(get_term("GO:0051704"));
    print_term(get_term("GO:0000003"));
    print_term(get_term("GO:0016032"));
    print_term(get_term("GO:0009987"));
    print_term(get_term("GO:0050896"));
    print_term(get_term("GO:0050789"));
 */
    printf("Calculation information content...");
    fflush(stdout);
    calculate_ics(0xffff);
    printf("\n");
 /*    calc_ic(get_bp(), 0xffff);*/
 /*    find_multi_parented();*/
    common_subsumer(get_term("GO:0000003"), get_term("GO:0000004"));
    /** should return go:0016032 */
    common_subsumer(get_term("GO:0019081"), get_term("GO:0050434"));
    printf("Resnik: %f\n", resnik(get_term("GO:0000003"), get_term("GO:0000004")));
 }
 void godist_exit() {
    int i;
    for (i=0; i<term_array_size; i++) {
 	free(term_array[i]);
    }
 }
 int godist_read_assoc(FILE *fd) {
    char term1[11], term2[11];
    int retval;
    retval = fscanf(fd, " %10s %10s ", term1, term2);
    if (retval != EOF) {
 	add_link(term1, term2);
    }
    return retval;
 }
 int godist_read_term(FILE *fd) {
    char term[11];
    int ev[12];
    int i;
    ENTRY e, *res;
    int nread = fscanf(fd, " %10s %d %d %d %d %d %d %d %d %d %d %d %d ",
                       term, &ev[0], &ev[1], &ev[2], &ev[3], &ev[4], &ev[5],
                       &ev[6], &ev[7], &ev[8], &ev[9], &ev[10], &ev[11]);
    if (errno != 0) {
        printf("errno: %d\n", errno);
    }
    if (nread == 13) {
        struct node *n = (struct node*) malloc(sizeof(struct node));
 	n->parentc = 0;
 	n->childrenc = 0;
 	n->visited = 0;
 	for (i=0; i<12; i++) {
 	    n->evidence[i] = ev[i];
 	    n->acc_evidence[i] = 0;
 	}
 	strcpy(n->term, term);
 	/* add to hash table */
 	e.key = n->term;
 	e.data = (void*)n;
 	res = hsearch(e, ENTER);
        term_array[term_array_size++] = n;
    } 
    return nread;
 }
 /* distance functions */
 float go_distance(char *term1, char *term2) {
    return 0.0;
 }
 void clear_flags(struct node *n) {
    int i, j;
    for (i=0; i<term_array_size; i++) {
 	term_array[i]->visited = 0;
 	for (j=0; j<12; j++)
 	    term_array[i]->temp_acc[j] = 0;
    }
 }
 void add_link(char *parent_id, char *child_id) {
    ENTRY *ep, e;
    struct node *parent, *child;
    char key[11];
    strcpy(key, parent_id);
    e.key = key;
    ep = hsearch(e, FIND);
    if (!ep) {
 	printf("Cannot find term %s\n", e.key);
 	return;
    }
    parent = (struct node*) ep->key;
    strcpy(key, child_id);
    e.key = key;
    ep = hsearch(e, FIND);
    if (!ep) {
 	printf("Cannot find term %s\n", e.key);
 	return;
    }
    child = (struct node*) ep->key;
    if (parent->childrenc +1 > MAX_CHILDREN) {
        printf("FIXME: increase child count");
 	return;
    }
    parent->children[parent->childrenc] = child;
    parent->childrenc++;
    child->parents[child->parentc] = parent;
    child->parentc++;
 }
 struct node *get_bp() {
    return get_term("GO:0008150");
 }
 struct node *get_term(char *term) {
    ENTRY e, *ep;
    e.key = term;
    ep = hsearch(e, FIND);
    if (ep) {
 	return ep->data;
    }
    return NULL;
 }
 void accumulate_evidence(struct node *n) {
    int i;
    acc_ev(n);
    for (i=0; i<12; i++) {
 	n->acc_evidence[i] = n->temp_acc[i];
    }
 }
 void acc_ev(struct node *n) {
    int i, j;
    if (n->visited)
        return;
    n->visited = 1;
    for (i=0; i<12; i++)
        n->temp_acc[i] = n->evidence[i];
    for (i=0; i<(n->childrenc); i++) {
 	if (!n->children[i]->visited) {
            acc_ev(n->children[i]);
 	    for (j=0; j<12; j++)
                n->temp_acc[j] += n->children[i]->temp_acc[j];
        }
    }
 }
 void print_terms() {
    int i;
    for (i=0; i<term_array_size; i++) {
 	printf("%s\n", term_array[i]->term);
    }
 }
 void print_term(struct node *n) {
    int i;
    printf("%s\n", n->term);
    printf("    children: %d\n", n->childrenc);
    printf("    parents:  %d\n", n->parentc);
    printf("    evidence: ");
    for (i=0; i<12; i++)
        printf("%d ", n->evidence[i]);
    printf("\n");
    printf("    accumulated evidence: ");
    for (i=0; i<12; i++)
        printf("%d ", n->acc_evidence[i]);
    printf("\n");
 }
 void find_multi_parented() {
    int i;
    for (i=0; i<term_array_size; i++) {
        if (term_array[i]->parentc > 1)
            printf("%s  -- %d\n", term_array[i]->term, term_array[i]->parentc);
    }
 }
 void calculate_ics(unsigned int evidence) {
    int i;
    for (i=0; i<term_array_size; i++)
 	calc_ic(term_array[i], evidence);
 }
 void calc_ic(struct node *n, unsigned int evidence) {
    int i;
    float ann=0.0;
    for (i=0; i<12; i++)
 	if (evidence & 1<<i)
 	    ann += (float) n->acc_evidence[i];
    n->ic = -log(ann/total_ann);
    /*    printf("%f\n", n->ic);*/
 }
 struct node *common_subsumer(struct node *n1, struct node *n2) {
    struct node *anc1[MAX_NODES];
    struct node *anc2[MAX_NODES];
    int ancc1=0, ancc2=0;
    int i, j;
    struct node *retval=NULL;
    add_ancestors(&ancc1, anc1, n1);
    add_ancestors(&ancc2, anc2, n2);
    for (i=0; i<ancc1; i++)
        for (j=0; j<ancc2; j++)
            if (anc1[i] == anc2[j])
                if ((!retval) || (anc1[i]->ic > retval->ic))
                    retval = anc1[i];
    if (retval)
 	;//        printf("Retval: %s\n", retval->term); 
    else
        printf("No value to return");
    return retval;
 }
 void add_ancestors(int *ancc, struct node *anc[], struct node *n) { 
    int i=0;
    anc[(*ancc)++] = n;
    for (i=0; i<n->parentc; i++)
        add_ancestors(ancc, anc, n->parents[i]);
 }
 float resnik(struct node *n1, struct node *n2) {
    struct node *subsumer = common_subsumer(n1, n2);
    if (!subsumer)
 	return 20;
    else
        return n1->ic + n2->ic - 2.0 * subsumer->ic;
 }
 int read_terms(FILE *fd, struct node *terms[], int *termc) {
    char term[11];
    int retval;
    printf("read_terms\n");
    retval = fscanf(fd, " %10s ", term);
    while (retval != EOF) {
 	printf(".");
    	fflush(stdout);
 	terms[(*termc)++] = get_term(term);
 	retval = fscanf(fd, " %10s ", term);
    }
    return retval;
 }
 void build_dataset() {
    struct node *terms[MAX_NODES];
    int termc = 0;
    int i, j;
    FILE *fd = fopen("dimension", "r");
    read_terms(fd, terms, &termc);
    for (i=0; i<termc; i++) {
 	for (j=0; j<termc; j++) {
            printf("%f ", resnik(terms[i], terms[j]));
 	}
 	printf("\n");
    }
    fclose(fd);
 }
--- a/scripts/geneontology/go-distance/godist.h
+++ b/scripts/geneontology/go-distance/godist.h
@ -0,0 +1,77 @@
 #ifndef GODIST_H
 #define GODIST_H
 #include <search.h>
 #define MAX_NODES 15000
 #define MAX_PARENTS 100
 #define MAX_CHILDREN 100
 enum EVIDENCE { MP = 1, 
                IGI = 1 << 1, 
 		IPI = 1 << 2, 
 		ISS = 1 << 3, 
 		IDA = 1 << 4, 
 		IEP = 1 << 5,
 		IEA = 1 << 6, 
 		TAS = 1 << 7, 
 		NAS = 1 << 8, 
 		ND =  1 << 9, 
 		RCA = 1 << 10, 
 		IC =  1 << 11 };
 struct node;
 struct node {
    /* GO term id. E.g: "GO:0005180" */
    char term[11];
    /* Information content */
    float ic;
    /* Depth in tree */
    int depth;
    /* Evidence codes */
    int evidence[12];
    /* Accumulated evidence codes */
    int acc_evidence[12];
    /* Working memory */
    int temp_acc[12];
    /* Parent count and parents */
    int parentc;
    struct node *parents[MAX_PARENTS];
    /* Child count and children */
    int childrenc;
    struct node *children[MAX_CHILDREN];
    /* Flag to ensure that a node is only visited once in DAG operations */
    char visited;
 };
 struct node* term_array[MAX_NODES];
 long term_array_size;
 int link_count;
 int total_ann;
 int evidence; /* bitvector with one bit per evidence code */
 /* Ontology initialisation functions. */
 int godist_init();
 int godist_read_assoc(FILE *fd);
 int godist_read_term(FILE *fd);
 void accumulate_evidence(struct node*);
 /* Distance metric functions */
 float resnik_distance(char *term1, char *term2);
 float fussimeg_distance(char *term1, char *term2);
 void calc_ic(struct node *n, unsigned int evidence);
 void clear_flags(struct node *n);
 void print_term(struct node *n);
 void add_ancestors(int *ancc, struct node *anc[], struct node *n);
 void calculate_ics(unsigned int);
 void acc_ev(struct node*);
 #endif
--- a/scripts/geneontology/go-distance/main.c
+++ b/scripts/geneontology/go-distance/main.c
@ -0,0 +1,35 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include "godist.h"
 extern char *optarg;
 extern int optind, opterr, optopt;
 #define _GNU_SOURCE
 #include <getopt.h>
 void print_help() {
    printf("go-distance 0.1.0\n\n");
    printf("Usage: go-distance [hr] <go-terms>\n\n");
 }
 int main(int argc, char **argv) {
    int i;
    char *dimension[MAX_NODES];
    while ((i = getopt(argc, argv, "h")) != -1) {
        switch(i) {
            case 104: 
                print_help(); 
                exit(0);
                break;
 	};
    }
    godist_init();
    build_dataset();
    godist_exit();
 }
--- a/scripts/geneontology/go-gene-matrix
+++ b/scripts/geneontology/go-gene-matrix
@ -0,0 +1,80 @@
 #!/usr/bin/python
 import os, sys
 import getopt
 sys.path.append('../..')
 from laydi import dataset
 import numpy
 max_val = numpy.inf
 no_nan = False
 def print_help():
    print
    print "Usage: go-gene-matrix <go-dist-matrix.ftsv> <gene-go-mapping.txt>"
    print
    print "Description:"
    print "    Takes a GO term by GO term distance matrix and a file that"
    print "    maps GO terms to genes as input arguments and produces a"
    print "    dataset that contains the shortest distances between all"
    print "    genes and GO terms."
    print
    print "Options:"
    print "    -h, --help       Show this help text."
    print "    -m, --max-dist   Trunkate all distances to this value."
    print
 def get_parameters():
    global max_val
    short_opts = "hm:"
    long_opts = ["help", "max-dist="]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    for opt, val in options:
        if opt in ['-h', '--help']:
            print_help()
            sys.exit(0)
        elif opt in ['-m', '--max-dist']:
            max_val = int(val)
    if len(params) < 2:
        print_help()
        sys.exit(1)
    return params
 if __name__ == '__main__':
    params = get_parameters()
    # Read dataset
    fd = open(params[0])
    ds = dataset.read_ftsv(fd)
    array = ds.asarray()
    fd.close()
    # Read mapping
    sorted_keys = []
    mapping = {}
    fd = open(params[1])
    lines = fd.readlines()
    for line in lines:
        values = line.split()
        if len(values) > 0:
            mapping[values[0]] = values[1:]
            sorted_keys.append(values[0])
    # Create new dataset
    matrix = numpy.zeros((len(sorted_keys), ds.shape[0]))
    dim = ds.get_dim_name(0)
    for i, gene in enumerate(sorted_keys):
        for j, go in enumerate(ds[dim]):
            min = max_val
            for go2 in mapping[gene]:
                if ds[dim].has_key(go2) and array[j, ds[dim][go2]] < min:
                    min = array[j, ds[dim][go2]]
            matrix[i, j] = min
    out_ds = dataset.Dataset(matrix, 
                             (('genes', sorted_keys), ('go-terms', ds[dim])),
                             "Gene by GO matrix")
    dataset.write_ftsv(sys.stdout, out_ds)
--- a/scripts/geneontology/go-to-network-ds
+++ b/scripts/geneontology/go-to-network-ds
@ -0,0 +1,3 @@
 #!/usr/bin/env python
--- a/scripts/illumina/illumina2ftsv
+++ b/scripts/illumina/illumina2ftsv
@ -0,0 +1,92 @@
 #!/usr/bin/python
 import getopt
 import numpy
 import sys
 from laydi import dataset
 VERSION = "0.1.0"
 dataset_fn = "-"
 def print_help():
    print "illumina2ftsv %s" % VERSION
    print 
    print "Usage: illumina2ftsv [options] <illumina_genome_studio_file>"
    print
 def parse_options():
    s_opts = "d:h"
    l_opts = ["dataset", "help"]
    options, params = getopt.getopt(sys.argv[1:], s_opts, l_opts)
    for opt, val in options:
        if opt in ["-d", "--dataset"]:
            global dataset_fn
            dataset_fn = val
        elif opt in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    if len(params) != 1:
        print_help()
        sys.exit(1)
    return params
 def read_illumina_file(fn):
    fd = open(fn)
    line = fd.readline()
    if line.strip() != "Illumina Inc. GenomeStudio version 1.7.0":
        raise Exception("File cannot be recognized as Illumina textual data")
    headers = {}
    line= fd.readline()
    while line.strip() != "":
        key, val = line.split("=", 1)
        headers[key.strip()] = val.strip()
        line = fd.readline()
    col_headers = fd.readline().split('\t')
    values = []
    line = fd.readline()
    while line != "":
        values.append([x.strip() for x in line.split('\t')])
        line = fd.readline()
    probe_col = col_headers.index("ProbeID")
    print "probe id column:"   
    header_cols = []
    samples = []
    for i, colname in enumerate(col_headers):
        if colname.startswith("AVG_Signal-"):
            header_cols.append(i)
            samples.append(colname.split("-", 1)[1])
    print header_cols
    print samples
    a = numpy.array(values)
    m = numpy.array(a[:,header_cols], dtype='d')
    print m
    probe_ids = list(a[:, probe_col])
    print "samples: ", len(samples)
    print "probe_ids: ", len(probe_ids)
    print "shape: ", m.shape
    ds = dataset.Dataset(m.transpose(), [('samples', samples), ('probe-ids', probe_ids)], name="Average Expr.")
    dataset.write_ftsv("test.ftsv", ds)
 if __name__ == '__main__':
    fn = parse_options()[0]
    read_illumina_file(fn)
--- a/scripts/illumina/laydi-annot-illumina
+++ b/scripts/illumina/laydi-annot-illumina
@ -0,0 +1,72 @@
 #!/usr/bin/python
 import getopt
 import os, os.path
 import sys
 #OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "Chromosome", "Definition", "Ontology_Component", "Ontology_Process", "Ontology_Function", "ILMN_Gene"]
 OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]
 def print_help(): 
    print "laydi-annot-illumina"
    print
    print "Usage: laydi-annot-illumina <illumina-annotation-file.txt>"
    print
    print "Description:"
    print "    Produce laydi annotation files from Illumina text annotation files"
    print "    Illumina files can be downloaded from:"
    print "    http://www.switchtoi.com/annotationfiles.ilmn"
    print
 def parse_cmdline():
    short_opts = "h"
    long_opts = ["help"]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    for key, val in options:
        if key in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    if len(params) != 1:
        print_help()
        sys.exit(1)
    return params[0]
 def convert_annotations(fn_in, fn_out):
    fd_in = open(fn_in)
    fd_out = open(fn_out, "w")
    # Skip headers
    line = fd_in.readline()
    while not line.startswith("[Probes]"):
        line = fd_in.readline()
    colnames = fd_in.readline().split("\t")
    export_colnums = [colnames.index(x) for x in OUTPUT_COLS]
    # Print output column headers
    export_colnames = ["probe-id"] + OUTPUT_COLS[1:]
    print >> fd_out, "\t".join(OUTPUT_COLS)
    line = fd_in.readline()
    while not line == "" and not line.startswith("["):
        values = line.split("\t")
        output_values = [values[x] for x in export_colnums]
        print >> fd_out, "\t".join(output_values)
        line = fd_in.readline()
 if __name__ == "__main__":
    fn_in = parse_cmdline()
    fn_out = os.path.split(fn_in)[1]
    fn_out = os.path.splitext(fn_out)[0] + ".annot"
    print "Reading: %s" % (fn_in,)
    print "Writing: %s" % (fn_out,)
    print
    print "Annotations:"
    print ", ".join(OUTPUT_COLS)
    convert_annotations(fn_in, fn_out)
--- a/scripts/illumina/laydi-mapping-illumina
+++ b/scripts/illumina/laydi-mapping-illumina
@ -0,0 +1,93 @@
 #!/usr/bin/python
 import getopt
 import os, os.path
 import sys
 OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]
 def print_help(): 
    print "laydi-mapping-illumina"
    print
    print "Usage: laydi-mapping-illumina <illumina-annotation-file.txt> <from_dim> <to_dim>"
    print
    print "Description:"
    print "    Produce mapping files from Illumina text annotation files"
    print "    Illumina files can be downloaded from:"
    print "    http://www.switchtoi.com/annotationfiles.ilmn"
    print
    print "    NOTE: <from_dim> and <to_dim> are the column names in the illumina text file,"
    print "    not laydi dimensions."
    print
 def parse_cmdline():
    short_opts = "h"
    long_opts = ["help"]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    for key, val in options:
        if key in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    if len(params) != 3:
        print_help()
        sys.exit(1)
    return params
 def build_map(fn, from_dim, to_dim):
    retval = {}
    fd = open(fn)
    line = fd.readline()
    while line != "" and line.strip() != "[Probes]":
        line = fd.readline()
    if line == "":
        return None
    line = fd.readline()
    cols = [x.strip() for x in line.split("\t")]
    from_col = cols.index(from_dim)
    to_col = cols.index(to_dim)
    line = fd.readline()
    while line != "" and not line.strip().startswith("["):
        key = line.split("\t")[from_col]
        val = line.split("\t")[to_col]
        if not retval.has_key(key):
            retval[key] = [val]
        else:
            retval[key].append(val)
        line = fd.readline()
    return retval
 def write_map(fd, d, from_dim, to_dim):
    opened_here = False
    if isinstance(fd, str):
        fd = open(fd, "w")
        opened_here = True
    print >> fd, "# from: %s" % from_dim
    print >> fd, "# to: %s" % to_dim
    print >> fd, "# description: "
    print >> fd
    for k, v in d.items():
        print >> fd, k,
        for e in v:
            print >> fd, e,
        print >> fd
    if opened_here:
        fd.close()
 if __name__ == '__main__':
    fn, from_dim, to_dim = parse_cmdline()
    m = build_map(fn, from_dim, to_dim)
    write_map(sys.stdout, m, from_dim, to_dim)
--- a/scripts/laydi-project/project.py
+++ b/scripts/laydi-project/project.py
@ -0,0 +1,281 @@
 import os, os.path
 import sys
 import configobj
 from laydi import dataset
 NAME = "laydi-cmd"
 VERSION = "0.1.0"
 PROJECT_VERSION_STRING = "Laydi project version 1"
 def is_project_directory(dirname):
    """Verifies that a directory is a laydi project"""
    if not os.path.isdir(dirname):
        return False
    ## Verify that the version is correct.
    version_fn = os.path.join(dirname, "VERSION")
    if not os.path.exists(version_fn):
        return False
    fd = open(version_fn)
    line = fd.readline()
    fd.close()
    if fd.strip() != PROJECT_VERSION_STRING:
        return False
    ## Require directories to be present.
    if not os.path.isdir(os.path.join(dirname, "annotations")):
        return False
    if not os.path.isdir(os.path.join(dirname, "data")):
        return False
    if not os.path.isdir(os.path.join(dirname, "selections")):
        return False
    if not os.path.isdir(os.path.join(dirname, "exports")):
        return False
    ## If no tests failed, return True
    return True
 def make_project_directory(dirname, force=False):
    """Creates a project directory
    force: ignore that directory exists and proceed anyway.
    """
    if os.path.exists(dirname) and not force:
        return False
    rootdir = dirname
    anndir = os.path.join(dirname, "annotations")
    seldir = os.path.join(dirname, "selections")
    datadir = os.path.join(dirname, "data")
    exportdir = os.path.join(dirname, "exports")
    version_file_path = os.path.join(dirname, "VERSION")
    os.makedirs(rootdir)
    for d in [anndir, seldir, datadir, exportdir]:
        os.mkdir(d)
    fd = open(version_file_path, "w")
    print >> fd, PROJECT_VERSION_STRING
    fd.close()
 class Universe(object):
    """A Universe is a collection of all existing identifiers in a set of datasets"""
    def __init__(self):
        self.refcount = {}
    def register_dim(self, dim):
        """Increase reference count for identifiers in Dimension object dim"""
        d = self.refcount.get(dim.name, None)
        if d == None:
            d = {}
            self.refcount[dim.name] = d
        for i in dim:
            d[i] = d.get(i, 0) + 1
    def register_ds(self, ds):
        """Increase reference count for identifiers in all Dimensions of dataset ds"""
        for dim in ds.dims:
            self.register_dim(dim)
    def unregister_dim(self, dim):
        """Update reference count for identifiers in Dimension object dim
           Update reference count for identifiers in Dimension object dim, and remove all
           identifiers with a reference count of 0, as they do not (by definition) exist 
           any longer.
        """
        ids = self.refcount[dim.name]
        for i in dim:
            refcount = ids[i]
            if refcount == 1:
                ids.pop(i)
            else:
                ids[i] -= 1
        if len(ids) == 0:
            self.refcount.pop(dim.name)
    def unregister_ds(self, ds):
        """Update reference count for identifiers along Dimensions in Dataset ds.
 	   Update reference count for identifiers along all Dimensions in
 	   Dataset ds, and remove all identifiers with a reference count of 0,
           as they do not (by definition) exist any longer.
        """
        for dim in ds:
            self.register_dim(dim)
    def register(self, obj):
        if isinstance(obj, Dataset):
            self.register_ds(obj)
        else:
            self.register_dim(obj)
    def unregister(self, obj):
        if isinstance(obj, Dataset):
            self.unregister_ds(obj)
        else:
            self.unregister_dim(obj)
    def __getent___(self, dimname):
        return set(self.references[dimname].keys())
    def __iter__(self):
        return self.references.keys().__iter__()
 class Dimension(object):
    """A Dimension represents the set of identifiers an object has along an axis.
    """
    def __init__(self, name, ids=[]):
        self.name = name
        self.idset = set(ids)
        self.idlist = list(ids)
        if len(self.idset) != len(self.idlist):
            raise Exception("Duplicate identifiers are not allowed")
    def __getitem__(self, element):
        return self.idlist[element]
    def __getslice__(self, start, end):
        return self.idlist[start:end]
    def __contains__(self, element):
        return self.idset.__contains__(element)
    def __str__(self):
        return "%s: %s" % (self.name, str(self.idlist))
    def __len__(self):
        return len(self.idlist)
    def __iter__(self):
        return iter(self.idlist)
    def intersection(self, dim):
        return self.idset.intersection(dim.idset)
    def as_tuple(self):
        return (self.name, self.idlist)
 class DirectoryNotifier(object):
    def __init__(self, path):
        self.path = path
        self.files = {}
        self.subdirs = {}
        self.timestamp = -1
        self.file_listeners = {}
        self.dir_listeners = {}
        self.update()
    def update(self):
        now = time.time()
        for fn in os.listdir(self.path):
            if os.getctime(fn) > self.timestamp:
                ext = os.path.splitext(fn)[1]
    def listen_files(self, obj, ext=None):
        listeners = self.file_listeners
        if listeners.has_key(ext):
            listeners[ext].append(obj)
        else:
            listeners[ext] = [obj]
    def listen_dirs(self, obj, ext=None):
        listeners = self.dir_listeners
        if listeners.has_key(ext):
            listeners[ext].append(obj)
        else:
            listeners[ext] = [obj]
 class DataDirectory(object):
    def __init__(self, dirname, recursive=False, universe=None):
        self.dirname = dirname
        ## Read datasets, plots and optionally subdirectories
        datasets = []
        ds_fn = {}
        plots = []
        plot_fn = {}
        subdirs = []
        subdir_fn = {}
        update_time = 0
        self.update()
    def update(self):
        ## Remember new timestamp.
        now = time.time()
        ## Read configuration
        ini_fn = os.path.join(dirname, "directory.ini")
        if os.path.isfile(ini_fn) and os.getctime(ini_fn) > self.update_time:
            self.config = configobj(ini_fn, unrepr=True)
        for fn in os.listdir(self.dirname):
            ext = os.path.splitext(fn)[1]
            if ext == "ftsv":
                ds = dataset.read_ftsv(fn)
                if universe is not None:
                    universe.register_ds(ds)
            elif ext == "plot":
                plot = configobj(fn, unrepr=True)
                plots.append(plot)
            elif os.path.isdir(fn) and recursive:
                subdirs.append(DataDirectory(fn, recursive=True, universe=universe))
        ## Set new update time
        self.update_time = now
 def SelectionDirectory(object):
    def __init__(self, dirname):
        pass
 class Project(object):
    def __init__(self, dirname):
        """Opens a project directory. The directory must exist and be a valid project."""
        ## Set path names.
        self.rootdir = dirname
        self.anndir = os.path.join(dirname, "annotations")
        self.seldir = os.path.join(dirname, "selections")
        self.datadir = os.path.join(dirname, "data")
        self.exportdir = os.path.join(dirname, "exports")
        version_file_path = os.path.join(dirname, "VERSION")
        self.universe = Universe()
        self.data = DataDirectory(self.datadir, universe=self.universe, recursive=True)
    def update(self):
        for datadir in self.data:
            datadir.update()
 ## class Dataset
 ## 
 ## 
 ## class Plot
 ## 
 ## 
 ## class Selection
 ## 
 ## 
 ## class Annotation
 ## 
 ## 
 ## class DataDirectory()
 ## 
 ## 
--- a/scripts/lpls/lpls.py
+++ b/scripts/lpls/lpls.py
@ -0,0 +1,438 @@
 import sys
 from pylab import *
 import matplotlib
 from scipy import *
 from scipy.linalg import inv,norm
 sys.path.append("../../laydi/lib")
 import select_generators
 def nipals_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], scale='scores', verbose=True):
    """ L-shaped Partial Least Sqaures Regression by the nipals algorithm.
    (X!Z)->Y
    :input:
        X : data matrix (m, n)
        Y : data matrix (m, l)
        Z : data matrix (n, o)
        alpha : how much z influence (1=max, 0=none)
    :output:
      T : X-scores
      W : X-weights/Z-weights
      P : X-loadings
      Q : Y-loadings
      U : X-Y relation
      L : Z-scores
      K : Z-loads
      B : Regression coefficients X->Y
      b0: Regression coefficient intercept
      evx : X-explained variance
      evy : Y-explained variance
      evz : Z-explained variance
    :Notes:
    """
    if mean_ctr:
        xctr, yctr, zctr = mean_ctr
        X, mnX = center(X, xctr)
        Y, mnY = center(Y, yctr)
        Z, mnZ = center(Z, zctr)
    varX = pow(X, 2).sum()
    varY = pow(Y, 2).sum()
    varZ = pow(Z, 2).sum()
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
    # initialize 
    U = empty((k, a_max))
    Q = empty((l, a_max))
    T = empty((m, a_max))
    W = empty((n, a_max))
    P = empty((n, a_max))
    K = empty((o, a_max))
    L = empty((u, a_max))
    B = empty((a_max, n, l))
    b0 = empty((a_max, m, l))
    var_x = empty((a_max,))
    var_y = empty((a_max,))
    var_z = empty((a_max,))
    for a in range(a_max):
        if verbose:
            print "\n Working on comp. %s" %a
        u = Y[:,:1]
        diff = 1
        MAX_ITER = 100
        lim = 1e-7
        niter = 0
        while (diff>lim and niter<MAX_ITER):
            niter += 1
            u1 = u.copy()
            w = dot(X.T, u)
            w = w/sqrt(dot(w.T, w))
            l = dot(Z, w)
            k = dot(Z.T, l)
            k = k/sqrt(dot(k.T, k))
            w = alpha*k + (1-alpha)*w
            w = w/sqrt(dot(w.T, w))
            t = dot(X, w)
            c = dot(Y.T, t)
            c = c/sqrt(dot(c.T, c))
            u = dot(Y, c)
            diff = abs(u1 - u).max()
        if verbose:
            print "Converged after %s iterations" %niter
        tt = dot(t.T, t)
        p = dot(X.T, t)/tt
        q = dot(Y.T, t)/tt
        l = dot(Z, w)
        U[:,a] = u.ravel()
        W[:,a] = w.ravel()
        P[:,a] = p.ravel()
        T[:,a] = t.ravel()
        Q[:,a] = q.ravel()
        L[:,a] = l.ravel()
        K[:,a] = k.ravel()
        X = X - dot(t, p.T)
        Y = Y - dot(t, q.T)
        Z = (Z.T - dot(w, l.T)).T
        var_x[a] = pow(X, 2).sum()
        var_y[a] = pow(Y, 2).sum()
        var_z[a] = pow(Z, 2).sum()
        B[a] = dot(dot(W[:,:a+1], inv(dot(P[:,:a+1].T, W[:,:a+1]))), Q[:,:a+1].T)
        b0[a] = mnY - dot(mnX, B[a])
    # variance explained
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    evz = 100.0*(1 - var_z/varZ)
    if scale=='loads':
        tnorm = apply_along_axis(norm, 0, T)
        T = T/tnorm
        Q = Q*tnorm
        W = W*tnorm
    return T, W, P, Q, U, L, K, B, b0, evx, evy, evz, mnX, mnY, mnZ
 def svd_lpls(X, Y, Z, a_max, alpha=.7, mean_ctr=[2, 0, 1], verbose=True):
    """
    NB: In the works ...
    L-shaped Partial Least Sqaures Regression by the svd algorithm.
    (X!Z)->Y
    :input:
        X : data matrix (m, n)
        Y : data matrix (m, l)
        Z : data matrix (n, o)
    :output:
      T : X-scores
      W : X-weights/Z-weights
      P : X-loadings
      Q : Y-loadings
      U : X-Y relation
      L : Z-scores
      K : Z-loads
      B : Regression coefficients X->Y
      b0: Regression coefficient intercept
      evx : X-explained variance
      evy : Y-explained variance
      evz : Z-explained variance
    :Notes:
        Not quite there ,,,,,,,,,,,,,,
    """
    if mean_ctr:
        xctr, yctr, zctr = mean_ctr
        X, mnX = center(X, xctr)
        Y, mnY = center(Y, xctr)
        Z, mnZ = center(Z, zctr)
    varX = pow(X, 2).sum()
    varY = pow(Y, 2).sum()
    varZ = pow(Z, 2).sum()
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
    # initialize 
    U = empty((k, a_max))
    Q = empty((l, a_max))
    T = empty((m, a_max))
    W = empty((n, a_max))
    P = empty((n, a_max))
    K = empty((o, a_max))
    L = empty((u, a_max))
    var_x = empty((a_max,))
    var_y = empty((a_max,))
    var_z = empty((a_max,))
    for a in range(a_max):
        if verbose:
            print "\n Working on comp. %s" %a
        xyz = dot(dot(Z,X.T),Y)
        u,s,vt = linalg.svd(xyz, 0)
        w = u[:,o]
        t = dot(X, w)
        tt = dot(t.T, t)
        p = dot(X.T, t)/tt
        q = dot(Y.T, t)/tt
        l = dot(Z.T, w)
        W[:,a] = w.ravel()
        P[:,a] = p.ravel()
        T[:,a] = t.ravel()
        Q[:,a] = q.ravel()
        L[:,a] = l.ravel()
        K[:,a] = k.ravel()
        X = X - dot(t, p.T)
        Y = Y - dot(t, q.T)
        Z = (Z.T - dot(w, l.T)).T
        var_x[a] = pow(X, 2).sum()
        var_y[a] = pow(Y, 2).sum()
        var_z[a] = pow(Z, 2).sum()
    B = dot(dot(W, inv(dot(P.T, W))), Q.T)
    b0 = mnY - dot(mnX, B)
    # variance explained
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    evz = 100.0*(1 - var_z/varZ)
    return T, W, P, Q, U, L, K, B, b0, evx, evy, evz
 def lplsr(X, Y, Z, a_max, mean_ctr=[2,0,1]):
    """ Haralds LPLS.
    """
    if mean_ctr!=None:
        xctr, yctr, zctr = mean_ctr
        X, mnX = center(X, xctr)
        Y, mnY = center(Y, yctr)
        Z, mnZ = center(Z, zctr)
    varX = pow(X, 2).sum()
    varY = pow(Y, 2).sum()
    varZ = pow(Z, 2).sum()
    m, n = X.shape
    k, l = Y.shape
    u, o = Z.shape
    # initialize
    Wy = empty((l, a_max))
    Py = empty((l, a_max))
    Ty = empty((m, a_max))
    Tz = empty((o, a_max))
    Wz = empty((u, a_max))
    Pz = empty((u, a_max))
    var_x = empty((a_max,))
    var_y = empty((a_max,))
    var_z = empty((a_max,))
    # residuals
    Ey = Y.copy()
    Ez = Z.copy()
    Ex = X.copy()
    for i in range(a_max):
        YtXZ = dot(Ey.T, dot(X, Ez.T))
        U, S, V = linalg.svd(YtXZ)
        wy = U[:,0]
        print wy
        wz = V[0,:]
        ty = dot(Ey, wy)
        tz = dot(Ez.T, wz)
        py = dot(Ey.T, ty)/dot(ty.T,ty)
        pz = dot(Ez, tz)/dot(tz.T,tz)
        Wy[:,i] = wy
        Wz[:,i] = wz
        Ty[:,i] = ty
        Tz[:,i] = tz
        Py[:,i] = py
        Pz[:,i] = pz
        Ey = Ey - outer(ty, py.T)
        Ez = (Ez.T - outer(tz, pz.T)).T
        var_y[i] = pow(Ey, 2).sum()
        var_z[i] = pow(Ez, 2).sum()
    tyd = apply_along_axis(norm, 0, Ty)
    tzd = apply_along_axis(norm, 0, Tz)
    Tyu = Ty/tyd
    Tzu = Tz/tzd
    C = dot(dot(Tyu.T, X), Tzu)
    for i in range(a_max):
        Ex = Ex - dot(dot(Ty[:,:i+1],C[:i+1,:i+1]), Tz[:,:i+1].T)
        var_x[i] = pow(Ex,2).sum()
    # variance explained
    print "var_x:"
    print var_x
    print "varX total:"
    print varX
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    evz = 100.0*(1 - var_z/varZ)
    return Ty, Tz, Wy, Wz, Py, Pz, C, Ey, Ez, Ex, evx, evy, evz
 def bifpls(X, Y, Z, a_max, alpha):
    """Swedssihsh LPLS by nipals.
    """
    u = X[:,0]
    Ey = Y.copy()
    Ez = Z.copy()
    for i in range(100):
        w = dot(X.T,u)
        w = w/vnorm(w)
        t = dot(X, w)
        q = dot(Ey, t.T)/dot(t.T,t)
        qnorm = vnorm(q)
        q = q/qnorm
        v = dot(Ez, q)
        s = dot(Ez.T, v)/dot(v.T,v)
        v = v*vnorm(s)
        s = s/vnorm(s)
        c = qnorm*(alpha*q + (1-alpha)*s)
        u = dot(Ey, c)/dot(s.T,s)
        p = dot(X.T, t)/dot(t.T,t)
        v2 = dot(Ez, s)/dot(s.T,s)
    Ey = Ey - dot(t, p.T)
    Ez = Ez - dot(v2, c.T)
    # variance explained
    evx = 100.0*(1 - var_x/varX)
    evy = 100.0*(1 - var_y/varY)
    evz = 100.0*(1 - var_z/varZ)
 def center(a, axis):
    # 0 = col center, 1 = row center, 2 = double center
    # -1 = nothing
    if len(a.shape)==1:
        mn = a.mean()
        return a - mn, mn
    if a.shape[0]==1 or a.shape[1]==1:
        mn = a.mean()
        return a - mn, mn
    if axis==-1:
        mn = zeros((a.shape[1],))
        return a - mn, mn
    elif axis==0:
        mn = a.mean(0)
        return a - mn, mn
    elif axis==1:
        mn = a.mean(1)[:,newaxis]
        return a - mn , mn
    elif axis==2:
        mn = a.mean(1)[:,newaxis] + a.mean(0) - a.mean()
        return a - mn, mn
    else:
        raise IOError("input error: axis must be in [-1,0,1,2]")
 def correlation_loadings(D, T, P, test=True):
    """ Returns correlation loadings.
    :input:
        - D: [nsamps, nvars], data (non-centered data)
        - T: [nsamps, a_max], Scores
        - P: [nvars, a_max], Loadings
    :ouput:
        - Rloads: [nvars, a_max], Correlation loadings
        - rmseVars: [nvars], scaling coeff. for each var in D
    :notes:
        - FIXME: Calculation is not valid .... using corrceof instead 
    """
    nsamps, nvars = D.shape
    nsampsT, a_max = T.shape
    nvarsP, a_maxP = P.shape
    if nsamps!=nsampsT: raise IOError("D/T mismatch")
    if a_max!=a_maxP: raise IOError("a_max mismatch")
    if nvars!=nvarsP: raise IOError("D/P mismatch")
    #init
    Rloads = empty((nvars, a_max), 'd')
    stdvar = stats.std(D, 0)
    rmseVars = sqrt(nsamps-1)*stdvar
    # center
    D = D - D.mean(0)
    TT = diag(dot(T.T, T))
    sTT = sqrt(TT)
    for a in range(a_max):
        Rloads[:,a] = sTT[a]*P[:,a]/rmseVars
    R = empty_like(Rloads)
    for a in range(a_max):
        for k in range(nvars):
            r = corrcoef(D[:,k], T[:,a])
            R[k,a] = r[0,1]
    #Rloads = R
    return Rloads, R, rmseVars
 def cv_lpls(X, Y, Z, a_max=2, nsets=None,alpha=.5, mean_ctr=[2,0,1]):
    """Performs crossvalidation to get generalisation error in lpls"""
    # if double centering of x or y:
    #     row-center prior to cross validation (as this is independent of subsets)
    if mean_ctr[0]==2:
        mnx_row = X.mean(1)[:,newaxis]
        X = X - mnx_row
        mean_ctr[0] = 0
    else:
        mnx_row = 0
    if mean_ctr[1]==2:
        if Y.shape[1]!=1:
            mny_row = Y.mean(1)[:,newaxis]
            Y = Y - mny_row
    else:
        mny_row = 0
    cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=True)
    k, l = Y.shape
    Yhat = empty((a_max,k,l), 'd')
    for i, (xcal,xi,ycal,yi,ind) in enumerate(cv_iter):
        T, W, P, Q, U, L, K, B, b0, evx, evy, evz, mnx, mny, mnz = nipals_lpls(xcal,ycal,Z,
                                                                               a_max=a_max,
                                                                               alpha=alpha,
                                                                               mean_ctr=mean_ctr,
                                                                               verbose=False)
        for a in range(a_max):
            xc = xi - mnx
            Yhat[a,ind,:] = mny + dot(xc, B[a])
    Yhat_class = zeros_like(Yhat)
    for a in range(a_max):
        for i in range(k):
            Yhat_class[a,i,argmax(Yhat[a,i,:])] = 1.0
    class_err = 100*((Yhat_class+Y)==2).sum(1)/Y.sum(0).astype('d')
    sep = (Y - Yhat)**2
    rmsep = sqrt(sep.mean(1))
    return rmsep, Yhat, class_err
 def jk_lpls(X, Y, Z, a_max, nsets=None, xz_alpha=.5, mean_ctr=[2,0,1]):
    cv_iter = select_generators.pls_gen(X, Y, n_blocks=nsets,center=False,index_out=False)
    m, n = X.shape
    k, l = Y.shape
    o, p = Z.shape
    if nsets==None:
        nsets = m
    WWx = empty((nsets, n, a_max), 'd')
    WWz = empty((nsets, o, a_max), 'd')
    WWy = empty((nsets, l, a_max), 'd')
    for i, (xcal,xi,ycal,yi) in enumerate(cv_iter):
        T, W, P, Q, U, L, K, B, b0, evx, evy, evz,mnx,mny,mnz = nipals_lpls(xcal,ycal,Z,
                                                                            a_max=a_max,
                                                                            alpha=xz_alpha,
                                                                            mean_ctr=mean_ctr,
                                                                            scale='loads',
                                                                            verbose=False)
        WWx[i,:,:] = W
        WWz[i,:,:] = L
        WWy[i,:,:] = Q
    return WWx, WWz, WWy
--- a/scripts/lpls/plots_lpls.py
+++ b/scripts/lpls/plots_lpls.py
@ -0,0 +1,148 @@
 import pylab
 import matplotlib
 import networkx as nx
 import scipy
 import rpy
 def plot_corrloads(R, pc1=0,pc2=1,s=20, c='b', zorder=5,expvar=None,ax=None,drawback=True, labels=None, **kwds):
    """ Correlation loading plot."""
    # background
    if ax==None or drawback==True:
        radius = 1
        center = (0,0)
        c100 = matplotlib.patches.Circle(center,
                                         radius=radius,
                                         facecolor=(0.97, .97, .97),
                                         zorder=1,
                                         linewidth=1,
                                         edgecolor=(0,0,0))
        c50 = matplotlib.patches.Circle(center,
                                        radius=radius/2.0,
                                        facecolor=(.85,.85,.85),
                                        zorder=1,
                                        linewidth=1,
                                        edgecolor=(0,0,0))
        ax = pylab.gca()
        ax.add_patch(c100)
        ax.add_patch(c50)
        ax.axhline(lw=1.5,color='k', zorder=4)
        ax.axvline(lw=1.5,color='k', zorder=4)
    # corrloads
    ax.scatter(R[:,pc1], R[:,pc2], s=s, c=c,zorder=zorder, **kwds)
    ax.set_xlim([-1.1,1.1])
    ax.set_ylim([-1.1,1.1])
    if expvar!=None:
        xstring = "Comp: %d   expl.var:  %.1f " %(pc1+1, expvar[pc1])
        pylab.xlabel(xstring)
        ystring = "Comp: %d   expl.var.:  %.1f " %(pc2+1, expvar[pc2])
        pylab.ylabel(ystring)
    if labels!=None:
        assert(len(labels)==R.shape[0])
        for name, r in zip(labels, R):
            pylab.text(r[pc1], r[pc2], "  " + name)
    #pylab.show()
 def dag(terms, ontology):
    rpy.r.library("GOstats")
    __parents = {'bp' : rpy.r.GOBPPARENTS,
                 'mf' : rpy.r.GOMFPARENTS,
                 'cc' : rpy.r.GOCCPARENTS}
    gograph = rpy.r.GOGraph(terms, __parents.get(ontology.lower()))
    dag = rpy.r.edges(gograph)
    #setattr(dag, "_ontology", ontology)
    return dag
 def plot_dag(dag, node_color='b', node_size=30,with_labels=False,nodelist=None,pos=None,**kwd):
    rpy.r.library("GOstats")
    dag_name = "GO-bp" 
    # networkx does not play well with colon in node names
    clean_edges = {}
    for head, neigb in dag.items():
        head = head.replace(":", "_")
        nei = [i.replace(":", "_") for i in neigb]
        clean_edges[head] = nei
    if pos==None:
        G = nx.from_dict_of_lists(clean_edges, nx.DiGraph(name=dag_name))
        pos = nx.pydot_layout(G, prog='dot')
        pos_new = {}
        for k, v in pos.items():
            x,y = v
            k = k.replace("_", ":")
            pos_new[k] = (x, -y) 
        pos = pos_new
    G = nx.from_dict_of_lists(dag, nx.Graph(name=dag_name))
    if len(node_color)>1:
        assert(len(node_color)==len(nodelist))
    nx.draw_networkx(G,pos, with_labels=with_labels, node_size=node_size, node_color=node_color, nodelist=nodelist, **kwd)
    return pos
 def plot_ZXcorr(gene_ids, term_ids, gene2go, X, D, scale=True):
    """ Plot correlation/covariance between genes as a function of
    semantic difference.
    input: X (n, p) data matrix
           D (p, p) gene-gene sematic similarity matrix
    """
    D = scipy.corrcoef(X)
    term2ind = dict(enumerate(term_ids))
    for i, gene_i in enumerate(gene_ids):
        for j, gene_j in enumerate(gene_ids):
            if j<i:
                r2 = D[i,j]
                terms_i = gene2go[gene_i]
                terms_j = gene2go[gene_j]
                for ti, term in enumerate(term_ids):
                    if term in terms_i:
                        pass
 def clustering_index(T, Yg):
    pass
 def draw_gene(gid, gene_ids, gene2go, Z, tmat, terms, G, pos):
    """Draw dags with marked go terms and distance to all terms.
    """
    sub_terms = gene2go[gid]
    sub_index = [i for i, tid in enumerate(terms) if tid in sub_terms]
    node_size = 70.*scipy.ones((len(terms),))
    node_size[sub_index] = 500
    gene_index = [i for i, gene_id in enumerate(gene_ids) if gene_id==gid]
    node_color = Z[:,gene_index].ravel()
    #1/0
    #node_size=200*node_color
    #node_color='g'
    pylab.figure()
    nx.draw_networkx(G, pos, node_color=node_color, node_size=node_size, with_labels=False, nodelist=terms)
    ax = pylab.gca()
    pylab.colorbar(ax.collections[0])
    for tid in sub_index:
        pylab.figure()
        node_color = tmat[tid,:]
        #node_size = 70*scipy.ones((len(terms),))
        node_size = 170*node_color
        node_size[tid] = 500
        nx.draw_networkx(G, pos, node_color=node_color, node_size=node_size, with_labels=False, nodelist=terms)
        pylab.title(terms[tid])
        ax = pylab.gca()
        pylab.colorbar(ax.collections[0])
    pylab.show()
    #nx.show()
--- a/scripts/lpls/rpy_go.py
+++ b/scripts/lpls/rpy_go.py
@ -0,0 +1,263 @@
 """ Module for Gene ontology related functions called in R"""
 import scipy
 import rpy
 silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
 import collections
 def goterms_from_gene(genelist, ontology='BP', garbage=['IEA'], ic_cutoff=2.0, verbose=False):
    """ Returns the go-terms from a specified genelist (Entrez id).
    Recalculates the information content if needed based on selected evidence codes.
    """
    rpy.r.library("GOSim")
    _CODES = {"IMP" : "inferred from mutant phenotype",
              "IGI" : "inferred from genetic interaction",
              "IPI" :"inferred from physical interaction",
              "ISS" : "inferred from sequence similarity",
              "IDA" : "inferred from direct assay",
              "IEP" : "inferred from expression pattern",
              "IEA" : "inferred from electronic annotation",
               "TAS" : "traceable author statement",
              "NAS" : "non-traceable author statement",
              "ND" : "no biological data available",
              "IC" : "inferred by curator"
              }
    _ONTOLOGIES = ['BP', 'CC', 'MF']
    #assert(scipy.all([(code in _CODES) for code in garbage]) or garbage==None)
    assert(ontology in _ONTOLOGIES)
    dummy = rpy.r.setOntology(ontology)
    ddef = False
    if ontology=='BP' and garbage!=None:
        # This is for ont=BP and garbage =['IEA', 'ISS', 'ND']
        rpy.r.load("ICsBP_small.rda") # Excludes IEA
        ic = rpy.r.assign("IC",rpy.r.IC, envir=rpy.r.GOSimEnv)
        max_val = 0
        for key, val in ic.items():
            if val != scipy.inf:
                if val>max_val:
                    max_val = val
        for key, val in ic.items():
            ic[key] = val/max_val
    else:
        # NB! this IC is just for BP
        ic = rpy.r('get("IC", envir=GOSimEnv)')
    print "loading GO definitions environment"
    gene2terms = collections.defaultdict(list)
    cc = 0
    dd = 0
    ii = 0
    jj = 0
    kk = 0
    all = rpy.r.mget(genelist, rpy.r.GOENTREZID2GO,ifnotfound="NA")
    n_ic = len(ic)
    print "Number of terms with IC: %d" %n_ic
    stopp = False
    for gene, terms in all.items():
        if verbose:
            print "\n\n ======ITEM========\n"
            print "Gene: " + str(gene)
            print "Number of terms: %d " %len(terms)
            print terms
            print "---\n"
            if stopp:
                1/0
        if terms!="NA":
            for term, desc in terms.items():
                if verbose:
                    print "\nChecking term: " + str(term)
                    print "With description: " + str(desc)
                if desc['Ontology'].lower() == ontology.lower() and term in ic:
                    if ic[term]>ic_cutoff:
                        #print ic[term]
                        jj+=1
                        if verbose:
                            print "too high" + str((gene, term))
                            stopp = True
                        continue
                    cc += 1
                    if verbose:
                        print "accepted" + str((gene, term))
                    gene2terms[gene].append(term)
                else:
                    if verbose:
                        print "Not accepted: " + str((gene, term))
                    if term not in ic:
                        if verbose:
                            print "Not in IC: "  + str((gene, term))
                        kk+=1
                    if desc['Ontology'].lower() != ontology:
                        if verbose:
                            print "Not in Ontology" + str((gene, term))
                        dd+=1
        else:
            ii+=1
    print "Number of genes total: %d" %len(all)
    print "\nNumber of genes without annotation: (%d (NA))" %ii
    print "\nNumber of terms with annoation but no IC: %d" %kk
    print "\nNumber of terms not in %s : %d " %(ontology, dd)
    print "\nNumber of terms with too high IC : %d " %jj
    print "\n Number of accepted terms: %d" %cc
    return gene2terms
 def genego_matrix(goterms, tmat, gene_ids, term_ids, func=max):
    ngenes = len(gene_ids)
    nterms = len(term_ids)
    gene2indx = {}
    for i,id in enumerate(gene_ids):
        gene2indx[id]=i
    term2indx = {}
    for i,id in enumerate(term_ids):
        term2indx[id]=i
    #G = scipy.empty((nterms, ngenes),'d')
    G = []
    new_gene_index = []
    for gene, terms in goterms.items():
        g_ind = gene2indx[gene]
        if len(terms)>0:
            t_ind = []
            new_gene_index.append(g_ind)
            for term in terms:
                if term2indx.has_key(term): t_ind.append(term2indx[term])
            subsim = tmat[t_ind, :]
            gene_vec = scipy.apply_along_axis(func, 0, subsim)
            G.append(gene_vec)
    return scipy.asarray(G), new_gene_index
 def genego_sim(gene2go, gene_ids, all_go_terms, STerm, go_term_sim="OA", term_sim="Lin", verbose=False):
    """Returns go-terms x genes similarity matrix.
    :input:
           - gene2go: dict: keys: gene_id, values: go_terms
           - gene_ids: list of gene ids (entrez ids)
           - STerm: (go_terms x go_terms) similarity matrix
           - go_terms_sim: similarity measure between a gene and multiple go terms (max, mean, OA)
           - term_sim: similarity measure between two go-terms
           - verbose
    """
    rpy.r.library("GOSim")
    #gene_ids = gene2go.keys()
    GG = scipy.empty((len(all_go_terms), len(gene_ids)), 'd')
    for j,gene in enumerate(gene_ids):
        for i,go_term in enumerate(all_go_terms):
            if verbose:
                print "\nAssigning similarity from %s to terms(gene): %s" %(go_term,gene)
            GG_ij = rpy.r.getGSim(go_term, gene2go[gene], similarity=go_term_sim,
                                  similarityTerm=term_sim, STerm=STerm, verbose=verbose)
            GG[i,j] = GG_ij
    return GG
 def goterm2desc(gotermlist):
    """Returns the go-terms description keyed by go-term.
    """
    rpy.r.library("GO")
    term2desc = {}
    for term in gotermlist:
        try:
            desc = rpy.r('Term(GOTERM[["' +str(term)+ '"]])')
            term2desc[str(term)] = desc
        except:
            raise Warning("Description not found for %s\n Mapping incomplete" %term)
    return term2desc
 def parents_dag(go_terms, ontology=['BP']):
    """ Returns a list of lists representation of a GO DAG parents of goterms.
    make the networkx graph by:
        G = networkx.Digraph()
        G = networkx.from_dict_of_lists(edge_dict, G)
    """
    try:
        rpy.r.library("GOstats")
    except:
        raise ImportError, "Gostats"
    assert(go_terms[0][:3]=='GO:')
    # go valid namespace
    go_env = {'BP':rpy.r.GOBPPARENTS, 'MF':rpy.r.GOMFPARENTS, 'CC': rpy.r.GOCCPARENTS}
    graph = rpy.r.GOGraph(go_terms, go_env[ontology[0]])
    edges = rpy.r.edges(graph)
    edges.pop('all')
    edge_dict = {}
    for head, neighbours in edges.items():
        for nn in neighbours.values():
            if edge_dict.has_key(nn):
                edge_dict[nn].append(head)
            else:
                edge_dict[nn] = [head]
    return edge_dict
 def gene_GO_hypergeo_test(genelist,universe="entrezUniverse",ontology="BP",chip = "hgu133a",pval_cutoff=0.01,cond=False,test_direction="over"):
    #assert(scipy.alltrue([True for i in genelist if i in universe]))
    universeGeneIds = universe
    params = rpy.r.new("GOHyperGParams",
                       geneIds=genelist,
                       annotation="hgu133a",
                       ontology=ontology,
                       pvalueCutoff=pval_cutoff,
                       conditional=cond,
                       testDirection=test_direction
                       )
    result = rpy.r.summary(rpy.r.hyperGTest(params))
    return result, params
 def data_aff2loc_hgu133a(X, aff_ids, verbose=False):
    aff_ids = scipy.asarray(aff_ids)
    if verbose:
        print "\nNumber of probesets in affy list: %s" %len(aff_ids)
    import rpy
    rpy.r.library("hgu133a")
    trans_table = rpy.r.as_list(rpy.r.hgu133aENTREZID)
    if verbose:
        print "Number of entrez ids: %d" %(scipy.asarray(trans_table.values())>0).sum()
    enz2aff = collections.defaultdict(list)
    #aff2enz = collections.defaultdict(list)
    for aff, enz in trans_table.items():
        if int(enz)>0 and (aff in aff_ids):
            enz2aff[enz].append(aff)
            #aff2enz[aff].append(enz)
    if verbose:
        print "\nNumber of translated entrez ids: %d" %len(enz2aff)
    aff2ind = dict(zip(aff_ids, scipy.arange(len(aff_ids))))
    var_x = X.var(0)
    new_data = []
    new_ids = []
    m = 0
    s = 0
    for enz, aff_id_list in enz2aff.items():
        index = [aff2ind[aff_id] for aff_id in aff_id_list]
        if len(index)>1:
            m+=1
            if verbose:
                pass
                #print "\nEntrez id: %s has %d probesets" %(enz, len(index))
                #print index
            xsub = X[:,index]
            choose_this = scipy.argmax(xsub.var(0))
            new_data.append(xsub[:,choose_this].ravel())
        else:
            s+=1
            new_data.append(X[:,index].ravel())
        new_ids.append(enz)
    if verbose:
        print "Ids with multiple probesets: %d" %m
        print "Ids with unique probeset: %d" %s
    X = scipy.asarray(new_data).T
    return X, new_ids
 def R_PLS(x,y,ncomp=3, validation='"LOO"'):
    rpy.r.library("pls")
    rpy.r.assign("X", x)
    rpy.r.assign("Y", y)
    callstr = "plsr(Y~X, ncomp=" + str(ncomp) + ", validation=" + validation + ")"
    print callstr
    result = rpy.r(callstr)
    return result
--- a/scripts/lpls/run_smoker.py
+++ b/scripts/lpls/run_smoker.py
@ -0,0 +1,654 @@
 import sys,time,cPickle
 import rpy
 from pylab import gca, figure, subplot,plot
 from scipy import *
 from scipy.linalg import norm
 from lpls import correlation_loadings
 import rpy_go
 sys.path.append("../../laydi") # home of dataset
 sys.path.append("../../laydi/lib") # home of cx_stats
 sys.path.append("/home/flatberg/laydi/scripts/lpls")
 sys.path.append("/home/flatberg/pyblm/")
 import dataset
 import cx_stats
 import pyblm
 from pyblm.engines import nipals_lpls, pls
 from pyblm.crossvalidation import lpls_val, lpls_jk
 from pyblm.statistics import pls_qvals
 from plots_lpls import plot_corrloads, plot_dag
 import plots_lpls
 def iqr(X, axis=0):
    """Interquartile range filtering."""
    def _iqr(c):
        return stats.scoreatpercentile(c, 75) - stats.scoreatpercentile(c, 25)
    return apply_along_axis(_iqr, axis, X)
 # Possible outliers
 # http://www.pubmedcentral.nih.gov/articlerender.fcgi?tool=pubmed&pubmedid=16817967
 sample_outliers = ['OV:NCI_ADR_RES', 'CNS:SF_295', 'CNS:SF_539', 'RE:SN12C', 'LC:NCI_H226', 'LC:NCI_H522', 'PR:PC_3', 'PR:DU_145']
 outlier = 'ME:LOXIMVI' # 19
 ####### OPTIONS ###########
 # data
 chip = "hgu133a"
 use_data = 'uma'
 #use_data = 'scherf'
 #use_data = 'uma'
 if use_data == 'scherf':
    data_cached = False
    use_saved_plsr_result = False
    subset = 'plsr'
    small_test = False
    use_sbg_subset = True # the sandberg nci-Ygroups subset
    std_y = False
    std_z = False
    # go
    ontology = "bp"
    min_genes = 5
    similarities = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
    meth = similarities[2]
    go_term_sim = "OA"
    # lpls
    a_max = 10
    aopt = 4
    aopt = 2 # doubling-time
    xz_alpha = .5
    w_alpha = .3
    center_axis = [2, 0, 2]
    zorth = True
    nsets = None
    qval_cutoff = 0.1
    n_iter = 50
    alpha_check = True
    calc_rmsep = True
    bevel_check = False
    save_calc = True
 elif use_data == 'uma':
    data_cached = False
    use_saved_plsr_result = False
    subset = 'iqr'
    small_test = False
    use_sbg_subset = True # the sandberg nci-Ygroups subset
    std_y = False
    std_z = False
    # go
    ontology = "bp"
    min_genes = 5
    similarities = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
    meth = similarities[2]
    go_term_sim = "OA"
    # lpls
    a_max = 10
    aopt = 5
    xz_alpha = .5
    w_alpha = .3
    center_axis = [2, 0, 2]
    zorth = True
    nsets = None
    qval_cutoff = 0.01
    n_iter = 50
    alpha_check = True
    calc_rmsep = True
    bevel_check = False
    save_calc = True
 elif use_data == 'smoker':
    data_cached = False
    use_saved_plsr_result = False
    #subset = 'plsr'
    subset = 'plsr'
    small_test = False
    use_sbg_subset = False # the sandberg nci-Ygroups subset
    std_y = False
    std_z = False
    # go
    ontology = "bp"
    min_genes = 5
    similarities = ("JiangConrath","Resnik","Lin","CoutoEnriched","CoutoJiangConrath","CoutoResnik","CoutoLin")
    meth = similarities[2]
    go_term_sim = "OA"
    # lpls
    a_max = 5
    aopt = 2
    xz_alpha = .5
    w_alpha = .3
    center_axis = [2, 0, 2]
    zorth = True
    nsets = None
    qval_cutoff = 0.01
    n_iter = 50
    alpha_check = True
    calc_rmsep = True
    bevel_check = False
    save_calc = True
 else:
    raise ValueError
 print "Using options for : " + use_data
 ######## DATA ##########
 if use_data=='smoker':
    # full smoker data
    DX = dataset.read_ftsv(open("/home/flatberg/datasets/smokers/full/Smokers.ftsv"))
    DY = dataset.read_ftsv(open("/home/flatberg/datasets/smokers/full/Yg.ftsv"))
    DYr = dataset.read_ftsv(open("/home/flatberg/datasets/smokers/full/Ypy.ftsv"))
    Y = DYr.asarray().astype('d')
    gene_ids = DX.get_identifiers('gene_ids', sorted=True)
    sample_ids = DY.get_identifiers('_patient', sorted=True)
 elif use_data=='scherf':
    print "hepp"
    #DX = dataset.read_ftsv(open("../../data/scherf/old_data/scherfX.ftsv"))
    #DY = dataset.read_ftsv(open("../../data/scherf/old_data/scherfY.ftsv"))
    DX = dataset.read_ftsv(open("nci60/X5964.ftsv", "r"))
    DYg = dataset.read_ftsv(open("../../data/uma/Yg133.ftsv"))
    DYr = dataset.read_ftsv(open("../../data/uma/Yd.ftsv"))
    Y = DYg.asarray().astype('d')
    DY = DYg.copy()
    Yg = Y
    Yr = DYr.asarray().astype('d')
    X = DX.asarray()
    gene_ids = DX.get_identifiers('gene_ids', sorted=True)
    sample_ids = DY.get_identifiers('cline', sorted=True)
 elif use_data=='staunton':
    pass
 elif use_data=='uma':
    DX = dataset.read_ftsv(open("/home/flatberg/datasets/uma/X133.ftsv"))
    DYg = dataset.read_ftsv(open("/home/flatberg/datasets/uma/Yg133.ftsv"))
    DYr = dataset.read_ftsv(open("/home/flatberg/datasets/uma/Yd.ftsv"))
    X = DX.asarray()
    Y = DYg.asarray().astype('d')
    DY = DYg.copy()
    Yg = Y
    Yr = DYr.asarray().astype('d')
    gene_ids = DX.get_identifiers('gene_ids', sorted=True)
    sample_ids = DY.get_identifiers('cline', sorted=True)
 else:
    print "use_data argument: (%s) not valid" %use_method
 if use_sbg_subset and use_data in ['uma', 'scherf', 'staunton']:
    print "Using sbg subset of cancers"
    Y = Yg
    Y_old = Y.copy()
    Yr_old = Yr.copy()
    X_old = X.copy() 
    keep_samples = ['CN', 'ME', 'LE', 'CO', 'RE']
    #keep_samples = ['CN', 'ME', 'LE', 'CO', 'RE']
    sample_ids_original = DY.get_identifiers('cline', sorted=True)
    sample_ids= [i for i in sample_ids if i[:2] in keep_samples]
    rows_ind = [i for i,name in enumerate(sample_ids_original) if name[:2] in keep_samples]
    # take out rows in X,Y
    X = X[rows_ind,:]
    Y = Y[rows_ind,:]
    Yr = Yr[rows_ind,:]
    #  identify redundant columns in Y
    cols_ind = where(Y.sum(0)>1)[0]
    Y = Y[:, cols_ind]
    # create new datasets with updated idents
    cat_ids = [name for i,name in enumerate(DYg.get_identifiers('_cancer', sorted=True)) if i in cols_ind]
    DX = dataset.Dataset(X, [['cline', sample_ids], ['gene_ids', gene_ids]], name='Dxr')
    DYg = dataset.CategoryDataset(Y, [['cline', sample_ids], ['_cancer', cat_ids]], name='Dyr')
    DYr = dataset.Dataset(Yr, [['cline', sample_ids], ['_time', ['doubling_time']]], name='Dyrr')
    DY_old = DY.copy()
    DY = DYg
    print "Now there are %d samples in X" %X.shape[0]
 # use subset of genes with defined GO-terms
 ic_all = 2026006.0 # sum of all ic in BP
 max_ic = -log(1/ic_all)
 ic_cutoff = -log(min_genes/ic_all)/max_ic
 print "Information cutoff for min %d genes: %.2f" %(min_genes, ic_cutoff)
 gene2goterms = rpy_go.goterms_from_gene(gene_ids, ic_cutoff=ic_cutoff)
 all_terms = set()
 for t in gene2goterms.values():
    all_terms.update(t)
 terms = list(all_terms)
 print "\nNumber of go-terms: %s" %len(terms)
 # update genelist
 gene_ids = gene2goterms.keys()
 print "\nNumber of genes: %s" %len(gene_ids)
 X = DX.asarray()
 index = DX.get_indices('gene_ids', gene_ids)
 X = X[:,index]
 # Use only subset defined on GO
 ontology = 'BP'
 print "\n\nFiltering genes by Go terms "
 # use subset based on SAM,PLSR or (IQR)
 if subset=='plsr':
    print "plsr filter on genes"
    if use_saved_plsr_result:
        index = cPickle.load(open('plsr_index.pkl'))
         # Subset data
        X = X[:,index]
        gene_ids = [gid for i, gid in enumerate(gene_ids) if i in index]
        print "\nNumber of genes: %s" %len(gene_ids)
        print "\nWorking on subset with %s genes " %len(gene_ids)
        # update valid go-terms
        gene2goterms = rpy_go.goterms_from_gene(gene_ids, ic_cutoff=ic_cutoff)
        all_terms = set()
        for t in gene2goterms.values():
            all_terms.update(t)
        terms = list(all_terms)
        print "\nNumber of go-terms: %s" %len(terms)
        # update genelist
        gene_ids = gene2goterms.keys()
    else:
        print "Initial plsr qvals"
        xcal_tsq_x, xpert_tsq_x = pyblm.pls_qvals(X, Y, aopt=aopt, n_iter=n_iter, center_axis=[0,0], nsets=None)
        qvals = pyblm.statistics._fdr(xcal_tsq_x, xpert_tsq_x, median)
        # cut off
        #sort_index = qvals.argsort()
        #index = sort_index[:800]
        #qval_cutoff = qvals[sort_index[500]]
        print "Using cuf off: %.2f" %qval_cutoff
        index = where(qvals<qval_cutoff)[0]
        if small_test:
            index = index[:20]
        # Subset data
        X = X[:,index]
        gene_ids = [gid for i, gid in enumerate(gene_ids) if i in index]
        print "\nNumber of genes: %s" %len(gene_ids)
        print "\nWorking on subset with %s genes " %len(gene_ids)
        # update valid go-terms
        gene2goterms = rpy_go.goterms_from_gene(gene_ids, ic_cutoff=ic_cutoff)
        all_terms = set()
        for t in gene2goterms.values():
            all_terms.update(t)
        terms = list(all_terms)
        print "\nNumber of go-terms: %s" %len(terms)
        # update genelist
        gene_ids = gene2goterms.keys()
    print "\nNumber of genes: %s" %len(gene_ids)
 elif subset == 'iqr':
    iqr_vals = iqr(X)
    index = where(iqr_vals>1)[0]
    X = X[:,index]
    gene_ids = [gid for i, gid in enumerate(gene_ids) if i in index]
    print "\nNumber of genes: %s" %len(gene_ids)
    print "\nWorking on subset with %s genes " %len(gene_ids)
    # update valid go-terms
    gene2goterms = rpy_go.goterms_from_gene(gene_ids, ic_cutoff=ic_cutoff)
    all_terms = set()
    for t in gene2goterms.values():
        all_terms.update(t)
    terms = list(all_terms)
    print "\nNumber of go-terms: %s" %len(terms)
    # update genelist
    gene_ids = gene2goterms.keys()
 else:
    # noimp (smoker data is prefiltered)
    print "No prefiltering on data used"
    pass 
 rpy.r.library("GOSim")
 # Go-term similarity matrix
 print "Term-term similarity matrix (method = %s)" %meth
 print "\nCalculating term-term similarity matrix"
 if meth=="CoutoEnriched":
    aa = 0
    ba = 0
    rpy.r.setEnrichmentFactors(alpha = aa, beta =ba)
 if not data_cached:
    rpytmat = rpy.with_mode(rpy.NO_CONVERSION, rpy.r.getTermSim)(terms, method=meth,verbose=False)
    tmat = rpy.r.assign("haha", rpytmat)
    print "\n Calculating Z matrix"
    Z = rpy_go.genego_sim(gene2goterms,gene_ids,terms,rpytmat,go_term_sim=go_term_sim,term_sim=meth)
    DZ = dataset.Dataset(Z, [['go-terms', terms], ['gene_ids', gene_ids]], name='Dz_'+str(meth))
    # update data (X) matrix
    newind = DX.get_indices('gene_ids', gene_ids)
    Xr = DX.asarray()[:,newind]
    DXr = dataset.Dataset(Xr, [['cline', sample_ids], ['gene_ids', gene_ids]], name='Dxr')
 else:
    #DXr = dataset.read_ftsv(open('Xr.ftsv', 'r'))
    newind = DX.get_indices('gene_ids', gene_ids)
    Xr = DX.asarray()[:,newind]
    DXr = dataset.Dataset(Xr, [['cline', sample_ids], ['gene_ids', gene_ids]], name='Dxr')
    DY = dataset.read_ftsv(open('Y.ftsv', 'r'))
    DZ = dataset.read_ftsv(open('Z.ftsv', 'r')) 
    Xr = DXr.asarray()
    Y = DY.asarray()
    Z = DZ.asarray()
    sample_ids = DX.get_identifiers('cline', sorted=True)
 # standardize Z?
 sdtz = False
 if sdtz:
    DZ._array = DZ._array/Dz._array.std(0)
 sdty = False
 if sdty:
    DY._array = DY._array/DY._array.std(0)
 # ##### PLS ONLY, CHECK FOR SIMILARITY BETWEEN W and Z #######
 if bevel_check:
    Xr = DXr.asarray()
    Y = DY.asarray()
    from pylab import figure, scatter, xlabel, subplot,xticks,yticks
    Xrcc = Xr - Xr.mean(0) - Xr.mean(1)[:,newaxis] + Xr.mean()
    Zcc = Z - Z.mean(0) - Z.mean(1)[:,newaxis] + Z.mean()
    Yc = Y - Y.mean(0)
    xy_pls_result = pls(Xrcc, Yc, a_max)
    xz_pls_result = pls(Xrcc.T, Zcc.T, a_max)
    # check for linearity between scores of xz-result and W of xy-result
    Wxy = xy_pls_result['W']
    Txz = xz_pls_result['T']
    figure()
    n = 0
    for i in range(a_max):
        w = Wxy[:,i]
        for j in range(a_max):
            n += 1
            t = Txz[:,j]
            r2 = stats.corrcoef(w, t)[0,-1]
            subplot(a_max, a_max, n)
            scatter(w, t)
            xticks([])
            yticks([])
            xlabel('(Wxy(%d), Tzx(%d)), r2: %.1f ' %(i+1,j+1,r2))
 # ####### LPLSR ######## 
 if save_calc and not data_cached:
    print "Saving calculations"
    import cPickle
    fh =  open("g2go_s.pkl", "w")
    cPickle.dump(gene2goterms, fh)
    fh.close()
    dataset.write_ftsv(open('Xs.ftsv', 'w'), DXr, decimals=7)
    dataset.write_ftsv(open('Ysg.ftsv', 'w'), DY, decimals=7)
    dataset.write_ftsv(open('Yspy.ftsv', 'w'), DYr, decimals=7)
    dataset.write_ftsv(open('Zs.ftsv', 'w'), DZ, decimals=7)
 def read_calc():
    import cPickle
    fh =  open("g2go_s.pkl")
    gene2goterms = cPickle.load(fh)
    fh.close()
    DXr = dataset.read_ftsv('Xu.ftsv')
    DY = dataset.read_ftsv('Yu.ftsv')
    DYr = dataset.read_ftsv('Ydu.ftsv')
    DZ = dataset.read_ftsv('Zu.ftsv')
    return DXr, DY, DYr, DZ, gene2goterms
 print "LPLSR ..."
 lpls_result = nipals_lpls(Xr,Y,Z, a_max,alpha=xz_alpha, center_axis=center_axis, zorth=zorth)
 globals().update(lpls_result)
 # Correlation loadings
 dx,Rx,rssx = correlation_loadings(Xr, T, P)
 dx,Ry,rssy = correlation_loadings(Y, T, Q)
 cadz,Rz,rssz = correlation_loadings(Z.T, W, L)
 # Prediction error
 if calc_rmsep:
    rmsep , yhat, class_error = pyblm.crossvalidation.lpls_val(Xr, Y, Z, a_max, alpha=xz_alpha,center_axis=center_axis, nsets=nsets,zorth=zorth)
 Alpha = arange(0.0, 1.01, .05)
 if alpha_check:
    Rmsep,Yhat, CE = [],[],[]
    for a in Alpha:
        print "alpha %f" %a
        rmsep_a , yhat, ce = pyblm.lpls_val(Xr, Y, Z, a_max, alpha=a,
                                          center_axis=center_axis,nsets=nsets,
                                          zorth=zorth)
        Rmsep.append(rmsep_a.copy())
        Yhat.append(yhat.copy())
        CE.append(ce.copy())
    Rmsep = asarray(Rmsep)
    Yhat = asarray(Yhat)
    #CE = asarray(CE)
 random_alpha_check = True
 if random_alpha_check:
    n_zrand = 100
    RMS,YHAT, CEE = [],[],[]
    zindex = arange(Z.shape[1])
    for ii in range(n_zrand):
        zind_rand = zindex.copy()
        random.shuffle(zind_rand)
        Zrand = Z[:,zind_rand]
        #Alpha = arange(0.0, 1.1, .25)
        Rmsep_r,Yhat_r, CE_r = [],[],[]
        for a in Alpha:
            print "Iter: %d  alpha %.2f" %(ii, a)
            rmsep , yhat, ce = pyblm.lpls_val(Xr, Y, Zrand, a_max, alpha=a,center_axis=center_axis,nsets=nsets, zorth=zorth)
            Rmsep_r.append(rmsep.copy())
            Yhat_r.append(yhat.copy())
            CE_r.append(ce.copy())
        RMS.append(Rmsep_r)
        YHAT.append(Yhat_r)
        CEE.append(CE_r)
    RMS = asarray(RMS)
    YHAT = asarray(YHAT)
    CEE = asarray(CEE)
 # Significance Hotellings T
 calc_qvals = True
 if not calc_qvals:
    Wx, Wz = pyblm.crossvalidation.lpls_jk(Xr, Y, Z, aopt, center_axis=center_axis, xz_alpha=xz_alpha, nsets=nsets)
    Ws = W*apply_along_axis(norm, 0, T)
    Ws = Ws[:,:aopt]
    cal_tsq_x = pyblm.statistics.hotelling(Wx, Ws[:,:aopt], alpha=w_alpha)
    Ls = L*apply_along_axis(norm, 0, K) 
    cal_tsq_z = pyblm.statistics.hotelling(Wz, Ls[:,:aopt], alpha=0.01)
 # qvals
 if calc_qvals:
    cal_tsq_z, pert_tsq_z, cal_tsq_x, pert_tsq_x = pyblm.lpls_qvals(Xr, Y, Z, aopt=aopt, zx_alpha=xz_alpha, n_iter=n_iter, nsets=nsets)
    qvalz = pyblm.statistics._fdr(cal_tsq_z, pert_tsq_z, median)
    qvalx = pyblm.statistics._fdr(cal_tsq_x, pert_tsq_x, median)
    # p-values, set-enrichment analysis
    active_genes_ids = where(qvalx < qval_cutoff)[0]
    active_genes = [name for i,name in enumerate(gene_ids) if i in active_genes_ids]
    active_universe = gene_ids
    gsea_result, gsea_params= rpy_go.gene_GO_hypergeo_test(genelist=active_genes,universe=active_universe,chip=chip,pval_cutoff=1.0,cond=False,test_direction="over")
    active_goterms_ids = where(qvalz < qval_cutoff)[0]
    active_goterms = [name for i,name in enumerate(terms) if i in active_goterms_ids]
    gsea_t2p = dict(zip(gsea_result['GOBPID'], gsea_result['Pvalue']))
 #### PLOTS ####
 from pylab import *
 from scipy import where
 dg = plots_lpls.dag(terms, "bp")
 pos = None
 if calc_qvals:
    figure(300)
    subplot(2,1,1)
    pos = plots_lpls.plot_dag(dg, node_color=cal_tsq_z, pos=pos, nodelist=terms)
    ax = gca()
    colorbar(ax.collections[0])
    xlabel('q values')
    xticks([])
    yticks([])
    subplot(2,1,2)
    pos = plot_dag(dg, node_color=qvalz, pos=pos, nodelist=terms)
    ax = gca()
    colorbar(ax.collections[0])
    xlabel('T2 values')
 else:
    figure(300)
    subplot(2,1,1)
    pos = plots_lpls.plot_dag(dg, pos=pos, nodelist=terms)
 if calc_rmsep:
    figure(190) #rmsep
    bar_col = 'rgbcmyk'*2
    m = Y.shape[1]
    bar_w = 1./(m + 2.)
    for a in range(m):
        bar(arange(a_max)+a*bar_w+.1, rmsep[a,:], width=bar_w, color=bar_col[a])
    ylim([rmsep.min()-.05, rmsep.max()+.05]) 
    title('RMSEP: Y(%s)' %DY.get_name())
 #figure(2)
 #for a in range(m):
 #    bar(arange(a_max)+a*bar_w+.1, class_error[:,a], width=bar_w, color=bar_col[a])
 #ylim([class_error.min()-.05, class_error.max()+.05])
 #title('Classification accuracy')
 figure(5) # Hyploid correlations
 pc1 = 2
 pc2 = 3
 tsqz = cal_tsq_z
 tsqx = cal_tsq_x
 tsqz_s = 550*tsqz/tsqz.max()
 td = rpy_go.goterm2desc(terms)
 tlabels = [td[i] for i in terms]
 #keep = tsqz.argsort()[:100]
 #k_Rz = Rz[keep,:]
 #k_tsqz_s = tsqz_s[keep]
 #k_tsq = tsqz[keep]
 #k_tlabels = [name for i,name in enumerate(tlabels) if i in keep]
 plot_corrloads(Rz, pc1=pc1, pc2=pc2, s=tsqz_s, c=tsqz, zorder=6, expvar=evz, ax=None,alpha=.9,labels=None)
 #plot_corrloads(k_Rz, pc1=0, pc2=1, s=k_tsqz_s, c=k_tsqz, zorder=5, expvar=evz, ax=None,alpha=.5,labels=None)
 ax = gca()
 ylabels = DYg.get_identifiers(DYg.get_dim_name()[1], sorted=True)
 #ylabels = DYr.get_identifiers(DYr.get_dim_name()[1], sorted=True)
 #blabels = yglabels[:]
 #blabels.append(ylabels[0])
 plot_corrloads(Ry, pc1=pc1, pc2=pc2, s=350, c='g', marker='s', zorder=7, expvar=evy, ax=ax,labels=ylabels,alpha=1.0, drawback=False)
 plot_corrloads(Rx, pc1=pc1, pc2=pc2, s=3, c=(.6,.6,.6), alpha=1, zorder=4, expvar=evx, ax=ax, drawback=False, faceted=False)
 figure(4)
 subplot(221)
 ax = gca()
 plot_corrloads(Rx, pc1=0, pc2=1, s=tsqx/2.0, c='b', zorder=5, expvar=evx, ax=ax)
 # title('X correlation')
 subplot(222)
 ax = gca()
 plot_corrloads(Ry, pc1=0, pc2=1, s=250, c='g', zorder=5, expvar=evy, ax=ax)
 #title('Y correlation')
 subplot(223)
 ax = gca()
 plot_corrloads(Rz, pc1=0, pc2=1, s=tsqz/10.0, c='r', zorder=5, expvar=evz, ax=ax)
 #title('Z correlation')
 subplot(224)
 plot(arange(len(evx)), evx, 'b', label='X', linewidth=2)
 plot(evy, 'g', label='Y', linewidth=2)
 plot(evz, 'r', label='Z', linewidth=2)
 legend(loc=2)
 ylabel('Explained variance')
 xlabel('Component')
 xticks((arange(len(evx))), [str(int(i+1)) for i in arange(len(evx))])
 show()
 figure(19)
 #subplot(1,2,1)
 # RMS : (n_rand_iter, n_alpha, nvarY, a_max)
 # Rmsep : (n_alpha, nvarY, a_max)
 rms = RMS[:,:,:,aopt] # looking at solution at aopt
 m_rms = rms.mean(2) # mean over all y-variables
 mm_rms = m_rms.mean(0) # mean over iterations
 std_mrms = m_rms.std(0) # standard deviation over iterations
 rms_t = Rmsep[:,:,aopt]
 m_rms_t = rms_t.mean(1)
 xax = arange(mm_rms.shape[0])
 std2_lim_down =  mm_rms - 1.*std_mrms
 std2_lim_up =  mm_rms + 1.*std_mrms
 xx = r_[xax, xax[::-1]] 
 yy = r_[std2_lim_down, std2_lim_up[::-1]]
 fill(xx, yy, fc='.9')
 plot(mm_rms, '--r', lw=1.5, label='Perm. mean')
 plot(std2_lim_down, 'b--')
 plot(std2_lim_up, 'b--', label='Perm. 2*std')
 plot(m_rms_t, 'g', lw=1.5, label='True')
 #c_ylim = ylim()
 #ylim(c_ylim[0], c_ylim[1]-1)
 alpha_ind = linspace(0, Alpha.shape[0]-1, 11)
 xticks(alpha_ind, ['%.1f' %a for a in arange(0,1.01, .1)])
 xlabel(r'$\alpha$')
 ylabel('mean error')
 leg = legend(loc=2)
 # delete fill from legend
 del leg.texts[-1]
 del leg.legendHandles[-1]
 # delete one of the std legends
 del leg.texts[1]
 del leg.legendHandles[1]
 klass = True
 if klass:
    figure(20)
    # subplot(1,2,1)
    # RMS : (n_rand_iter, n_alpha, nvarY, a_max)
    # Rmsep : (n_alpha, nvarY, a_max)
    cee = CEE[:,:,aopt,:] # looking at solution at aopt
    m_cee = cee.mean(-1) # mean over all y-variables
    mm_cee = m_cee.mean(0) # mean over iterations
    std_cee = m_cee.std(0) # standard deviation over iterations
    CE = asarray(CE)
    cee_t = CE[:,:,aopt]
    m_cee_t = cee_t.mean(1)
    xax = arange(mm_cee.shape[0])
    std2_lim_down =  mm_cee - 2*std_cee
    std2_lim_up =  mm_cee + 2*std_cee
    xx = r_[xax, xax[::-1]] 
    yy = r_[std2_lim_down, std2_lim_up[::-1]]
    fill(xx, yy, fc='.9')
    plot(mm_cee, '--r', lw=1.5)
    plot(std2_lim_down, 'b--')
    plot(std2_lim_up, 'b--')
    plot(m_cee_t, 'g', lw=1.5)
    c_ylim = ylim()
    ylim = ylim(c_ylim[0], .2)
    xticks(xax, [str(a)[:3] for a in Alpha])
    xlabel(r'$\alpha$')
    ylabel('mean error')
--- a/Show More
+++ b/Show More