laydi/workflows/go_workflow.py

import gtk
from system import dataset, logger, plots, workflow
from system.annotations import Annotations
#import geneontology
#import gostat
from scipy import array,randn,log
import cPickle

class EinarsWorkflow (workflow.Workflow):

    name = 'Test Workflow'
    ident = 'go'
    description = 'Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'
    def __init__(self, app):
        workflow.Workflow.__init__(self, app)

        load = workflow.Stage('load', 'Load Data')
        load.add_function(CelFileImportFunction())
        load.add_function(TestDataFunction())
        load.add_function(DatasetLoadFunction())
        self.add_stage(load)

        preproc = workflow.Stage('preprocess', 'Preprocessing')
        preproc.add_function(DatasetLog())
        preproc.add_function(workflow.Function('rma', 'RMA'))
        self.add_stage(preproc)

        go = workflow.Stage('go', 'Gene Ontology Data')
        go.add_function(LoadAnnotationsFunction())
        go.add_function(GODistanceFunction())
        self.add_stage(go)

        regression = workflow.Stage('regression', 'Regression')
        regression.add_function(workflow.Function('pls', 'PLS'))
        self.add_stage(regression)

        explore = workflow.Stage('explore', 'Explorative analysis')
        explore.add_function(PCAFunction(self))
        self.add_stage(explore)

        save = workflow.Stage('save', 'Save Data')
        save.add_function(DatasetSaveFunction())
        self.add_stage(save)
        
        logger.log('debug', '\tEinar\'s workflow is now active')

class LoadAnnotationsFunction(workflow.Function):

    def __init__(self):
        workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')
        self.annotations = None

    def load_file(self, filename):
        f = open(filename)
        self.annotations = Annotations('genes', 'go-terms')
        logger.log('notice', 'Loading annotation file: %s' % filename)

        for line in f.readlines():
            val = line.split(' \t')

            if len(val) > 1:
                val = [v.strip() for v in val]
                retval.add_annotations('genes', val[0], 
                                       'go-terms', set(val[1:]))
            
    def on_response(self, dialog, response):
        if response == gtk.RESPONSE_OK:
            logger.log('notice', 'Reading file: %s' % dialog.get_filename())
            self.load_file(dialog.get_filename())

    def run(self, data):
        btns = ('Open', gtk.RESPONSE_OK, \
                'Cancel', gtk.RESPONSE_CANCEL)
        dialog = gtk.FileChooserDialog('Open GO Annotation File',
                                       buttons=btns)
        dialog.connect('response', self.on_response)
        dialog.run()
        dialog.destroy()
        return [self.annotations]

class GODistanceFunction(workflow.Function):

    def __init__(self):
        workflow.Function.__init__(self, 'go_diatance', 'GO Distances')
        self.output = None

    def run(self, data):
        logger.log('debug', 'datatype: %s' % type(data))
        if not type(data) == Annotations:
            return None

        logger.log('debug', 'dimensions: %s' % data.dimensions)
        
        genes = data.get_ids('genes')
        gene_distances = array((len(genes), len(genes)))

        return gene_distances


class TestDataFunction(workflow.Function):  
    def __init__(self):
        workflow.Function.__init__(self, 'test_data', 'Generate Test Data')

    def run(self, data):
        logger.log('notice', 'Injecting foo test data')
        x = randn(20,30)
        X = dataset.Dataset(x)
        return [X, plots.SinePlot()]

class DatasetLog(workflow.Function):
    def __init__(self):
        workflow.Function.__init__(self, 'log', 'Log')

    def run(self, data):
        logger.log('notice', 'Taking the log of dataset %s' % data.get_name())
        d = data.asarray()
        d = log(d)
        new_data_name = 'log(%s)' % data.get_name()
        ds = dataset.Dataset(d, name=new_data_name)
        return [ds]

class DatasetLoadFunction(workflow.Function):
    """Loader for previously pickled Datasets."""
    def __init__(self):
        workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')

    def run(self, data):
        chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
                                        action=gtk.FILE_CHOOSER_ACTION_OPEN,
                                        buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                                                 gtk.STOCK_OPEN, gtk.RESPONSE_OK))
        pkl_filter = gtk.FileFilter()
        pkl_filter.set_name("Python pickled data files (*.pkl)")
        pkl_filter.add_pattern("*.[pP][kK][lL]")
        all_filter = gtk.FileFilter()
        all_filter.set_name("All Files (*.*)")
        all_filter.add_pattern("*")
        chooser.add_filter(pkl_filter)
        chooser.add_filter(all_filter)

        try:
            if chooser.run() == gtk.RESPONSE_OK:
                return [cPickle.load(open(chooser.get_filename()))]
        finally:
            chooser.destroy()


class DatasetSaveFunction(workflow.Function):
    """QND way to save data to file for later import to this program."""
    def __init__(self):
        workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')

    def run(self, data):
        if not data:
            logger.log("notice", "No data to save.")
            return

        chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
                                        action=gtk.FILE_CHOOSER_ACTION_SAVE,
                                        buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                                                 gtk.STOCK_SAVE, gtk.RESPONSE_OK))
        pkl_filter = gtk.FileFilter()
        pkl_filter.set_name("Python pickled data files (*.pkl)")
        pkl_filter.add_pattern("*.[pP][kK][lL]")
        all_filter = gtk.FileFilter()
        all_filter.set_name("All Files (*.*)")
        all_filter.add_pattern("*")
        chooser.add_filter(pkl_filter)
        chooser.add_filter(all_filter)
        chooser.set_current_name(data.get_name() + ".pkl")

        try:
            if chooser.run() == gtk.RESPONSE_OK:
                cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
                logger.log("notice", "Saved data to %r." % chooser.get_filename())
        finally:
            chooser.destroy()
                

class CelFileImportFunction(workflow.Function):
    """Loads AffyMetrix .CEL-files into matrix."""
    def __init__(self):
        workflow.Function.__init__(self, 'cel_import', 'Import Affy')

    def run(self, data):
        import rpy
        chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
                                        action=gtk.FILE_CHOOSER_ACTION_OPEN,
                                        buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                                                 gtk.STOCK_OPEN, gtk.RESPONSE_OK))
        chooser.set_select_multiple(True)
        cel_filter = gtk.FileFilter()
        cel_filter.set_name("Cel Files (*.cel)")
        cel_filter.add_pattern("*.[cC][eE][lL]")
        all_filter = gtk.FileFilter()
        all_filter.set_name("All Files (*.*)")
        all_filter.add_pattern("*")
        chooser.add_filter(cel_filter)
        chooser.add_filter(all_filter)

        try:
            if chooser.run() == gtk.RESPONSE_OK:
                rpy.r.library("affy")
    
                silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
                silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
                silent_eval('E <- rma(E)')

                m = rpy.r('m <- E@exprs')
    
                vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
                rownames = vector_eval('rownames(m)') 
                colnames = vector_eval('colnames(m)') 

                # We should be nice and clean up after ourselves
                rpy.r.rm(["E", "m"])
                
                if m:
                    data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")
                    plot = plots.LinePlot(data, "Gene profiles")
                    return [data, plot]
                else:
                    logger.log("notice", "No data loaded from importer.")
        finally:
            chooser.destroy()


class PCAFunction(workflow.Function):
    """Generic PCA function."""
    def __init__(self, wf):
        workflow.Function.__init__(self, 'pca', 'PCA')
        self._workflow = wf

    def run(self, data):
        import rpy
        
        dim_2, dim_1 = data.get_dim_names()
        
    
        silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
        rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())
        silent_eval("t = prcomp(t(m))")

        T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))
        T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),
                                   ("component", T_ids)], name="T")
        P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),
                                          ("component", T_ids)], name="P")

        # cleanup
        rpy.r.rm(["t", "m"])

        loading_plot1 = plots.ScatterPlot(P,'ids','component','1','2')
        loading_plot2 = plots.ScatterPlot(P,'ids','component','3','4')
        score_plot = plots.ScatterPlot(T,'filename','component','1','2')
        
        return [T, P, loading_plot1, loading_plot2, score_plot]
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`import gtk`
Made system a package. 2006-04-25 11:53:35 +02:00			`from system import dataset, logger, plots, workflow`
			`from system.annotations import Annotations`
mainly overhaul of observers, and removal of project singleton 2006-04-20 12:27:58 +02:00			`#import geneontology`
			`#import gostat`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`from scipy import array,randn,log`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`import cPickle`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class EinarsWorkflow (workflow.Workflow):`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
* workflows/go_workflow.py: Added a workflow name to make it identifiable in the "create project" druid. 2006-04-22 23:48:30 +02:00			`name = 'Test Workflow'`
Added command line options to create a new project based on a given workflow. 2006-04-25 14:19:25 +02:00			`ident = 'go'`
* system/dialogs.py: Oooops. Forgot this file in the previous commits. Well, here it is, currently only with the New Project dialog. 2006-04-23 00:29:02 +02:00			`description = 'Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`def __init__(self, app):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Workflow.__init__(self, app)`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`load = workflow.Stage('load', 'Load Data')`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00			`load.add_function(CelFileImportFunction())`
* fluent, *.py: Data and plots returned from functions will now show up in the navigator window. 2006-04-20 16:29:13 +02:00			`load.add_function(TestDataFunction())`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`load.add_function(DatasetLoadFunction())`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`self.add_stage(load)`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`preproc = workflow.Stage('preprocess', 'Preprocessing')`
			`preproc.add_function(DatasetLog())`
			`preproc.add_function(workflow.Function('rma', 'RMA'))`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`self.add_stage(preproc)`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`go = workflow.Stage('go', 'Gene Ontology Data')`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`go.add_function(LoadAnnotationsFunction())`
* fluent: General cleanup. 2006-04-19 21:59:55 +02:00			`go.add_function(GODistanceFunction())`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`self.add_stage(go)`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`regression = workflow.Stage('regression', 'Regression')`
			`regression.add_function(workflow.Function('pls', 'PLS'))`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`self.add_stage(regression)`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`explore = workflow.Stage('explore', 'Explorative analysis')`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00			`explore.add_function(PCAFunction(self))`
			`self.add_stage(explore)`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`save = workflow.Stage('save', 'Save Data')`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`save.add_function(DatasetSaveFunction())`
			`self.add_stage(save)`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
			`logger.log('debug', '\tEinar\'s workflow is now active')`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class LoadAnnotationsFunction(workflow.Function):`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'load-go-ann', 'Load Annotations')`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00			`self.annotations = None`

			`def load_file(self, filename):`
			`f = open(filename)`
			`self.annotations = Annotations('genes', 'go-terms')`
* fluent: General cleanup. 2006-04-19 21:59:55 +02:00			`logger.log('notice', 'Loading annotation file: %s' % filename)`
* workflows/go_workflow.py: A workflow that makes distance matrices based on gene ontology information. Currently only reads gene annotations. 2006-04-17 22:29:29 +02:00
			`for line in f.readlines():`
			`val = line.split(' \t')`

			`if len(val) > 1:`
			`val = [v.strip() for v in val]`
			`retval.add_annotations('genes', val[0],`
			`'go-terms', set(val[1:]))`

			`def on_response(self, dialog, response):`
			`if response == gtk.RESPONSE_OK:`
			`logger.log('notice', 'Reading file: %s' % dialog.get_filename())`
			`self.load_file(dialog.get_filename())`

			`def run(self, data):`
			`btns = ('Open', gtk.RESPONSE_OK, \`
			`'Cancel', gtk.RESPONSE_CANCEL)`
			`dialog = gtk.FileChooserDialog('Open GO Annotation File',`
			`buttons=btns)`
			`dialog.connect('response', self.on_response)`
			`dialog.run()`
			`dialog.destroy()`
			`return [self.annotations]`

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class GODistanceFunction(workflow.Function):`
* fluent: General cleanup. 2006-04-19 21:59:55 +02:00
			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'go_diatance', 'GO Distances')`
* fluent: General cleanup. 2006-04-19 21:59:55 +02:00			`self.output = None`

			`def run(self, data):`
			`logger.log('debug', 'datatype: %s' % type(data))`
			`if not type(data) == Annotations:`
			`return None`

			`logger.log('debug', 'dimensions: %s' % data.dimensions)`

			`genes = data.get_ids('genes')`
			`gene_distances = array((len(genes), len(genes)))`

			`return gene_distances`

Added plot to output from test-function. 2006-04-21 16:58:42 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class TestDataFunction(workflow.Function):`
* fluent, *.py: Data and plots returned from functions will now show up in the navigator window. 2006-04-20 16:29:13 +02:00			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'test_data', 'Generate Test Data')`
* fluent, *.py: Data and plots returned from functions will now show up in the navigator window. 2006-04-20 16:29:13 +02:00
			`def run(self, data):`
			`logger.log('notice', 'Injecting foo test data')`
Added random test data 2006-04-21 12:56:01 +02:00			`x = randn(20,30)`
Complete rewrite of dataset class, with (all) the necessary updates 2006-04-24 11:53:07 +02:00			`X = dataset.Dataset(x)`
Removed need to provide plots with project/workflow on creation, is instead injected by project itself when added to it. 2006-04-24 16:42:45 +02:00			`return [X, plots.SinePlot()]`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class DatasetLog(workflow.Function):`
			`def __init__(self):`
			`workflow.Function.__init__(self, 'log', 'Log')`

			`def run(self, data):`
			`logger.log('notice', 'Taking the log of dataset %s' % data.get_name())`
			`d = data.asarray()`
			`d = log(d)`
Function names are now folders in the navigator. 2006-04-24 17:20:27 +02:00			`new_data_name = 'log(%s)' % data.get_name()`
			`ds = dataset.Dataset(d, name=new_data_name)`
			`return [ds]`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class DatasetLoadFunction(workflow.Function):`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`"""Loader for previously pickled Datasets."""`
			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'load_data', 'Load Pickled Dataset')`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00
			`def run(self, data):`
			`chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,`
			`action=gtk.FILE_CHOOSER_ACTION_OPEN,`
			`buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,`
			`gtk.STOCK_OPEN, gtk.RESPONSE_OK))`
			`pkl_filter = gtk.FileFilter()`
			`pkl_filter.set_name("Python pickled data files (*.pkl)")`
			`pkl_filter.add_pattern("*.[pP][kK][lL]")`
			`all_filter = gtk.FileFilter()`
			`all_filter.set_name("All Files (.)")`
			`all_filter.add_pattern("*")`
			`chooser.add_filter(pkl_filter)`
			`chooser.add_filter(all_filter)`

			`try:`
			`if chooser.run() == gtk.RESPONSE_OK:`
			`return [cPickle.load(open(chooser.get_filename()))]`
			`finally:`
			`chooser.destroy()`


The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class DatasetSaveFunction(workflow.Function):`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`"""QND way to save data to file for later import to this program."""`
			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'save_data', 'Save Pickled Dataset')`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00
			`def run(self, data):`
			`if not data:`
			`logger.log("notice", "No data to save.")`
			`return`

			`chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,`
			`action=gtk.FILE_CHOOSER_ACTION_SAVE,`
			`buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,`
			`gtk.STOCK_SAVE, gtk.RESPONSE_OK))`
			`pkl_filter = gtk.FileFilter()`
			`pkl_filter.set_name("Python pickled data files (*.pkl)")`
			`pkl_filter.add_pattern("*.[pP][kK][lL]")`
			`all_filter = gtk.FileFilter()`
			`all_filter.set_name("All Files (.)")`
			`all_filter.add_pattern("*")`
			`chooser.add_filter(pkl_filter)`
			`chooser.add_filter(all_filter)`
			`chooser.set_current_name(data.get_name() + ".pkl")`

			`try:`
			`if chooser.run() == gtk.RESPONSE_OK:`
Made sure pickling use protocol 2 to make it faster and the files smaller (binary). 2006-04-22 20:11:31 +02:00			`cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)`
Added pickling as a quick and dirty way of loading and saving data in einars workflow. 2006-04-22 18:27:33 +02:00			`logger.log("notice", "Saved data to %r." % chooser.get_filename())`
			`finally:`
			`chooser.destroy()`


The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class CelFileImportFunction(workflow.Function):`
Added doc-string for CelFileImportFunction. 2006-04-22 17:59:19 +02:00			`"""Loads AffyMetrix .CEL-files into matrix."""`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00			`def __init__(self):`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`workflow.Function.__init__(self, 'cel_import', 'Import Affy')`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00
			`def run(self, data):`
Removed workflow being dependent on rpy by putting the import within the run function of affymatrix import. 2006-04-22 22:48:44 +02:00			`import rpy`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00			`chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,`
			`action=gtk.FILE_CHOOSER_ACTION_OPEN,`
			`buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,`
			`gtk.STOCK_OPEN, gtk.RESPONSE_OK))`
			`chooser.set_select_multiple(True)`
			`cel_filter = gtk.FileFilter()`
			`cel_filter.set_name("Cel Files (*.cel)")`
			`cel_filter.add_pattern("*.[cC][eE][lL]")`
			`all_filter = gtk.FileFilter()`
			`all_filter.set_name("All Files (.)")`
			`all_filter.add_pattern("*")`
			`chooser.add_filter(cel_filter)`
			`chooser.add_filter(all_filter)`

Big cleanup of affymatrix importer. Now use E@exprs to get matrix from loaded data. Also made row and colname import better by ensuring vector-handling, ensuring that getting single column/row data works without being converted into a single string instead of string in a list. Removed ;1-hack and use NO_CONVERSION to ensure no extra computation is done when calculating r stuff. Now AffyMatrix data is named as such. 2006-04-22 19:59:15 +02:00			`try:`
			`if chooser.run() == gtk.RESPONSE_OK:`
			`rpy.r.library("affy")`

			`silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)`
			`silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00			`silent_eval('E <- rma(E)')`
Big cleanup of affymatrix importer. Now use E@exprs to get matrix from loaded data. Also made row and colname import better by ensuring vector-handling, ensuring that getting single column/row data works without being converted into a single string instead of string in a list. Removed ;1-hack and use NO_CONVERSION to ensure no extra computation is done when calculating r stuff. Now AffyMatrix data is named as such. 2006-04-22 19:59:15 +02:00
			`m = rpy.r('m <- E@exprs')`

			`vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)`
			`rownames = vector_eval('rownames(m)')`
			`colnames = vector_eval('colnames(m)')`

			`# We should be nice and clean up after ourselves`
			`rpy.r.rm(["E", "m"])`

			`if m:`
Added LinePlot used by Affy importer. 2006-04-27 13:03:11 +02:00			`data = dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), name="AffyMatrix Data")`
			`plot = plots.LinePlot(data, "Gene profiles")`
			`return [data, plot]`
Big cleanup of affymatrix importer. Now use E@exprs to get matrix from loaded data. Also made row and colname import better by ensuring vector-handling, ensuring that getting single column/row data works without being converted into a single string instead of string in a list. Removed ;1-hack and use NO_CONVERSION to ensure no extra computation is done when calculating r stuff. Now AffyMatrix data is named as such. 2006-04-22 19:59:15 +02:00			`else:`
			`logger.log("notice", "No data loaded from importer.")`
			`finally:`
Added support for affy cel files. 2006-04-22 02:17:22 +02:00			`chooser.destroy()`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00

The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`class PCAFunction(workflow.Function):`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00			`"""Generic PCA function."""`
The navigator now displays the plots and data in a tree that shows the ancestry information. 2006-04-24 16:52:21 +02:00			`def __init__(self, wf):`
			`workflow.Function.__init__(self, 'pca', 'PCA')`
			`self._workflow = wf`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00
			`def run(self, data):`
			`import rpy`

			`dim_2, dim_1 = data.get_dim_names()`


			`silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)`
			`rpy.with_mode(rpy.NO_CONVERSION, rpy.r.assign)("m", data.get_matrix())`
			`silent_eval("t = prcomp(t(m))")`

			`T_ids = map(str, range(1, rpy.r("dim(t$x)")[1]+1))`
			`T = dataset.Dataset(rpy.r("t$x"), [(dim_1, data.get_identifiers(dim_1)),`
			`("component", T_ids)], name="T")`
			`P = dataset.Dataset(rpy.r("t$rotation"), [(dim_2, data.get_identifiers(dim_2)),`
			`("component", T_ids)], name="P")`

			`# cleanup`
			`rpy.r.rm(["t", "m"])`

Removed need to provide plots with project/workflow on creation, is instead injected by project itself when added to it. 2006-04-24 16:42:45 +02:00			`loading_plot1 = plots.ScatterPlot(P,'ids','component','1','2')`
			`loading_plot2 = plots.ScatterPlot(P,'ids','component','3','4')`
Added LinePlot used by Affy importer. 2006-04-27 13:03:11 +02:00			`score_plot = plots.ScatterPlot(T,'filename','component','1','2')`
Added matrix-getter to Dataset. Added rma-function to preprocessing incorporated in AffyMatrix-importer. Added PCAFunction to go_workflow for processing loaded affymatrix data. 2006-04-24 16:07:34 +02:00
			`return [T, P, loading_plot1, loading_plot2, score_plot]`