import gtk import logger from annotations import Annotations from workflow import * import plots import dataset #import geneontology #import gostat import rpy from scipy import array,randn class EinarsWorkflow (Workflow): def __init__(self, app): Workflow.__init__(self, app) self.name = 'Einar\'s Workflow' load = Stage('load', 'Load Data') load.add_function(Function('load', 'Load Microarrays')) load.add_function(CelFileImportFunction()) load.add_function(TestDataFunction()) self.add_stage(load) preproc = Stage('preprocess', 'Preprocessing') preproc.add_function(Function('rma', 'RMA')) self.add_stage(preproc) go = Stage('go', 'Gene Ontology Data') go.add_function(LoadAnnotationsFunction()) go.add_function(GODistanceFunction()) self.add_stage(go) regression = Stage('regression', 'Regression') regression.add_function(Function('pls', 'PLS')) self.add_stage(regression) logger.log('debug', '\tEinar\'s workflow is now active') class LoadAnnotationsFunction(Function): def __init__(self): Function.__init__(self, 'load-go-ann', 'Load Annotations') self.annotations = None def load_file(self, filename): f = open(filename) self.annotations = Annotations('genes', 'go-terms') logger.log('notice', 'Loading annotation file: %s' % filename) for line in f.readlines(): val = line.split(' \t') if len(val) > 1: val = [v.strip() for v in val] retval.add_annotations('genes', val[0], 'go-terms', set(val[1:])) def on_response(self, dialog, response): if response == gtk.RESPONSE_OK: logger.log('notice', 'Reading file: %s' % dialog.get_filename()) self.load_file(dialog.get_filename()) def run(self, data): btns = ('Open', gtk.RESPONSE_OK, \ 'Cancel', gtk.RESPONSE_CANCEL) dialog = gtk.FileChooserDialog('Open GO Annotation File', buttons=btns) dialog.connect('response', self.on_response) dialog.run() dialog.destroy() return [self.annotations] class GODistanceFunction(Function): def __init__(self): Function.__init__(self, 'go_diatance', 'GO Distances') self.output = None def run(self, data): logger.log('debug', 'datatype: %s' % type(data)) if not type(data) == Annotations: return None logger.log('debug', 'dimensions: %s' % data.dimensions) genes = data.get_ids('genes') gene_distances = array((len(genes), len(genes))) return gene_distances class TestDataFunction(Function): def __init__(self): Function.__init__(self, 'test_data', 'Generate Test Data') def run(self, data): logger.log('notice', 'Injecting foo test data') x = randn(20,30) axis_0 = ['rows',[]] axis_1 = ['cols',[]] X = dataset.Dataset(x,[axis_0,axis_1]) return [X, plots.SinePlot(None)] class CelFileImportFunction(Function): def __init__(self): Function.__init__(self, 'cel_import', 'Import Affy') def run(self, data): chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, action=gtk.FILE_CHOOSER_ACTION_OPEN, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) chooser.set_select_multiple(True) cel_filter = gtk.FileFilter() cel_filter.set_name("Cel Files (*.cel)") cel_filter.add_pattern("*.[cC][eE][lL]") all_filter = gtk.FileFilter() all_filter.set_name("All Files (*.*)") all_filter.add_pattern("*") chooser.add_filter(cel_filter) chooser.add_filter(all_filter) if chooser.run() == gtk.RESPONSE_OK: logger.log('debug', "Selected files: %s" % ", ".join(chooser.get_filenames())) rpy.r.library("affy") # hack: we append ";1" to make sure no r-object is returned to python (faster) rpy.r('At.aBatch <- ReadAffy(filenames=c("%s"));1' % '", "'.join(chooser.get_filenames())) # we destroy it immediately to keep it from being on # screen while we do something with the files chooser.destroy() # also here we append ";1" rpy.r('At.eSet <- expresso(At.aBatch, bg.correct=F, summary.method="liwong", pmcorrect.method="pmonly", normalize.method="qspline");1') m = rpy.r('At.m <- exprs(At.eSet)') rownames = rpy.r('rownames(At.m)') colnames = rpy.r('colnames(At.m)') return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)))] else: chooser.destroy()