import gtk import logger from annotations import Annotations from workflow import * import geneontology import gostat from scipy import array class EinarsWorkflow (Workflow): def __init__(self, app): Workflow.__init__(self, app) self.name = 'Einar\'s Workflow' load = Stage('load', 'Load Data') load.add_function(Function('load', 'Load Microarrays')) self.add_stage(load) preproc = Stage('preprocess', 'Preprocessing') preproc.add_function(Function('rma', 'RMA')) self.add_stage(preproc) go = Stage('go', 'Gene Ontology Data') go.add_function(LoadAnnotationsFunction()) go.add_function(GODistanceFunction()) self.add_stage(go) regression = Stage('regression', 'Regression') regression.add_function(Function('pls', 'PLS')) self.add_stage(regression) logger.log('debug', '\tEinar\'s workflow is now active') class LoadAnnotationsFunction(Function): def __init__(self): Function.__init__(self, 'load-go-ann', 'Load Annotations') self.annotations = None def load_file(self, filename): f = open(filename) self.annotations = Annotations('genes', 'go-terms') logger.log('notice', 'Loading annotation file: %s' % filename) for line in f.readlines(): val = line.split(' \t') if len(val) > 1: val = [v.strip() for v in val] retval.add_annotations('genes', val[0], 'go-terms', set(val[1:])) def on_response(self, dialog, response): if response == gtk.RESPONSE_OK: logger.log('notice', 'Reading file: %s' % dialog.get_filename()) self.load_file(dialog.get_filename()) def run(self, data): btns = ('Open', gtk.RESPONSE_OK, \ 'Cancel', gtk.RESPONSE_CANCEL) dialog = gtk.FileChooserDialog('Open GO Annotation File', buttons=btns) dialog.connect('response', self.on_response) dialog.run() dialog.destroy() return [self.annotations] class GODistanceFunction(Function): def __init__(self): Function.__init__(self, 'go_diatance', 'GO Distances') self.output = None def run(self, data): logger.log('debug', 'datatype: %s' % type(data)) if not type(data) == Annotations: return None logger.log('debug', 'dimensions: %s' % data.dimensions) genes = data.get_ids('genes') gene_distances = array((len(genes), len(genes))) return gene_distances