Projects/laydi
Projects
/
laydi
Archived
7
0
Fork 0
This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/workflows/go_workflow.py

205 lines
7.5 KiB
Python
Raw Normal View History

import gtk
import logger
from annotations import Annotations
from workflow import *
import plots
2006-04-21 12:56:01 +02:00
import dataset
#import geneontology
#import gostat
2006-04-22 02:17:22 +02:00
import rpy
2006-04-21 12:56:01 +02:00
from scipy import array,randn
import cPickle
class EinarsWorkflow (Workflow):
def __init__(self, app):
Workflow.__init__(self, app)
self.name = 'Einar\'s Workflow'
load = Stage('load', 'Load Data')
load.add_function(Function('load', 'Load Microarrays'))
2006-04-22 02:17:22 +02:00
load.add_function(CelFileImportFunction())
load.add_function(TestDataFunction())
load.add_function(DatasetLoadFunction())
self.add_stage(load)
preproc = Stage('preprocess', 'Preprocessing')
preproc.add_function(Function('rma', 'RMA'))
self.add_stage(preproc)
go = Stage('go', 'Gene Ontology Data')
go.add_function(LoadAnnotationsFunction())
2006-04-19 21:59:55 +02:00
go.add_function(GODistanceFunction())
self.add_stage(go)
regression = Stage('regression', 'Regression')
regression.add_function(Function('pls', 'PLS'))
self.add_stage(regression)
save = Stage('save', 'Save Data')
save.add_function(DatasetSaveFunction())
self.add_stage(save)
logger.log('debug', '\tEinar\'s workflow is now active')
class LoadAnnotationsFunction(Function):
def __init__(self):
Function.__init__(self, 'load-go-ann', 'Load Annotations')
self.annotations = None
def load_file(self, filename):
f = open(filename)
self.annotations = Annotations('genes', 'go-terms')
2006-04-19 21:59:55 +02:00
logger.log('notice', 'Loading annotation file: %s' % filename)
for line in f.readlines():
val = line.split(' \t')
if len(val) > 1:
val = [v.strip() for v in val]
retval.add_annotations('genes', val[0],
'go-terms', set(val[1:]))
def on_response(self, dialog, response):
if response == gtk.RESPONSE_OK:
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
self.load_file(dialog.get_filename())
def run(self, data):
btns = ('Open', gtk.RESPONSE_OK, \
'Cancel', gtk.RESPONSE_CANCEL)
dialog = gtk.FileChooserDialog('Open GO Annotation File',
buttons=btns)
dialog.connect('response', self.on_response)
dialog.run()
dialog.destroy()
return [self.annotations]
2006-04-19 21:59:55 +02:00
class GODistanceFunction(Function):
def __init__(self):
Function.__init__(self, 'go_diatance', 'GO Distances')
self.output = None
def run(self, data):
logger.log('debug', 'datatype: %s' % type(data))
if not type(data) == Annotations:
return None
logger.log('debug', 'dimensions: %s' % data.dimensions)
genes = data.get_ids('genes')
gene_distances = array((len(genes), len(genes)))
return gene_distances
class TestDataFunction(Function):
def __init__(self):
Function.__init__(self, 'test_data', 'Generate Test Data')
def run(self, data):
logger.log('notice', 'Injecting foo test data')
2006-04-21 12:56:01 +02:00
x = randn(20,30)
axis_0 = ['rows',[]]
axis_1 = ['cols',[]]
X = dataset.Dataset(x,[axis_0,axis_1])
return [X, plots.SinePlot(None)]
2006-04-22 02:17:22 +02:00
class DatasetLoadFunction(Function):
"""Loader for previously pickled Datasets."""
def __init__(self):
Function.__init__(self, 'load_data', 'Load Pickled Dataset')
def run(self, data):
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
return [cPickle.load(open(chooser.get_filename()))]
finally:
chooser.destroy()
class DatasetSaveFunction(Function):
"""QND way to save data to file for later import to this program."""
def __init__(self):
Function.__init__(self, 'save_data', 'Save Pickled Dataset')
def run(self, data):
if not data:
logger.log("notice", "No data to save.")
return
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_SAVE,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
pkl_filter = gtk.FileFilter()
pkl_filter.set_name("Python pickled data files (*.pkl)")
pkl_filter.add_pattern("*.[pP][kK][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(pkl_filter)
chooser.add_filter(all_filter)
chooser.set_current_name(data.get_name() + ".pkl")
try:
if chooser.run() == gtk.RESPONSE_OK:
cPickle.dump(data, open(chooser.get_filename(), "w"))
logger.log("notice", "Saved data to %r." % chooser.get_filename())
finally:
chooser.destroy()
2006-04-22 02:17:22 +02:00
class CelFileImportFunction(Function):
"""Loads AffyMetrix .CEL-files into matrix."""
2006-04-22 02:17:22 +02:00
def __init__(self):
Function.__init__(self, 'cel_import', 'Import Affy')
def run(self, data):
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
chooser.set_select_multiple(True)
cel_filter = gtk.FileFilter()
cel_filter.set_name("Cel Files (*.cel)")
cel_filter.add_pattern("*.[cC][eE][lL]")
all_filter = gtk.FileFilter()
all_filter.set_name("All Files (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(cel_filter)
chooser.add_filter(all_filter)
if chooser.run() == gtk.RESPONSE_OK:
logger.log('debug', "Selected files: %s" % ", ".join(chooser.get_filenames()))
rpy.r.library("affy")
# hack: we append ";1" to make sure no r-object is returned to python (faster)
rpy.r('At.aBatch <- ReadAffy(filenames=c("%s"));1' % '", "'.join(chooser.get_filenames()))
# we destroy it immediately to keep it from being on
# screen while we do something with the files
chooser.destroy()
# also here we append ";1"
rpy.r('At.eSet <- expresso(At.aBatch, bg.correct=F, summary.method="liwong", pmcorrect.method="pmonly", normalize.method="qspline");1')
m = rpy.r('At.m <- exprs(At.eSet)')
rownames = rpy.r('rownames(At.m)')
colnames = rpy.r('colnames(At.m)')
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)))]
else:
chooser.destroy()