2006-04-17 22:29:29 +02:00
|
|
|
import gtk
|
|
|
|
import logger
|
|
|
|
from annotations import Annotations
|
|
|
|
from workflow import *
|
2006-04-20 16:29:13 +02:00
|
|
|
import plots
|
2006-04-21 12:56:01 +02:00
|
|
|
import dataset
|
2006-04-20 12:27:58 +02:00
|
|
|
#import geneontology
|
|
|
|
#import gostat
|
2006-04-21 12:56:01 +02:00
|
|
|
from scipy import array,randn
|
2006-04-22 18:27:33 +02:00
|
|
|
import cPickle
|
2006-04-17 22:29:29 +02:00
|
|
|
|
|
|
|
class EinarsWorkflow (Workflow):
|
|
|
|
|
2006-04-22 23:48:30 +02:00
|
|
|
name = 'Test Workflow'
|
2006-04-23 00:29:02 +02:00
|
|
|
description = 'Gene Ontology Workflow. This workflow currently serves as a general testing workflow.'
|
2006-04-17 22:29:29 +02:00
|
|
|
def __init__(self, app):
|
|
|
|
Workflow.__init__(self, app)
|
|
|
|
|
|
|
|
load = Stage('load', 'Load Data')
|
|
|
|
load.add_function(Function('load', 'Load Microarrays'))
|
2006-04-22 02:17:22 +02:00
|
|
|
load.add_function(CelFileImportFunction())
|
2006-04-20 16:29:13 +02:00
|
|
|
load.add_function(TestDataFunction())
|
2006-04-22 18:27:33 +02:00
|
|
|
load.add_function(DatasetLoadFunction())
|
2006-04-17 22:29:29 +02:00
|
|
|
self.add_stage(load)
|
|
|
|
|
|
|
|
preproc = Stage('preprocess', 'Preprocessing')
|
|
|
|
preproc.add_function(Function('rma', 'RMA'))
|
|
|
|
self.add_stage(preproc)
|
|
|
|
|
|
|
|
go = Stage('go', 'Gene Ontology Data')
|
|
|
|
go.add_function(LoadAnnotationsFunction())
|
2006-04-19 21:59:55 +02:00
|
|
|
go.add_function(GODistanceFunction())
|
2006-04-17 22:29:29 +02:00
|
|
|
self.add_stage(go)
|
|
|
|
|
|
|
|
regression = Stage('regression', 'Regression')
|
|
|
|
regression.add_function(Function('pls', 'PLS'))
|
|
|
|
self.add_stage(regression)
|
2006-04-22 18:27:33 +02:00
|
|
|
|
|
|
|
save = Stage('save', 'Save Data')
|
|
|
|
save.add_function(DatasetSaveFunction())
|
|
|
|
self.add_stage(save)
|
2006-04-17 22:29:29 +02:00
|
|
|
|
|
|
|
logger.log('debug', '\tEinar\'s workflow is now active')
|
|
|
|
|
|
|
|
class LoadAnnotationsFunction(Function):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'load-go-ann', 'Load Annotations')
|
|
|
|
self.annotations = None
|
|
|
|
|
|
|
|
def load_file(self, filename):
|
|
|
|
f = open(filename)
|
|
|
|
self.annotations = Annotations('genes', 'go-terms')
|
2006-04-19 21:59:55 +02:00
|
|
|
logger.log('notice', 'Loading annotation file: %s' % filename)
|
2006-04-17 22:29:29 +02:00
|
|
|
|
|
|
|
for line in f.readlines():
|
|
|
|
val = line.split(' \t')
|
|
|
|
|
|
|
|
if len(val) > 1:
|
|
|
|
val = [v.strip() for v in val]
|
|
|
|
retval.add_annotations('genes', val[0],
|
|
|
|
'go-terms', set(val[1:]))
|
|
|
|
|
|
|
|
def on_response(self, dialog, response):
|
|
|
|
if response == gtk.RESPONSE_OK:
|
|
|
|
logger.log('notice', 'Reading file: %s' % dialog.get_filename())
|
|
|
|
self.load_file(dialog.get_filename())
|
|
|
|
|
|
|
|
def run(self, data):
|
|
|
|
btns = ('Open', gtk.RESPONSE_OK, \
|
|
|
|
'Cancel', gtk.RESPONSE_CANCEL)
|
|
|
|
dialog = gtk.FileChooserDialog('Open GO Annotation File',
|
|
|
|
buttons=btns)
|
|
|
|
dialog.connect('response', self.on_response)
|
|
|
|
dialog.run()
|
|
|
|
dialog.destroy()
|
|
|
|
return [self.annotations]
|
|
|
|
|
2006-04-19 21:59:55 +02:00
|
|
|
class GODistanceFunction(Function):
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'go_diatance', 'GO Distances')
|
|
|
|
self.output = None
|
|
|
|
|
|
|
|
def run(self, data):
|
|
|
|
logger.log('debug', 'datatype: %s' % type(data))
|
|
|
|
if not type(data) == Annotations:
|
|
|
|
return None
|
|
|
|
|
|
|
|
logger.log('debug', 'dimensions: %s' % data.dimensions)
|
|
|
|
|
|
|
|
genes = data.get_ids('genes')
|
|
|
|
gene_distances = array((len(genes), len(genes)))
|
|
|
|
|
|
|
|
return gene_distances
|
|
|
|
|
2006-04-21 16:58:42 +02:00
|
|
|
|
|
|
|
class TestDataFunction(Function):
|
2006-04-20 16:29:13 +02:00
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'test_data', 'Generate Test Data')
|
|
|
|
|
|
|
|
def run(self, data):
|
|
|
|
logger.log('notice', 'Injecting foo test data')
|
2006-04-21 12:56:01 +02:00
|
|
|
x = randn(20,30)
|
2006-04-24 11:53:07 +02:00
|
|
|
X = dataset.Dataset(x)
|
2006-04-21 16:58:42 +02:00
|
|
|
return [X, plots.SinePlot(None)]
|
2006-04-22 02:17:22 +02:00
|
|
|
|
|
|
|
|
2006-04-22 18:27:33 +02:00
|
|
|
class DatasetLoadFunction(Function):
|
|
|
|
"""Loader for previously pickled Datasets."""
|
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'load_data', 'Load Pickled Dataset')
|
|
|
|
|
|
|
|
def run(self, data):
|
|
|
|
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
|
|
|
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
|
|
|
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
|
|
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
|
|
|
pkl_filter = gtk.FileFilter()
|
|
|
|
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
|
|
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
|
|
all_filter = gtk.FileFilter()
|
|
|
|
all_filter.set_name("All Files (*.*)")
|
|
|
|
all_filter.add_pattern("*")
|
|
|
|
chooser.add_filter(pkl_filter)
|
|
|
|
chooser.add_filter(all_filter)
|
|
|
|
|
|
|
|
try:
|
|
|
|
if chooser.run() == gtk.RESPONSE_OK:
|
|
|
|
return [cPickle.load(open(chooser.get_filename()))]
|
|
|
|
finally:
|
|
|
|
chooser.destroy()
|
|
|
|
|
|
|
|
|
|
|
|
class DatasetSaveFunction(Function):
|
|
|
|
"""QND way to save data to file for later import to this program."""
|
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'save_data', 'Save Pickled Dataset')
|
|
|
|
|
|
|
|
def run(self, data):
|
|
|
|
if not data:
|
|
|
|
logger.log("notice", "No data to save.")
|
|
|
|
return
|
|
|
|
|
|
|
|
chooser = gtk.FileChooserDialog(title="Save pickled data...", parent=None,
|
|
|
|
action=gtk.FILE_CHOOSER_ACTION_SAVE,
|
|
|
|
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
|
|
gtk.STOCK_SAVE, gtk.RESPONSE_OK))
|
|
|
|
pkl_filter = gtk.FileFilter()
|
|
|
|
pkl_filter.set_name("Python pickled data files (*.pkl)")
|
|
|
|
pkl_filter.add_pattern("*.[pP][kK][lL]")
|
|
|
|
all_filter = gtk.FileFilter()
|
|
|
|
all_filter.set_name("All Files (*.*)")
|
|
|
|
all_filter.add_pattern("*")
|
|
|
|
chooser.add_filter(pkl_filter)
|
|
|
|
chooser.add_filter(all_filter)
|
|
|
|
chooser.set_current_name(data.get_name() + ".pkl")
|
|
|
|
|
|
|
|
try:
|
|
|
|
if chooser.run() == gtk.RESPONSE_OK:
|
2006-04-22 20:11:31 +02:00
|
|
|
cPickle.dump(data, open(chooser.get_filename(), "w"), protocol=2)
|
2006-04-22 18:27:33 +02:00
|
|
|
logger.log("notice", "Saved data to %r." % chooser.get_filename())
|
|
|
|
finally:
|
|
|
|
chooser.destroy()
|
|
|
|
|
|
|
|
|
2006-04-22 02:17:22 +02:00
|
|
|
class CelFileImportFunction(Function):
|
2006-04-22 17:59:19 +02:00
|
|
|
"""Loads AffyMetrix .CEL-files into matrix."""
|
2006-04-22 02:17:22 +02:00
|
|
|
def __init__(self):
|
|
|
|
Function.__init__(self, 'cel_import', 'Import Affy')
|
|
|
|
|
|
|
|
def run(self, data):
|
2006-04-22 22:48:44 +02:00
|
|
|
import rpy
|
2006-04-22 02:17:22 +02:00
|
|
|
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
|
|
|
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
|
|
|
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
|
|
|
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
|
|
|
chooser.set_select_multiple(True)
|
|
|
|
cel_filter = gtk.FileFilter()
|
|
|
|
cel_filter.set_name("Cel Files (*.cel)")
|
|
|
|
cel_filter.add_pattern("*.[cC][eE][lL]")
|
|
|
|
all_filter = gtk.FileFilter()
|
|
|
|
all_filter.set_name("All Files (*.*)")
|
|
|
|
all_filter.add_pattern("*")
|
|
|
|
chooser.add_filter(cel_filter)
|
|
|
|
chooser.add_filter(all_filter)
|
|
|
|
|
2006-04-22 19:59:15 +02:00
|
|
|
try:
|
|
|
|
if chooser.run() == gtk.RESPONSE_OK:
|
|
|
|
rpy.r.library("affy")
|
|
|
|
|
|
|
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
|
|
|
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
|
|
|
|
|
|
|
m = rpy.r('m <- E@exprs')
|
|
|
|
|
|
|
|
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
|
|
|
rownames = vector_eval('rownames(m)')
|
|
|
|
colnames = vector_eval('colnames(m)')
|
|
|
|
|
|
|
|
# We should be nice and clean up after ourselves
|
|
|
|
rpy.r.rm(["E", "m"])
|
|
|
|
|
|
|
|
if m:
|
|
|
|
return [dataset.Dataset(m, (('ids', rownames), ('filename', colnames)), "AffyMatrix Data")]
|
|
|
|
else:
|
|
|
|
logger.log("notice", "No data loaded from importer.")
|
|
|
|
finally:
|
2006-04-22 02:17:22 +02:00
|
|
|
chooser.destroy()
|