From fb7128138e3dd1592c250e0c7d2479907e1f1a05 Mon Sep 17 00:00:00 2001 From: tangstad Date: Wed, 3 May 2006 14:27:38 +0000 Subject: [PATCH] Fixed floats in phenotype importer. Added PrintFunction for debug-printing. Added PhenotypeImportFunction to AffyWorkflow. --- test/workflows/affy_workflowtest.py | 4 +-- workflows/affy_workflow.py | 41 +++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/test/workflows/affy_workflowtest.py b/test/workflows/affy_workflowtest.py index 4d214bd..82e5b33 100644 --- a/test/workflows/affy_workflowtest.py +++ b/test/workflows/affy_workflowtest.py @@ -20,13 +20,13 @@ CEL def testFloatData(self): cel_data = """\ CEL\tage -02-05-33\t8 +02-05-33\t8.5 03-07-38\t9 """ dataset = PhenotypeDataset(cel_data) self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names()) self.assertEquals(['age'], dataset.get_identifiers('phenotypes')) - self.assertEquals([[8], + self.assertEquals([[8.5], [9]], dataset.asarray().tolist()) def testCategoryData(self): diff --git a/workflows/affy_workflow.py b/workflows/affy_workflow.py index 6ec21b8..cc88c98 100644 --- a/workflows/affy_workflow.py +++ b/workflows/affy_workflow.py @@ -14,17 +14,30 @@ class AffyWorkflow (workflow.Workflow): load = workflow.Stage('load', 'Load Data') load.add_function(CelFileImportFunction()) + load.add_function(PhenotypeImportFunction()) load.add_function(TestDataFunction()) load.add_function(DatasetLoadFunction()) self.add_stage(load) explore = workflow.Stage('explore', 'Explorative analysis') explore.add_function(PCAFunction(self)) + explore.add_function(PrintFunction()) self.add_stage(explore) save = workflow.Stage('save', 'Save Data') save.add_function(DatasetSaveFunction()) self.add_stage(save) + +class PrintFunction(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'printer', 'Print Stuff') + + def run(self, data): + dim1, dim2 = data.get_dim_names() + print dim1, dim2 + print "\t", "\t".join(data.get_identifiers(dim2)) + for row in zip(data.get_identifiers(dim1), data.asarray().tolist()): + print "\t".join(map(str, row)) class TestDataFunction(workflow.Function): @@ -146,6 +159,30 @@ class CelFileImportFunction(workflow.Function): chooser.destroy() +class PhenotypeImportFunction(workflow.Function): + def __init__(self): + workflow.Function.__init__(self, 'import_phenotype', 'Import Phenotypes') + + def run(self): + chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None, + action=gtk.FILE_CHOOSER_ACTION_OPEN, + buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, + gtk.STOCK_OPEN, gtk.RESPONSE_OK)) + all_filter = gtk.FileFilter() + all_filter.set_name("Tab separated file (*.*)") + all_filter.add_pattern("*") + chooser.add_filter(all_filter) + + try: + if chooser.run() == gtk.RESPONSE_OK: + text = open(chooser.get_filename()).read() + data = PhenotypeDataset(text) + return [data] + + finally: + chooser.destroy() + + class PCAFunction(workflow.Function): """Generic PCA function.""" def __init__(self, wf): @@ -197,7 +234,7 @@ class PhenotypeDataset(dataset.Dataset): for col_name, column in zip(col_names, columns[1:]): try: - categories[col_name] = map(int, column) + categories[col_name] = map(float, column) phenotypes.append(col_name) except ValueError: # category-data @@ -230,7 +267,7 @@ class PhenotypeDataset(dataset.Dataset): dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names), ('phenotypes', phenotypes)], - shape=(len(cel_names),len(phenotypes))) + shape=(len(cel_names),len(phenotypes)), name="Phenotype Data") def get_phenotype_table(self):