Fixed floats in phenotype importer.

Added PrintFunction for debug-printing.
Added PhenotypeImportFunction to AffyWorkflow.
This commit is contained in:
Truls Alexander Tangstad 2006-05-03 14:27:38 +00:00
parent b757da5929
commit fb7128138e
2 changed files with 41 additions and 4 deletions

View File

@ -20,13 +20,13 @@ CEL
def testFloatData(self):
cel_data = """\
CEL\tage
02-05-33\t8
02-05-33\t8.5
03-07-38\t9
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
self.assertEquals(['age'], dataset.get_identifiers('phenotypes'))
self.assertEquals([[8],
self.assertEquals([[8.5],
[9]], dataset.asarray().tolist())
def testCategoryData(self):

View File

@ -14,17 +14,30 @@ class AffyWorkflow (workflow.Workflow):
load = workflow.Stage('load', 'Load Data')
load.add_function(CelFileImportFunction())
load.add_function(PhenotypeImportFunction())
load.add_function(TestDataFunction())
load.add_function(DatasetLoadFunction())
self.add_stage(load)
explore = workflow.Stage('explore', 'Explorative analysis')
explore.add_function(PCAFunction(self))
explore.add_function(PrintFunction())
self.add_stage(explore)
save = workflow.Stage('save', 'Save Data')
save.add_function(DatasetSaveFunction())
self.add_stage(save)
class PrintFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'printer', 'Print Stuff')
def run(self, data):
dim1, dim2 = data.get_dim_names()
print dim1, dim2
print "\t", "\t".join(data.get_identifiers(dim2))
for row in zip(data.get_identifiers(dim1), data.asarray().tolist()):
print "\t".join(map(str, row))
class TestDataFunction(workflow.Function):
@ -146,6 +159,30 @@ class CelFileImportFunction(workflow.Function):
chooser.destroy()
class PhenotypeImportFunction(workflow.Function):
def __init__(self):
workflow.Function.__init__(self, 'import_phenotype', 'Import Phenotypes')
def run(self):
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
action=gtk.FILE_CHOOSER_ACTION_OPEN,
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
all_filter = gtk.FileFilter()
all_filter.set_name("Tab separated file (*.*)")
all_filter.add_pattern("*")
chooser.add_filter(all_filter)
try:
if chooser.run() == gtk.RESPONSE_OK:
text = open(chooser.get_filename()).read()
data = PhenotypeDataset(text)
return [data]
finally:
chooser.destroy()
class PCAFunction(workflow.Function):
"""Generic PCA function."""
def __init__(self, wf):
@ -197,7 +234,7 @@ class PhenotypeDataset(dataset.Dataset):
for col_name, column in zip(col_names, columns[1:]):
try:
categories[col_name] = map(int, column)
categories[col_name] = map(float, column)
phenotypes.append(col_name)
except ValueError:
# category-data
@ -230,7 +267,7 @@ class PhenotypeDataset(dataset.Dataset):
dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names),
('phenotypes', phenotypes)],
shape=(len(cel_names),len(phenotypes)))
shape=(len(cel_names),len(phenotypes)), name="Phenotype Data")
def get_phenotype_table(self):