Fixed floats in phenotype importer.
Added PrintFunction for debug-printing. Added PhenotypeImportFunction to AffyWorkflow.
This commit is contained in:
parent
b757da5929
commit
fb7128138e
|
@ -20,13 +20,13 @@ CEL
|
|||
def testFloatData(self):
|
||||
cel_data = """\
|
||||
CEL\tage
|
||||
02-05-33\t8
|
||||
02-05-33\t8.5
|
||||
03-07-38\t9
|
||||
"""
|
||||
dataset = PhenotypeDataset(cel_data)
|
||||
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
|
||||
self.assertEquals(['age'], dataset.get_identifiers('phenotypes'))
|
||||
self.assertEquals([[8],
|
||||
self.assertEquals([[8.5],
|
||||
[9]], dataset.asarray().tolist())
|
||||
|
||||
def testCategoryData(self):
|
||||
|
|
|
@ -14,17 +14,30 @@ class AffyWorkflow (workflow.Workflow):
|
|||
|
||||
load = workflow.Stage('load', 'Load Data')
|
||||
load.add_function(CelFileImportFunction())
|
||||
load.add_function(PhenotypeImportFunction())
|
||||
load.add_function(TestDataFunction())
|
||||
load.add_function(DatasetLoadFunction())
|
||||
self.add_stage(load)
|
||||
|
||||
explore = workflow.Stage('explore', 'Explorative analysis')
|
||||
explore.add_function(PCAFunction(self))
|
||||
explore.add_function(PrintFunction())
|
||||
self.add_stage(explore)
|
||||
|
||||
save = workflow.Stage('save', 'Save Data')
|
||||
save.add_function(DatasetSaveFunction())
|
||||
self.add_stage(save)
|
||||
|
||||
class PrintFunction(workflow.Function):
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'printer', 'Print Stuff')
|
||||
|
||||
def run(self, data):
|
||||
dim1, dim2 = data.get_dim_names()
|
||||
print dim1, dim2
|
||||
print "\t", "\t".join(data.get_identifiers(dim2))
|
||||
for row in zip(data.get_identifiers(dim1), data.asarray().tolist()):
|
||||
print "\t".join(map(str, row))
|
||||
|
||||
|
||||
class TestDataFunction(workflow.Function):
|
||||
|
@ -146,6 +159,30 @@ class CelFileImportFunction(workflow.Function):
|
|||
chooser.destroy()
|
||||
|
||||
|
||||
class PhenotypeImportFunction(workflow.Function):
|
||||
def __init__(self):
|
||||
workflow.Function.__init__(self, 'import_phenotype', 'Import Phenotypes')
|
||||
|
||||
def run(self):
|
||||
chooser = gtk.FileChooserDialog(title="Select cel files...", parent=None,
|
||||
action=gtk.FILE_CHOOSER_ACTION_OPEN,
|
||||
buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
|
||||
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
|
||||
all_filter = gtk.FileFilter()
|
||||
all_filter.set_name("Tab separated file (*.*)")
|
||||
all_filter.add_pattern("*")
|
||||
chooser.add_filter(all_filter)
|
||||
|
||||
try:
|
||||
if chooser.run() == gtk.RESPONSE_OK:
|
||||
text = open(chooser.get_filename()).read()
|
||||
data = PhenotypeDataset(text)
|
||||
return [data]
|
||||
|
||||
finally:
|
||||
chooser.destroy()
|
||||
|
||||
|
||||
class PCAFunction(workflow.Function):
|
||||
"""Generic PCA function."""
|
||||
def __init__(self, wf):
|
||||
|
@ -197,7 +234,7 @@ class PhenotypeDataset(dataset.Dataset):
|
|||
|
||||
for col_name, column in zip(col_names, columns[1:]):
|
||||
try:
|
||||
categories[col_name] = map(int, column)
|
||||
categories[col_name] = map(float, column)
|
||||
phenotypes.append(col_name)
|
||||
except ValueError:
|
||||
# category-data
|
||||
|
@ -230,7 +267,7 @@ class PhenotypeDataset(dataset.Dataset):
|
|||
|
||||
dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names),
|
||||
('phenotypes', phenotypes)],
|
||||
shape=(len(cel_names),len(phenotypes)))
|
||||
shape=(len(cel_names),len(phenotypes)), name="Phenotype Data")
|
||||
|
||||
|
||||
def get_phenotype_table(self):
|
||||
|
|
Reference in New Issue