This repository has been archived on 2024-07-04. You can view files and clone it, but cannot push or open issues or pull requests.
laydi/test/workflows/affy_workflowtest.py

122 lines
3.9 KiB
Python
Raw Normal View History

import unittest
import sys
sys.path.append("../..")
from workflows.affy_workflow import PhenotypeDataset
class PhenotypeDatasetTest(unittest.TestCase):
def testEmptyData(self):
# we have a list of cel-files, but no categories
cel_data = """\
CEL
02-05-33
03-07-38
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
self.assertEquals(['02-05-33', '03-07-38'], dataset.get_identifiers('CEL'))
self.assertEquals([], dataset.get_identifiers('phenotypes'))
def testFloatData(self):
cel_data = """\
CEL\tage
02-05-33\t8.5
03-07-38\t9
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
self.assertEquals(['age'], dataset.get_identifiers('phenotypes'))
self.assertEquals([[8.5],
[9]], dataset.asarray().tolist())
def testCategoryData(self):
"""Categories expand to one numeric column for each category choice."""
cel_data = """\
CEL\tsick
02-05-33\tyes
03-07-38\tno
04-93-33\tyes
08-32-33\tmaybe
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
self.assertEquals(['sick-yes', 'sick-no', 'sick-maybe'], dataset.get_identifiers('phenotypes'))
self.assertEquals([[1, 0, 0],
[0, 1, 0],
[1, 0, 0],
[0, 0, 1]], dataset.asarray().tolist())
def testMultipleCategoriesAndFloats(self):
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(['sex-F', 'sex-M', 'age', 'infected-I', 'infected-N'],
dataset.get_identifiers('phenotypes'))
self.assertEquals([[1, 0, 8, 1, 0],
[1, 0, 9, 0, 1],
[0, 1, 8, 1, 0]], dataset.asarray().tolist())
def testGetPhenotypeTable(self):
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals([['CEL', 'sex', 'age', 'infected'],
['02-05-33', 'F', '8', 'I'],
['02-05-34', 'F', '9', 'N'],
['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table())
2006-05-09 16:13:09 +02:00
# we can also get a sorted list
new_order = ['02-05-35', '02-05-33', '02-05-34']
self.assertEquals([['CEL', 'sex', 'age', 'infected'],
['02-05-35', 'M', '8', 'I'],
['02-05-33', 'F', '8', 'I'],
['02-05-34', 'F', '9', 'N']], dataset.get_phenotype_table(new_order))
def testGetCategories(self):
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories()))
def testGetFactors(self):
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(set(["sex", "infected"]), dataset.get_factors(["F", "I"]))
def testGetCategoryVariable(self):
"""Can get set/unset list for given category."""
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals([1, 1, 0], dataset.get_category_variable("F"))
self.assertEquals([0, 0, 1], dataset.get_category_variable("M"))
self.assertEquals([1, 0, 1], dataset.get_category_variable("I"))
self.assertEquals([0, 1, 0], dataset.get_category_variable("N"))
if __name__=='__main__':
unittest.main()