122 lines
3.9 KiB
Python
122 lines
3.9 KiB
Python
import unittest
|
|
import sys
|
|
|
|
sys.path.append("../..")
|
|
from workflows.affy_workflow import PhenotypeDataset
|
|
|
|
class PhenotypeDatasetTest(unittest.TestCase):
|
|
def testEmptyData(self):
|
|
# we have a list of cel-files, but no categories
|
|
cel_data = """\
|
|
CEL
|
|
02-05-33
|
|
03-07-38
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
|
|
self.assertEquals(['02-05-33', '03-07-38'], dataset.get_identifiers('CEL'))
|
|
self.assertEquals([], dataset.get_identifiers('phenotypes'))
|
|
|
|
def testFloatData(self):
|
|
cel_data = """\
|
|
CEL\tage
|
|
02-05-33\t8.5
|
|
03-07-38\t9
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
|
|
self.assertEquals(['age'], dataset.get_identifiers('phenotypes'))
|
|
self.assertEquals([[8.5],
|
|
[9]], dataset.asarray().tolist())
|
|
|
|
def testCategoryData(self):
|
|
"""Categories expand to one numeric column for each category choice."""
|
|
cel_data = """\
|
|
CEL\tsick
|
|
02-05-33\tyes
|
|
03-07-38\tno
|
|
04-93-33\tyes
|
|
08-32-33\tmaybe
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names())
|
|
self.assertEquals(['sick-yes', 'sick-no', 'sick-maybe'], dataset.get_identifiers('phenotypes'))
|
|
self.assertEquals([[1, 0, 0],
|
|
[0, 1, 0],
|
|
[1, 0, 0],
|
|
[0, 0, 1]], dataset.asarray().tolist())
|
|
|
|
def testMultipleCategoriesAndFloats(self):
|
|
cel_data = """\
|
|
CEL\tsex\tage\tinfected
|
|
02-05-33\tF\t8\tI
|
|
02-05-34\tF\t9\tN
|
|
02-05-35\tM\t8\tI
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals(['sex-F', 'sex-M', 'age', 'infected-I', 'infected-N'],
|
|
dataset.get_identifiers('phenotypes'))
|
|
|
|
self.assertEquals([[1, 0, 8, 1, 0],
|
|
[1, 0, 9, 0, 1],
|
|
[0, 1, 8, 1, 0]], dataset.asarray().tolist())
|
|
|
|
def testGetPhenotypeTable(self):
|
|
cel_data = """\
|
|
CEL\tsex\tage\tinfected
|
|
02-05-33\tF\t8\tI
|
|
02-05-34\tF\t9\tN
|
|
02-05-35\tM\t8\tI
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
|
|
self.assertEquals([['CEL', 'sex', 'age', 'infected'],
|
|
['02-05-33', 'F', '8', 'I'],
|
|
['02-05-34', 'F', '9', 'N'],
|
|
['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table())
|
|
|
|
# we can also get a sorted list
|
|
new_order = ['02-05-35', '02-05-33', '02-05-34']
|
|
self.assertEquals([['CEL', 'sex', 'age', 'infected'],
|
|
['02-05-35', 'M', '8', 'I'],
|
|
['02-05-33', 'F', '8', 'I'],
|
|
['02-05-34', 'F', '9', 'N']], dataset.get_phenotype_table(new_order))
|
|
|
|
def testGetCategories(self):
|
|
cel_data = """\
|
|
CEL\tsex\tage\tinfected
|
|
02-05-33\tF\t8\tI
|
|
02-05-34\tF\t9\tN
|
|
02-05-35\tM\t8\tI
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
|
|
self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories()))
|
|
|
|
def testGetFactors(self):
|
|
cel_data = """\
|
|
CEL\tsex\tage\tinfected
|
|
02-05-33\tF\t8\tI
|
|
02-05-34\tF\t9\tN
|
|
02-05-35\tM\t8\tI
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals(set(["sex", "infected"]), dataset.get_factors(["F", "I"]))
|
|
|
|
def testGetCategoryVariable(self):
|
|
"""Can get set/unset list for given category."""
|
|
cel_data = """\
|
|
CEL\tsex\tage\tinfected
|
|
02-05-33\tF\t8\tI
|
|
02-05-34\tF\t9\tN
|
|
02-05-35\tM\t8\tI
|
|
"""
|
|
dataset = PhenotypeDataset(cel_data)
|
|
self.assertEquals([1, 1, 0], dataset.get_category_variable("F"))
|
|
self.assertEquals([0, 0, 1], dataset.get_category_variable("M"))
|
|
self.assertEquals([1, 0, 1], dataset.get_category_variable("I"))
|
|
self.assertEquals([0, 1, 0], dataset.get_category_variable("N"))
|
|
|
|
if __name__=='__main__':
|
|
unittest.main()
|