import unittest import sys sys.path.append("../..") from workflows.affy_workflow import PhenotypeDataset class PhenotypeDatasetTest(unittest.TestCase): def testEmptyData(self): # we have a list of cel-files, but no categories cel_data = """\ CEL 02-05-33 03-07-38 """ dataset = PhenotypeDataset(cel_data) self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names()) self.assertEquals(['02-05-33', '03-07-38'], dataset.get_identifiers('CEL')) self.assertEquals([], dataset.get_identifiers('phenotypes')) def testFloatData(self): cel_data = """\ CEL\tage 02-05-33\t8.5 03-07-38\t9 """ dataset = PhenotypeDataset(cel_data) self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names()) self.assertEquals(['age'], dataset.get_identifiers('phenotypes')) self.assertEquals([[8.5], [9]], dataset.asarray().tolist()) def testCategoryData(self): """Categories expand to one numeric column for each category choice.""" cel_data = """\ CEL\tsick 02-05-33\tyes 03-07-38\tno 04-93-33\tyes 08-32-33\tmaybe """ dataset = PhenotypeDataset(cel_data) self.assertEquals(['CEL', 'phenotypes'], dataset.get_dim_names()) self.assertEquals(['sick-yes', 'sick-no', 'sick-maybe'], dataset.get_identifiers('phenotypes')) self.assertEquals([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1]], dataset.asarray().tolist()) def testMultipleCategoriesAndFloats(self): cel_data = """\ CEL\tsex\tage\tinfected 02-05-33\tF\t8\tI 02-05-34\tF\t9\tN 02-05-35\tM\t8\tI """ dataset = PhenotypeDataset(cel_data) self.assertEquals(['sex-F', 'sex-M', 'age', 'infected-I', 'infected-N'], dataset.get_identifiers('phenotypes')) self.assertEquals([[1, 0, 8, 1, 0], [1, 0, 9, 0, 1], [0, 1, 8, 1, 0]], dataset.asarray().tolist()) def testGetPhenotypeTable(self): cel_data = """\ CEL\tsex\tage\tinfected 02-05-33\tF\t8\tI 02-05-34\tF\t9\tN 02-05-35\tM\t8\tI """ dataset = PhenotypeDataset(cel_data) self.assertEquals([['CEL', 'sex', 'age', 'infected'], ['02-05-33', 'F', '8', 'I'], ['02-05-34', 'F', '9', 'N'], ['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table()) # we can also get a sorted list new_order = ['02-05-35', '02-05-33', '02-05-34'] self.assertEquals([['CEL', 'sex', 'age', 'infected'], ['02-05-35', 'M', '8', 'I'], ['02-05-33', 'F', '8', 'I'], ['02-05-34', 'F', '9', 'N']], dataset.get_phenotype_table(new_order)) def testGetCategories(self): cel_data = """\ CEL\tsex\tage\tinfected 02-05-33\tF\t8\tI 02-05-34\tF\t9\tN 02-05-35\tM\t8\tI """ dataset = PhenotypeDataset(cel_data) self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories())) def testGetFactors(self): cel_data = """\ CEL\tsex\tage\tinfected 02-05-33\tF\t8\tI 02-05-34\tF\t9\tN 02-05-35\tM\t8\tI """ dataset = PhenotypeDataset(cel_data) self.assertEquals(set(["sex", "infected"]), dataset.get_factors(["F", "I"])) def testGetCategoryVariable(self): """Can get set/unset list for given category.""" cel_data = """\ CEL\tsex\tage\tinfected 02-05-33\tF\t8\tI 02-05-34\tF\t9\tN 02-05-35\tM\t8\tI """ dataset = PhenotypeDataset(cel_data) self.assertEquals([1, 1, 0], dataset.get_category_variable("F")) self.assertEquals([0, 0, 1], dataset.get_category_variable("M")) self.assertEquals([1, 0, 1], dataset.get_category_variable("I")) self.assertEquals([0, 1, 0], dataset.get_category_variable("N")) if __name__=='__main__': unittest.main()