diff --git a/test/workflows/affy_workflowtest.py b/test/workflows/affy_workflowtest.py index 82e5b33..2b071e2 100644 --- a/test/workflows/affy_workflowtest.py +++ b/test/workflows/affy_workflowtest.py @@ -75,6 +75,17 @@ CEL\tsex\tage\tinfected ['02-05-34', 'F', '9', 'N'], ['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table()) + def testGetCategories(self): + cel_data = """\ +CEL\tsex\tage\tinfected +02-05-33\tF\t8\tI +02-05-34\tF\t9\tN +02-05-35\tM\t8\tI +""" + dataset = PhenotypeDataset(cel_data) + + self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories())) + if __name__=='__main__': diff --git a/workflows/affy_workflow.py b/workflows/affy_workflow.py index cc88c98..8d7df67 100644 --- a/workflows/affy_workflow.py +++ b/workflows/affy_workflow.py @@ -231,6 +231,7 @@ class PhenotypeDataset(dataset.Dataset): col_names = rows[0][1:] phenotypes = [] categories = {} + self._categories = set() for col_name, column in zip(col_names, columns[1:]): try: @@ -247,8 +248,8 @@ class PhenotypeDataset(dataset.Dataset): entries[entry].append(i) - for key in keys: + self._categories.add(key) z = zeros(len(column)) for i in entries[key]: z[i] = 1 @@ -269,6 +270,14 @@ class PhenotypeDataset(dataset.Dataset): ('phenotypes', phenotypes)], shape=(len(cel_names),len(phenotypes)), name="Phenotype Data") - def get_phenotype_table(self): + """Get string based table of phenotypes as read from file.""" return self._table + + def get_categories(self): + """Get categories of factors. + + If factor 'sick' had possibilites Y/N, and 'sex' M/F, the + categories would be Y, N, M and F. + """ + return self._categories