Added categorygetter for PhenotypeDataset.

This commit is contained in:
Truls Alexander Tangstad 2006-05-04 13:30:04 +00:00
parent 338778d7b1
commit fafa373e67
2 changed files with 22 additions and 2 deletions

View File

@ -75,6 +75,17 @@ CEL\tsex\tage\tinfected
['02-05-34', 'F', '9', 'N'],
['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table())
def testGetCategories(self):
cel_data = """\
CEL\tsex\tage\tinfected
02-05-33\tF\t8\tI
02-05-34\tF\t9\tN
02-05-35\tM\t8\tI
"""
dataset = PhenotypeDataset(cel_data)
self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories()))
if __name__=='__main__':

View File

@ -231,6 +231,7 @@ class PhenotypeDataset(dataset.Dataset):
col_names = rows[0][1:]
phenotypes = []
categories = {}
self._categories = set()
for col_name, column in zip(col_names, columns[1:]):
try:
@ -247,8 +248,8 @@ class PhenotypeDataset(dataset.Dataset):
entries[entry].append(i)
for key in keys:
self._categories.add(key)
z = zeros(len(column))
for i in entries[key]:
z[i] = 1
@ -269,6 +270,14 @@ class PhenotypeDataset(dataset.Dataset):
('phenotypes', phenotypes)],
shape=(len(cel_names),len(phenotypes)), name="Phenotype Data")
def get_phenotype_table(self):
"""Get string based table of phenotypes as read from file."""
return self._table
def get_categories(self):
"""Get categories of factors.
If factor 'sick' had possibilites Y/N, and 'sex' M/F, the
categories would be Y, N, M and F.
"""
return self._categories