Implemented PhenotypeDataset with tests.

This commit is contained in:
2006-05-03 14:04:28 +00:00
parent 56a6028547
commit b757da5929
3 changed files with 133 additions and 2 deletions

View File

@@ -1,6 +1,6 @@
import gtk
from system import dataset, logger, plots, workflow
from scipy import randn
from scipy import randn, array, transpose, zeros
import cPickle
@@ -185,3 +185,53 @@ class PCAFunction(workflow.Function):
return [T, P, loading_plot, score_plot]
class PhenotypeDataset(dataset.Dataset):
def __init__(self, string):
self._table = rows = [line.split("\t") for line in string.splitlines()]
columns = zip(*rows[1:])
cel_names = columns[0]
col_names = rows[0][1:]
phenotypes = []
categories = {}
for col_name, column in zip(col_names, columns[1:]):
try:
categories[col_name] = map(int, column)
phenotypes.append(col_name)
except ValueError:
# category-data
keys = []
entries = {}
for i, entry in enumerate(column):
if entry not in entries:
keys.append(entry)
entries[entry] = []
entries[entry].append(i)
for key in keys:
z = zeros(len(column))
for i in entries[key]:
z[i] = 1
key = "%s-%s" % (col_name, key)
phenotypes.append(key)
categories[key] = z
matrix_data = []
for key in phenotypes:
matrix_data.append(categories[key])
if matrix_data:
a = transpose(array(matrix_data))
else:
a = None
dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names),
('phenotypes', phenotypes)],
shape=(len(cel_names),len(phenotypes)))
def get_phenotype_table(self):
return self._table