Implemented PhenotypeDataset with tests.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import gtk
|
||||
from system import dataset, logger, plots, workflow
|
||||
from scipy import randn
|
||||
from scipy import randn, array, transpose, zeros
|
||||
import cPickle
|
||||
|
||||
|
||||
@@ -185,3 +185,53 @@ class PCAFunction(workflow.Function):
|
||||
|
||||
return [T, P, loading_plot, score_plot]
|
||||
|
||||
|
||||
class PhenotypeDataset(dataset.Dataset):
|
||||
def __init__(self, string):
|
||||
self._table = rows = [line.split("\t") for line in string.splitlines()]
|
||||
columns = zip(*rows[1:])
|
||||
cel_names = columns[0]
|
||||
col_names = rows[0][1:]
|
||||
phenotypes = []
|
||||
categories = {}
|
||||
|
||||
for col_name, column in zip(col_names, columns[1:]):
|
||||
try:
|
||||
categories[col_name] = map(int, column)
|
||||
phenotypes.append(col_name)
|
||||
except ValueError:
|
||||
# category-data
|
||||
keys = []
|
||||
entries = {}
|
||||
for i, entry in enumerate(column):
|
||||
if entry not in entries:
|
||||
keys.append(entry)
|
||||
entries[entry] = []
|
||||
|
||||
entries[entry].append(i)
|
||||
|
||||
|
||||
for key in keys:
|
||||
z = zeros(len(column))
|
||||
for i in entries[key]:
|
||||
z[i] = 1
|
||||
key = "%s-%s" % (col_name, key)
|
||||
phenotypes.append(key)
|
||||
categories[key] = z
|
||||
|
||||
matrix_data = []
|
||||
for key in phenotypes:
|
||||
matrix_data.append(categories[key])
|
||||
|
||||
if matrix_data:
|
||||
a = transpose(array(matrix_data))
|
||||
else:
|
||||
a = None
|
||||
|
||||
dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names),
|
||||
('phenotypes', phenotypes)],
|
||||
shape=(len(cel_names),len(phenotypes)))
|
||||
|
||||
|
||||
def get_phenotype_table(self):
|
||||
return self._table
|
||||
|
Reference in New Issue
Block a user