Made sure ordering of matrices are ok.
This commit is contained in:
parent
86f89b6ffe
commit
443b18cea6
|
@ -75,6 +75,13 @@ CEL\tsex\tage\tinfected
|
||||||
['02-05-34', 'F', '9', 'N'],
|
['02-05-34', 'F', '9', 'N'],
|
||||||
['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table())
|
['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table())
|
||||||
|
|
||||||
|
# we can also get a sorted list
|
||||||
|
new_order = ['02-05-35', '02-05-33', '02-05-34']
|
||||||
|
self.assertEquals([['CEL', 'sex', 'age', 'infected'],
|
||||||
|
['02-05-35', 'M', '8', 'I'],
|
||||||
|
['02-05-33', 'F', '8', 'I'],
|
||||||
|
['02-05-34', 'F', '9', 'N']], dataset.get_phenotype_table(new_order))
|
||||||
|
|
||||||
def testGetCategories(self):
|
def testGetCategories(self):
|
||||||
cel_data = """\
|
cel_data = """\
|
||||||
CEL\tsex\tage\tinfected
|
CEL\tsex\tage\tinfected
|
||||||
|
@ -110,6 +117,5 @@ CEL\tsex\tage\tinfected
|
||||||
self.assertEquals([1, 0, 1], dataset.get_category_variable("I"))
|
self.assertEquals([1, 0, 1], dataset.get_category_variable("I"))
|
||||||
self.assertEquals([0, 1, 0], dataset.get_category_variable("N"))
|
self.assertEquals([0, 1, 0], dataset.get_category_variable("N"))
|
||||||
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -131,41 +131,41 @@ Example: Y-N, M-F""" % ", ".join(data.get_categories()))
|
||||||
if not factors:
|
if not factors:
|
||||||
logger.log("warning", "nothing to do, no factors")
|
logger.log("warning", "nothing to do, no factors")
|
||||||
|
|
||||||
table = data.get_phenotype_table()
|
table = data.get_phenotype_table([os.path.splitext(f)[0] for f in affy.get_identifiers('filename')])
|
||||||
cn = table[0]
|
cn = table[0]
|
||||||
entries = zip(*table[1:])
|
entries = zip(*table[1:])
|
||||||
rn = entries[0]
|
rn = entries[0]
|
||||||
|
|
||||||
import rpy
|
import rpy
|
||||||
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
rpy.r.library("limma")
|
rpy.r.library("limma")
|
||||||
rpy.r("a <- matrix('kalle', nrow=%d, ncol=%d)" % (len(rn), len(cn)))
|
silent_eval("a <- matrix('kalle', nrow=%d, ncol=%d)" % (len(rn), len(cn)))
|
||||||
for i, row in enumerate(entries):
|
for i, row in enumerate(entries):
|
||||||
for j, entry in enumerate(row):
|
for j, entry in enumerate(row):
|
||||||
rpy.r("a[%d, %d] <- '%s'" % (j+1, i+1, entry))
|
silent_eval("a[%d, %d] <- '%s'" % (j+1, i+1, entry))
|
||||||
rpy.r.assign("rn", rn)
|
rpy.r.assign("rn", rn)
|
||||||
rpy.r.assign("cn", cn)
|
rpy.r.assign("cn", cn)
|
||||||
rpy.r("rownames(a) <- rn")
|
silent_eval("rownames(a) <- rn")
|
||||||
rpy.r("colnames(a) <- cn")
|
silent_eval("colnames(a) <- cn")
|
||||||
|
|
||||||
unique_categories = list(set(categories))
|
unique_categories = list(set(categories))
|
||||||
|
|
||||||
# compose fancy list of factors for design matrix
|
# compose fancy list of factors for design matrix
|
||||||
rpy.r("design <- matrix(0, nrow=%d, ncol=%d)" % (len(rn), len(unique_categories)))
|
silent_eval("design <- matrix(0, nrow=%d, ncol=%d)" % (len(rn), len(unique_categories)))
|
||||||
for i, category in enumerate(unique_categories):
|
for i, category in enumerate(unique_categories):
|
||||||
for j, value in enumerate(data.get_category_variable(category)):
|
for j, value in enumerate(data.get_category_variable(category)):
|
||||||
rpy.r("design[%d, %d] <- %d" % (j+1, i+1, value))
|
silent_eval("design[%d, %d] <- %d" % (j+1, i+1, value))
|
||||||
|
|
||||||
rpy.r.assign("colnames.design", unique_categories)
|
rpy.r.assign("colnames.design", unique_categories)
|
||||||
rpy.r("colnames(design) <- colnames.design")
|
silent_eval("colnames(design) <- colnames.design")
|
||||||
|
|
||||||
rpy.r.assign("expr", affy.asarray())
|
rpy.r.assign("expr", affy.asarray())
|
||||||
rpy.r("fit <- lmFit(expr, design)")
|
silent_eval("fit <- lmFit(expr, design)")
|
||||||
|
|
||||||
# FIXME: might be a case for code injection...
|
# FIXME: might be a case for code injection...
|
||||||
string = "contrast.matrix <- makeContrasts(%s, levels=design)" % response
|
string = "contrast.matrix <- makeContrasts(%s, levels=design)" % response
|
||||||
rpy.r(string)
|
silent_eval(string)
|
||||||
rpy.r("fit2 <- contrasts.fit(fit, contrast.matrix)")
|
silent_eval("fit2 <- contrasts.fit(fit, contrast.matrix)")
|
||||||
rpy.r("fit2 <- eBayes(fit2)")
|
silent_eval("fit2 <- eBayes(fit2)")
|
||||||
coeff = rpy.r("fit2$coefficients")
|
coeff = rpy.r("fit2$coefficients")
|
||||||
amean = rpy.r("fit2$Amean")
|
amean = rpy.r("fit2$Amean")
|
||||||
padj = rpy.r("p.adjust(fit2$p.value, method='fdr')")
|
padj = rpy.r("p.adjust(fit2$p.value, method='fdr')")
|
||||||
|
@ -187,8 +187,10 @@ Example: Y-N, M-F""" % ", ".join(data.get_categories()))
|
||||||
'contrast', response, response,
|
'contrast', response, response,
|
||||||
name="Vulcano plot")
|
name="Vulcano plot")
|
||||||
|
|
||||||
return [coeff_data, amean_data, padj_data, vulcano_plot]
|
# We should be nice and clean up after ourselves
|
||||||
|
rpy.r("rm(list=ls())")
|
||||||
|
|
||||||
|
return [coeff_data, amean_data, padj_data, vulcano_plot]
|
||||||
|
|
||||||
|
|
||||||
class CelFileImportFunction(workflow.Function):
|
class CelFileImportFunction(workflow.Function):
|
||||||
|
@ -349,9 +351,34 @@ class PhenotypeDataset(dataset.Dataset):
|
||||||
('phenotypes', phenotypes)],
|
('phenotypes', phenotypes)],
|
||||||
shape=(len(cel_names),len(phenotypes)), name="Phenotype Data")
|
shape=(len(cel_names),len(phenotypes)), name="Phenotype Data")
|
||||||
|
|
||||||
def get_phenotype_table(self):
|
def sort_cels(self, cel_names):
|
||||||
|
self._dims = []
|
||||||
|
|
||||||
|
cels = {}
|
||||||
|
for row in self._table[1:]:
|
||||||
|
cels[row[0]] = row[1:]
|
||||||
|
|
||||||
|
new_table = [self._table[0]]
|
||||||
|
for name in cel_names:
|
||||||
|
new_table.append([name] + cels[name])
|
||||||
|
|
||||||
|
self._table = new_table
|
||||||
|
self._set_identifiers([('CEL', cel_names), ('phenotypes', self.get_identifiers('phenotypes'))], self._all_dims)
|
||||||
|
|
||||||
|
def get_phenotype_table(self, cel_order=None):
|
||||||
"""Get string based table of phenotypes as read from file."""
|
"""Get string based table of phenotypes as read from file."""
|
||||||
return self._table
|
if not cel_order:
|
||||||
|
return self._table
|
||||||
|
else:
|
||||||
|
cels = {}
|
||||||
|
for row in self._table[1:]:
|
||||||
|
cels[row[0]] = row[1:]
|
||||||
|
|
||||||
|
new_table = [self._table[0]]
|
||||||
|
for name in cel_order:
|
||||||
|
new_table.append([name] + cels[name])
|
||||||
|
|
||||||
|
return new_table
|
||||||
|
|
||||||
def get_categories(self):
|
def get_categories(self):
|
||||||
"""Get categories of factors.
|
"""Get categories of factors.
|
||||||
|
|
Reference in New Issue