From 443b18cea649d257c1a43d781aa198e5d44b888e Mon Sep 17 00:00:00 2001 From: tangstad Date: Tue, 9 May 2006 14:13:09 +0000 Subject: [PATCH] Made sure ordering of matrices are ok. --- test/workflows/affy_workflowtest.py | 8 +++- workflows/affy_workflow.py | 59 +++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 17 deletions(-) diff --git a/test/workflows/affy_workflowtest.py b/test/workflows/affy_workflowtest.py index 8c6cdb2..dd8f934 100644 --- a/test/workflows/affy_workflowtest.py +++ b/test/workflows/affy_workflowtest.py @@ -75,6 +75,13 @@ CEL\tsex\tage\tinfected ['02-05-34', 'F', '9', 'N'], ['02-05-35', 'M', '8', 'I']], dataset.get_phenotype_table()) + # we can also get a sorted list + new_order = ['02-05-35', '02-05-33', '02-05-34'] + self.assertEquals([['CEL', 'sex', 'age', 'infected'], + ['02-05-35', 'M', '8', 'I'], + ['02-05-33', 'F', '8', 'I'], + ['02-05-34', 'F', '9', 'N']], dataset.get_phenotype_table(new_order)) + def testGetCategories(self): cel_data = """\ CEL\tsex\tage\tinfected @@ -110,6 +117,5 @@ CEL\tsex\tage\tinfected self.assertEquals([1, 0, 1], dataset.get_category_variable("I")) self.assertEquals([0, 1, 0], dataset.get_category_variable("N")) - if __name__=='__main__': unittest.main() diff --git a/workflows/affy_workflow.py b/workflows/affy_workflow.py index 349f209..c60a433 100644 --- a/workflows/affy_workflow.py +++ b/workflows/affy_workflow.py @@ -131,41 +131,41 @@ Example: Y-N, M-F""" % ", ".join(data.get_categories())) if not factors: logger.log("warning", "nothing to do, no factors") - table = data.get_phenotype_table() + table = data.get_phenotype_table([os.path.splitext(f)[0] for f in affy.get_identifiers('filename')]) cn = table[0] entries = zip(*table[1:]) rn = entries[0] import rpy + silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r) rpy.r.library("limma") - rpy.r("a <- matrix('kalle', nrow=%d, ncol=%d)" % (len(rn), len(cn))) + silent_eval("a <- matrix('kalle', nrow=%d, ncol=%d)" % (len(rn), len(cn))) for i, row in enumerate(entries): for j, entry in enumerate(row): - rpy.r("a[%d, %d] <- '%s'" % (j+1, i+1, entry)) + silent_eval("a[%d, %d] <- '%s'" % (j+1, i+1, entry)) rpy.r.assign("rn", rn) rpy.r.assign("cn", cn) - rpy.r("rownames(a) <- rn") - rpy.r("colnames(a) <- cn") + silent_eval("rownames(a) <- rn") + silent_eval("colnames(a) <- cn") unique_categories = list(set(categories)) # compose fancy list of factors for design matrix - rpy.r("design <- matrix(0, nrow=%d, ncol=%d)" % (len(rn), len(unique_categories))) + silent_eval("design <- matrix(0, nrow=%d, ncol=%d)" % (len(rn), len(unique_categories))) for i, category in enumerate(unique_categories): for j, value in enumerate(data.get_category_variable(category)): - rpy.r("design[%d, %d] <- %d" % (j+1, i+1, value)) + silent_eval("design[%d, %d] <- %d" % (j+1, i+1, value)) rpy.r.assign("colnames.design", unique_categories) - rpy.r("colnames(design) <- colnames.design") + silent_eval("colnames(design) <- colnames.design") rpy.r.assign("expr", affy.asarray()) - rpy.r("fit <- lmFit(expr, design)") - + silent_eval("fit <- lmFit(expr, design)") # FIXME: might be a case for code injection... string = "contrast.matrix <- makeContrasts(%s, levels=design)" % response - rpy.r(string) - rpy.r("fit2 <- contrasts.fit(fit, contrast.matrix)") - rpy.r("fit2 <- eBayes(fit2)") + silent_eval(string) + silent_eval("fit2 <- contrasts.fit(fit, contrast.matrix)") + silent_eval("fit2 <- eBayes(fit2)") coeff = rpy.r("fit2$coefficients") amean = rpy.r("fit2$Amean") padj = rpy.r("p.adjust(fit2$p.value, method='fdr')") @@ -187,10 +187,12 @@ Example: Y-N, M-F""" % ", ".join(data.get_categories())) 'contrast', response, response, name="Vulcano plot") + # We should be nice and clean up after ourselves + rpy.r("rm(list=ls())") + return [coeff_data, amean_data, padj_data, vulcano_plot] - class CelFileImportFunction(workflow.Function): """Loads Affymetrics .CEL-files into matrix.""" def __init__(self): @@ -348,10 +350,35 @@ class PhenotypeDataset(dataset.Dataset): dataset.Dataset.__init__(self, a, identifiers=[('CEL', cel_names), ('phenotypes', phenotypes)], shape=(len(cel_names),len(phenotypes)), name="Phenotype Data") + + def sort_cels(self, cel_names): + self._dims = [] + + cels = {} + for row in self._table[1:]: + cels[row[0]] = row[1:] + + new_table = [self._table[0]] + for name in cel_names: + new_table.append([name] + cels[name]) + + self._table = new_table + self._set_identifiers([('CEL', cel_names), ('phenotypes', self.get_identifiers('phenotypes'))], self._all_dims) - def get_phenotype_table(self): + def get_phenotype_table(self, cel_order=None): """Get string based table of phenotypes as read from file.""" - return self._table + if not cel_order: + return self._table + else: + cels = {} + for row in self._table[1:]: + cels[row[0]] = row[1:] + + new_table = [self._table[0]] + for name in cel_order: + new_table.append([name] + cels[name]) + + return new_table def get_categories(self): """Get categories of factors.