Implemented Limma function for Affy workflow.
Extended ScatterPlot to take two datasets and updated code using it.
This commit is contained in:
parent
033d4d5333
commit
5b1af849dc
|
@ -1,5 +1,5 @@
|
||||||
import pygtk
|
import pygtk
|
||||||
pygtk.require('2.0')
|
# pygtk.require('2.0')
|
||||||
import gtk
|
import gtk
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
|
@ -429,18 +429,19 @@ class LinePlot(Plot):
|
||||||
|
|
||||||
|
|
||||||
class ScatterPlot(Plot):
|
class ScatterPlot(Plot):
|
||||||
def __init__(self, dataset, id_dim, sel_dim, id_1, id_2, name="Scatter plot"):
|
def __init__(self, dataset_1, dataset_2, id_dim, sel_dim, id_1, id_2, name="Scatter plot"):
|
||||||
Plot.__init__(self, name)
|
Plot.__init__(self, name)
|
||||||
fig = Figure(figsize=(5,4), dpi=72)
|
fig = Figure(figsize=(5,4), dpi=72)
|
||||||
self.ax = ax = fig.add_subplot(111)
|
self.ax = ax = fig.add_subplot(111)
|
||||||
self.current_dim = id_dim
|
self.current_dim = id_dim
|
||||||
# testing testing
|
# testing testing
|
||||||
self.dataset = dataset
|
self.dataset_1 = dataset_1
|
||||||
x_index = dataset[sel_dim][id_1]
|
|
||||||
y_index = dataset[sel_dim][id_2]
|
|
||||||
|
|
||||||
self.xaxis_data = dataset._array[:,x_index]
|
x_index = dataset_1[sel_dim][id_1]
|
||||||
self.yaxis_data = dataset._array[:,y_index]
|
y_index = dataset_2[sel_dim][id_2]
|
||||||
|
|
||||||
|
self.xaxis_data = dataset_1._array[:,x_index]
|
||||||
|
self.yaxis_data = dataset_2._array[:,y_index]
|
||||||
ax.plot(self.xaxis_data,self.yaxis_data,'og')
|
ax.plot(self.xaxis_data,self.yaxis_data,'og')
|
||||||
ax.set_title(self.get_title())
|
ax.set_title(self.get_title())
|
||||||
ax.set_xlabel("%s - %s" % (sel_dim, id_1))
|
ax.set_xlabel("%s - %s" % (sel_dim, id_1))
|
||||||
|
@ -478,13 +479,13 @@ class ScatterPlot(Plot):
|
||||||
#logger.log('debug','Selection y_start bigger than y_end')
|
#logger.log('debug','Selection y_start bigger than y_end')
|
||||||
index =scipy.nonzero((xdata>x1) & (xdata<x2) & (ydata<y1) & (ydata>y2))
|
index =scipy.nonzero((xdata>x1) & (xdata<x2) & (ydata<y1) & (ydata>y2))
|
||||||
|
|
||||||
ids = self.dataset.get_identifiers(self.current_dim, index)
|
ids = self.dataset_1.get_identifiers(self.current_dim, index)
|
||||||
self.selection_listener(self.current_dim, ids)
|
self.selection_listener(self.current_dim, ids)
|
||||||
|
|
||||||
def selection_changed(self, selection):
|
def selection_changed(self, selection):
|
||||||
ids = selection[self.current_dim] # current identifiers
|
ids = selection[self.current_dim] # current identifiers
|
||||||
|
|
||||||
index = self.dataset.get_indices(self.current_dim, ids)
|
index = self.dataset_1.get_indices(self.current_dim, ids)
|
||||||
xdata_new = scipy.take(self.xaxis_data,index) #take data
|
xdata_new = scipy.take(self.xaxis_data,index) #take data
|
||||||
ydata_new = scipy.take(self.yaxis_data,index)
|
ydata_new = scipy.take(self.yaxis_data,index)
|
||||||
self.ax.clear()
|
self.ax.clear()
|
||||||
|
|
|
@ -86,6 +86,29 @@ CEL\tsex\tage\tinfected
|
||||||
|
|
||||||
self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories()))
|
self.assertEquals(set(['F', 'M', 'I', 'N']), set(dataset.get_categories()))
|
||||||
|
|
||||||
|
def testGetFactors(self):
|
||||||
|
cel_data = """\
|
||||||
|
CEL\tsex\tage\tinfected
|
||||||
|
02-05-33\tF\t8\tI
|
||||||
|
02-05-34\tF\t9\tN
|
||||||
|
02-05-35\tM\t8\tI
|
||||||
|
"""
|
||||||
|
dataset = PhenotypeDataset(cel_data)
|
||||||
|
self.assertEquals(set(["sex", "infected"]), dataset.get_factors(["F", "I"]))
|
||||||
|
|
||||||
|
def testGetCategoryVariable(self):
|
||||||
|
"""Can get set/unset list for given category."""
|
||||||
|
cel_data = """\
|
||||||
|
CEL\tsex\tage\tinfected
|
||||||
|
02-05-33\tF\t8\tI
|
||||||
|
02-05-34\tF\t9\tN
|
||||||
|
02-05-35\tM\t8\tI
|
||||||
|
"""
|
||||||
|
dataset = PhenotypeDataset(cel_data)
|
||||||
|
self.assertEquals([1, 1, 0], dataset.get_category_variable("F"))
|
||||||
|
self.assertEquals([0, 0, 1], dataset.get_category_variable("M"))
|
||||||
|
self.assertEquals([1, 0, 1], dataset.get_category_variable("I"))
|
||||||
|
self.assertEquals([0, 1, 0], dataset.get_category_variable("N"))
|
||||||
|
|
||||||
|
|
||||||
if __name__=='__main__':
|
if __name__=='__main__':
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import gtk
|
import gtk
|
||||||
|
import os.path
|
||||||
from system import dataset, logger, plots, workflow, dialogs
|
from system import dataset, logger, plots, workflow, dialogs
|
||||||
from scipy import randn, array, transpose, zeros
|
from scipy import randn, array, transpose, zeros
|
||||||
import cPickle
|
import cPickle
|
||||||
|
@ -17,9 +18,12 @@ class AffyWorkflow (workflow.Workflow):
|
||||||
load.add_function(PhenotypeImportFunction())
|
load.add_function(PhenotypeImportFunction())
|
||||||
load.add_function(TestDataFunction())
|
load.add_function(TestDataFunction())
|
||||||
load.add_function(DatasetLoadFunction())
|
load.add_function(DatasetLoadFunction())
|
||||||
load.add_function(ContrastMatrixGenerateFunction())
|
|
||||||
self.add_stage(load)
|
self.add_stage(load)
|
||||||
|
|
||||||
|
significance = workflow.Stage('significance', 'Significance analysis')
|
||||||
|
significance.add_function(LimmaFunction())
|
||||||
|
self.add_stage(significance)
|
||||||
|
|
||||||
explore = workflow.Stage('explore', 'Explorative analysis')
|
explore = workflow.Stage('explore', 'Explorative analysis')
|
||||||
explore.add_function(PCAFunction(self))
|
explore.add_function(PCAFunction(self))
|
||||||
explore.add_function(PrintFunction())
|
explore.add_function(PrintFunction())
|
||||||
|
@ -106,18 +110,86 @@ class DatasetSaveFunction(workflow.Function):
|
||||||
chooser.destroy()
|
chooser.destroy()
|
||||||
|
|
||||||
|
|
||||||
class ContrastMatrixGenerateFunction(workflow.Function):
|
class LimmaFunction(workflow.Function):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
workflow.Function.__init__(self, 'contrast_create', 'Create contrast matrix')
|
workflow.Function.__init__(self, 'limma', 'Limma')
|
||||||
|
|
||||||
def run(self, data):
|
def run(self, affy, data):
|
||||||
response = dialogs.get_text('Enter contrasts...', """\
|
response = dialogs.get_text('Enter contrasts...', """\
|
||||||
Enter comma-separated list of contrasts.
|
Enter comma-separated list of contrasts.
|
||||||
Available categories: %s
|
Available categories: %s
|
||||||
|
|
||||||
Example: Y-N, M-F""" % ", ".join(data.get_categories()))
|
Example: Y-N, M-F""" % ", ".join(data.get_categories()))
|
||||||
|
|
||||||
logger.log("notice", "contrasts selected: %s" % response)
|
logger.log("notice", "contrasts selected: %s" % response)
|
||||||
|
|
||||||
|
categories = []
|
||||||
|
[categories.extend(s.split("-")) for s in response.split(",")]
|
||||||
|
categories = [s.strip() for s in categories]
|
||||||
|
|
||||||
|
factors = data.get_factors(categories)
|
||||||
|
if not factors:
|
||||||
|
logger.log("warning", "nothing to do, no factors")
|
||||||
|
|
||||||
|
table = data.get_phenotype_table()
|
||||||
|
cn = table[0]
|
||||||
|
entries = zip(*table[1:])
|
||||||
|
rn = entries[0]
|
||||||
|
|
||||||
|
import rpy
|
||||||
|
rpy.r.library("limma")
|
||||||
|
rpy.r("a <- matrix('kalle', nrow=%d, ncol=%d)" % (len(rn), len(cn)))
|
||||||
|
for i, row in enumerate(entries):
|
||||||
|
for j, entry in enumerate(row):
|
||||||
|
rpy.r("a[%d, %d] <- '%s'" % (j+1, i+1, entry))
|
||||||
|
rpy.r.assign("rn", rn)
|
||||||
|
rpy.r.assign("cn", cn)
|
||||||
|
rpy.r("rownames(a) <- rn")
|
||||||
|
rpy.r("colnames(a) <- cn")
|
||||||
|
|
||||||
|
unique_categories = list(set(categories))
|
||||||
|
|
||||||
|
# compose fancy list of factors for design matrix
|
||||||
|
rpy.r("design <- matrix(0, nrow=%d, ncol=%d)" % (len(rn), len(unique_categories)))
|
||||||
|
for i, category in enumerate(unique_categories):
|
||||||
|
for j, value in enumerate(data.get_category_variable(category)):
|
||||||
|
rpy.r("design[%d, %d] <- %d" % (j+1, i+1, value))
|
||||||
|
|
||||||
|
rpy.r.assign("colnames.design", unique_categories)
|
||||||
|
rpy.r("colnames(design) <- colnames.design")
|
||||||
|
|
||||||
|
rpy.r.assign("expr", affy.asarray())
|
||||||
|
rpy.r("fit <- lmFit(expr, design)")
|
||||||
|
|
||||||
|
# FIXME: might be a case for code injection...
|
||||||
|
string = "contrast.matrix <- makeContrasts(%s, levels=design)" % response
|
||||||
|
rpy.r(string)
|
||||||
|
rpy.r("fit2 <- contrasts.fit(fit, contrast.matrix)")
|
||||||
|
rpy.r("fit2 <- eBayes(fit2)")
|
||||||
|
coeff = rpy.r("fit2$coefficients")
|
||||||
|
amean = rpy.r("fit2$Amean")
|
||||||
|
padj = rpy.r("p.adjust(fit2$p.value, method='fdr')")
|
||||||
|
|
||||||
|
dim_1, dim_2 = affy.get_dim_names()
|
||||||
|
|
||||||
|
|
||||||
|
coeff_data = dataset.Dataset(coeff, [(dim_1, affy.get_identifiers(dim_1)),
|
||||||
|
("contrast", [response])],
|
||||||
|
name="Coefficients")
|
||||||
|
amean_data = dataset.Dataset(array(amean), [("average", ["average"]),
|
||||||
|
(dim_1, affy.get_identifiers(dim_1))],
|
||||||
|
name="Average Intensity")
|
||||||
|
padj_data = dataset.Dataset(padj, [(dim_1, affy.get_identifiers(dim_1)),
|
||||||
|
("contrast", [response])],
|
||||||
|
name="Adjusted P-value")
|
||||||
|
|
||||||
|
vulcano_plot = plots.ScatterPlot(coeff_data, padj_data, dim_1,
|
||||||
|
'contrast', response, response,
|
||||||
|
name="Vulcano plot")
|
||||||
|
|
||||||
|
return [coeff_data, amean_data, padj_data, vulcano_plot]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CelFileImportFunction(workflow.Function):
|
class CelFileImportFunction(workflow.Function):
|
||||||
"""Loads Affymetrics .CEL-files into matrix."""
|
"""Loads Affymetrics .CEL-files into matrix."""
|
||||||
|
@ -147,7 +219,6 @@ class CelFileImportFunction(workflow.Function):
|
||||||
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
silent_eval = rpy.with_mode(rpy.NO_CONVERSION, rpy.r)
|
||||||
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
silent_eval('E <- ReadAffy(filenames=c("%s"))' % '", "'.join(chooser.get_filenames()))
|
||||||
silent_eval('E <- rma(E)')
|
silent_eval('E <- rma(E)')
|
||||||
|
|
||||||
m = rpy.r('m <- E@exprs')
|
m = rpy.r('m <- E@exprs')
|
||||||
|
|
||||||
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
vector_eval = rpy.with_mode(rpy.VECTOR_CONVERSION, rpy.r)
|
||||||
|
@ -223,9 +294,9 @@ class PCAFunction(workflow.Function):
|
||||||
# cleanup
|
# cleanup
|
||||||
rpy.r.rm(["t", "m"])
|
rpy.r.rm(["t", "m"])
|
||||||
|
|
||||||
loading_plot = plots.ScatterPlot(P, dim_2, component_dim, '1', '2',
|
loading_plot = plots.ScatterPlot(P, P, dim_2, component_dim, '1', '2',
|
||||||
"Loadings")
|
"Loadings")
|
||||||
score_plot = plots.ScatterPlot(T, dim_1,component_dim, '1', '2',
|
score_plot = plots.ScatterPlot(T, T, dim_1,component_dim, '1', '2',
|
||||||
"Scores")
|
"Scores")
|
||||||
|
|
||||||
return [T, P, loading_plot, score_plot]
|
return [T, P, loading_plot, score_plot]
|
||||||
|
@ -239,7 +310,7 @@ class PhenotypeDataset(dataset.Dataset):
|
||||||
col_names = rows[0][1:]
|
col_names = rows[0][1:]
|
||||||
phenotypes = []
|
phenotypes = []
|
||||||
categories = {}
|
categories = {}
|
||||||
self._categories = set()
|
self._categories = {}
|
||||||
|
|
||||||
for col_name, column in zip(col_names, columns[1:]):
|
for col_name, column in zip(col_names, columns[1:]):
|
||||||
try:
|
try:
|
||||||
|
@ -257,7 +328,7 @@ class PhenotypeDataset(dataset.Dataset):
|
||||||
entries[entry].append(i)
|
entries[entry].append(i)
|
||||||
|
|
||||||
for key in keys:
|
for key in keys:
|
||||||
self._categories.add(key)
|
self._categories[key] = col_name
|
||||||
z = zeros(len(column))
|
z = zeros(len(column))
|
||||||
for i in entries[key]:
|
for i in entries[key]:
|
||||||
z[i] = 1
|
z[i] = 1
|
||||||
|
@ -288,4 +359,29 @@ class PhenotypeDataset(dataset.Dataset):
|
||||||
If factor 'sick' had possibilites Y/N, and 'sex' M/F, the
|
If factor 'sick' had possibilites Y/N, and 'sex' M/F, the
|
||||||
categories would be Y, N, M and F.
|
categories would be Y, N, M and F.
|
||||||
"""
|
"""
|
||||||
return self._categories
|
return self._categories.keys()
|
||||||
|
|
||||||
|
def get_factors(self, categories):
|
||||||
|
factors = set()
|
||||||
|
for c in categories:
|
||||||
|
factors.add(self._categories[c])
|
||||||
|
|
||||||
|
return factors
|
||||||
|
|
||||||
|
def get_category_variable(self, category):
|
||||||
|
# abit brute-force, but does the job until optimization is
|
||||||
|
# necessary
|
||||||
|
factor = self._categories[category]
|
||||||
|
variable = []
|
||||||
|
for column in zip(*self.get_phenotype_table()):
|
||||||
|
if column[0] == factor:
|
||||||
|
for entry in column[1:]:
|
||||||
|
if entry == category:
|
||||||
|
variable.append(1)
|
||||||
|
else:
|
||||||
|
variable.append(0)
|
||||||
|
|
||||||
|
return variable
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -248,7 +248,7 @@ class PCAFunction(workflow.Function):
|
||||||
# cleanup
|
# cleanup
|
||||||
rpy.r.rm(["t", "m"])
|
rpy.r.rm(["t", "m"])
|
||||||
|
|
||||||
loading_plot = plots.ScatterPlot(P,'ids','component','1','2', "Loadings")
|
loading_plot = plots.ScatterPlot(P, P, ,'ids','component','1','2', "Loadings")
|
||||||
score_plot = plots.ScatterPlot(T,'filename','component','1','2', "Scores")
|
score_plot = plots.ScatterPlot(T, T,'filename','component','1','2', "Scores")
|
||||||
|
|
||||||
return [T, P, loading_plot, score_plot]
|
return [T, P, loading_plot, score_plot]
|
||||||
|
|
|
@ -102,9 +102,9 @@ class PCAFunction(Function):
|
||||||
#tsq = dataset.Dataset(tsq,[singel_def,data_ids[1])
|
#tsq = dataset.Dataset(tsq,[singel_def,data_ids[1])
|
||||||
|
|
||||||
## plots
|
## plots
|
||||||
loading_plot1 = plots.ScatterPlot(P,'genes','comp','1','2')
|
loading_plot1 = plots.ScatterPlot(P,P,'genes','comp','1','2')
|
||||||
loading_plot2 = plots.ScatterPlot(P,'genes','comp','3','4')
|
loading_plot2 = plots.ScatterPlot(P,P,'genes','comp','3','4')
|
||||||
score_plot = plots.ScatterPlot(T,'samples','comp','1','2')
|
score_plot = plots.ScatterPlot(T,T,'samples','comp','1','2')
|
||||||
|
|
||||||
return [T,P,E,loading_plot1,loading_plot2,score_plot]
|
return [T,P,E,loading_plot1,loading_plot2,score_plot]
|
||||||
|
|
||||||
|
|
Reference in New Issue