current datset selection fix

This commit is contained in:
2006-04-21 09:23:05 +00:00
parent 7851048fb6
commit fc4e62f799
4 changed files with 36 additions and 15 deletions

View File

@@ -1,9 +1,10 @@
import gtk
import logger
from workflow import *
from scipy import array
from scipy import array,zeros
from data import read_affy_annot,read_mootha,data_dict_to_matrix
import plots
import dataset
class PCAWorkflow(Workflow):
@@ -12,7 +13,7 @@ class PCAWorkflow(Workflow):
self.name = 'PCAs Workflow'
load = Stage('load', 'Load Data')
load.add_function(Function('load_mootha', 'Load'))
load.add_function(LoadMoothaData())
self.add_stage(load)
preproc = Stage('preprocess', 'Preprocessing')
@@ -113,6 +114,7 @@ class LoadMoothaData(Function):
f = open(filename)
logger.log('notice', 'Loading expression file: %s' % filename)
self.file = f
self.filename = filename
def on_response(self, dialog, response):
if response == gtk.RESPONSE_OK:
@@ -122,16 +124,23 @@ class LoadMoothaData(Function):
def run(self, data):
btns = ('Open', gtk.RESPONSE_OK, \
'Cancel', gtk.RESPONSE_CANCEL)
dialog = gtk.FileChooserDialog('Open Affy Annotation File',
dialog = gtk.FileChooserDialog('Open diabetes expression File',
buttons=btns)
dialog.connect('response', self.on_response)
dialog.run()
dialog.destroy()
### Reading and parsing here
d,sample_names = read_mootha(self.file)
x,gene_ids = data_dict_to_matrix(d)
d,sample_names = read_mootha()
n_samps = len(sample_names)
n_genes = len(d.keys())
typecode = 'f'
x = zeros((n_samps,n_genes),typecode)
gene_ids = []
for i,(id,desc) in enumerate(d.items()):
gene_ids.append(id)
x[:,i] = desc[0].astype(typecode)
gene_def = ['genes',gene_ids]
sample_def = ['samples', sample_names]
X = dataset.Dataset(x,[sample_def,gene_def]) # samples x genes
return X
return [X]