current datset selection fix

2006-04-21 09:23:05 +00:00
parent 7851048fb6
commit fc4e62f799
4 changed files with 36 additions and 15 deletions
--- a/workflows/pca_workflow.py
+++ b/workflows/pca_workflow.py
@@ -1,9 +1,10 @@
 import gtk
 import logger
 from workflow import *
-from scipy import array
+from scipy import array,zeros
 from data import read_affy_annot,read_mootha,data_dict_to_matrix
 import plots
+import dataset

 class PCAWorkflow(Workflow):

@@ -12,7 +13,7 @@ class PCAWorkflow(Workflow):
        self.name = 'PCAs Workflow'

        load = Stage('load', 'Load Data')
-        load.add_function(Function('load_mootha', 'Load'))
+        load.add_function(LoadMoothaData())
        self.add_stage(load)

        preproc = Stage('preprocess', 'Preprocessing')
@@ -113,6 +114,7 @@ class LoadMoothaData(Function):
        f = open(filename)
        logger.log('notice', 'Loading expression file: %s' % filename)
        self.file = f
+        self.filename = filename
        
    def on_response(self, dialog, response):
        if response == gtk.RESPONSE_OK:
@@ -122,16 +124,23 @@ class LoadMoothaData(Function):
    def run(self, data):
        btns = ('Open', gtk.RESPONSE_OK, \
                'Cancel', gtk.RESPONSE_CANCEL)
-        dialog = gtk.FileChooserDialog('Open Affy Annotation File',
+        dialog = gtk.FileChooserDialog('Open diabetes expression File',
                                       buttons=btns)
        dialog.connect('response', self.on_response)
        dialog.run()
        dialog.destroy()

        ### Reading and parsing here
-        d,sample_names = read_mootha(self.file)
-        x,gene_ids = data_dict_to_matrix(d)
+        d,sample_names = read_mootha()
+        n_samps = len(sample_names)
+        n_genes = len(d.keys())
+        typecode = 'f'
+        x = zeros((n_samps,n_genes),typecode)
+        gene_ids = []
+        for i,(id,desc) in enumerate(d.items()):
+            gene_ids.append(id)
+            x[:,i] = desc[0].astype(typecode)
        gene_def = ['genes',gene_ids]
        sample_def = ['samples', sample_names]
        X = dataset.Dataset(x,[sample_def,gene_def]) # samples x genes
-        return X
+        return [X]