diff --git a/system/dataset.py b/system/dataset.py index d7a7f9f..6e68b4a 100644 --- a/system/dataset.py +++ b/system/dataset.py @@ -43,6 +43,9 @@ class Dataset: if len(df)!=d and df: raise ValueError,"dim size and identifyer mismatch" + def __str__(self): + self.name = 'Arnar' + return self.name def names(self,axis=0): """Returns identifier names of a dimension. NB: sorted by values! diff --git a/workflows/pca_workflow.py b/workflows/pca_workflow.py index d074e9c..6c6d03d 100644 --- a/workflows/pca_workflow.py +++ b/workflows/pca_workflow.py @@ -54,8 +54,24 @@ class LoadAnnotationsFunction(Function): dialog.run() dialog.destroy() - ### Reading and aprsing here + ### Reading and parsing here annot = read_affy_annot(self.file) + i_want = 'Pathway' + nothing = '---' + ids_in_data = set(data.names('genes')) #assuming we have genes + sanity_check = set(annot.keys()) + if not ids_in_data.intersection(sanity_check) == ids_in_data: + logger.log('debug','Some identifers in data does not exist in affy file!') + for affy_id,description in annot: + if affy_id in ids_in_data: + pathways = desc[i_want]: + if not pathways[0][0]=='--': + + + + + + return [self.annotations] class PCAFunction(Function):