diff --git a/system/dataset.py b/system/dataset.py index d60e198..22f9bd9 100644 --- a/system/dataset.py +++ b/system/dataset.py @@ -157,19 +157,25 @@ class Dataset: self.has_array = True def get_name(self): + """Returns dataset name""" return self._name + def get_all_dims(self): + """Returns all dimensions in project""" return self._all_dims - def get_identifiers(self): - #return [n for n in self._map.iteritems()] + def get_dim_names(self): + """Returns dim names""" + return [dim for dim in self._dims] - # ensure correct order - # this has correct dims but not identifiers - ids = [] - for dim in self._dims: - ids.append((dim,self._map[dim].keys())) - return ids + def get_identifiers(self,dim): + """Returns identifiers aling dim, sorted by position (index)""" + items = self._map[dim].items() + backitems=[ [v[1],v[0]] for v in items] + backitems.sort() + sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))] + return tuple(sorted_ids) + class CategoryDataset(Dataset): """The category dataset class. diff --git a/workflows/pca_workflow.py b/workflows/pca_workflow.py index b29ebbb..66cd099 100644 --- a/workflows/pca_workflow.py +++ b/workflows/pca_workflow.py @@ -93,13 +93,12 @@ class PCAFunction(Function): singel_def = ('1',('s')) # pull out input identifiers: - data_ids = [] - for dim in data: - data_ids.append((dim,data[dim].keys())) + row_ids = data.get_identifiers('genes') + col_ids = data.get_identifiers('samples') - T = dataset.Dataset(T,(data_ids[0],comp_def)) - P = dataset.Dataset(P,[data_ids[1],comp_def]) - E = dataset.Dataset(E,data_ids) + T = dataset.Dataset(T,[('samples',col_ids) ,comp_def]) + P = dataset.Dataset(P,[('genes',row_ids),comp_def]) + E = dataset.Dataset(E,[('samples',col_ids),'genes',(row_ids)]) #tsq = dataset.Dataset(tsq,[singel_def,data_ids[1]) ## plots