diff --git a/system/dataset.py b/system/dataset.py index 95529ae..568ae39 100644 --- a/system/dataset.py +++ b/system/dataset.py @@ -172,13 +172,28 @@ class Dataset: """Returns dim names""" return [dim for dim in self._dims] - def get_identifiers(self,dim): - """Returns identifiers aling dim, sorted by position (index)""" + def get_identifiers(self, dim, indices=None): + """Returns identifiers along dim, sorted by position (index). + + You can optionally provide a list of indices to get only the + identifiers of a given position. + """ items = self._map[dim].items() backitems=[ [v[1],v[0]] for v in items] backitems.sort() sorted_ids=[ backitems[i][1] for i in range(0,len(backitems))] - return tuple(sorted_ids) + + if indices != None: + return [sorted_ids[index] for index in indices] + else: + return sorted_ids + + def get_indices(self, dim, idents): + """Get indices for identifiers along dimension.""" + reverse = {} + for key, value in self._map[dim].items(): + reverse[value] = key + return [self._map[dim][key] for key in idents] class CategoryDataset(Dataset): diff --git a/test/system/datasettest.py b/test/system/datasettest.py index 2609c95..3add265 100644 --- a/test/system/datasettest.py +++ b/test/system/datasettest.py @@ -21,6 +21,24 @@ class DatasetTest(unittest.TestCase): assert data['samples']['sample_b']==1 assert 'gene_c' in data['genes'].keys() assert data['genes']['gene_c']==2 + + def testLookupIndicesOfIdentifiers(self): + data = self.testdata + # base case + self.assertEquals([0, 1, 2], data.get_indices('genes', ['gene_a', 'gene_b', 'gene_c'])) + # "advanced" lookup + self.assertEquals([2, 0], data.get_indices('genes', ['gene_c', 'gene_a'])) + # other dimension + self.assertEquals([0, 1], data.get_indices('samples', ['sample_a', 'sample_b'])) + + def testLookupIdentifiersOfIndices(self): + data = self.testdata + # base case + self.assertEquals(['gene_a', 'gene_b', 'gene_c'], data.get_identifiers('genes', [0, 1, 2])) + # "advanced" lookup + self.assertEquals(['gene_c', 'gene_a'], data.get_identifiers('genes', [2, 0])) + # other dimension + self.assertEquals(['sample_a', 'sample_b'], data.get_identifiers('samples', [0, 1])) #def testExtraction(self): # ids = ['gene_a','gene_b']