go-gene-matrix takes a GO vs. GO distance matrix and a gene-go-mapping file
and makes a gene vs. go distance matrix based on the shortest distances found between each gene and go term.
This commit is contained in:
		
							
								
								
									
										80
									
								
								scripts/geneontology/go-gene-matrix
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										80
									
								
								scripts/geneontology/go-gene-matrix
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
#!/usr/bin/python
 | 
			
		||||
 | 
			
		||||
import os, sys
 | 
			
		||||
import getopt
 | 
			
		||||
sys.path.append('../..')
 | 
			
		||||
from fluents import dataset
 | 
			
		||||
import numpy
 | 
			
		||||
 | 
			
		||||
max_val = numpy.inf
 | 
			
		||||
no_nan = False
 | 
			
		||||
 | 
			
		||||
def print_help():
 | 
			
		||||
    print
 | 
			
		||||
    print "Usage: go-gene-matrix <go-dist-matrix.ftsv> <gene-go-mapping.txt>"
 | 
			
		||||
    print
 | 
			
		||||
    print "Description:"
 | 
			
		||||
    print "    Takes a GO term by GO term distance matrix and a file that"
 | 
			
		||||
    print "    maps GO terms to genes as input arguments and produces a"
 | 
			
		||||
    print "    dataset that contains the shortest distances between all"
 | 
			
		||||
    print "    genes and GO terms."
 | 
			
		||||
    print
 | 
			
		||||
    print "Options:"
 | 
			
		||||
    print "    -h, --help       Show this help text."
 | 
			
		||||
    print "    -m, --max-dist   Trunkate all distances to this value."
 | 
			
		||||
    print
 | 
			
		||||
 | 
			
		||||
def get_parameters():
 | 
			
		||||
    global max_val
 | 
			
		||||
    short_opts = "hm:"
 | 
			
		||||
    long_opts = ["help", "max-dist="]
 | 
			
		||||
 | 
			
		||||
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
 | 
			
		||||
    for opt, val in options:
 | 
			
		||||
        if opt in ['-h', '--help']:
 | 
			
		||||
            print_help()
 | 
			
		||||
            sys.exit(0)
 | 
			
		||||
        elif opt in ['-m', '--max-dist']:
 | 
			
		||||
            max_val = int(val)
 | 
			
		||||
 | 
			
		||||
    if len(params) < 2:
 | 
			
		||||
        print_help()
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
    return params
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    params = get_parameters()
 | 
			
		||||
 | 
			
		||||
    # Read dataset
 | 
			
		||||
    fd = open(params[0])
 | 
			
		||||
    ds = dataset.read_ftsv(fd)
 | 
			
		||||
    array = ds.asarray()
 | 
			
		||||
    fd.close()
 | 
			
		||||
 | 
			
		||||
    # Read mapping
 | 
			
		||||
    sorted_keys = []
 | 
			
		||||
    mapping = {}
 | 
			
		||||
    fd = open(params[1])
 | 
			
		||||
    lines = fd.readlines()
 | 
			
		||||
    for line in lines:
 | 
			
		||||
        values = line.split()
 | 
			
		||||
        if len(values) > 0:
 | 
			
		||||
            mapping[values[0]] = values[1:]
 | 
			
		||||
            sorted_keys.append(values[0])
 | 
			
		||||
 | 
			
		||||
    # Create new dataset
 | 
			
		||||
    matrix = numpy.zeros((len(sorted_keys), ds.shape[0]))
 | 
			
		||||
    dim = ds.get_dim_name(0)
 | 
			
		||||
    for i, gene in enumerate(sorted_keys):
 | 
			
		||||
        for j, go in enumerate(ds[dim]):
 | 
			
		||||
            min = max_val
 | 
			
		||||
            for go2 in mapping[gene]:
 | 
			
		||||
                if ds[dim].has_key(go2) and array[j, ds[dim][go2]] < min:
 | 
			
		||||
                    min = array[j, ds[dim][go2]]
 | 
			
		||||
            matrix[i, j] = min
 | 
			
		||||
    out_ds = dataset.Dataset(matrix, 
 | 
			
		||||
                             (('genes', sorted_keys), ('go-terms', ds[dim])),
 | 
			
		||||
                             "Gene by GO matrix")
 | 
			
		||||
    dataset.write_ftsv(sys.stdout, out_ds)
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user