go-gene-matrix takes a GO vs. GO distance matrix and a gene-go-mapping file
and makes a gene vs. go distance matrix based on the shortest distances found between each gene and go term.
This commit is contained in:
parent
1ab558248c
commit
335efe231a
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import os, sys
|
||||||
|
import getopt
|
||||||
|
sys.path.append('../..')
|
||||||
|
from fluents import dataset
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
max_val = numpy.inf
|
||||||
|
no_nan = False
|
||||||
|
|
||||||
|
def print_help():
|
||||||
|
print
|
||||||
|
print "Usage: go-gene-matrix <go-dist-matrix.ftsv> <gene-go-mapping.txt>"
|
||||||
|
print
|
||||||
|
print "Description:"
|
||||||
|
print " Takes a GO term by GO term distance matrix and a file that"
|
||||||
|
print " maps GO terms to genes as input arguments and produces a"
|
||||||
|
print " dataset that contains the shortest distances between all"
|
||||||
|
print " genes and GO terms."
|
||||||
|
print
|
||||||
|
print "Options:"
|
||||||
|
print " -h, --help Show this help text."
|
||||||
|
print " -m, --max-dist Trunkate all distances to this value."
|
||||||
|
print
|
||||||
|
|
||||||
|
def get_parameters():
|
||||||
|
global max_val
|
||||||
|
short_opts = "hm:"
|
||||||
|
long_opts = ["help", "max-dist="]
|
||||||
|
|
||||||
|
options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
|
||||||
|
for opt, val in options:
|
||||||
|
if opt in ['-h', '--help']:
|
||||||
|
print_help()
|
||||||
|
sys.exit(0)
|
||||||
|
elif opt in ['-m', '--max-dist']:
|
||||||
|
max_val = int(val)
|
||||||
|
|
||||||
|
if len(params) < 2:
|
||||||
|
print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
params = get_parameters()
|
||||||
|
|
||||||
|
# Read dataset
|
||||||
|
fd = open(params[0])
|
||||||
|
ds = dataset.read_ftsv(fd)
|
||||||
|
array = ds.asarray()
|
||||||
|
fd.close()
|
||||||
|
|
||||||
|
# Read mapping
|
||||||
|
sorted_keys = []
|
||||||
|
mapping = {}
|
||||||
|
fd = open(params[1])
|
||||||
|
lines = fd.readlines()
|
||||||
|
for line in lines:
|
||||||
|
values = line.split()
|
||||||
|
if len(values) > 0:
|
||||||
|
mapping[values[0]] = values[1:]
|
||||||
|
sorted_keys.append(values[0])
|
||||||
|
|
||||||
|
# Create new dataset
|
||||||
|
matrix = numpy.zeros((len(sorted_keys), ds.shape[0]))
|
||||||
|
dim = ds.get_dim_name(0)
|
||||||
|
for i, gene in enumerate(sorted_keys):
|
||||||
|
for j, go in enumerate(ds[dim]):
|
||||||
|
min = max_val
|
||||||
|
for go2 in mapping[gene]:
|
||||||
|
if ds[dim].has_key(go2) and array[j, ds[dim][go2]] < min:
|
||||||
|
min = array[j, ds[dim][go2]]
|
||||||
|
matrix[i, j] = min
|
||||||
|
out_ds = dataset.Dataset(matrix,
|
||||||
|
(('genes', sorted_keys), ('go-terms', ds[dim])),
|
||||||
|
"Gene by GO matrix")
|
||||||
|
dataset.write_ftsv(sys.stdout, out_ds)
|
||||||
|
|
Reference in New Issue