#!/usr/bin/python

import os, sys
import getopt
sys.path.append('../..')
from laydi import dataset
import numpy

max_val = numpy.inf
no_nan = False

def print_help():
    print
    print "Usage: go-gene-matrix <go-dist-matrix.ftsv> <gene-go-mapping.txt>"
    print
    print "Description:"
    print "    Takes a GO term by GO term distance matrix and a file that"
    print "    maps GO terms to genes as input arguments and produces a"
    print "    dataset that contains the shortest distances between all"
    print "    genes and GO terms."
    print
    print "Options:"
    print "    -h, --help       Show this help text."
    print "    -m, --max-dist   Trunkate all distances to this value."
    print

def get_parameters():
    global max_val
    short_opts = "hm:"
    long_opts = ["help", "max-dist="]

    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
    for opt, val in options:
        if opt in ['-h', '--help']:
            print_help()
            sys.exit(0)
        elif opt in ['-m', '--max-dist']:
            max_val = int(val)

    if len(params) < 2:
        print_help()
        sys.exit(1)

    return params

if __name__ == '__main__':
    params = get_parameters()

    # Read dataset
    fd = open(params[0])
    ds = dataset.read_ftsv(fd)
    array = ds.asarray()
    fd.close()

    # Read mapping
    sorted_keys = []
    mapping = {}
    fd = open(params[1])
    lines = fd.readlines()
    for line in lines:
        values = line.split()
        if len(values) > 0:
            mapping[values[0]] = values[1:]
            sorted_keys.append(values[0])

    # Create new dataset
    matrix = numpy.zeros((len(sorted_keys), ds.shape[0]))
    dim = ds.get_dim_name(0)
    for i, gene in enumerate(sorted_keys):
        for j, go in enumerate(ds[dim]):
            min = max_val
            for go2 in mapping[gene]:
                if ds[dim].has_key(go2) and array[j, ds[dim][go2]] < min:
                    min = array[j, ds[dim][go2]]
            matrix[i, j] = min
    out_ds = dataset.Dataset(matrix, 
                             (('genes', sorted_keys), ('go-terms', ds[dim])),
                             "Gene by GO matrix")
    dataset.write_ftsv(sys.stdout, out_ds)