#!/usr/bin/python

import getopt
import os, os.path
import sys

OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]

def print_help(): 
    print "laydi-mapping-illumina"
    print
    print "Usage: laydi-mapping-illumina <illumina-annotation-file.txt> <from_dim> <to_dim>"
    print
    print "Description:"
    print "    Produce mapping files from Illumina text annotation files"
    print "    Illumina files can be downloaded from:"
    print "    http://www.switchtoi.com/annotationfiles.ilmn"
    print
    print "    NOTE: <from_dim> and <to_dim> are the column names in the illumina text file,"
    print "    not laydi dimensions."
    print


def parse_cmdline():
    short_opts = "h"
    long_opts = ["help"]
    options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)

    for key, val in options:
        if key in ["-h", "--help"]:
            print_help()
            sys.exit(0)
    
    if len(params) != 3:
        print_help()
        sys.exit(1)

    return params


def build_map(fn, from_dim, to_dim):
    retval = {}
    fd = open(fn)
    line = fd.readline()
    while line != "" and line.strip() != "[Probes]":
        line = fd.readline()
    if line == "":
        return None

    line = fd.readline()
    cols = [x.strip() for x in line.split("\t")]
    from_col = cols.index(from_dim)
    to_col = cols.index(to_dim)

    line = fd.readline()
    while line != "" and not line.strip().startswith("["):
        key = line.split("\t")[from_col]
        val = line.split("\t")[to_col]
        if not retval.has_key(key):
            retval[key] = [val]
        else:
            retval[key].append(val)

        line = fd.readline()

    return retval


def write_map(fd, d, from_dim, to_dim):
    opened_here = False
    if isinstance(fd, str):
        fd = open(fd, "w")
        opened_here = True

    print >> fd, "# from: %s" % from_dim
    print >> fd, "# to: %s" % to_dim
    print >> fd, "# description: "
    print >> fd

    for k, v in d.items():
        print >> fd, k,
        for e in v:
            print >> fd, e,
        print >> fd
    if opened_here:
        fd.close()


if __name__ == '__main__':
    fn, from_dim, to_dim = parse_cmdline()
    m = build_map(fn, from_dim, to_dim)
    write_map(sys.stdout, m, from_dim, to_dim)

