Script to generate mapping files between identifiers found in tab separated files.
This commit is contained in:
parent
f1b3009f11
commit
6be624e872
|
@ -0,0 +1,93 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import getopt
|
||||||
|
import os, os.path
|
||||||
|
import sys
|
||||||
|
|
||||||
|
OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]
|
||||||
|
|
||||||
|
def print_help():
|
||||||
|
print "laydi-mapping-illumina"
|
||||||
|
print
|
||||||
|
print "Usage: laydi-mapping-illumina <illumina-annotation-file.txt> <from_dim> <to_dim>"
|
||||||
|
print
|
||||||
|
print "Description:"
|
||||||
|
print " Produce mapping files from Illumina text annotation files"
|
||||||
|
print " Illumina files can be downloaded from:"
|
||||||
|
print " http://www.switchtoi.com/annotationfiles.ilmn"
|
||||||
|
print
|
||||||
|
print " NOTE: <from_dim> and <to_dim> are the column names in the illumina text file,"
|
||||||
|
print " not laydi dimensions."
|
||||||
|
print
|
||||||
|
|
||||||
|
|
||||||
|
def parse_cmdline():
|
||||||
|
short_opts = "h"
|
||||||
|
long_opts = ["help"]
|
||||||
|
options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
|
||||||
|
|
||||||
|
for key, val in options:
|
||||||
|
if key in ["-h", "--help"]:
|
||||||
|
print_help()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if len(params) != 3:
|
||||||
|
print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def build_map(fn, from_dim, to_dim):
|
||||||
|
retval = {}
|
||||||
|
fd = open(fn)
|
||||||
|
line = fd.readline()
|
||||||
|
while line != "" and line.strip() != "[Probes]":
|
||||||
|
line = fd.readline()
|
||||||
|
if line == "":
|
||||||
|
return None
|
||||||
|
|
||||||
|
line = fd.readline()
|
||||||
|
cols = [x.strip() for x in line.split("\t")]
|
||||||
|
from_col = cols.index(from_dim)
|
||||||
|
to_col = cols.index(to_dim)
|
||||||
|
|
||||||
|
line = fd.readline()
|
||||||
|
while line != "" and not line.strip().startswith("["):
|
||||||
|
key = line.split("\t")[from_col]
|
||||||
|
val = line.split("\t")[to_col]
|
||||||
|
if not retval.has_key(key):
|
||||||
|
retval[key] = [val]
|
||||||
|
else:
|
||||||
|
retval[key].append(val)
|
||||||
|
|
||||||
|
line = fd.readline()
|
||||||
|
|
||||||
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
def write_map(fd, d, from_dim, to_dim):
|
||||||
|
opened_here = False
|
||||||
|
if isinstance(fd, str):
|
||||||
|
fd = open(fd, "w")
|
||||||
|
opened_here = True
|
||||||
|
|
||||||
|
print >> fd, "# from: %s" % from_dim
|
||||||
|
print >> fd, "# to: %s" % to_dim
|
||||||
|
print >> fd, "# description: "
|
||||||
|
print >> fd
|
||||||
|
|
||||||
|
for k, v in d.items():
|
||||||
|
print >> fd, k,
|
||||||
|
for e in v:
|
||||||
|
print >> fd, e,
|
||||||
|
print >> fd
|
||||||
|
if opened_here:
|
||||||
|
fd.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
fn, from_dim, to_dim = parse_cmdline()
|
||||||
|
m = build_map(fn, from_dim, to_dim)
|
||||||
|
write_map(sys.stdout, m, from_dim, to_dim)
|
||||||
|
|
Reference in New Issue