Script to generate mapping files between identifiers found in tab separated files.
This commit is contained in:
parent
f1b3009f11
commit
6be624e872
93
scripts/illumina/laydi-mapping-illumina
Executable file
93
scripts/illumina/laydi-mapping-illumina
Executable file
@ -0,0 +1,93 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import getopt
|
||||
import os, os.path
|
||||
import sys
|
||||
|
||||
OUTPUT_COLS = ["Array_Address_Id", "Entrez_Gene_ID", "Accession", "ILMN_Gene", "Definition", ]
|
||||
|
||||
def print_help():
|
||||
print "laydi-mapping-illumina"
|
||||
print
|
||||
print "Usage: laydi-mapping-illumina <illumina-annotation-file.txt> <from_dim> <to_dim>"
|
||||
print
|
||||
print "Description:"
|
||||
print " Produce mapping files from Illumina text annotation files"
|
||||
print " Illumina files can be downloaded from:"
|
||||
print " http://www.switchtoi.com/annotationfiles.ilmn"
|
||||
print
|
||||
print " NOTE: <from_dim> and <to_dim> are the column names in the illumina text file,"
|
||||
print " not laydi dimensions."
|
||||
print
|
||||
|
||||
|
||||
def parse_cmdline():
|
||||
short_opts = "h"
|
||||
long_opts = ["help"]
|
||||
options, params = getopt.getopt(sys.argv[1:], short_opts, long_opts)
|
||||
|
||||
for key, val in options:
|
||||
if key in ["-h", "--help"]:
|
||||
print_help()
|
||||
sys.exit(0)
|
||||
|
||||
if len(params) != 3:
|
||||
print_help()
|
||||
sys.exit(1)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def build_map(fn, from_dim, to_dim):
|
||||
retval = {}
|
||||
fd = open(fn)
|
||||
line = fd.readline()
|
||||
while line != "" and line.strip() != "[Probes]":
|
||||
line = fd.readline()
|
||||
if line == "":
|
||||
return None
|
||||
|
||||
line = fd.readline()
|
||||
cols = [x.strip() for x in line.split("\t")]
|
||||
from_col = cols.index(from_dim)
|
||||
to_col = cols.index(to_dim)
|
||||
|
||||
line = fd.readline()
|
||||
while line != "" and not line.strip().startswith("["):
|
||||
key = line.split("\t")[from_col]
|
||||
val = line.split("\t")[to_col]
|
||||
if not retval.has_key(key):
|
||||
retval[key] = [val]
|
||||
else:
|
||||
retval[key].append(val)
|
||||
|
||||
line = fd.readline()
|
||||
|
||||
return retval
|
||||
|
||||
|
||||
def write_map(fd, d, from_dim, to_dim):
|
||||
opened_here = False
|
||||
if isinstance(fd, str):
|
||||
fd = open(fd, "w")
|
||||
opened_here = True
|
||||
|
||||
print >> fd, "# from: %s" % from_dim
|
||||
print >> fd, "# to: %s" % to_dim
|
||||
print >> fd, "# description: "
|
||||
print >> fd
|
||||
|
||||
for k, v in d.items():
|
||||
print >> fd, k,
|
||||
for e in v:
|
||||
print >> fd, e,
|
||||
print >> fd
|
||||
if opened_here:
|
||||
fd.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
fn, from_dim, to_dim = parse_cmdline()
|
||||
m = build_map(fn, from_dim, to_dim)
|
||||
write_map(sys.stdout, m, from_dim, to_dim)
|
||||
|
Reference in New Issue
Block a user