Working version.

This commit is contained in:
Einar Ryeng 2010-09-27 19:45:56 +00:00
parent 3d68d27a56
commit f001d12584

View File

@ -1,6 +1,7 @@
#!/usr/bin/python
import getopt
import numpy
import sys
from laydi import dataset
@ -16,7 +17,7 @@ def parse_options():
def read_illumina_file(fn):
fd = open(fn)
line = fd.readline()
if line != "Illumina Inc. GenomeStudio version 1.7.0":
if line.strip() != "Illumina Inc. GenomeStudio version 1.7.0":
raise Exception("File cannot be recognized as Illumina textual data")
headers = {}
@ -31,15 +32,38 @@ def read_illumina_file(fn):
values = []
line = fd.readline()
while line != "":
values.append[x.strip() for x in line.split('\t')
values.append([x.strip() for x in line.split('\t')])
line = fd.readline()
probe_col = col_headers.find("ProbeID")
probe_col = col_headers.index("ProbeID")
print "probe id column:"
header_cols = []
samples = []
for i, colname in enumerate(col_headers):
if colname.startswith("AVG_Signal-"):
header_cols.append(i)
samples.append(colname.split("-", 1)[1])
print header_cols
print samples
a = numpy.array(values)
m = numpy.array(a[:,header_cols], dtype='d')
print m
probe_ids = list(a[:, probe_col])
print "samples: ", len(samples)
print "probe_ids: ", len(probe_ids)
print "shape: ", m.shape
ds = dataset.Dataset(m.transpose(), [('samples', samples), ('probe-ids', probe_ids)], name="Average Expr.")
dataset.write_ftsv("test.ftsv", ds)
if __name__ == '__main__':
fn = params[0]
fn = parse_options()[0]
read_illumina_file(fn)