-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplotTiles.py
More file actions
executable file
·147 lines (120 loc) · 4.9 KB
/
plotTiles.py
File metadata and controls
executable file
·147 lines (120 loc) · 4.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python
import os
import sys
import argparse
import logging
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
def getOptions():
""" Function to pull in arguments """
parser = argparse.ArgumentParser(description="Takes coordinate table created by fastqDumpCoords.py and plots the flowcell location for the top N duplicated reads.")
parser.add_argument("-i", "--input", dest="input", action='store', required=True, help="fastqDumpCoords.py coordinate table [Required]")
parser.add_argument("-o", "--outdir", dest="out", action='store', required=True, help="Directory to output PNGs [Required]")
parser.add_argument("-N", dest="num", action='store', default=10, required=False, help="Number of sequences to plot [Default 10]")
parser.add_argument("-g", "--log", dest="log", action='store', required=False, help="Log File")
args = parser.parse_args()
return(args)
def setLogger(fname,loglevel):
""" Function to handle error logging """
logging.basicConfig(filename=fname, level=loglevel, format='%(asctime)s - %(levelname)s - %(message)s')
def incZGA2(x):
if x['tile'] <= 60:
zarray[x['tile']-1,0] +=1
else:
zarray[120-x['tile'],1] +=1
def incZHI(x):
one = x['plane'] - 1
two = x['tileNum'] - 1
three = x['swath'] - 1
zarray[one,two,three] += 1
def plotFlowGA2(zarray, pp, seq):
column_labels = list('12')
row_labels = list(range(1,60+1))
fig, ax1 = plt.subplots()
heatmap1 = ax1.pcolor(zarray, cmap=plt.cm.Blues)
# put the major ticks at the middle of each cell
ax1.set_xticks(np.arange(zarray.shape[1])+0.5, minor=False)
ax1.set_yticks(np.arange(zarray.shape[0])+0.5, minor=False)
# Change axis labels to look better
ax1.invert_yaxis()
ax1.tick_params(axis=1, which='major', labelsize=10)
ax1.set_xticklabels(column_labels, minor=False)
ax1.set_yticklabels(row_labels, minor=False)
# add titles
ax1.set_title("top")
# Add gradient bar to figure
fig.colorbar(heatmap1)
plt.suptitle(seq)
pp.savefig()
def plotFlowHI(zarray, pp, rowNum, seq):
column_labels = list('123')
row_labels = list(range(1,rowNum+1))
fig, (ax1, ax2) = plt.subplots(1,2,sharey=True)
heatmap1 = ax1.pcolor(zarray[0], cmap=plt.cm.Blues)
heatmap2 = ax2.pcolor(zarray[1], cmap=plt.cm.Blues)
# put the major ticks at the middle of each cell
ax1.set_xticks(np.arange(zarray[0].shape[1])+0.5, minor=False)
ax1.set_yticks(np.arange(zarray[0].shape[0])+0.5, minor=False)
ax2.set_xticks(np.arange(zarray[1].shape[1])+0.5, minor=False)
ax2.set_yticks(np.arange(zarray[1].shape[0])+0.5, minor=False)
# Change axis labels to look better
ax1.invert_yaxis()
ax1.set_xticklabels(column_labels, minor=False)
ax1.set_yticklabels(row_labels, minor=False)
ax2.set_xticklabels(column_labels, minor=False)
ax2.set_yticklabels(row_labels, minor=False)
# add titles
ax1.set_title("top")
ax2.set_title("bottom")
# Add gradient bar to figure
fig.colorbar(heatmap1)
plt.suptitle(seq)
pp.savefig()
def main():
""" MAIN Function to execute everything """
# Turn on Logging if option -g was given
args = getOptions()
if args.log:
setLogger(args.log,logging.INFO)
else:
setLogger(os.devnull,logging.INFO)
fname = os.path.basename(args.input)
myname = os.path.splitext(fname)[0]
pname = os.path.join(args.out, myname + '.pdf')
pp = PdfPages(pname)
logging.info("Importing coordinate table.")
df = pd.read_csv(args.input)
logging.info("Finished importing coordinate table.")
logging.info("Summarizing sequence counts.")
counts = pd.value_counts(df['sequence'])
logging.info("Finished summarizing sequence counts.")
for i in xrange(int(args.num)):
seq = counts.index[i]
subset = df[df['sequence'] == seq]
maxTileNum = max(subset['tile'])
global zarray
if maxTileNum <= 120:
# This is a GAIIx lane
zarray = np.zeros((60,2))
subset.apply(incZGA2, axis=1)
plotFlowGA2(zarray, pp, seq)
else: # This is a Hiseq lane
# Split hiseq tile informaion into parts.
tileList = subset['tile'].values
subset['plane'] = np.array([int(str(x)[0]) for x in tileList])
subset['swath'] = np.array([int(str(x)[1]) for x in tileList])
subset['tileNum'] = np.array([int(str(x)[-2:]) for x in tileList])
# Determine if there are 8 rows or 16 rows
rowNum = subset.tileNum.max()
# Build storage array and plot heatmap
zarray = np.zeros((2,rowNum,3))
subset.apply(incZHI, axis=1)
plotFlowHI(zarray, pp, rowNum, seq)
pp.close()
if __name__=='__main__':
main()
logging.info("Script complete.")