-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathplot_lambda.py
More file actions
executable file
·112 lines (91 loc) · 3.13 KB
/
plot_lambda.py
File metadata and controls
executable file
·112 lines (91 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import argparse
def get_color(value):
if value <= 0.7:
return "cadetblue"
elif value <= 1:
return "darkorange"
else:
return "darkred"
# Set up argument parser
parser = argparse.ArgumentParser(
description="This script plots methylation rate in CpG sites for lambda based on MultiQC data from nf-core/methylseq. Use path to lambda control analysis directory as input."
)
parser.add_argument("dir", type=str, help="path to analysis directory")
parser.add_argument("--prefix", type=str, default="", help="project code or flowcell")
args = parser.parse_args()
# Find the paths to the two files from the provided working directory
analysis_dir = args.dir
alignment_stats = os.path.join(
analysis_dir,
"results",
"multiqc",
"bismark",
"multiqc_data",
"mqc_bismark_alignment_1.txt",
)
methylation_stats = os.path.join(
analysis_dir,
"results",
"multiqc",
"bismark",
"multiqc_data",
"multiqc_bismark_methextract.txt",
)
# Check if the files exist
missing_files = []
if not os.path.exists(alignment_stats):
missing_files.append(alignment_stats)
if not os.path.exists(methylation_stats):
missing_files.append(methylation_stats)
if missing_files:
for file in missing_files:
print(f"The file {file} does not exist. Please check the path.")
exit()
data1 = pd.read_csv(
alignment_stats, delimiter="\t", usecols=["Sample", "Aligned Uniquely"]
)
data2 = pd.read_csv(
methylation_stats, delimiter="\t", usecols=["Sample", "percent_cpg_meth"]
)
merged_data = pd.merge(data1, data2, on="Sample", how="left")
merged_data["Sample"] = merged_data["Sample"].str.replace("_1_val_1", "")
merged_data.columns = ["Sample", "Reads", "%mCpG"]
# Plotting
plt.figure(figsize=(10, 6))
colors = merged_data["%mCpG"].apply(get_color)
# Create bar plot
bars = plt.bar(merged_data["Sample"], merged_data["Reads"], color=colors)
# Add horizontal line at y=5000
plt.axhline(y=5000, color="midnightblue", linestyle="--")
# Add labels and title
plt.xlabel("Sample")
plt.ylabel("Reads")
plt.title(f'{args.prefix} Lambda')
# Rotate x-axis labels for better readability
plt.xticks(rotation=90)
# Add %mCpG values on top of the bars if they are over 0.7
for bar, value in zip(bars, merged_data["%mCpG"]):
if value > 0.7:
plt.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height(),
f"{value:.1f} %mCpG",
ha="center",
va="bottom",
)
# Add legend
legend_elements = [
Line2D([0], [0], color="cadetblue", lw=4, label="OK"),
Line2D([0], [0], color="darkorange", lw=4, label="<1.0"),
Line2D([0], [0], color="darkred", lw=4, label="Failed"),
]
plt.legend(handles=legend_elements, loc='center left', bbox_to_anchor=(1.02, 0.5), title="%mCpG QC")
# Save plot to a file in the provided working directory
plt.tight_layout(rect=[0,0,1,1])
plt.savefig(os.path.join(analysis_dir, f'{args.prefix}_lambda.png'))
print(f"The plot has been saved to '{os.path.join(analysis_dir, f'{args.prefix}_lambda.png')}'")