-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfilterUninformative.py
More file actions
179 lines (141 loc) · 4.85 KB
/
filterUninformative.py
File metadata and controls
179 lines (141 loc) · 4.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
#!/usr/bin/env python3
import argparse
import os
import shutil
import sys
def main():
arguments = Get_Arguments()
locidir = arguments.dir
blacklist_dir = arguments.blacklist_dir
# Validate that input file exists.
check_code = check_if_file_exists(arguments.log)
if check_code == 1:
return 1 # Die if input file doesn't exist.
# Get filenames for bad loci from IQ-TREE log file.
blacklist = list()
blacklist = get_bad_files(arguments.log)
# Abort if there were no uninformative/invariant sites.
if not blacklist:
print("No blacklisted loci were found in log file. Aborting program.")
return 1
print("\n\nSearching for log file in parent directory: " + \
os.path.abspath(os.path.join(locidir, os.pardir)) + "\n")
print("Directory containing loci files (must be located within parent "
"directory): " + locidir + "\n")
check_code = move_blacklisted(blacklist, locidir, blacklist_dir)
if check_code == 1:
return 1
return 0
################################################################################
def dir_path(string):
# Checks if directory exists or raises exception.
# Arguments:
# string: Name of directory
if os.path.isdir(string):
return string
else:
raise NotADirectoryError(string)
def move_blacklisted(list_of_files, dir, b_dir):
# Creates blacklist directory and moves blacklisted files to it.
# Arguments:
# list of blacklisted filenames
# parent directory path
# blacklist directory name; goes in parent directory
# Returns:
# int: 0 if no errors, 1 if error occurred.
parent_dir = os.getcwd() # Get path to parent directory
# Creates blacklist DIR; Aborts if blacklist directory already exists
if not os.path.exists(b_dir):
try:
os.mkdir(b_dir)
except OSError:
print("Creation of the directory '%s' failed." % b_dir)
else:
print("Successfully created the blacklist directory\n")
else:
print("Error: Blacklist directory '%s' already exists." % b_dir)
return 1
print("Changing into loci directory", end="...")
os.chdir(dir) # Changes directory to argument passed for '--dir'
print("DONE!\n")
blist_dir = parent_dir + "/" + b_dir + "/"
print("Moving blacklisted loci files to " + parent_dir + "/" + b_dir, end="...\n")
# Moves the files in list_of_files to blacklist DIR
for file in list_of_files:
try:
shutil.move(file, blist_dir)
except (OSError, IOError) as e:
print("Error moving {} to {}: {}".format(file, blist_dir, e))
return 1
print("DONE!!!\n")
print("Moved {} loci files to {} directory\n".format(str(len(list_of_files)), b_dir))
remaining_file_count = len([name for name in os.listdir('.') if os.path.isfile(name)])
print("{} loci files remain in {}\n".format(remaining_file_count, dir))
return 0
def get_bad_files(file):
# Get bad loci from IQ-TREE log file.
# The bad filenames are the last element of the split method when the
# line starts with "WARNING: No ")
# Arguments:
# filename of IQ-TREE log file.
# Returns:
# list: list of blacklisted file names.
result_list = list()
with open(file, "r") as fin:
for line in fin:
if line.startswith("WARNING: No "):
line = line.rstrip()
result = line.split()[-1]
if not result.endswith(".nex"):
result = "{}.nex".format(result)
result_list.append(result)
return result_list
def check_if_file_exists(filename):
# Check if file exists
# Arguments:
# Name of input file
# Returns:
# int: 0 if successful, 1 if errors.
try:
file = open(filename, "r")
except IOError:
print("\nError: The file " + filename + " does not exist or cannot be "
"read.\n")
return 1
finally:
file.close()
return 0
def Get_Arguments():
# Parse command-line arguments using argparse.
# Returns:
# Object containing command-line arguments.
parser = argparse.ArgumentParser(description="Blacklists phylogenetically "
"uninformative loci listed in "
"IQ-TREE log file.",
add_help=False)
required_args = parser.add_argument_group("Required Arguments")
optional_args = parser.add_argument_group("Optional Arguments")
required_args.add_argument("-l", "--log",
type=str,
required=True,
help="Input IQ-TREE log file")
required_args.add_argument("-d", "--dir",
type=dir_path,
required=True,
help="Path to directory containing files for all"
"loci")
optional_args.add_argument("-b", "--blacklist_dir",
type=str,
required=False,
default="blacklist",
help="Specify name of blacklist directory; "
"default = 'blacklist'")
optional_args.add_argument("-h", "--help", action="help",
help="Displays this help menu")
args = parser.parse_args()
return args
################################################################################
if __name__ == '__main__':
rtrn_code = main()
print("Program finished with exit status " + str(rtrn_code) + "\n")
sys.exit(rtrn_code)