hisat-3n/hisat-3n-build

149 lines
4.6 KiB
Plaintext
Raw Normal View History

2025-01-18 13:09:52 +00:00
#!/usr/bin/env python
"""
Copyright 2015, Daehwan Kim <infphilo@gmail.com>
This file is part of HISAT 2.
HISAT 2 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
HISAT 2 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with HISAT 2. If not, see <http://www.gnu.org/licenses/>.
"""
import os
import sys
import inspect
import logging
import re
def build_args():
"""
Parse the wrapper arguments. Returns the options,<programm arguments> tuple.
"""
parsed_args = {}
to_remove = []
argv = sys.argv[:]
for i, arg in enumerate(argv):
if arg == '--large-index':
parsed_args[arg] = ""
to_remove.append(i)
elif arg == '--debug':
parsed_args[arg] = ""
to_remove.append(i)
elif arg == '--verbose':
parsed_args[arg] = ""
to_remove.append(i)
for i in reversed(to_remove):
del argv[i]
return parsed_args, argv
def main():
logging.basicConfig(level=logging.ERROR,
format='%(levelname)s: %(message)s'
)
delta = 200
small_index_max_size= 4 * 1024**3 - delta
build_bin_name = "hisat2-build"
build_bin_s = "hisat2-build-s"
build_bin_l = "hisat2-build-l"
curr_script = os.path.realpath(inspect.getsourcefile(main))
ex_path = os.path.dirname(curr_script)
build_bin_spec = os.path.join(ex_path,build_bin_s)
#repeat_bin_spec = os.path.join(ex_path,"hisat2-repeat")
options, argv = build_args()
if '--verbose' in options:
logging.getLogger().setLevel(logging.INFO)
if '--debug' in options:
build_bin_spec += '-debug'
build_bin_l += '-debug'
if '--large-index' in options:
build_bin_spec = os.path.join(ex_path,build_bin_l)
elif len(argv) >= 2:
ref_fnames = argv[-2]
tot_size = 0
for fn in ref_fnames.split(','):
if os.path.exists(fn):
statinfo = os.stat(fn)
tot_size += statinfo.st_size
if tot_size > small_index_max_size:
build_bin_spec = os.path.join(ex_path, build_bin_l)
if "--repeat-index" in argv:
#build repeat index first
if argv[1].startswith('-'):
outputName = argv[-1]
else:
outputName = argv[2]
nThread = "1"
repeatLength = "100-300"
repeatCount = "5"
referenceName = ""
base_change = ""
removeRepeatLength = False
for i in range(len(argv)):
if argv[i] == "--base-change":
base_change = argv[i+1]
if argv[i] == "-p":
nThread = argv[i+1]
elif argv[i] == "-f":
referenceName = argv[i+1]
elif argv[i] == "--repeat-index":
if ("--repeat-index" != argv[-1]):
match = re.match(r'\d+-\d+', argv[i+1])
if match:
repeatLength = argv[i+1]
removeRepeatLength = True
if removeRepeatLength:
argv.remove(repeatLength)
if len(referenceName) == 0:
if argv[1].startswith('-'):
referenceName = argv[-2]
else:
referenceName = argv[1]
cwd = sys.path[0]
repeatArgv = []
repeatArgv += [cwd + "/hisat2-repeat"]
repeatArgv += ["-p", nThread]
repeatArgv += ["--repeat-length", repeatLength]
repeatArgv += ["--repeat-count", repeatCount]
repeatArgv += ["--base-change", base_change]
repeatArgv += ["--3N"]
repeatArgv += [referenceName, outputName]
try:
os.system(" ".join(repeatArgv))
except ValueError:
print("Can not automatically generate repeat database for HISAT-3N. Please manually generate repeat database by using hisat2-repeat.")
argv.append("--3N")
argv[0] = build_bin_name
argv.insert(1, 'basic-0')
argv.insert(1, '--wrapper')
logging.info('Command: %s %s' % (build_bin_spec, ' '.join(argv[1:])))
os.execv(build_bin_spec, argv)
if __name__ == "__main__":
main()