hisat-3n/evaluation/build_indexes.py

59 lines
3.0 KiB
Python
Raw Normal View History

2025-01-18 13:09:52 +00:00
#!/usr/bin/env python
import sys, os
use_message = '''
'''
# GRCh38 release 84
def build_indexes():
# Build indexes
if not os.path.exists("indexes"):
os.mkdir("indexes")
os.chdir("indexes")
aligners = ["HISAT2", "HISAT", "Bowtie", "STAR", "GSNAP", "BWA", "minimap2"]
genomes = ["22_20-21M", "22", "genome"]
for genome in genomes:
for aligner in aligners:
if genome == "genome":
dir = aligner
else:
dir = aligner + "_" + genome
if os.path.exists(dir):
continue
os.mkdir(dir)
os.chdir(dir)
if aligner == "HISAT2":
cmd = "../../aligners/bin/hisat2-build ../../data/%s.fa %s" % (genome, genome)
cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --snp ../../data/%s.snp --haplotype ../../data/%s.haplotype %s_snp" % (genome, genome, genome, genome)
cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --ss ../../data/%s.ss --exon ../../data/%s.exon %s_tran" % (genome, genome, genome, genome)
cmd = cmd + "; ../../aligners/bin/hisat2-build -p 4 ../../data/%s.fa --snp ../../data/%s.snp --haplotype ../../data/%s.haplotype --ss ../../data/%s.ss --exon ../../data/%s.exon %s_snp_tran" % (genome, genome, genome, genome, genome, genome)
elif aligner == "HISAT":
cmd = "../../aligners/bin/hisat-build ../../data/%s.fa %s" % (genome, genome)
cmd = cmd + "; ../../aligners/bin/tophat -G ../../data/%s.gtf --transcriptome-index=gtf %s; rm -rf tophat_out" % (genome, genome)
elif aligner == "Bowtie":
cmd = "../../aligners/bin/bowtie-build ../../data/%s.fa %s" % (genome, genome)
elif aligner == "Bowtie2":
cmd = "../../aligners/bin/bowtie2-build --threads 6 ../../data/%s.fa %s" % (genome, genome)
elif aligner == "STAR":
cmd = "../../aligners/bin/STAR --runMode genomeGenerate --genomeDir . --genomeFastaFiles ../../data/%s.fa" % (genome)
cmd = cmd + "; mkdir gtf; ../../aligners/bin/STAR --runMode genomeGenerate --genomeDir gtf --genomeFastaFiles ../../data/%s.fa --sjdbGTFfile ../../data/%s.gtf --sjdbOverhang 99 --runThreadN 4" % (genome, genome)
elif aligner == "GSNAP":
cmd = "../../aligners/bin/gmap_build -B ../../aligners/bin -D . -d %s ../../data/%s.fa" % (genome, genome)
elif aligner == "BWA":
cmd = "../../aligners/bin/bwa index -p %s.fa ../../data/%s.fa" % (genome, genome)
elif aligner == "minimap2":
cmd = "../../aligners/bin/minimap2 -x sr -d %s.mmi ../../data/%s.fa" % (genome, genome)
elif aligner == "VG":
assert False
else:
assert False
print >> sys.stderr, cmd
os.system(cmd)
os.chdir("..")
os.chdir("..")
if __name__ == "__main__":
build_indexes()