hisat-3n/evaluation/tests/repeat/test_repeat.py
2025-01-18 21:09:52 +08:00

132 lines
3.1 KiB
Python

#!/usr/bin/env python
import sys, os, subprocess, random
from argparse import ArgumentParser, FileType
"""
"""
def reverse_complement(seq):
result = ""
for nt in seq:
base = nt
if nt == 'A':
base = 'T'
elif nt == 'a':
base = 't'
elif nt == 'C':
base = 'G'
elif nt == 'c':
base = 'g'
elif nt == 'G':
base = 'C'
elif nt == 'g':
base = 'c'
elif nt == 'T':
base = 'A'
elif nt == 't':
base = 'a'
result = base + result
return result
"""
"""
def read_genome(genome_filename):
chr_dic = {}
genome_file = open(genome_filename, "r")
chr_name, sequence = "", ""
for line in genome_file:
if line[0] == ">":
if chr_name and sequence:
chr_dic[chr_name] = sequence
chr_name = line[1:-1].split()[0]
sequence = ""
else:
sequence += line[:-1]
if chr_name and sequence:
chr_dic[chr_name] = sequence
genome_file.close()
print >> sys.stderr, "genome is loaded"
return chr_dic
"""
"""
def generate_random_seq(seq_len):
assert seq_len > 0
random_seq = ""
for i in xrange(seq_len):
random_seq += "ACGT"[random.randint(0, 3)]
return random_seq
"""
"""
def test_repeat(verbose):
random.seed(1)
backbone_seq = generate_random_seq(500)
mm_seq = backbone_seq[:]
mm_seq = mm_seq[:50] + ("A" if mm_seq[50] != "A" else "C") + mm_seq[51:]
mm_seq2 = backbone_seq[:]
mm_seq2 = mm_seq2[:450] + ("A" if mm_seq2[450] != "A" else "C") + mm_seq2[451:]
del_seq = backbone_seq[:]
del_seq = del_seq[:50] + del_seq[52:150] + del_seq[152:]
del_seq2 = backbone_seq[:]
del_seq2 = del_seq2[:350] + del_seq2[352:450] + del_seq2[452:]
indel_seq = backbone_seq[:]
indel_seq = indel_seq[:30] + indel_seq[32:130] + "AAA" + indel_seq[130:]
indel_seq2 = backbone_seq[:]
indel_seq2 = indel_seq2[:30] + "AAA" + indel_seq2[30:130] + indel_seq2[132:]
seqs = [
# dummy_seq,
["bb01", backbone_seq],
["bb02", backbone_seq],
["bb03", backbone_seq],
["bb04", backbone_seq],
["bb05", backbone_seq],
["mm01", mm_seq],
["mm02", mm_seq],
["dd01", del_seq],
["dd02", del_seq],
["dd03", del_seq2],
["dd04", del_seq2],
["id01", indel_seq],
["id02", indel_seq],
["id03", indel_seq],
["id04", indel_seq],
["id05", indel_seq],
["id06", indel_seq],
["id07", indel_seq2],
]
for id, seq in seqs:
print ">%s" % id
print generate_random_seq(20)
print seq
print generate_random_seq(20)
"""
"""
if __name__ == "__main__":
parser = ArgumentParser(
description='')
parser.add_argument('-v', '--verbose',
dest='verbose',
action='store_true',
help='also print some statistics to stderr')
args = parser.parse_args()
test_repeat(args.verbose)