hisat-3n/scripts/test/simple_tests.pl

4806 lines
278 KiB
Perl
Raw Normal View History

2025-01-18 13:09:52 +00:00
#!/usr/bin/perl -w
#
# Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
#
# This file is part of Bowtie 2.
#
# Bowtie 2 is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Bowtie 2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
#
##
# Give simple tests with known results to bowtie2.
#
use strict;
use warnings;
use Getopt::Long;
use FindBin qw($Bin);
use lib $Bin;
use List::Util qw(max min);
use Data::Dumper;
use DNA;
use Clone qw(clone);
use Test::Deep;
my $bowtie2 = "";
my $bowtie2_build = "";
my $skipColor = 1;
GetOptions(
"bowtie2=s" => \$bowtie2,
"bowtie2-build=s" => \$bowtie2_build,
"skip-color" => \$skipColor) || die "Bad options";
if(! -x $bowtie2 || ! -x $bowtie2_build) {
my $bowtie2_dir = `dirname $bowtie2`;
my $bowtie2_exe = `basename $bowtie2`;
my $bowtie2_build_exe = `basename $bowtie2_build`;
chomp($bowtie2_dir);
chomp($bowtie2_exe);
chomp($bowtie2_build_exe);
system("make -C $bowtie2_dir $bowtie2_exe $bowtie2_build_exe") && die;
}
(-x $bowtie2) || die "Cannot run '$bowtie2'";
(-x $bowtie2_build) || die "Cannot run '$bowtie2_build'";
my @cases = (
{ name => "Left-align insertion",
ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ],
norc => 1,
reads => [ "ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ],
# ref: AC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA
# read: ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA
# 0123456789012345678901234567890123456789
cigar => [ "2M1I40M" ],
samoptflags => [ {
"MD:Z:42" => 1,
"YT:Z:UU" => 1,
"NM:i:1" => 1,
"XG:i:1" => 1,
"XO:i:1" => 1,
"AS:i:-8" => 1 } ],
report => "",
args => ""
},
{ name => "Left-align deletion",
ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ],
norc => 1,
reads => [ "ACGTACAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ],
# ref: ACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA
# read: ACGTAC-AAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA
# 012345678901234567890123456789012345678
cigar => [ "6M1D39M" ],
samoptflags => [ {
"MD:Z:6^A39" => 1,
"YT:Z:UU" => 1,
"NM:i:1" => 1,
"XG:i:1" => 1,
"XO:i:1" => 1,
"AS:i:-8" => 1 } ],
report => "",
args => ""
},
{ name => "Left-align insertion with mismatch at LHS",
ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ],
norc => 1,
reads => [ "TATCTACGACTGCTACGCCCTAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ],
# ref: GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC
# read: TATCTACGACTGCTACGCCC-TAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC
# 01234567890123456789-012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6
cigar => [ "20M1D45M" ],
samoptflags => [ {
"MD:Z:20^A0A44" => 1,
"YT:Z:UU" => 1,
"NM:i:2" => 1,
"XG:i:1" => 1,
"XO:i:1" => 1,
"XM:i:1" => 1,
"AS:i:-14" => 1 } ],
report => "",
args => ""
},
# This won't necessarily pass because the original location of the deletion
# might
#{ name => "Left-align deletion with mismatch at LHS",
# ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ],
# norc => 1,
# reads => [ "TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ],
# # ref: GCGATATCTACGACTGCTACGCCC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC
# # read: TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC
# # 01234567890123456789-012345678901234567890123456789012345678901234
# # 0 1 0 1 2 3 4
# cigar => [ "20M1I45M" ],
# samoptflags => [ {
# "MD:Z:19C45" => 1,
# "YT:Z:UU" => 1,
# "NM:i:2" => 1,
# "XG:i:1" => 1,
# "XO:i:1" => 1,
# "XM:i:1" => 1,
# "AS:i:-14" => 1 } ],
# report => "",
# args => ""
#},
{ name => "Flags for when mates align non-concordantly, with many alignments for one",
# 012345678
ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 1 2 3 4 5 6 7 8 9 0 1
# 0 1
norc => 1,
mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ],
mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ],
# 981 1064 1147
# ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA
samflags_map => [{ 981 => (1 | 128), 1064 => (1 | 128), 1147 => (1 | 128), 2 => (1 | 64) }],
report => "",
args => ""
},
{ name => "Flags for when mates align non-concordantly, with many alignments for one",
# 012345678
ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAG" ],
# 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2
# 0 1 2 3 4 5 6 7 8 9 0 1
# 0 1
norc => 1,
mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ],
mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ],
tlen_map => [{ 2 => 1021, 981 => -1021 }],
samflags_map => [{ 981 => (1 | 128), 2 => (1 | 64) }],
report => "",
args => ""
},
{ name => "Flags for when mates align non-concordantly, with many alignments for one",
# 012345678
ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ],
# 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8
# 0 * 1 * 2 *
# 0 1
norc => 1,
mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ],
mate2s => [ "TCGTCGTGATGCGTCAGCTCGGATAGCCAGTACGTAGCTCGT" ],
# 981 1064 1147
# ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA
samflags_map => [{ 79 => (1 | 2 | 16 | 128), 162 => (1 | 2 | 16 | 128), 245 => (1 | 2 | 16 | 128), 2 => (1 | 2 | 32 | 64) }],
report => "",
args => ""
},
# Checking MD:Z strings for alignment
{ name => "MD:Z 1",
ref => [ "CACGATCGACTTGA"."C"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
hits => [ { 0 => 1 } ],
samoptflags => [ {
"AS:i:-14" => 1, # alignment score
"XM:i:1" => 1, # num mismatches
"XO:i:1" => 1, # num gap opens
"XG:i:1" => 1, # num gap extensions
"NM:i:2" => 1, # num edits
"MD:Z:13^A0C39" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
{ name => "MD:Z 2",
ref => [ "CACGATCGACTTGA"."A"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
# 0123456789012 012345678901234567890123456789012345678
hits => [ { 0 => 1 } ],
samoptflags => [ {
"AS:i:-14" => 1, # alignment score
"XM:i:1" => 1, # num mismatches
"XO:i:1" => 1, # num gap opens
"XG:i:1" => 1, # num gap extensions
"NM:i:2" => 1, # num edits
"MD:Z:13^A0A39" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
{ name => "MD:Z 3",
ref => [ "CACGATCGACTTGT"."AA"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
# 0123456789012 012345678901234567890123456789012345678
hits => [ { 0 => 1 } ],
samoptflags => [ {
"AS:i:-17" => 1, # alignment score
"XM:i:1" => 1, # num mismatches
"XO:i:1" => 1, # num gap opens
"XG:i:2" => 1, # num gap extensions
"NM:i:3" => 1, # num edits
"MD:Z:13^TA0A39" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
{ name => "MD:Z 4",
ref => [ "CACGATCGACTTGN"."NN"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ],
# 0123456789012 012345678901234567890123456789012345678
hits => [ { 0 => 1 } ],
samoptflags => [ {
"AS:i:-12" => 1, # alignment score
"XN:i:3" => 1, # num ambiguous ref bases
"XM:i:1" => 1, # num mismatches
"XO:i:1" => 1, # num gap opens
"XG:i:2" => 1, # num gap extensions
"NM:i:3" => 1, # num edits
"MD:Z:13^NN0N39" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
#
# Local alignment
#
# Local alignment for a short perfect hit where hit spans the read
{ name => "Local alignment 1",
ref => [ "TTGT" ],
reads => [ "TTGT" ],
args => "--local --policy \"MIN=L,0.0,0.75\"",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:4=" ],
cigar => [ "4M" ],
samoptflags => [ {
"AS:i:8" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:4" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YP:i:0" => 1, # read aligned repetitively in paired fashion
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
# T T G A T T G A
# T x T x
# T x T
# G x G
# T T
# Local alignment for a short hit where hit is trimmed at one end
{ name => "Local alignment 2",
ref => [ "TTGA" ],
reads => [ "TTGT" ],
args => "--local --policy \"MIN=L,0.0,0.75\\;SEED=0,3\\;IVAL=C,1,0\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:3=1S" ],
cigar => [ "3M1S" ],
samoptflags => [ {
"AS:i:6" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:3" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YP:i:0" => 1, # read aligned repetitively in paired fashion
"YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc)
} ] },
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
# T T G T T C G T T T G T T C G T
# 0 T x
# 1 T x
# 2 G x
# 3 T x
# 4 T x
# 5 C x
# 6 G x
# 7 T x
# 8 T x
# 9 T x
# 0 G x
# 1 T x
# 2 T x
#
# Score=130
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
# T T G T T C G T T T G T T C G T
# 0 T x
# 1 T x
# 2 G x
# 3 T x
# 4 T x
# 5 C x
# 6 G x
# 7 T x
# 8 T
# 9 T
# 0 G
# 1 T
# 2 T
#
# Score=80
# Local alignment for a perfect hit
{ name => "Local alignment 3",
# TTGTTCGT
# TTGTTCGT
ref => [ "TTGTTCGTTTGTTCGT" ],
# 0123456789012345
# TTGTTCGTTTGTT
# TTGTTCGT-----
reads => [ "TTGTTCGTTTGTT" ],
args => "--local -L 8 -i C,1,0 --score-min=C,12",
report => "-a",
hits => [ { 0 => 1, 8 => 1 } ],
flags_map => [{
0 => "XM:0,XP:0,XT:UU,XC:13=",
8 => "XM:0,XP:0,XT:UU,XC:8="
}],
cigar_map => [{
0 => "13M",
8 => "8M5S"
}],
samoptflags_map => [{
0 => { "AS:i:26" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 },
8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 }
}]
},
# 1
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
# T T G T T C G T T T G T T C G T
# 0 T x
# 1 T x
# 2 G x
# 3 T x
# 4 T x
# 5 C x
# 6 G x
# 7 T x
# 8 T
# 9 T
# 10 G
# 1 T
# Local alignment for a hit that should be trimmed from the right end
{ name => "Local alignment 4",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGTTTGT" ],
args => "--local --policy \"SEED=0,3\\;IVAL=C,1,0\" --score-min=C,12",
report => "-a",
hits => [ { 0 => 1, 8 => 1 } ],
flags_map => [{
0 => "XM:0,XP:0,XT:UU,XC:12=",
8 => "XM:0,XP:0,XT:UU,XC:8=4S"
}],
cigar_map => [{
0 => "12M",
8 => "8M4S"
}],
samoptflags_map => [{
0 => { "AS:i:24" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:12" => 1 },
8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 }
}]
},
#
# Test some common featuers for the manual. E.g. when more than one
# alignment is reported in -k mode, what order are they reported in? They
# should be in order by alignment score.
#
{ name => "Alignment order -k",
# 012345678
ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ],
reads => [ "GCGCATGCACATATCA" ],
quals => [ "GOAIYEFGFIWDSFIU" ],
args => "--min-score C,-24,0 -L 4",
report => "-k 4"
},
{ name => "Alignment order -a",
# 012345678
ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ],
reads => [ "GCGCATGCACATATCA" ],
quals => [ "GOAIYEFGFIWDSFIU" ],
args => "--min-score C,-24,0 -L 4",
report => "-a"
},
#
# What order are mates reported in? Should be reporting in mate1/mate2
# order.
#
{ name => "Mate reporting order, -a",
# 012345678
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ],
mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ],
mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ],
quals => [ "GOAIYEFGFIWDSFIU" ],
args => "",
report => "-a"
},
{ name => "Mate reporting order, -M 1",
# 012345678
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ],
mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ],
mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ],
quals => [ "GOAIYEFGFIWDSFIU" ],
args => "",
report => "-M 1"
},
#
# Test dovetailing, containment, and overlapping
#
{ name => "Non-overlapping; no args",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
report => "-M 1"
},
{ name => "Non-overlapping; --no-discordant",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
report => "-M 1 --no-discordant"
},
{ name => "Non-overlapping; --no-discordant --no-mixed",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
report => "-M 1 --no-discordant --no-mixed"
},
{ name => "Non-overlapping; --no-discordant --no-mixed",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
report => "-M 1 --no-discordant --no-mixed"
},
{ name => "Non-overlapping; --no-dovetail",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "--no-dovetail",
report => "-M 1"
},
{ name => "Non-overlapping; --un-conc=.tmp.simple_tests.pl",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "--un-conc=.tmp.simple_tests.pl",
report => "-M 1"
},
{ name => "Non-overlapping; --no-overlap",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATA" ],
mate2s => [ "CGCATCGACATTAATATCC" ],
pairhits => [{ "1,23" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "--no-overlap",
report => "-M 1"
},
{ name => "Overlapping; no args",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATATTA" ],
mate2s => [ "TTAGCGCATCGACATTAATATCC" ],
pairhits => [{ "1,19" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "",
report => "-M 1"
},
{ name => "Overlapping; --no-dovetail",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATATTA" ],
mate2s => [ "TTAGCGCATCGACATTAATATCC" ],
pairhits => [{ "1,19" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "--no-dovetail",
report => "-M 1"
},
{ name => "Overlapping; --no-contain",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATATTA" ],
mate2s => [ "TTAGCGCATCGACATTAATATCC" ],
pairhits => [{ "1,19" => 1 }],
mate1fw => 1, mate2fw => 1,
args => "--no-contain",
report => "-M 1"
},
{ name => "Overlapping; --no-overlap",
ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ],
# 01234567890123456789012345678901234567890123456
mate1s => [ "GCTATCATCACGCGGATATTA" ],
mate2s => [ "TTAGCGCATCGACATTAATATCC" ],
pairhits => [],
mate1fw => 1, mate2fw => 1,
args => "--no-overlap",
report => "-M 1"
},
#
# Test XS:i with quality scaling
#
{ name => "Scoring params 1",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTCGCTGCGGGG" ],
reads => [ "GCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTC" ],
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfglduhiuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "88M" ],
samoptflags => [ {
"AS:i:0" => 1,
"YT:Z:UU" => 1,
"MD:Z:88" => 1 } ],
},
{ name => "Scoring params 2",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ],
reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ],
# * -1 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "--ignore-quals --score-min C,-40,0 -N 1 -L 20",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "37M4D30M3I14M" ],
# 37M4D30M13I4M
samoptflags => [ {
"AS:i:-38" => 1,
"YT:Z:UU" => 1,
"MD:Z:0G11A24^GACC44" => 1,
"NM:i:9" => 1,
"XM:i:2" => 1,
"XG:i:7" => 1,
"XO:i:2" => 1 } ],
},
{ name => "Scoring params 3",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ],
reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ],
# * -1 * -6 **** -5 -3 -3 -3 -3 *** -1 -2 -2 -2
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "--ignore-quals --rfg 1,2 --score-min C,-40,0 -N 1 -L 20",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "37M4D30M3I14M" ],
samoptflags => [ {
"AS:i:-31" => 1,
"YT:Z:UU" => 1,
"MD:Z:0G11A24^GACC44" => 1,
"NM:i:9" => 1,
"XM:i:2" => 1,
"XG:i:7" => 1,
"XO:i:2" => 1 } ],
},
{ name => "Scoring params 4",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ],
reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ],
# * -1 * -6 **** -1 -2 -2 -2 -2 *** -5 -3 -3 -3
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "--ignore-quals --rdg 1,2 --score-min C,-40,0 -N 1 -L 20",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "37M4D30M3I14M" ],
samoptflags => [ {
"AS:i:-30" => 1,
"YT:Z:UU" => 1,
"MD:Z:0G11A24^GACC44" => 1,
"NM:i:9" => 1,
"XM:i:2" => 1,
"XG:i:7" => 1,
"XO:i:2" => 1 } ],
},
{ name => "Scoring params 5",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ],
reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ],
# * -1 * -8 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "--ignore-quals --mp 8 --score-min C,-40,0 -N 1 -L 20",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "37M4D30M3I14M" ],
samoptflags => [ {
"AS:i:-40" => 1,
"YT:Z:UU" => 1,
"MD:Z:0G11A24^GACC44" => 1,
"NM:i:9" => 1,
"XM:i:2" => 1,
"XG:i:7" => 1,
"XO:i:2" => 1 } ],
},
{ name => "Scoring params 6",
# 012345678
ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ],
reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ],
# * -4 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3
quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6 7 8
args => "--ignore-quals --np 4 --score-min C,-41,0 -N 1 -L 20",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "37M4D30M3I14M" ],
samoptflags => [ {
"AS:i:-41" => 1,
"YT:Z:UU" => 1,
"MD:Z:0G11A24^GACC44" => 1,
"NM:i:9" => 1,
"XM:i:2" => 1,
"XG:i:7" => 1,
"XO:i:2" => 1 } ],
},
#
# Test XS:i with quality scaling
#
{ name => "Q XS:i 1a",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIIIA" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-5" => 1, "XS:i:-5" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1a ! --mp 3,3",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII!" ],
args => "-L 6 --mp 3,3 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-3" => 1, "XS:i:-3" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1a ! --mp 3,6",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII!" ],
args => "-L 6 --mp 6,3 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-3" => 1, "XS:i:-3" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1a I --mp 3,3",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIIII" ],
args => "-L 6 --mp 3,3 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-3" => 1, "XS:i:-3" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1a I --mp 3,6",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIIII" ],
args => "-L 6 --mp 6,3 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-6" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1a --ignore-quals",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIIIA" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-6" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1b",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII5" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-4" => 1, "XS:i:-4" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1b --ignore-quals",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII5" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-6" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1c",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII4" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-3" => 1, "XS:i:-3" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "Q XS:i 1c --ignore-quals",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
quals => [ "IIIIIII4" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-6" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
# One mate aligns. Ensuring that the unmapped mate gets reference
# information filled in from the other mate.
{ ref => [ "CATCGACTGAGACTCGTACGACAATTACGCGCATTATTCGCATCACCAGCGCGGCGCGCGCCCCCTAT" ],
# 01234567890123456789012345678901234567890123456789012345678901234567
# 0 1 2 3 4 5 6
# ATCACCAGCGTTTCGCGCGAAACCTA
mate1s => [ "ATCGACTGAGACTCGTACGACAATTAC" ],
mate2s => [ "TAGGTTTCGCGCGAAACGCTGGTGAT" ],
pairhits_orig => [{ "1,1" => 1}]
},
{ ref => [ "TTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,4,C,1,0",
report => "-M 1"
},
# Testing that DEFAULT is -M 1
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGT" ],
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [
{ "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 }
],
},
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGT" ],
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [
{ "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 }
],
},
#
# Test XS:i
#
{ name => "XS:i 1",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6",
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:-6" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:7A0" => 1,
"NM:i:1" => 1, "XM:i:1" => 1 } ],
},
{ name => "XS:i 2",
ref => [ "TTGTTCGATTGTTCGA" ],
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,7,C,1 --score-min=C,-5",
report => "",
cigar => [ "*" ],
samoptflags => [{ "YT:Z:UU" => 1, "YM:i:0" => 1 }],
},
{ name => "XS:i 3a",
ref => [ "TTGTTCGATTGTTCGT" ],
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 3b",
ref => [ "TTGTTCGATTGTTCGT" ],
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=52",
report => "-M 1",
hits => [ { 8 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 3c",
ref => [ "TTGTTCGATTGTTCGT" ],
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=53",
report => "-M 2",
hits => [ { 8 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:0" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 4a",
ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ],
# |||||| ||||||| ||||||||
# TTGTTCGT||||||| ||||||||
# TTGTTCGT||||||||
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=53",
report => "-M 2",
hits => [ { 16 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 4b",
ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ],
# |||||| ||||||| ||||||||
# TTGTTCGT||||||| ||||||||
# TTGTTCGT||||||||
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=54",
report => "-M 3",
hits => [ { 16 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:0" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 5a",
ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ],
# |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| ||||||
# TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT||||||
# TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,6,C,1,1 --score-min=C,-12 --seed=54",
report => "-M 1",
hits => [ { 16 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
{ name => "XS:i 5b",
ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ],
# |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| ||||||
# TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT||||||
# TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT
# TTGTTCGT
reads => [ "TTGTTCGT" ],
args => "--multiseed=0,5,C,1,1 --score-min=C,-12 --seed=55",
report => "-M 1",
hits => [ { 16 => 1 } ],
cigar => [ "8M" ],
samoptflags => [ {
"AS:i:0" => 1, "XS:i:-6" => 1,
"YM:i:1" => 1, "YT:Z:UU" => 1,
"MD:Z:8" => 1,
"NM:i:0" => 1, "XM:i:0" => 1 } ],
},
# Testing BWA-SW-like scoring
#
# a*max{T,c*log(l)} = 1 * max(30, 5.5 * log(56)) = 1 * max(30, 22.139) = 30
#
{ name => "BWA-SW-like 1",
ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ],
# ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
reads => [ "TAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATC" ],
# 01234567890123456789012345678901234567890123456789012345
# 1 2 3 4 5
args => "--bwa-sw-like",
hits => [{ 3 => 1 }],
samoptflags => [{ "AS:i:56" => 1, "NM:i:0" => 1,
"MD:Z:56" => 1, "YT:Z:UU" => 1 }]
},
{ name => "BWA-SW-like 2",
# 0123
ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ],
# |||||||||||||||||||||||||| ||||||||||||||||||||||||||||
reads => [ "TAGATTCCACTACGCTAACCATCGAGTTCTCGTCTCAGAGTTTCGATAGGAAAATC" ],
# 01234567890123456789012345678901234567890123456789012345
# 1 2 3 4 5
args => "--bwa-sw-like -L 18",
hits => [{ 3 => 1 }],
# Tot matches = 54
# Tot penalties = 6
samoptflags => [{ "AS:i:48" => 1, "NM:i:2" => 1, "XM:i:2" => 1,
"MD:Z:26A0A28" => 1, "YT:Z:UU" => 1 }]
},
{ name => "BWA-SW-like 3",
# 0123
ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ],
# |||||||||||||||||||||||||| |||||||||||||||||||||||||||
reads => [ "TAGATTCCACTACGCTAACCATCGAG"."TCGTCTCAGAGTTTCGATAGGAAAATC" ],
# 01234567890123456789012345678901234567890123456789012345
# 1 2 3 4 5
args => "--bwa-sw-like -i C,1,0",
hits => [{ 3 => 1 }],
# Tot matches = 53
# Tot penalties = 11
samoptflags => [{ "AS:i:42" => 1, "NM:i:3" => 1, "XM:i:0" => 1,
"XO:i:1" => 1, "XG:i:3" => 1,
"MD:Z:26^AAC27" => 1, "YT:Z:UU" => 1 }]
},
# Some tricky SAM FLAGS field tests
{ name => "SAM paired-end where both mates align 1",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ],
mate2s => [ "GATTGTCACACACCCGGTCGT" ],
# -----------------------------------------------------
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
pairhits => [{ "3,35" => 1 }],
norc => 1,
samflags_map => [{ 3 => (1 | 2 | 32 | 64), 35 => (1 | 2 | 16 | 128) }],
tlen_map => [{ 3 => 53, 35 => -53 }] },
{ name => "SAM paired-end where both mates align 2",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "TCACCGACGCCGAAGCGTAGATAG" ],
mate2s => [ "ACGACCGGGTGTGTGACAATC" ],
# -----------------------------------------------------
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
mate1fw => 0,
mate2fw => 1,
pairhits => [{ "3,35" => 1 }],
norc => 1,
samflags_map => [{ 3 => (1 | 2 | 16 | 64), 35 => (1 | 2 | 32 | 128) }],
tlen_map => [{ 3 => 53, 35 => -53 }] },
{ name => "SAM paired-end where both mates align 3",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ],
mate2s => [ "ACGACCGGGTGTGTGACAATC" ],
# -----------------------------------------------------
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
mate1fw => 1,
mate2fw => 1,
pairhits => [{ "3,35" => 1 }],
norc => 1,
samflags_map => [{ 3 => (1 | 2 | 64), 35 => (1 | 2 | 128) }],
tlen_map => [{ 3 => 53, 35 => -53 }] },
{ name => "SAM paired-end where mate #1 aligns but mate #2 doesn't",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ],
mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ],
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
pairhits => [{ "*,3" => 1 }],
norc => 1,
samflags_map => [{ 3 => (1 | 8 | 64), "*" => (1 | 4 | 128) }] },
{ name => "SAM paired-end where neither mate aligns",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "CTATATACGAAAAAGCGTCGGCGA" ],
mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ],
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
pairhits => [{ "*,*" => 1 }],
norc => 1,
samoptflags_flagmap => [{
(1 | 4 | 8 | 64) => { "YT:Z:UP" => 1 },
(1 | 4 | 8 | 128) => { "YT:Z:UP" => 1 }
}] },
{ name => "SAM paired-end where both mates align, but discordantly",
ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ],
# 012345678901234567890123456789012345678901234567890123456789012345678
# 0 1 2 3 4 5 6
mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ],
mate2s => [ "ACGACCGGGTGTGTGACAATC" ],
# -----------------------------------------------------
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
# 0x1 template having multiple fragments in sequencing
# 0x2 each fragment properly aligned according to the aligner
# 0x4 fragment unmapped
# 0x8 next fragment in the template unmapped
# 0x10 SEQ being reverse complemented
# 0x20 SEQ of the next fragment in the template being reversed
# 0x40 the first fragment in the template
# 0x80 the last fragment in the template
pairhits => [{ "3,35" => 1 }],
norc => 1,
samflags_map => [{ 3 => (1 | 64), 35 => (1 | 128) }],
# Which TLEN is right? Depends on criteria for when to infer TLEN. If
# criterion is mates are concordant, then it should be 0 here. If the
# criterion is that both mates align to the same chromosome, should be
# +-53
#tlen_map => [{ 3 => 0, 35 => 0 }] },
tlen_map => [{ 3 => 53, 35 => -53 }] },
{ name => "matchesRef regression 4",
ref => [ "CCGGGTCGTCACGCCCCGCTTGCGTCANGCCCCTCACCCTCCCTTTGTCGGCTCCCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGGTCGTCACGCCCCGCTTGCGTCANGC",
"GCTCGGAATTCGTGCTCCGNCCCGTACGGTT" ],
#
# NNNNNGA------A-------------------G-NTTT
# ||||||||||||||||||||||||||||||||||
# CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG
args => "--policy \"NP=Q\\;RDG=46.3220993654702\\;RFG=41.3796024365659\\;MIN=L,5.57015383125426,-3.28597145122829\\;NCEIL=L,0.263054599454459,0.130843661549367\\;SEED=1,29\\;IVAL=L,0.0169183264663712,3.75762168662522\" --overhang --trim5 6",
reads => [ "CTTTGCACCCCTCCCTTGTCGGCTCCCACCCATCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGTCGTCACTCCCCGTTTGCGTCATGCCCCTCACCCTCCCTTTGTCGGCTCGCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCTCTCGGGGTCTTCACGCCCCGCTTGCTTCATGCCCCTCACTCGCACCCCG" ],
},
{ name => "matchesRef regression 3",
ref => [ "GAAGNTTTTCCAATATTTTTAATTTCCTCTATTTTTCTCTCGTCTTGNTCTAC" ],
#
# NNNNNGA------A-------------------G-NTTT
# ||||||||||||||||||||||||||||||||||
# CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG
args => "--policy \"MMP=R\\;MIN=L,8.8,-8.1\" --overhang",
reads => [ "CAAGACGAGAGAAAAATAGAGGAAATTAAAAATATTGG" ],
},
{ name => "matchesRef regression 2",
ref => ["GTTGTCGGCAGCTCTGGATATGTGNTCTCGGGTTTATNTCGTTGTCG",
"CCTTGTTNTTAATGCTGCCTGGTTTNG"],
args => "--policy \"RDG=2.02030755427021,2.81949533273331\\;MIN=L,-6.52134769703939,-3.39889659588514\\;IVAL=L,0.127835912101927\" --overhang --trim5 5",
mate1s => ["TCTGGCGGTTGCGAAGGCCCCTGGCGGTTGCTATGTCCTCTGGCGGTTGCGTTGTCGGCAGCTCG"],
mate2s => ["AGAACACATATCCAGAGCTGCCGACAACGAAATGAACCCGAGAGCACAAATCCAGAG"] },
# Regression test for an issue observed once
{ name => "matchesRef regression 1",
# 0 1 2 3 4 5 6 7
# 01234567890123456789012345678901234567890123456789012345678901234567890
ref => [ "AGGTCGACCGAAAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTAC" ],
# ||||||||||||||||||||||||||||||||||||||||||||||||||
reads => [ "AAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTACTGGTCTGGGG" ],
# 012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5
args => "--overhang" },
# 1 discordant alignment and one concordant alignment. Discordant because
# the fragment is too long.
{ name => "Discordant with different chromosomes",
ref => [ "TTTATAAAAATATTTCCCCCCCC",
"CCCCCCTGTCGCTACCGCCCCCCCCCCC" ],
# ATAAAAATAT GTCGCTACCG
# ATAAAAATAT TGTCGCTACC
# 01234567890123456789012
# 0 1 2
# 0123456789012345678901234567
# 0 1 2
mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ],
mate2s => [ "GTCGCTACCG", "TGTCGCTACC" ],
mate1fw => 1,
mate2fw => 1,
args => "-I 0 -X 35",
# Not really any way to flag an alignment as discordant
pairhits => [ { "3,7" => 1 }, { "3,6" => 1 } ],
rnext_map => [ { 3 => 1, 7 => 0 }, { 3 => 1, 6 => 0 } ],
pnext_map => [ { 3 => 7, 7 => 3 }, { 3 => 6, 6 => 3 } ] },
{ name => "Fastq 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII",
hits => [{ 2 => 1 }] },
{ name => "Tabbed 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII",
hits => [{ 2 => 1 }] },
{ name => "Fasta 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => ">r0\nCATCGATCAGTATCTG",
hits => [{ 2 => 1 }] },
{ name => "Qseq 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1"),
hits => [{ 2 => 1 }] },
{ name => "Raw 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "CATCGATCAGTATCTG",
hits => [{ 2 => 1 }] },
# Like Fastq 1 but with extra newline
{ name => "Fastq 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII\n",
hits => [{ 2 => 1 }] },
{ name => "Tabbed 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n",
hits => [{ 2 => 1 }] },
{ name => "Fasta 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => ">r0\nCATCGATCAGTATCTG\n",
hits => [{ 2 => 1 }] },
{ name => "Qseq 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n",
hits => [{ 2 => 1 }] },
{ name => "Raw 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "CATCGATCAGTATCTG\n",
hits => [{ 2 => 1 }] },
# Like Fastq 1 but with many extra newlines
{ name => "Fastq 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }] },
{ name => "Tabbed 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }] },
{ name => "Fasta 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n\n",
hits => [{ 2 => 1 }] },
{ name => "Qseq 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n",
hits => [{ 2 => 1 }] },
{ name => "Raw 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "\n\n\nCATCGATCAGTATCTG\n\n",
hits => [{ 2 => 1 }] },
# Quality string length doesn't match (too short by 1)
{ name => "Fastq 4",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
should_abort => 1},
{ name => "Tabbed 4",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n",
should_abort => 1},
{ name => "Qseq 4",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIII",
"1")."\n\n",
should_abort => 1},
# Name line doesn't start with @
{ name => "Fastq 5",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\nr0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
should_abort => 1,
hits => [{ }] },
# Name line doesn't start with >
{ name => "Fasta 5",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\nr0\nCATCGATCAGTATCTG\r",
should_abort => 1,
hits => [{ }] },
# Name line doesn't start with @ (2)
{ name => "Fastq 6",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
should_abort => 1,
hits => [{ }] },
# Name line doesn't start with > (2)
{ name => "Fasta 6",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "r0\nCATCGATCAGTATCTG\r",
should_abort => 1,
hits => [{ }] },
# Part of sequence is trimmed
{ name => "Fastq 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n",
args => "--trim3 4",
norc => 1,
hits => [{ 2 => 1 }] },
{ name => "Tabbed 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n",
args => "--trim3 4",
norc => 1,
hits => [{ 2 => 1 }] },
{ name => "Fasta 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n",
args => "--trim3 4",
norc => 1,
hits => [{ 2 => 1 }] },
{ name => "Qseq 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n",
args => "--trim3 4",
norc => 1,
hits => [{ 2 => 1 }] },
{ name => "Raw 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "\n\n\r\nCATCGATCAGTATCTG\r\n",
args => "--trim3 4",
norc => 1,
hits => [{ 2 => 1 }] },
# Whole sequence is trimmed
{ name => "Fastq 8",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n",
args => "--trim5 16",
hits => [{ "*" => 1 }] },
{ name => "Tabbed 8",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n",
args => "--trim5 16",
hits => [{ "*" => 1 }] },
{ name => "Fasta 8",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n",
args => "--trim3 16",
hits => [{ "*" => 1 }] },
{ name => "Qseq 8",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n",
args => "--trim3 16",
hits => [{ "*" => 1 }] },
{ name => "Raw 8",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "\n\n\r\nCATCGATCAGTATCTG\r\n",
args => "--trim3 16",
hits => [{ "*" => 1 }] },
# Sequence is skipped
{ name => "Fastq 9",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n",
args => "-s 1",
hits => [{ }] },
{ name => "Tabbed 9",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n",
args => "-s 1",
hits => [{ }] },
{ name => "Fasta 9",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n",
args => "-s 1",
hits => [{ }] },
{ name => "Qseq 9",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"X", "Y",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n",
args => "-s 1",
hits => [{ }] },
{ name => "Raw 9",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "CATCGATCAGTATCTG\n",
args => "-s 1",
hits => [{ }] },
# Like Fastq 1 but with many extra newlines
{ name => "Fastq multiread 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Tabbed multiread 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n".
"\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Fasta multiread 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\n\n".
"\n\n\r\n>r1\nATCGATCAGTATCTG\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Qseq multiread 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"0", # Mate
"ATCGATCAGTATCTG",
"IIIIIIIIIIIIIII",
"1")."\n\n",
idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0,
"MachName_RunNum_Lane_Tile_12_15_Index" => 1 },
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Raw multiread 1",
ref => [ "AGCATCGATCAGTATCTGA" ],
raw => "\n\n\r\nCATCGATCAGTATCTG\n\n".
"\n\n\r\nATCGATCAGTATCTG\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
# Like Fastq multiread 1 but with -u 1
{ name => "Fastq multiread 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 1",
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }] },
{ name => "Tabbed multiread 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 1",
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n".
"\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }] },
{ name => "Fasta multiread 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 1",
fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n".
"\n\n\r\n>r1\nATCGATCAGTATCTG\r\n",
hits => [{ 2 => 1 }] },
{ name => "Qseq multiread 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 1",
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"0", # Mate
"ATCGATCAGTATCTG",
"IIIIIIIIIIIIIII",
"1")."\n\n",
idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0,
"MachName_RunNum_Lane_Tile_12_15_Index" => 1 },
hits => [{ 2 => 1 }] },
{ name => "Raw multiread 2",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 1",
raw => "\n\n\r\nCATCGATCAGTATCTG\r\n".
"\n\n\r\nATCGATCAGTATCTG\r\n",
hits => [{ 2 => 1 }] },
# Like Fastq multiread 1 but with -u 2
{ name => "Fastq multiread 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 2",
fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Tabbed multiread 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 2",
tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n".
"\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Fasta multiread 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 2",
fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n".
"\n\n\r\n>r1\nATCGATCAGTATCTG\r\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Qseq multiread 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 2",
qseq => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"0", # Mate
"CATCGATCAGTATCTG",
"IIIIIIIIIIIIIIII",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"0", # Mate
"ATCGATCAGTATCTG",
"IIIIIIIIIIIIIII",
"1")."\n\n",
idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0,
"MachName_RunNum_Lane_Tile_12_15_Index" => 1 },
hits => [{ 2 => 1 }, { 3 => 1 }] },
{ name => "Raw multiread 3",
ref => [ "AGCATCGATCAGTATCTGA" ],
args => "-u 2",
raw => "\n\n\r\nCATCGATCAGTATCTG\r\n".
"\n\n\r\nATCGATCAGTATCTG\r\n",
hits => [{ 2 => 1 }, { 3 => 1 }] },
# Paired-end reads that should align
{ name => "Fastq paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n".
"\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n",
fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII",
pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
{ name => "Tabbed paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n".
"\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n",
paired => 1,
pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
{ name => "Fasta paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n".
"\n\n>r1\nTCAGTTTTTGA\r\n",
fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n".
"\n\n\r\n>r1\nAGCATCGATC",
pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
{ name => "Qseq paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
qseq1 => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"1", # Mate
"AGCATCGATC",
"ABCBGACBCB",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"1", # Mate
"TCAGTTTTTGA",
"95849456875",
"1")."\n\n",
qseq2 => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"2", # Mate
"TCAGTTTTTGA",
"ABCBGACBCBA",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"2", # Mate
"AGCATCGATC",
"AGGCBBGCBG",
"1")."\n\n",
idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0,
"MachName_RunNum_Lane_Tile_12_15_Index" => 1 },
pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
{ name => "Raw paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
raw1 => "\n\n\r\nAGCATCGATC\r\n".
"\n\nTCAGTTTTTGA\r\n",
raw2 => "\n\n\r\nTCAGTTTTTGA\n".
"\n\n\r\nAGCATCGATC",
pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] },
# Paired-end reads that should align
{ name => "Fastq paired 2",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-s 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fastq1 => "\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n".
"\n\n\@r1\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n",
fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII",
pairhits => [ { }, { "0,8" => 1 } ] },
{ name => "Tabbed paired 2",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-s 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
tabbed => "r0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n".
"\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII",
paired => 1,
pairhits => [ { }, { "0,8" => 1 } ] },
{ name => "Fasta paired 2",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-s 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fasta1 => ">r0\nAGCATCGATC\r\n".
"\n\n>r1\nTCAGTTTTTGA\n",
fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n".
"\n\n\r\n>r1\nAGCATCGATC",
pairhits => [ { }, { "0,8" => 1 } ] },
{ name => "Qseq paired 1",
ref => [ "AGCATCGATCAAAAACTGA" ],
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
args => "-s 1",
qseq1 => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"1", # Mate
"AGCATCGATC",
"ABCBGACBCB",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"1", # Mate
"TCAGTTTTTGA",
"95849456875",
"1")."\n\n",
qseq2 => "\n\n\n".join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"10", "10",
"Index",
"2", # Mate
"TCAGTTTTTGA",
"ABCBGACBCBA",
"1")."\n\n".
join("\t", "MachName",
"RunNum",
"Lane",
"Tile",
"12", "15",
"Index",
"2", # Mate
"AGCATCGATC",
"AGGCBBGCBG",
"1")."\n\n",
idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0,
"MachName_RunNum_Lane_Tile_12_15_Index" => 1 },
pairhits => [ { }, { "0,8" => 1 } ] },
{ name => "Raw paired 2",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-s 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
raw1 => "AGCATCGATC\r\n".
"\n\nTCAGTTTTTGA\n",
raw2 => "\n\n\r\nTCAGTTTTTGA\n".
"\n\n\r\nAGCATCGATC",
pairhits => [ { }, { "0,8" => 1 } ] },
# Paired-end reads that should align
{ name => "Fastq paired 3",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-u 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n".
"\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n",
fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n".
"\n\n\r\n\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII",
pairhits => [ { "0,8" => 1 }, { } ] },
{ name => "Tabbed paired 3",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-u 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n".
"\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII",
paired => 1,
pairhits => [ { "0,8" => 1 }, { } ] },
{ name => "Fasta paired 3",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-u 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n".
"\n\n>r1\nTCAGTTTTTGA\r\n",
fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n".
"\n\n\r\n>r1\nAGCATCGATC",
pairhits => [ { "0,8" => 1 }, { } ] },
{ name => "Raw paired 3",
ref => [ "AGCATCGATCAAAAACTGA" ],
args => "-u 1",
# AGCATCGATC
# TCAAAAACTGA
# 0123456789012345678
raw1 => "\n\n\r\nAGCATCGATC\r\n".
"\n\nTCAGTTTTTGA\r\n",
raw2 => "\n\n\r\nTCAGTTTTTGA\n".
"\n\n\r\nAGCATCGATC",
pairhits => [ { "0,8" => 1 }, { } ] },
# Paired-end reads that should align
#{ name => "Fastq paired 4",
# ref => [ "AGCATCGATCAAAAACTGA" ],
# args => "-s 1 -L 4 -i C,1,0",
# # AGCATCGATC
# # TCAAAAACTGA
# # 0123456789012345678
# fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n".
# #"\n\n\@r1\nTC\r\n+\n\nII\n\n".
# "\n\n\@r2\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n",
# fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n".
# #"\n\n\r\n\@r1\nAG\r\n+\nII".
# "\n\@r2\nAGCATCGATC\r\n+\nIIIIIIIIII",
# paired => 1,
# pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ],
# pairhits => [ { "0,8" => 1 } ],
# samoptflags_map => [
# { },
# { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } },
## { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 },
# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }]
#},
#{ name => "Tabbed paired 4",
# ref => [ "AGCATCGATCAAAAACTGA" ],
# args => "-s 1 -L 4 -i C,1,0",
# # AGCATCGATC
# # TCAAAAACTGA
# # 0123456789012345678
# tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n".
# "\n\nr1\tTC\tII\tAG\tII".
# "\n\nr2\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n",
# paired => 1,
# #pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ],
# pairhits => [ { }, { "0,8" => 1 } ],
# samoptflags_map => [
# { },
# #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } },
# { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 },
# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }]
#},
#{ name => "Fasta paired 4",
# ref => [ "AGCATCGATCAAAAACTGA" ],
# args => "-s 1 -L 4 -i C,1,0",
# # AGCATCGATC
# # TCAAAAACTGA
# # 0123456789012345678
# fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n".
# # "\n\n>r1\nTC\r\n".
# "\n\n>r2\nTCAGTTTTTGA\r\n",
# fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n\n".
# # "\n\n\r\n>r1\nAG".
# "\n>r2\nAGCATCGATC",
# # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ],
# pairhits => [ { }, { "0,8" => 1 } ],
# samoptflags_map => [
# { },
# #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } },
# { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 },
# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }]
#},
#{ name => "Raw paired 4",
# ref => [ "AGCATCGATCAAAAACTGA" ],
# args => "-s 1 -L 4 -i C,1,0",
# # AGCATCGATC
# # TCAAAAACTGA
# # 0123456789012345678
# raw1 => "\n\n\r\nAGCATCGATC\r\n".
## "\n\nTC\r\n".
# "\n\nTCAGTTTTTGA\r\n",
# raw2 => "\n\n\r\nTCAGTTTTTGA\n\n".
# "\n\n\r\nAG".
# "\nAGCATCGATC",
# pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ],
# pairhits => [ { }, { "0,8" => 1 } ],
# samoptflags_map => [
# { },
# { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } },
# { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 },
# 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }]
#},
#
# Check that skipping of empty reads is handled correctly. A read that is
# empty or becomes empty after --trim3/--trim5 are applied should still
# count as a first-class read that gets propagated up into the alignment
# loop. And it should be counted in the -s/-u totals.
#
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "", "ATCGATCAGTA" ],
args => "-s 1",
hits => [ {}, { 3 => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
mate1s => [ "", "AGCATCGATC" ],
mate2s => [ "", "TCAGATACTG" ],
args => "-s 1",
pairhits => [ {}, { "0,9" => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "", "ATCGATCAGTA" ],
args => "-s 2",
hits => [ {}, {} ] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
mate1s => [ "", "AGCATCGATC" ],
mate2s => [ "", "TCAGATACTG" ],
args => "-s 2",
pairhits => [ {}, {} ] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "", "ATCGATCAGTA", "AGTATCTGA" ],
args => "-s 1 -u 1",
hits => [ {}, { 3 => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "AC", "ATCGATCAGTA" ],
args => "-s 1 --trim3 2",
norc => 1,
hits => [ {}, { 3 => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "AC", "ATCGATCAGTA" ],
args => "-s 1 --trim3 2",
nofw => 1,
hits => [ {}, { 5 => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "AC", "ATCGATCAGTA" ],
args => "-s 1 --trim5 2",
nofw => 1,
hits => [ {}, { 3 => 1 }] },
{ ref => [ "AGCATCGATCAGTATCTGA" ],
reads => [ "AC", "ATCGATCAGTA" ],
args => "-s 1 --trim5 2",
norc => 1,
hits => [ {}, { 5 => 1 }] },
#
# Alignment with overhang
#
{ ref => [ "TGC" ],
reads => [ "ATGC" ],
args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,1,0\"",
hits => [ { 0 => 1 } ],
cigar => [ "1S3M" ],
samoptflags => [
{ "AS:i:-1" => 1, "YT:Z:UU" => 1, "MD:Z:3" => 1, "XN:i:1" => 1 } ]
},
{ ref => [ "TTGTTCGT" ],
reads => [ "TTGTTCG" ],
args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 0 => 1 } ],
cigar => [ "7M" ],
samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ]
},
{ ref => [ "TTGTTCGT" ],
reads => [ "TTGTTCG" ],
args => "",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:7=" ],
cigar => [ "7M" ],
samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ]
},
{ ref => [ "TTGTTCGT" ],
reads => [ "TGTTCGT", "TTGTTCG" ],
args => "--overhang",
hits => [ { 1 => 1 }, { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ],
cigar => [ "7M", "7M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 }
]
},
{ ref => [ "TTGTTCGT" ],
reads => [ "TGTTCGT", "TTGTTCG" ],
args => "",
hits => [ { 1 => 1 }, { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ],
cigar => [ "7M", "7M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 }
]
},
# Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends
{ ref => [ "TTGTTCGT" ],
# TGTTCGT
# GTTCGTA
# ATTGTTC
reads => [ "TGTTCGT", "GTTCGTA", "ATTGTTC" ],
args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 1 => 1 }, { 2 => 1 }, { 0 => 1 } ],
cigar => [ "7M", "6M1S", "1S6M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 },
{ "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 },
{ "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 }
]},
# Same as previous case but --overhang not specified
{ ref => [ "TTGTTCGT" ],
reads => [ "TGTTCGT", "TTGTTCG", "GTTCGTA", "ATTGTTC" ],
args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 1 => 1 }, { 0 => 1 } ], # only the internal hits
cigar => [ "7M", "7M", "*", "*" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:7" => 1 },
{ "YT:Z:UU" => 1 },
{ "YT:Z:UU" => 1 }
]
},
# A simple case that should align with or without overhang, with or without
# a special NCEIL setting.
{ ref => [ "TTGTTCGT" ],
reads => [ "TTGTTCG" ],
args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 0 => 1 } ]},
{ ref => [ "TTGTTCGT" ],
reads => [ "TTGTTCG" ],
args => "--overhang",
hits => [ { 0 => 1 } ]},
#
# Testing the various -M/-m/-k/-a alignment modes in both unpaired and
# paired-end modes. Ensuring that SAM optional flags such as YM:i, YP:i
# are set properly in all cases.
#
#
# Paired-end
#
{ name => "P.M.58.G.b Unpaired -M 5 w/ 8 hits global, but mate #1 has just 1",
# 0 1 2 3 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 1 2 3 4 5 6 7 8 9
# 0 0 0 0 0 0 0 0 0 0
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "-X 1000",
report => "-M 5",
pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1,
"12,486" => 1, "12,552" => 1, "12,723" => 1,
"12,789" => 1, "12,960" => 1 }],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "28M", 315 => "28M",
486 => "28M", 552 => "28M",
723 => "28M", 789 => "28M",
960 => "28M"
}],
samoptflags_map => [ {
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
} ] },
{ name => "P.M.58.L.b Unpaired -M 5 w/ 8 hits local, but mate #1 has just 1",
# 0 1 2 3 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 1 2 3 4 5 6 7 8 9
# 0 0 0 0 0 0 0 0 0 0
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "-X 1000 --local",
report => "-M 5",
pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1,
"12,486" => 1, "12,552" => 1, "12,723" => 1,
"12,789" => 1, "12,960" => 1 }],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "28M", 315 => "28M",
486 => "28M", 552 => "28M",
723 => "28M", 789 => "28M",
960 => "28M"
}],
samoptflags_map => [ {
12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
} ] },
{ name => "P.k.58.G.b Unpaired -k 5 w/ 8 hits global, but mate #1 has just 1",
# 0 1 2 3 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 1 2 3 4 5 6 7 8 9
# 0 0 0 0 0 0 0 0 0 0
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "-X 1000",
report => "-k 5",
pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1,
"12,486" => 1, "12,552" => 1, "12,723" => 1,
"12,789" => 1, "12,960" => 1 }],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "28M", 315 => "28M",
486 => "28M", 552 => "28M",
723 => "28M", 789 => "28M",
960 => "28M"
}],
samoptflags_map => [ {
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
} ] },
{ name => "P.k.58.L.b Unpaired -k 5 w/ 8 hits local, but mate #1 has just 1",
# 0 1 2 3 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 1 2 3 4 5 6 7 8 9
# 0 0 0 0 0 0 0 0 0 0
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "-X 1000 --local",
report => "-k 5",
pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1,
"12,486" => 1, "12,552" => 1, "12,723" => 1,
"12,789" => 1, "12,960" => 1 }],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "28M", 315 => "28M",
486 => "28M", 552 => "28M",
723 => "28M", 789 => "28M",
960 => "28M"
}],
samoptflags_map => [ {
12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
} ] },
{ name => "P.M.22.G. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, global",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "-X 150",
report => "-M 2",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
hits_are_superset => [ 1 ],
samoptflags_map => [{
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
}]
},
{ name => "P.M.22.L. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, local",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "--local -X 150",
report => "-M 2",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
hits_are_superset => [ 1 ],
samoptflags_map => [{
12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
}]
},
{ name => "P.k.2.G. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, global",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "-X 150",
report => "-k 1",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YT:Z:CP" => 1, "YS:i:0" => 1 },
}]
},
{ name => "P.k.2.L. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, local",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "--local -X 150",
report => "-k 1",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YT:Z:CP" => 1, "YS:i:56" => 1 },
315 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YT:Z:CP" => 1, "YS:i:66" => 1 },
}]
},
{ name => "P.M.2.G. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, global",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
report => "-M 1 -X 150",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
}]
},
{ name => "P.M.2.L. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, local",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 1 2 3
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
report => "-M 1 --local -X 150",
pairhits => [ { "12,78" => 1, "249,315" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 249 => "33M",
78 => "28M", 315 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
}]
},
{ name => "P.k.1.G. Paired -k w/ 1 paired hit, 1 unpaired hit each, global",
# 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5 6 7 8 9 0 1 2
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
report => "-k 1 -X 150",
pairhits => [ { "12,78" => 1 } ],
cigar_map => [{
12 => "33M",
78 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
}]
},
{ name => "P.k.1.L. Paired -k 1 w/ 1 paired hit, 1 unpaired hit each, local",
# 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5 6 7 8 9 0 1 2
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "--local -X 150",
report => "-k 1",
pairhits => [ { "12,78" => 1 } ],
cigar_map => [{
12 => "33M",
78 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
}]
},
{ name => "P.M.1.G. Paired -M w/ 1 paired hit, 1 unpaired hit each, global",
# 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5 6 7 8 9 0 1 2
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "-X 150",
report => "-M 1",
pairhits => [ { "12,78" => 1 } ],
cigar_map => [{
12 => "33M",
78 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
}]
},
{ name => "P.M.1.L. Paired -M w/ 1 paired hit, 1 unpaired hit each, local",
# 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5 6 7 8 9 0 1 2
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
mate1fw => 1, mate2fw => 0,
args => "-X 150 --local",
report => "-M 1",
pairhits => [ { "12,78" => 1 } ],
cigar_map => [{
12 => "33M",
78 => "28M"
}],
samoptflags_map => [{
12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1,
"YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
}]
},
{ name => "P.M.58.G. Unpaired -M 5 w/ 8 hits global",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGA" ],
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6
# 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "-X 150",
report => "-M 5",
pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1,
"723,789" => 1, "960,1026" => 1, "1197,1263" => 1,
"1434,1500" => 1, "1671,1737" => 1, "1908,1974" => 1,
"2145,2211" => 1, "2382,2448" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "33M", 315 => "28M",
486 => "33M", 552 => "28M",
723 => "33M", 789 => "28M",
960 => "33M", 1026 => "28M",
1197 => "33M", 1263 => "28M",
1434 => "33M", 1500 => "28M",
1671 => "33M", 1737 => "28M",
1908 => "33M", 1974 => "28M",
2145 => "33M", 2211 => "28M",
2382 => "33M", 2448 => "28M",
}],
samoptflags_map => [ {
12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1026 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1197 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1263 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1434 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1500 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1671 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1737 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1908 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
1974 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
2145 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
2211 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
2382 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
2448 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 },
} ] },
{ name => "P.M.58.L. Unpaired -M 5 w/ 8 hits local",
# 0 1 2 3 0 1 2 0 1 2 3 0 1 2
# 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTC" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9
# 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1
mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped
args => "--local -X 150",
report => "-M 5",
pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1,
"723,789" => 1, "960,1026" => 1, "1197,1263" => 1,
"1434,1500" => 1, "1671,1737" => 1 } ],
hits_are_superset => [ 1 ],
cigar_map => [{
12 => "33M", 78 => "28M",
249 => "33M", 315 => "28M",
486 => "33M", 552 => "28M",
723 => "33M", 789 => "28M",
960 => "33M", 1026 => "28M",
1197 => "33M", 1263 => "28M",
1434 => "33M", 1500 => "28M",
1671 => "33M", 1737 => "28M"
}],
samoptflags_map => [ {
12 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
249 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
486 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
723 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
960 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
1026 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
1197 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
1263 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
1434 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
1500 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
1671 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 },
1737 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1,
"XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1,
"YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 },
} ] },
#
# Unpaired
#
{ name => "U.M.1.G. Unpaired -M w/ 1 hit global",
# 0 1 2 3
# 012345678901234567890123456789012
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG
# CAGCGTACGGTATCTAGCTATG
# GGTATCTAGCTATGGGCATCGA
# AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ],
# 01234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5
reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
report => "-M 1",
hits => [ { 12 => 1 } ],
cigar => [ "33M" ],
samoptflags => [{
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:33" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
}]
},
{ name => "U.M.1.L. Unpaired -M w/ 1 hit local",
# 0 1 2 3
# 012345678901234567890123456789012
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ],
# 01234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5
reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
args => "--local",
report => "-M 1",
hits => [ { 12 => 1 } ],
cigar => [ "33M" ],
samoptflags => [{
"AS:i:66" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:33" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
}]
},
{ name => "U.k.1.G. Unpaired -k 1 w/ 1 hit global",
# 0 1 2 3
# 012345678901234567890123456789012
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ],
# 01234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5
reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
report => "-k 1",
hits => [ { 12 => 1 } ],
cigar => [ "33M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:33" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.1.L. Unpaired -m w/ 1 hit local",
# 0 1 2 3
# 012345678901234567890123456789012
# CAGCGTACGGTATCTAGCTATGGGCATCGATCG
ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ],
# 01234567890123456789012345678901234567890123456789012345
# 0 1 2 3 4 5
reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ],
args => "--local",
report => "-k 1",
hits => [ { 12 => 1 } ],
cigar => [ "33M" ],
samoptflags => [ {
"AS:i:66" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:33" => 1, # mismatching positions/bases
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.2.G. Unpaired -M 1 w/ 2 hit global",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "",
report => "-M 1",
hits => [ { 6 => 1, 48 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:1" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.2.L. Unpaired -M 1 w/ 2 hit local",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "--local",
report => "-M 1",
hits => [ { 6 => 1, 48 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:60" => 1, # alignment score
"XS:i:60" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:1" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.2.G. Unpaired -k 1 w/ 2 hit global",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-k 1",
hits => [ { 6 => 1, 48 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.2.L. Unpaired -k 1 w/ 2 hit local",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-k 1",
hits => [ { 6 => 1, 48 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.22.G. Unpaired -M 2 w/ 2 hit global",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-M 2",
hits => [ { 6 => 1, 48 => 1 } ],
cigar => [ "30M" ],
hits_are_superset => [ 1 ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.22.L. Unpaired -M 2 w/ 2 hit local",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-M 2 --local",
hits => [ { 6 => 1, 48 => 1 } ],
cigar => [ "30M" ],
hits_are_superset => [ 1 ],
samoptflags => [ {
"AS:i:60" => 1, # alignment score
"XS:i:60" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.22.G. Unpaired -k 2 w/ 2 hit global",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-k 2",
hits => [ { 6 => 1, 48 => 1 } ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.22.L. Unpaired -k 2 w/ 2 hit local",
# 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234
# 0 1 2 3 4 5 6 7 8
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "--local",
report => "-k 2",
hits => [ { 6 => 1, 48 => 1 } ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:60" => 1, # alignment score
"XS:i:60" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.58.G. Unpaired -M 5 w/ 8 hits global",
# 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
# 0 1 2 3
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "-X 150",
report => "-M 5",
hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:1" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.M.58.L. Unpaired -M 5 w/ 8 hits global",
# 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
# 0 1 2 3
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "--local",
report => "-M 5",
hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:60" => 1, # alignment score
"XS:i:60" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:1" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.58.G. Unpaired -k 5 w/ 8 hits global",
# 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
# 0 1 2 3
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
report => "-k 5",
hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:0" => 1, # alignment score
"XS:i:0" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
{ name => "U.k.58.L. Unpaired -k 5 w/ 8 hits local",
# 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2
# 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789
# AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA
ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ],
# 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
# 0 1 2 3
reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ],
args => "--local",
report => "-k 5",
hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ],
hits_are_superset => [ 1 ],
cigar => [ "30M" ],
samoptflags => [ {
"AS:i:60" => 1, # alignment score
"XS:i:60" => 1, # suboptimal alignment score
"XN:i:0" => 1, # num ambiguous ref bases
"XM:i:0" => 1, # num mismatches
"XO:i:0" => 1, # num gap opens
"XG:i:0" => 1, # num gap extensions
"NM:i:0" => 1, # num edits
"MD:Z:30" => 1, # mismatching positions/bases
"YM:i:0" => 1, # read aligned repetitively in unpaired fashion
"YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion
} ] },
# Following cases depend on this being the case:
#
# static const float DEFAULT_CEIL_CONST = 3.0f;
# static const float DEFAULT_CEIL_LINEAR = 3.0f;
# Just enough budget for hits, so it should align
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27
args => "-L 6 -i C,1,0 --policy \"MMP=C27\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 27
report => "-a",
hits => [ { 0 => 1, 8 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:6=1X1=" ],
cigar => [ "8M" ],
samoptflags => [ { "AS:i:-27" => 1, "XS:i:-27" => 1, "NM:i:1" => 1,
"XM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6G1" => 1 } ] },
# Not quite enough budget for hits, so it should NOT align
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27
args => "-L 6 -i C,1,0 --policy \"MMP=C28\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 28
report => "-a",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [ { "YT:Z:UU" => 1 } ] },
# Check that using a seed of length 1 with 1-mismatch doesn't crash.
# Perhaps we should disallow it though?
{ ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ],
reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ],
names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ],
args => "--policy \"SEED=1,1\"",
check_random => 1,
report => "-k 1" },
#
# Gap penalties
#
# Alignment with 1 read gap
{ name => "Gap penalties 1",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=29,10\\;RFG=25,15\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:6=1D6=" ],
cigar => [ "6M1D6M" ],
samoptflags => [{
"AS:i:-39" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1,
"YT:Z:UU" => 1, "MD:Z:6^G6" => 1 }]
},
# Alignment with 1 read gap, but not enough budget
{ name => "Gap penalties 2",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=30,10\\;RFG=25,15\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [{ "YT:Z:UU" => 1 }]
},
# Alignment with 1 reference gap
{ name => "Gap penalties 3",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,15\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ],
cigar => [ "7M1I6M" ],
samoptflags => [{ "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1,
"XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }]
},
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
# T T G T T C G T T T G T T C G T
# 0 1 1 0 2 3 1 0 0 1 1 0 2 3 1
# 0 T x
# 0 0 x
# 1 T x
# 1 1 x
# 2 G x
# 2 1 x
# 3 T x
# 3 0 x
# 4 T x
# 4 2 x
# 5 C x
# 5 3 x
# 6 G
# 6 2 x
# 7 A
# 7 3 x
# 8 T x
# 8 0 x
# 9 T x
# 9 0 x
# 0 T x
# 0 1 x
# 1 G x
# 1 1 x
# 2 T x
# 2 0 x
# 3 T
#
# Alignment with 1 reference gap, but not enough budget
{ name => "Gap penalties 4",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,16\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [ { "YT:Z:UU" => 1 } ] },
# Alignment with 1 reference gap, but not enough budget
{ name => "Gap penalties 5",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=31,15\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [ { "YT:Z:UU" => 1 } ] },
# Alignment with 1 reference gap and 1 read gap
{ name => "Gap penalties 6",
ref => [ "ATTGTTCGTTTGTTCGTA" ],
reads => [ "ATTGTTGTTTGATTCGTA" ], # budget = 3 + 18 * 3 = 57
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=19,10\\;RFG=18,10\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:6=1D5=1I6=" ],
cigar => [ "6M1D5M1I6M" ] },
# Alignment with 1 reference gap and 1 read gap, but not enough budget
{ name => "Gap penalties 7",
ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTGTTTGATTCGT" ], # budget = 3 + 16 * 3 = 51
args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=16,10\\;RFG=16,10\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ] },
# Experiment with N filtering
{ name => "N filtering 1",
ref => [ "GAGACTTTATACGCATCGAACTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0\"",
report => "-a",
hits => [ { 8 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:13=" ] },
{ name => "N filtering 2",
ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0\"",
report => "-a",
hits => [ { "*" => 1 } ] },
{ name => "N filtering 3",
ref => [ "GAGACTTTATACGCATCGAANTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0\"",
report => "-a",
hits => [ { "*" => 1 } ] },
{ name => "N filtering 4",
ref => [ "GAGACTTTNTACGCATCGAACTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0\"",
report => "-a",
hits => [ { "*" => 1 } ] },
{ name => "N filtering 5",
ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"",
report => "-a",
hits => [ { 8 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:2=1X10=" ] },
{ name => "N filtering 6",
ref => [ "GAGACTTTNTACGCATCGAANTATCGCTCTA" ],
reads => [ "ATACGCATCGAAC" ],
# 0123456789012345678901234567890
# 1 2 3
args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"",
report => "-a",
hits => [ { "*" => 1 } ] },
# No discordant alignment because one mate is repetitive.
# Alignment with 1 reference gap
{ ref => [ "TTTTGTTCGTTTG" ],
reads => [ "TTTTGTTCGATTTG" ], # budget = 3 + 14 * 3 = 45
args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:9=1I4=" ],
cigar => [ "9M1I4M" ],
samoptflags => [
{ "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1,
"YT:Z:UU" => 1, "MD:Z:13" => 1 },
]
},
# TTGTTCGTTTGTT
# Tx
# T x
# G x
# T x
# T x
# C x
# G x
# A x
# T x
# T x
# T x
# G x
# T x
# T x
# Alignment with 1 reference gap
{ ref => [ "TTGTTCGTTTGTT" ],
reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45
args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"",
report => "-a",
hits => [ { 0 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ],
cigar => [ "7M1I6M" ],
samoptflags => [
{ "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1,
"YT:Z:UU" => 1, "MD:Z:13" => 1 },
]
},
{ ref => [ "ACNCA" ],
reads => [ "CA" ],
args => "",
report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"",
hits => [ { 3 => 1 } ],
edits => [ ],
flags => [ "XM:0,XP:0,XT:UU,XC:2=" ],
cigar => [ "2M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
]
},
{ name => "N ceil = 0, 2 legit hits (1)",
ref => [ "ACNCA" ],
reads => [ "AC" ],
args => "",
report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"",
hits => [ { 0 => 1 } ],
edits => [ ],
flags => [ ] },
{ name => "N ceil = 0, 2 legit hits (2)",
ref => [ "ACNCANNNNNNNNCGNNNNNNNNCG" ],
# 0123456789012345678901234
# 0 1 2
reads => [ "CG" ],
args => "",
report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"",
hits => [ { 13 => 2, 23 => 2 } ],
edits => [ ],
cigar => [ "2M", "2M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
]
},
{ ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ],
# 0123456789012345678901234567890123456
# 0 1 2 3
reads => [ "CG" ],
args => "",
report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"",
hits => [ { 13 => 2, 23 => 2, 31 => 2 } ],
edits => [ ],
flags => [ "XM:0,XP:0,XT:UU,XC:2=",
"XM:0,XP:0,XT:UU,XC:2=",
"XM:0,XP:0,XT:UU,XC:2=" ],
cigar => [ "2M", "2M", "2M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
]
},
{ ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ],
# 0123456789012345678901234567890123456
# 0 1 2 3
reads => [ "CG" ],
args => "",
report => "-a --policy \"SEED=0,1\\;IVAL=C,1,0\\;NCEIL=L,0,0\"",
hits => [ { 13 => 2, 23 => 2, 31 => 2 } ],
edits => [ ],
flags => [ "XM:0,XP:0,XT:UU,XC:2=",
"XM:0,XP:0,XT:UU,XC:2=",
"XM:0,XP:0,XT:UU,XC:2=" ],
cigar => [ "2M", "2M", "2M" ],
samoptflags => [
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
{ "YT:Z:UU" => 1, "MD:Z:2" => 1 },
]
},
#
# Alignment involving ambiguous reference character
#
# First read has non-compatible unambiguous charcacter (G for Y),
# second read has compatible one
{ ref => [ "TTGTTYGT" ],
reads => [ "TTGTTGGT", "TTGTTCGT" ],
args => "",
report => "-a --policy \"SEED=0,5\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 0 => 1 }, { 0 => 1 } ],
norc => 1,
edits => [ "5:N>G", "5:N>C" ],
flags => [ "XM:0,XP:0,XT:UU,XC:5=1X2=", "XM:0,XP:0,XT:UU,XC:5=1X2=" ],
cigar => [ "8M", "8M" ],
samoptflags => [
{ "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1,
"YT:Z:UU" => 1, "MD:Z:5N2" => 1 },
{ "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1,
"YT:Z:UU" => 1, "MD:Z:5N2" => 1 },
]
},
#
# Alignment with multi-character read gap
#
# Relatively small example with a read gap extend
{ ref => [ "ATAACCTTCG" ],
reads => [ "ATAATTCG" ], # 3 * 19 + 3 = 60
# ^
# 4:CC>-
args => "",
report => "-a --overhang --gbar 3 --policy \"MMP=C30\\;RDG=5,5\\;SEED=0,4\\;IVAL=C,1,0\\;RFG=25,20\\;MIN=L,-3,-3\"",
hits => [ { 0 => 1 } ],
edits => [ "4:CC>-" ],
flags => [ "XM:0,XP:0,XT:UU,XC:4=2D4=" ],
cigar => [ "4M2D4M" ],
samoptflags => [
{ "AS:i:-15" => 1, "NM:i:2" => 1,
"XO:i:1" => 1, "XG:i:2" => 3, "YT:Z:UU" => 1, "MD:Z:4^CC4" => 1 }
]
},
# Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends
{ ref => [ "ATATGCCCCATGCCCCCCTCCG" ],
reads => [ "ATATGCCCCCCCCCCTCCG" ], # 3 * 19 + 3 = 60
# ^
# 9:ATG>-
args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,15\\;MIN=L,-3,-3\"",
hits => [ { 0 => 1 } ],
edits => [ "9:ATG>-" ],
norc => 1,
flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ],
cigar => [ "9M3D10M" ],
samoptflags => [
{ "AS:i:-20" => 1, "NM:i:3" => 1,
"XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 }
]
},
# Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends
{ ref => [ "ATATGCCCCATGCCCCCCTCCG" ],
reads => [ "CGGAGGGGGGGGGGCATAT" ],
# ATATGCCCCCCCCCCTCCG
# ^
# 10:GTA>-
args => "",
report => "-a --overhang --policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,20\\;MIN=L,-3,-3\"",
hits => [ { 0 => 1 } ],
edits => [ "10:GTA>-" ],
norc => 1,
flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ],
cigar => [ "9M3D10M" ],
samoptflags => [
{ "AS:i:-20" => 1, "NM:i:3" => 1,
"XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 }
]
},
# 1 discordant alignment and one concordant alignment. Discordant because
# the fragment is too long.
{ name => "Simple paired-end 13",
ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCTGTCGCTACCGCCCCCCCCCCC" ],
# 012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5
# ATAAAAATAT GTCGCTACCG
# ATAAAAATAT TGTCGCTACC
# ATAAAAATAT CTGTCGCTAC
# ATAAAAATAT CCTGTCGCTA
# TAAAAATATT GTCGCTACCG
# TAAAAATATT TGTCGCTACC
# TAAAAATATT CTGTCGCTAC
# TAAAAATATT CCTGTCGCTA
# 012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5
# -----------------------------------
# 012345678901234567890123456789012345678901234567
# 0 1 2 3 4
mate1s => [ "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT",
"TAAAAATATT", "TAAAAATATT", "TAAAAATATT", "TAAAAATATT", ],
mate2s => [ "GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA",
"GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 35",
# Not really any way to flag an alignment as discordant
pairhits => [ { "3,30" => 1 }, { "3,29" => 1 }, { "3,28" => 1 }, { "3,27" => 1 },
{ "4,30" => 1 }, { "4,29" => 1 }, { "4,28" => 1 }, { "4,27" => 1 } ],
flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:DP,XC:10=",
"XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=",
"XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=",
"XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] },
# 1 discordant alignment and one concordant alignment. Discordant because
# the fragment is too long.
{ name => "Simple paired-end 12",
ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCGGGCCCGCCCGCCCCCCCCCCC" ],
# ATAAAAATAT GGCCCGCCCG
# ATAAAAATAT CCGGGCCCGC
# 012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5
# -------------------------------------
# 012345678901234567890123456789012345678901234567
mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ],
mate2s => [ "GGCCCGCCCG", "CCGGGCCCGC" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 36",
# Not really any way to flag an alignment as discordant
pairhits => [ { "3,30" => 1 }, { "3,27" => 1 } ],
flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] },
# 1 discordant alignment. Discordant because the fragment is too long.
{ name => "Simple paired-end 11",
ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCCCGATCGCCCGCCCCCCCCCCC" ],
# ATAAAAATAT CGATCGCCCG
# 012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5
# -------------------------------------
# 012345678901234567890123456789012345678901234567
mate1s => [ "ATAAAAATAT" ],
mate2s => [ "CGATCGCCCG" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 36",
# Not really any way to flag an alignment as discordant
pairhits => [ { "3,30" => 1 } ],
flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] },
# 1 discordant alignment. Discordant because the fragment is too short.
{ name => "Simple paired-end 10",
ref => [ "TTTATAAAAATATTTCCCCCCGATCGCCCGCCCCCCCCCCC" ],
# ATAAAAATAT CGATCGCCCG
# 01234567890123456789012345678901234567890
# 0 1 2 3 4
# ---------------------------
# 012345678901234567890123456
mate1s => [ "ATAAAAATAT" ],
mate2s => [ "CGATCGCCCG" ],
mate1fw => 1, mate2fw => 1,
args => "-I 28 -X 80",
# Not really any way to flag an alignment as discordant
pairhits => [ { "3,20" => 1 } ],
flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] },
# Like 6, but with -M limit
{ name => "Simple paired-end 9",
ref => [ "CCCATATATATATCCTCCCATATATATATCCCTCCCCATATATATATCCCTTTTCCTTTCGCGCGCGCGTTTCCCCCCCCC" ],
# ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5 6 7 8
mate1s => [ "ATATATATAT" ],
mate2s => [ "CGCGCGCGCG" ],
mate1fw => 1, mate2fw => 0,
args => "-I 0 -X 80",
report => "-M 2",
lines => 2,
pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ],
hits_are_superset => [ 1 ],
flags => [ "XM:1,XP:1,XT:CP,XC:10=", "XM:1,XP:1,XT:CP,XC:10=" ] },
# Like 6, but without -m limit
{ name => "Simple paired-end 8",
ref => [ "CCCATATATATATCCTCCCATATATATATCCCTTCCCATATATATATCCCTTTTTTTTTCGCGCGCGCGTTTCCCCCCCCC" ],
# ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG
# 012345678901234567890123456789012345678901234567890123456789012345678901234567890
# 0 1 2 3 4 5 6 7 8
mate1s => [ "ATATATATAT" ],
mate2s => [ "CGCGCGCGCG" ],
mate1fw => 1, mate2fw => 0,
args => "-I 0 -X 80",
pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ],
flags => [ "XM:0,XP:0,XT:CP,XC:10=" ] },
# Paired-end read, but only one mate aligns
{ name => "Simple paired-end 2; no --no-mixed",
ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCCCTTCGCGCGCGCGTTTCCCCC" ],
# ATATATATAT CGCGCGCGCG
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
mate1s => [ "ATATATATAT" ],
mate2s => [ "CCCCCGGGGG" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 50 --nofw",
nofw => 1,
pairhits => [ { "*,3" => 1 } ],
flags_map => [ { 3 => "XM:0,XP:0,XT:UP,XC:10=",
"*" => "XM:0,XP:0,XT:UP" } ],
cigar_map => [{
3 => "10M",
"*" => "*"
}],
samoptflags_map => [{
3 => {
"MD:Z:10" => 1, # mismatching positions/bases
"YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc)
},
"*" => {
"YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc)
}
}]
},
{ name => "Simple paired-end 2; --no-mixed",
ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTCCCCC" ],
# ATATATATAT CGCGCGCGCG
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
mate1s => [ "ATATATATAT" ],
mate2s => [ "CCCCCGGGGG" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 50 --no-mixed",
pairhits => [ { "*,*" => 1 } ] },
# Simple paired-end alignment
{ name => "Simple paired-end 1",
ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTTCCCC" ],
# ATATATATAT CGCGCGCGCG
# 01234567890123456789012345678901234567890123456789012
# 0 1 2 3 4 5
mate1s => [ "ATATATATAT" ],
mate2s => [ "CGCGCGCGCG" ],
mate1fw => 1, mate2fw => 1,
args => "-I 0 -X 50",
pairhits => [ { "3,35" => 1 } ],
flags => [ "XM:0,XP:0,XT:CP,XC:10=" ],
cigar_map => [{
3 => "10M",
35 => "10M"
}],
samoptflags_map => [{
3 => {
"MD:Z:10" => 1, # mismatching positions/bases
"YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc)
},
35 => {
"MD:Z:10" => 1, # mismatching positions/bases
"YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc)
}
}]
},
# Check that pseudo-random generation is always the same for
# same-sequence, same-name reads
{ ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ],
reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ],
names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ],
args => "",
check_random => 1,
report => "-k 1" },
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTGTTCGT" ],
report => "-M 1",
hits => [ { 0 => 1, 8 => 1 } ],
flags => [ "XM:1,XP:0,XT:UU,XC:8=" ],
hits_are_superset => [ 1 ],
cigar => [ "8M" ],
samoptflags => [
{ "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 }
],
},
# Read 3 overhangs right end
{ ref => [ "TTGTTCGT" ],
reads => [ "GTTCGTA" ],
args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,2,0\"",
hits => [ { 2 => 1 } ],
flags => [ "XM:0,XP:0,XT:UU,XC:6=1X" ] },
# Mess with arguments
# Default should be 1-mismatch, so this shouldn't align
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTATTAGT" ],
args => "",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [{ "YT:Z:UU" => 1 }],
},
# Shouldn't align with 0 mismatches either
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTATTAGT" ],
args => "--policy SEED=0",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [{ "YT:Z:UU" => 1 }],
},
# Should align with 0 mismatches if we can wedge a seed into the 2
# matching characters between the two mismatches. Here we fail to
# wedge a length-3 seed in (there's no room)
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTATTAGT" ],
args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C1\"",
hits => [ { "*" => 1 } ],
flags => [ "XM:0,XP:0,XT:UU" ],
cigar => [ "*" ],
samoptflags => [{ "YT:Z:UU" => 1 }],
},
# Should align with 0 mismatches if we can wedge a seed into the 2
# matching characters between the two mismatches. Here we wedge a
# length-2 seed in
{ ref => [ "TTGTTCGTTTGTTCGT" ],
reads => [ "TTATTAGT" ],
args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;MMP=C1\"",
#
# TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT
# || || || || | | || ||
# TTATTAGT TTATTAGT TTATTAGT
#
# TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT
# || | || | || || ||
# TTATTAGT TTATTAGT TTATTAGT
#
hits => [ { 0 => 1, 3 => 1, 4 => 1,
5 => 1, 7 => 1, 8 => 1} ],
flag_map => [ { 0 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2=",
3 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X",
4 => "XM:0,XP:0,XT:UU,XC:1=2X2=1X2=",
5 => "XM:0,XP:0,XT:UU,XC:3X2=2X1=",
7 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X",
8 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2="} ],
cigar_map => [ { 0 => "8M", 3 => "8M", 4 => "8M",
5 => "8M", 7 => "8M", 8 => "8M" } ],
samoptflags_map => [{
0 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1,
"YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 },
3 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1,
"YT:Z:UU" => 1, "MD:Z:2C0G1T0T0G0" => 1 },
4 => { "AS:i:-3" => 1, "XS:i:-2" => 1, "NM:i:3" => 1, "XM:i:3" => 1,
"YT:Z:UU" => 1, "MD:Z:1C0G2T2" => 1 },
5 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1,
"YT:Z:UU" => 1, "MD:Z:0C0G0T2G0T1" => 1 },
7 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1,
"YT:Z:UU" => 1, "MD:Z:2T0G1T0C0G0" => 1 },
8 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1,
"YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 },
}],
},
);
##
# Take a list of reference sequences and write them to a temporary
# FASTA file of the given name.
#
sub writeFasta($$) {
my ($l, $fa) = @_;
open(FA, ">$fa") || die "Could not open $fa for writing";
my $idx = 0;
for(my $i = 0; $i < scalar(@$l); $i++) {
print FA ">$idx\n".$l->[$i]."\n";
$idx++;
}
close(FA);
}
##
# Take a lists of named reads/mates and write them to appropriate
# files.
#
sub writeReads($$$$$$$$$) {
my (
$reads,
$quals,
$mate1s,
$qual1s,
$mate2s,
$qual2s,
$names,
$fq1,
$fq2) = @_;
open(FQ1, ">$fq1") || die "Could not open '$fq1' for writing";
open(FQ2, ">$fq2") || die "Could not open '$fq2' for writing";
my $pe = (defined($mate1s) && $mate1s ne "");
if($pe) {
for (0..scalar(@$mate1s)-1) {
my $m1 = $mate1s->[$_];
my $m2 = $mate2s->[$_];
my $q1 = $qual1s->[$_];
my $q2 = $qual2s->[$_];
my $nm = $names->[$_];
defined($m1) || die;
defined($m2) || die;
$q1 = $q1 || ("I" x length($m1));
$q2 = $q2 || ("I" x length($m2));
$nm = $nm || "r$_";
print FQ1 "\@$nm/1\n$m1\n+\n$q1\n";
print FQ2 "\@$nm/2\n$m2\n+\n$q2\n";
}
} else {
for (0..scalar(@$reads)-1) {
my $read = $reads->[$_];
defined($read) || die;
my $qual = $quals->[$_];
my $nm = $names->[$_];
$qual = $qual || ("I" x length($read));
$nm = $nm || "r$_";
print FQ1 "\@$nm\n$read\n+\n$qual\n";
}
}
close(FQ1);
close(FQ2);
}
##
# Run bowtie2 with given arguments
#
sub runbowtie2($$$$$$$$$$$$$$$$$$$$$) {
my (
$do_build,
$args,
$color,
$fa,
$reportargs, #5
$read_file_format,
$read_file,
$mate1_file,
$mate2_file,
$reads,
$quals,
$mate1s,
$qual1s,
$mate2s,
$qual2s,
$names,
$ls,
$rawls,
$header_ls,
$raw_header_ls,
$should_abort) = @_;
$args .= " --quiet";
$reportargs = "-a" unless defined($reportargs);
$args .= " -C" if $color;
$args .= " $reportargs";
# Write the reference to a fasta file
print "References:\n";
open(FA, $fa) || die;
while(<FA>) { print $_; }
close(FA);
if($do_build) {
my $build_args = ($color ? "-C" : "");
my $cmd = "$bowtie2_build --quiet --sanity $build_args $fa .simple_tests.tmp";
print "$cmd\n";
system($cmd);
($? == 0) || die "Bad exitlevel from bowtie2-build: $?";
}
my $pe = (defined($mate1s) && $mate1s ne "");
$pe = $pe || (defined($mate1_file));
my $mate1arg;
my $mate2arg;
my $readarg;
my $formatarg = "-c";
my ($readstr, $m1str, $m2str) = (undef, undef, undef);
$readstr = join(",", @$reads) if defined($reads);
$m1str = join(",", @$mate1s) if defined($mate1s);
$m2str = join(",", @$mate2s) if defined($mate2s);
if(defined($read_file) || defined($mate1_file)) {
defined($read_file_format) || die;
my $ext = "";
if($read_file_format eq "fastq") {
$formatarg = "-q";
$ext = ".fq";
} elsif($read_file_format eq "tabbed") {
$formatarg = "--12";
$ext = ".tab";
} elsif($read_file_format eq "fasta") {
$formatarg = "-f";
$ext = ".fa";
} elsif($read_file_format eq "qseq") {
$formatarg = "--qseq";
$ext = "_qseq.txt";
} elsif($read_file_format eq "raw") {
$formatarg = "-r";
$ext = ".raw";
} else {
die "Bad format: $read_file_format";
}
if(defined($read_file)) {
# Unpaired
open(RD, ">.simple_tests$ext") || die;
print RD $read_file;
close(RD);
$readarg = ".simple_tests$ext";
} else {
defined($mate1_file) || die;
defined($mate2_file) || die;
# Paired
open(M1, ">.simple_tests.1$ext") || die;
print M1 $mate1_file;
close(M1);
open(M2, ">.simple_tests.2$ext") || die;
print M2 $mate2_file;
close(M2);
$mate1arg = ".simple_tests.1$ext";
$mate2arg = ".simple_tests.2$ext";
}
} else {
writeReads(
$reads,
$quals,
$mate1s,
$qual1s,
$mate2s,
$qual2s,
$names,
".simple_tests.1.fq",
".simple_tests.2.fq");
$mate1arg = ".simple_tests.1.fq";
$mate2arg = ".simple_tests.2.fq";
$formatarg = "-q";
$readarg = $mate1arg;
}
my $cmd;
if($pe) {
# Paired-end case
$cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg";
} else {
# Unpaired case
$cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg $readarg";
}
print "$cmd\n";
open(BT, "$cmd |") || die "Could not open pipe '$cmd |'";
while(<BT>) {
print $_;
chomp;
if(substr($_, 0, 1) eq "@") {
push @$header_ls, [ split(/\t/, $_, -1) ];
push @$raw_header_ls, $_;
} else {
push @$ls, [ split(/\t/, $_, -1) ];
push @$rawls, $_;
}
}
close(BT);
($? == 0 || $should_abort) || die "bowtie2 aborted with exitlevel $?\n";
($? != 0 || !$should_abort) || die "bowtie2 failed to abort!\n";
}
##
# Compare a hash ref of expected SAM flags with a hash ref of observed SAM
# flags.
#
sub matchSamOptionalFlags($$) {
my ($flags, $ex_flags) = @_;
my %ex = ();
for(keys %$ex_flags) {
my ($nm, $ty, $vl) = split(/:/, $_);
defined($vl) || die "Could not parse optional flag field \"$_\"";
($ex{$nm}{ty}, $ex{$nm}{vl}) = ($ty, $vl);
}
for(keys %$flags) {
my ($ex_ty, $ex_vl);
if(defined($ex{$_})) {
($ex_ty, $ex_vl) = ($ex{$_}{ty}, $ex{$_}{vl});
} else {
($ex_ty, $ex_vl) = ("i", "0");
}
defined($ex_ty) || die;
defined($ex_vl) || die;
my ($ty, $vl) = ($flags->{$_}{ty}, $flags->{$_}{vl});
defined($ty) || die;
defined($vl) || die;
$ex_ty eq $ty ||
die "Expected SAM optional flag $_ to have type $ex_ty, had $ty";
$ex_vl eq $vl ||
die "Expected SAM optional flag $_ to have value $ex_vl, had $vl";
}
return 1;
}
my $tmpfafn = ".simple_tests.pl.fa";
my $last_ref = undef;
for (my $ci = 0; $ci < scalar(@cases); $ci++) {
my $c = $cases[$ci];
last unless defined($c);
# If there's any skipping of cases to be done, do it here prior to the
# eq_deeply check
my $color = 0;
$color = $c->{color} if defined($c->{color});
next if ($color && $skipColor);
my $do_build = 0;
unless(defined($last_ref) && eq_deeply($c->{ref}, $last_ref)) {
writeFasta($c->{ref}, $tmpfafn);
$do_build = 1;
}
$last_ref = $c->{ref};
# For each set of arguments...
my $case_args = $c->{args};
$case_args = "" unless defined($case_args);
my $first = 1; # did we build the index yet?
# Forward, then reverse-complemented
my $fwlo = ($c->{nofw} ? 1 : 0);
my $fwhi = ($c->{norc} ? 0 : 1);
for(my $fwi = $fwlo; $fwi <= $fwhi; $fwi++) {
my $fw = ($fwi == 0);
my $sam = 1;
my $reads = $c->{reads};
my $quals = $c->{quals};
my $m1s = $c->{mate1s};
my $q1s = $c->{qual1s};
my $m2s = $c->{mate2s};
my $q2s = $c->{qual2s};
my $read_file = undef;
my $mate1_file = undef;
my $mate2_file = undef;
$read_file = $c->{fastq} if defined($c->{fastq});
$read_file = $c->{tabbed} if defined($c->{tabbed});
$read_file = $c->{fasta} if defined($c->{fasta});
$read_file = $c->{qseq} if defined($c->{qseq});
$read_file = $c->{raw} if defined($c->{raw});
$mate1_file = $c->{fastq1} if defined($c->{fastq1});
$mate1_file = $c->{tabbed1} if defined($c->{tabbed1});
$mate1_file = $c->{fasta1} if defined($c->{fasta1});
$mate1_file = $c->{qseq1} if defined($c->{qseq1});
$mate1_file = $c->{raw1} if defined($c->{raw1});
$mate2_file = $c->{fastq2} if defined($c->{fastq2});
$mate2_file = $c->{tabbed2} if defined($c->{tabbed2});
$mate2_file = $c->{fasta2} if defined($c->{fasta2});
$mate2_file = $c->{qseq2} if defined($c->{qseq2});
$mate2_file = $c->{raw2} if defined($c->{raw2});
my $read_file_format = undef;
if(!defined($reads) && !defined($m1s) && !defined($m2s)) {
defined($read_file) || defined($mate1_file) || die;
$read_file_format = "fastq" if defined($c->{fastq}) || defined($c->{fastq1});
$read_file_format = "tabbed" if defined($c->{tabbed}) || defined($c->{tabbed});
$read_file_format = "fasta" if defined($c->{fasta}) || defined($c->{fasta1});
$read_file_format = "qseq" if defined($c->{qseq}) || defined($c->{qseq1});
$read_file_format = "raw" if defined($c->{raw}) || defined($c->{raw1});
next unless $fw;
}
# Run bowtie2
my @lines = ();
my @rawlines = ();
my @header_lines = ();
my @header_rawlines = ();
print $c->{name}." " if defined($c->{name});
print "(fw:".($fw ? 1 : 0).", sam:$sam)\n";
my $mate1fw = 1;
my $mate2fw = 0;
$mate1fw = $c->{mate1fw} if defined($c->{mate1fw});
$mate2fw = $c->{mate2fw} if defined($c->{mate2fw});
if(!$fw) {
# Reverse-complement the reads
my @s = (); @s = @$reads if defined($reads);
my @q = (); @q = @$quals if defined($quals);
# Reverse-complement mates and switch mate1 with mate2
my @m1 = (); @m1 = @$m1s if defined($m1s);
my @m2 = (); @m2 = @$m2s if defined($m2s);
my @q1 = (); @q1 = @$q1s if defined($q1s);
my @q2 = (); @q2 = @$q2s if defined($q2s);
for(0..scalar(@s)-1) {
$s[$_] = DNA::revcomp($s[$_], $color);
$q[$_] = reverse $q[$_] if $_ < scalar(@q);
}
if($mate1fw == $mate2fw) {
for(0..$#m1) { $m1[$_] = DNA::revcomp($m1[$_], $color); }
for(0..$#m2) { $m2[$_] = DNA::revcomp($m2[$_], $color); }
for(0..$#q1) { $q1[$_] = reverse $q1[$_]; }
for(0..$#q2) { $q2[$_] = reverse $q2[$_]; }
}
$reads = \@s if defined($reads);
$quals = \@q if defined($quals);
$m1s = \@m2 if defined($m1s);
$q1s = \@q2 if defined($q1s);
$m2s = \@m1 if defined($m2s);
$q2s = \@q1 if defined($q2s);
}
my $a = $case_args;
if(defined($m2s)) {
$a .= " --";
$a .= ($mate1fw ? "f" : "r");
$a .= ($mate2fw ? "f" : "r");
}
runbowtie2(
$do_build && $first,
"$a",
$color,
$tmpfafn,
$c->{report},
$read_file_format, # formate of read/mate files
$read_file, # read file
$mate1_file, # mate #1 file
$mate2_file, # mate #2 file
$reads, # read list
$quals, # quality list
$m1s, # mate #1 sequence list
$q1s, # mate #1 quality list
$m2s, # mate #2 sequence list
$q2s, # mate #2 quality list
$c->{names},
\@lines,
\@rawlines,
\@header_lines,
\@header_rawlines,
$c->{should_abort});
$first = 0;
my $pe = defined($c->{mate1s}) && $c->{mate1s} ne "";
$pe = $pe || defined($mate1_file);
$pe = $pe || $c->{paired};
my ($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1);
# Keep temporary copies of hits and pairhits so that we can
# restore for the next orientation
my $hitstmp = [];
$hitstmp = clone($c->{hits}) if defined($c->{hits});
my $pairhitstmp = [];
$pairhitstmp = clone($c->{pairhits}) if defined($c->{pairhits});
my $pairhits_orig_tmp = [];
$pairhits_orig_tmp = clone($c->{pairhits_orig}) if defined($c->{pairhits_orig});
# Record map from already-seen read name, read sequence and
# quality to the place on the reference where it's reported.
# This allows us to check that the pseudo-random generator
# isn't mistakenly yielding different alignments for identical
# reads.
my %seenNameSeqQual = ();
if(defined($c->{lines})) {
my $l = scalar(@lines);
$l == $c->{lines} || die "Expected $c->{lines} lines, got $l";
}
for my $li (0 .. scalar(@lines)-1) {
my $l = $lines[$li];
my ($readname, $orient, $chr, $off_orig, $off, $seq, $qual, $mapq,
$oms, $editstr, $flagstr, $samflags, $cigar, $rnext, $pnext,
$tlen);
my %samoptflags = ();
if($sam) {
scalar(@$l) >= 11 ||
die "Bad number of fields; expected at least 11 got ".
scalar(@$l).":\n$rawlines[$li]\n";
($readname, $samflags, $chr, $off) = @$l[0..3];
($seq, $qual) = @$l[9..10];
$orient = ((($samflags >> 4) & 1) == 0) ? "+" : "-";
$mapq = $l->[4]; # mapping quality
$cigar = $l->[5]; # CIGAR string
$rnext = $l->[6]; # ref seq of next frag in template
$pnext = $l->[7]; # position of next frag in template
$tlen = $l->[8]; # template length
if($pnext == 0) { $pnext = "*"; } else { $pnext--; }
for(my $m = 11; $m < scalar(@$l); $m++) {
next if $l->[$m] eq "";
my ($nm, $ty, $vl) = split(/:/, $l->[$m]);
defined($vl) ||
die "Could not parse optional flag field $m: ".
"\"$l->[$m]\"";
$samoptflags{$nm}{ty} = $ty;
$samoptflags{$nm}{vl} = $vl;
}
if($off > 0) { $off--; }
else { $off = "*"; }
$off_orig = $off;
$off = "*" if $cigar eq "*";
} else {
scalar(@$l) == 9 ||
die "Bad number of fields; expected 9 got ".
scalar(@$l).":\n$rawlines[$li]\n";
($readname, $orient, $chr, $off, $seq,
$qual, $oms, $editstr, $flagstr) = @$l;
$off_orig = $off;
}
if($c->{check_random}) {
my $rsqKey = "$readname\t$orient\t$seq\t$qual";
my $rsqVal = "$chr\t$off";
if(defined($seenNameSeqQual{$rsqKey})) {
$seenNameSeqQual{$rsqKey} eq $rsqVal ||
die "Two hits for read/seq/qual:\n$rsqKey\n".
"had different alignments:\n".
"$seenNameSeqQual{$rsqKey}\n$rsqVal\n";
}
$seenNameSeqQual{$rsqKey} = $rsqVal;
}
$readname ne "" || die "readname was blank:\n".Dumper($c);
my $rdi = $readname;
$rdi = substr($rdi, 1) if substr($rdi, 0, 1) eq "r";
my $mate = 0;
if($readname =~ /\//) {
($rdi, $mate) = split(/\//, $readname);
defined($rdi) || die;
}
$rdi = $c->{idx_map}{$rdi} if defined($c->{idx_map}{$rdi});
$rdi ne "" || die "rdi was blank:\nreadname=$readname\n".Dumper($c);
if($rdi != int($rdi)) {
# Read name has non-numeric characters. Figure out
# what number it is by scanning the names list.
my $found = 0;
for(my $i = 0; $i < scalar(@{$c->{names}}); $i++) {
if($c->{names}->[$i] eq $readname) {
$rdi = $i;
$found = 1;
last;
}
}
$found || die "No specified name matched reported name $readname";
}
# Check that the sequence printed in the alignment is sane
if($color) {
# It's a decoded nucleotide sequence
my $dseq = $c->{dec_seq}->[$rdi];
if(defined($dseq)) {
$seq eq $dseq || die "Expected decoded sequence '$seq' from alignment to match '$dseq'";
}
my $dqual = $c->{dec_qual}->[$rdi];
if(defined($dqual)) {
$qual eq $dqual || die "Expected decoded qualities '$qual' from alignment to match '$dqual'";
}
} else {
}
# Make simply-named copies of some portions of the test case
# 'hits'
my %hits = ();
%hits = %{$c->{hits}->[$rdi]} if
defined($c->{hits}->[$rdi]);
# 'flags'
my $flags = undef;
$flags = $c->{flags}->[$rdi] if
defined($c->{flags}->[$rdi]);
# 'samflags'
my $ex_samflags = undef;
$ex_samflags = $c->{ex_samflags}->[$rdi] if
defined($c->{ex_samflags}->[$rdi]);
# 'samflags_map'
my $ex_samflags_map = undef;
$ex_samflags_map = $c->{samflags_map}->[$rdi] if
defined($c->{samflags_map}->[$rdi]);
# 'samoptflags'
my $ex_samoptflags = undef;
$ex_samoptflags = $c->{samoptflags}->[$rdi] if
defined($c->{samoptflags}->[$rdi]);
# 'cigar'
my $ex_cigar = undef;
$ex_cigar = $c->{cigar}->[$rdi] if
defined($c->{cigar}->[$rdi]);
# 'cigar_map'
my $ex_cigar_map = undef;
$ex_cigar_map = $c->{cigar_map}->[$rdi] if
defined($c->{cigar_map}->[$rdi]);
# 'mapq_hi' - boolean indicating whether mapq is hi/lo
my $ex_mapq_hi = undef;
$ex_mapq_hi = $c->{mapq_hi}->[$rdi] if
defined($c->{mapq_hi}->[$rdi]);
# 'mapq'
my $ex_mapq = undef;
$ex_mapq = $c->{mapq}->[$rdi] if
defined($c->{mapq}->[$rdi]);
# 'mapq_map'
my $ex_mapq_map = undef;
$ex_mapq_map = $c->{mapq_map}->[$rdi] if
defined($c->{mapq_map}->[$rdi]);
# 'rnext_map'
my $ex_rnext_map = undef;
$ex_rnext_map = $c->{rnext_map}->[$rdi] if
defined($c->{rnext_map}) && defined($c->{rnext_map}->[$rdi]);
# 'pnext_map'
my $ex_pnext_map = undef;
$ex_pnext_map = $c->{pnext_map}->[$rdi] if
defined($c->{pnext_map}) && defined($c->{pnext_map}->[$rdi]);
# 'tlen_map'
my $ex_tlen_map = undef;
$ex_tlen_map = $c->{tlen_map}->[$rdi] if
defined($c->{tlen_map}) && defined($c->{tlen_map}->[$rdi]);
# 'flags_fw'
my $flags_fw = undef;
$flags_fw = $c->{flags_fw}->[$rdi] if
defined($c->{flags_fw}->[$rdi]);
# 'flags_rc'
my $flags_rc = undef;
$flags_rc = $c->{flags_rc}->[$rdi] if
defined($c->{flags_rc}->[$rdi]);
# 'pairhits'
my %pairhits = ();
%pairhits = %{$c->{pairhits}->[$rdi]} if
defined($c->{pairhits}->[$rdi]);
# 'pairhits_orig'
my %pairhits_orig = ();
%pairhits_orig = %{$c->{pairhits_orig}->[$rdi]} if
defined($c->{pairhits_orig}->[$rdi]);
# 'pairflags'
my %pairflags = ();
%pairflags = %{$c->{pairflags}->[$rdi]} if
defined($c->{pairflags}->[$rdi]);
# 'hits_are_superset'
my $hits_are_superset = 0;
$hits_are_superset = $c->{hits_are_superset}->[$rdi] if
defined($ci);
# edits
my $ex_edits = undef;
$ex_edits = $c->{edits}->[$rdi] if
defined($c->{edits}->[$rdi]);
if(!$sam) {
# Bowtie flags
if(defined($flags)) {
$flagstr eq $flags ||
die "Expected flags=\"$flags\", got \"$flagstr\"";
}
if(defined($flags_fw) && $fw) {
$flagstr eq $flags_fw ||
die "Expected flags=\"$flags_fw\", got \"$flagstr\"";
}
if(defined($flags_rc) && !$fw) {
$flagstr eq $flags_rc ||
die "Expected flags=\"$flags_rc\", got \"$flagstr\"";
}
if(defined($c->{flag_map})) {
if(defined($c->{flag_map}->[$rdi]->{$off})) {
$flagstr eq $c->{flag_map}->[$rdi]->{$off} ||
die "Expected flags=\"$c->{flag_map}->[$rdi]->{$off}\"".
" at offset $off, got \"$flagstr\"";
}
}
}
if($sam) {
# SAM flags
if(defined($ex_samflags)) {
$samflags eq $ex_samflags ||
die "Expected flags $ex_samflags, got $samflags";
}
if(defined($ex_samflags_map)) {
if(defined($c->{samflags_map}->[$rdi]->{$off})) {
my $ex = $c->{samflags_map}->[$rdi]->{$off};
$samflags eq $ex || die
"Expected FLAGS value $ex at offset $off, got $samflags"
} else {
die "Expected to see alignment with offset $off parsing samflags_map";
}
}
# CIGAR string
if(defined($ex_cigar)) {
$cigar eq $ex_cigar ||
die "Expected CIGAR string $ex_cigar, got $cigar";
}
if(defined($ex_cigar_map)) {
if(defined($c->{cigar_map}->[$rdi]->{$off})) {
my $ex = $c->{cigar_map}->[$rdi]->{$off};
$cigar eq $ex || die
"Expected CIGAR string $ex at offset $off, got $cigar"
} else {
die "Expected to see alignment with offset $off parsing cigar_map";
}
}
# MAPQ
if(defined($ex_mapq)) {
$mapq eq $ex_mapq ||
die "Expected MAPQ $ex_mapq, got $mapq";
}
if(defined($ex_mapq_map)) {
if(defined($c->{mapq_map}->[$rdi]->{$off})) {
my $ex = $c->{mapq_map}->[$rdi]->{$off};
$mapq eq $ex || die
"Expected MAPQ string $ex at offset $off, got $mapq"
} else {
die "Expected to see alignment with offset $off parsing mapq_map";
}
}
# MAPQ
if(defined($ex_mapq_hi)) {
if($ex_mapq_hi == 0) {
$mapq < 20 || die "Expected MAPQ < 20, got $mapq";
} else {
$mapq >= 20 || die "Expected MAPQ >= 20, got $mapq";
}
}
if(defined($ex_mapq_map)) {
if(defined($c->{mapq_map}->[$rdi]->{$off})) {
my $ex = $c->{mapq_map}->[$rdi]->{$off};
$mapq eq $ex || die
"Expected MAPQ string $ex at offset $off, got $mapq"
} else {
die "Expected to see alignment with offset $off parsing mapq_map";
}
}
# SAM optional flags
if(defined($ex_samoptflags)) {
matchSamOptionalFlags(\%samoptflags, $ex_samoptflags);
}
if(defined($c->{samoptflags_map})) {
if(defined($c->{samoptflags_map}->[$rdi]->{$off})) {
matchSamOptionalFlags(
\%samoptflags,
$c->{samoptflags_map}->[$rdi]->{$off});
} else {
die "Expected to see alignment with offset $off parsing samoptflags_map";
}
}
if(defined($c->{samoptflags_flagmap})) {
if(defined($c->{samoptflags_flagmap}->[$rdi]->{$samflags})) {
matchSamOptionalFlags(
\%samoptflags,
$c->{samoptflags_flagmap}->[$rdi]->{$samflags});
} else {
die "Expected to see alignment with flag $samflags parsing samoptflags_flagmap";
}
}
# RNEXT map
if(defined($c->{rnext_map})) {
if(defined($c->{rnext_map}->[$rdi]->{$off})) {
my $ex = $c->{rnext_map}->[$rdi]->{$off};
$rnext eq $ex || die
"Expected RNEXT '$ex' at offset $off, got '$rnext'"
} else {
die "Expected to see alignment with offset $off parsing rnext_map".Dumper($c);
}
}
# PNEXT map
if(defined($c->{pnext_map})) {
if(defined($c->{pnext_map}->[$rdi]->{$off})) {
my $ex = $c->{pnext_map}->[$rdi]->{$off};
$pnext eq $ex || die
"Expected PNEXT '$ex' at offset $off, got '$pnext'"
} else {
die "Expected to see alignment with offset $off parsing pnext_map";
}
}
# TLEN map
if(defined($c->{tlen_map})) {
if(defined($c->{tlen_map}->[$rdi]->{$off})) {
my $ex = $c->{tlen_map}->[$rdi]->{$off};
$tlen eq $ex || die
"Expected TLEN '$ex' at offset $off, got '$tlen'"
} else {
die "Expected to see alignment with offset $off parsing tlen_map";
}
}
}
if($pe && $lastchr ne "") {
my $offkey_orig = $lastoff.",".$off_orig;
$offkey_orig = $off_orig.",".$lastoff_orig if $off_orig eq "*";
my $offkey = $lastoff.",".$off;
$offkey = $off.",".$lastoff if $off eq "*";
if($lastoff ne "*" && $off ne "*") {
$offkey = min($lastoff, $off).",".max($lastoff, $off);
}
if(defined($c->{pairhits}->[$rdi])) {
defined($pairhits{$offkey}) ||
die "No such paired off as $offkey in pairhits list: ".Dumper(\%pairhits)."\n";
$c->{pairhits}->[$rdi]->{$offkey}--;
delete $c->{pairhits}->[$rdi]->{$offkey} if $c->{pairhits}->[$rdi]->{$offkey} == 0;
%pairhits = %{$c->{pairhits}->[$rdi]};
}
if(defined($c->{pairhits_orig}->[$rdi])) {
defined($pairhits_orig{$offkey_orig}) ||
die "No such paired off as $offkey in pairhits_orig list: ".Dumper(\%pairhits_orig)."\n";
$c->{pairhits_orig}->[$rdi]->{$offkey_orig}--;
delete $c->{pairhits_orig}->[$rdi]->{$offkey_orig} if $c->{pairhits_orig}->[$rdi]->{$offkey_orig} == 0;
%pairhits_orig = %{$c->{pairhits_orig}->[$rdi]};
}
($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1);
} elsif($pe) {
# Found an unpaired alignment from aligning a pair
my $foundSe =
defined($c->{pairhits}->[$rdi]) &&
$c->{pairhits}->[$rdi]->{$off};
if($foundSe) {
$c->{pairhits}->[$rdi]->{$off}--;
delete $c->{pairhits}->[$rdi]->{$off}
if $c->{pairhits}->[$rdi]->{$off} == 0;
%pairhits = %{$c->{pairhits}->[$rdi]};
} else {
($lastchr, $lastoff) = ($chr, $off);
}
# Found an unpaired alignment from aligning a pair
$foundSe =
defined($c->{pairhits_orig}->[$rdi]) &&
$c->{pairhits_orig}->[$rdi]->{$off_orig};
if($foundSe) {
$c->{pairhits_orig}->[$rdi]->{$off_orig}--;
delete $c->{pairhits_orig}->[$rdi]->{$off_orig}
if $c->{pairhits_orig}->[$rdi]->{$off_orig} == 0;
%pairhits_orig = %{$c->{pairhits_orig}->[$rdi]};
} else {
($lastchr, $lastoff, $lastoff_orig) = ($chr, $off, $off_orig);
}
} else {
if(defined($c->{hits}->[$rdi])) {
defined($hits{$off}) ||
die "No such off as $off in hits list: ".Dumper(\%hits)."\n";
$c->{hits}->[$rdi]->{$off}--;
delete $c->{hits}->[$rdi]->{$off} if $c->{hits}->[$rdi]->{$off} == 0;
%hits = %{$c->{hits}->[$rdi]};
}
}
if(!$sam && defined($ex_edits)) {
my $eds = $l->[7];
$eds eq $ex_edits ||
die "For edit string, expected \"$ex_edits\" got \"$eds\"\n";
}
}
# Go through all the per-read
my $klim = 0;
$klim = scalar(@{$c->{hits}}) if defined($c->{hits});
$klim = max($klim, scalar(@{$c->{pairhits}})) if defined($c->{pairhits});
for (my $k = 0; $k < $klim; $k++) {
# For each read
my %hits = %{$c->{hits}->[$k]} if defined($c->{hits}->[$k]);
my %pairhits = %{$c->{pairhits}->[$k]} if defined($c->{pairhits}->[$k]);
my %pairhits_orig = %{$c->{pairhits_orig}->[$k]} if defined($c->{pairhits_orig}->[$k]);
my $hits_are_superset = $c->{hits_are_superset}->[$k];
# Check if there are any hits left over
my $hitsLeft = scalar(keys %hits);
if($hitsLeft != 0 && !$hits_are_superset) {
print Dumper(\%hits);
die "Had $hitsLeft hit(s) left over at position $k";
}
my $pairhitsLeft = scalar(keys %pairhits);
if($pairhitsLeft != 0 && !$hits_are_superset) {
print Dumper(\%pairhits);
die "Had $pairhitsLeft hit(s) left over at position $k";
}
my $pairhits_orig_Left = scalar(keys %pairhits_orig);
if($pairhits_orig_Left != 0 && !$hits_are_superset) {
print Dumper(\%pairhits_orig);
die "Had $pairhits_orig_Left hit(s) left over at position $k";
}
}
$c->{hits} = $hitstmp;
$c->{pairhits} = $pairhitstmp;
$c->{pairhits_orig} = $pairhits_orig_tmp;
}
$last_ref = undef if $first;
}
print "PASSED\n";