#!/usr/bin/perl -w # # Copyright 2011, Ben Langmead # # This file is part of Bowtie 2. # # Bowtie 2 is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Bowtie 2 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Bowtie 2. If not, see . # ## # Give simple tests with known results to bowtie2. # use strict; use warnings; use Getopt::Long; use FindBin qw($Bin); use lib $Bin; use List::Util qw(max min); use Data::Dumper; use DNA; use Clone qw(clone); use Test::Deep; my $bowtie2 = ""; my $bowtie2_build = ""; my $skipColor = 1; GetOptions( "bowtie2=s" => \$bowtie2, "bowtie2-build=s" => \$bowtie2_build, "skip-color" => \$skipColor) || die "Bad options"; if(! -x $bowtie2 || ! -x $bowtie2_build) { my $bowtie2_dir = `dirname $bowtie2`; my $bowtie2_exe = `basename $bowtie2`; my $bowtie2_build_exe = `basename $bowtie2_build`; chomp($bowtie2_dir); chomp($bowtie2_exe); chomp($bowtie2_build_exe); system("make -C $bowtie2_dir $bowtie2_exe $bowtie2_build_exe") && die; } (-x $bowtie2) || die "Cannot run '$bowtie2'"; (-x $bowtie2_build) || die "Cannot run '$bowtie2_build'"; my @cases = ( { name => "Left-align insertion", ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], norc => 1, reads => [ "ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ], # ref: AC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA # read: ACAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA # 0123456789012345678901234567890123456789 cigar => [ "2M1I40M" ], samoptflags => [ { "MD:Z:42" => 1, "YT:Z:UU" => 1, "NM:i:1" => 1, "XG:i:1" => 1, "XO:i:1" => 1, "AS:i:-8" => 1 } ], report => "", args => "" }, { name => "Left-align deletion", ref => [ "GCGATATCTACGACTGCTACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], norc => 1, reads => [ "ACGTACAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA" ], # ref: ACGTACAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA # read: ACGTAC-AAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGA # 012345678901234567890123456789012345678 cigar => [ "6M1D39M" ], samoptflags => [ { "MD:Z:6^A39" => 1, "YT:Z:UU" => 1, "NM:i:1" => 1, "XG:i:1" => 1, "XO:i:1" => 1, "AS:i:-8" => 1 } ], report => "", args => "" }, { name => "Left-align insertion with mismatch at LHS", ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], norc => 1, reads => [ "TATCTACGACTGCTACGCCCTAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ], # ref: GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC # read: TATCTACGACTGCTACGCCC-TAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC # 01234567890123456789-012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 cigar => [ "20M1D45M" ], samoptflags => [ { "MD:Z:20^A0A44" => 1, "YT:Z:UU" => 1, "NM:i:2" => 1, "XG:i:1" => 1, "XO:i:1" => 1, "XM:i:1" => 1, "AS:i:-14" => 1 } ], report => "", args => "" }, # This won't necessarily pass because the original location of the deletion # might #{ name => "Left-align deletion with mismatch at LHS", # ref => [ "GCGATATCTACGACTGCTACGCCCAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC" ], # norc => 1, # reads => [ "TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC" ], # # ref: GCGATATCTACGACTGCTACGCCC-AAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGACAGC # # read: TATCTACGACTGCTACGCCAAAAAAAAAAAAAAAAGTGTTTACGTTGCTAGACTCGATCGATCTGAC # # 01234567890123456789-012345678901234567890123456789012345678901234 # # 0 1 0 1 2 3 4 # cigar => [ "20M1I45M" ], # samoptflags => [ { # "MD:Z:19C45" => 1, # "YT:Z:UU" => 1, # "NM:i:2" => 1, # "XG:i:1" => 1, # "XO:i:1" => 1, # "XM:i:1" => 1, # "AS:i:-14" => 1 } ], # report => "", # args => "" #}, { name => "Flags for when mates align non-concordantly, with many alignments for one", # 012345678 ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9 0 1 # 0 1 norc => 1, mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ], # 981 1064 1147 # ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA samflags_map => [{ 981 => (1 | 128), 1064 => (1 | 128), 1147 => (1 | 128), 2 => (1 | 64) }], report => "", args => "" }, { name => "Flags for when mates align non-concordantly, with many alignments for one", # 012345678 ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAG" ], # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 # 0 1 2 3 4 5 6 7 8 9 0 1 # 0 1 norc => 1, mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], mate2s => [ "ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA" ], tlen_map => [{ 2 => 1021, 981 => -1021 }], samflags_map => [{ 981 => (1 | 128), 2 => (1 | 64) }], report => "", args => "" }, { name => "Flags for when mates align non-concordantly, with many alignments for one", # 012345678 ref => [ "CAGCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGATAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCGAGGATAGATCGCTCGCCTGACCTATATCGCTCGCGATTACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGATCG" ], # 01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 # 0 * 1 * 2 * # 0 1 norc => 1, mate1s => [ "GCGGCTAGCTATCGATCGTCCGGCAGCTATCATTATGA" ], mate2s => [ "TCGTCGTGATGCGTCAGCTCGGATAGCCAGTACGTAGCTCGT" ], # 981 1064 1147 # ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA ACGAGCTACGTACTGGCTATCCGAGCTGACGCATCACGACGA samflags_map => [{ 79 => (1 | 2 | 16 | 128), 162 => (1 | 2 | 16 | 128), 245 => (1 | 2 | 16 | 128), 2 => (1 | 2 | 32 | 64) }], report => "", args => "" }, # Checking MD:Z strings for alignment { name => "MD:Z 1", ref => [ "CACGATCGACTTGA"."C"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], hits => [ { 0 => 1 } ], samoptflags => [ { "AS:i:-14" => 1, # alignment score "XM:i:1" => 1, # num mismatches "XO:i:1" => 1, # num gap opens "XG:i:1" => 1, # num gap extensions "NM:i:2" => 1, # num edits "MD:Z:13^A0C39" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, { name => "MD:Z 2", ref => [ "CACGATCGACTTGA"."A"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], reads => [ "CACGATCGACTTGG". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], # 0123456789012 012345678901234567890123456789012345678 hits => [ { 0 => 1 } ], samoptflags => [ { "AS:i:-14" => 1, # alignment score "XM:i:1" => 1, # num mismatches "XO:i:1" => 1, # num gap opens "XG:i:1" => 1, # num gap extensions "NM:i:2" => 1, # num edits "MD:Z:13^A0A39" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, { name => "MD:Z 3", ref => [ "CACGATCGACTTGT"."AA"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], # 0123456789012 012345678901234567890123456789012345678 hits => [ { 0 => 1 } ], samoptflags => [ { "AS:i:-17" => 1, # alignment score "XM:i:1" => 1, # num mismatches "XO:i:1" => 1, # num gap opens "XG:i:2" => 1, # num gap extensions "NM:i:3" => 1, # num edits "MD:Z:13^TA0A39" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, { name => "MD:Z 4", ref => [ "CACGATCGACTTGN"."NN"."TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], reads => [ "CACGATCGACTTGC". "TCATCGACGCTATCATTAATATATATAAGCCCGCATCTA" ], # 0123456789012 012345678901234567890123456789012345678 hits => [ { 0 => 1 } ], samoptflags => [ { "AS:i:-12" => 1, # alignment score "XN:i:3" => 1, # num ambiguous ref bases "XM:i:1" => 1, # num mismatches "XO:i:1" => 1, # num gap opens "XG:i:2" => 1, # num gap extensions "NM:i:3" => 1, # num edits "MD:Z:13^NN0N39" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, # # Local alignment # # Local alignment for a short perfect hit where hit spans the read { name => "Local alignment 1", ref => [ "TTGT" ], reads => [ "TTGT" ], args => "--local --policy \"MIN=L,0.0,0.75\"", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:4=" ], cigar => [ "4M" ], samoptflags => [ { "AS:i:8" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:4" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YP:i:0" => 1, # read aligned repetitively in paired fashion "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, # T T G A T T G A # T x T x # T x T # G x G # T T # Local alignment for a short hit where hit is trimmed at one end { name => "Local alignment 2", ref => [ "TTGA" ], reads => [ "TTGT" ], args => "--local --policy \"MIN=L,0.0,0.75\\;SEED=0,3\\;IVAL=C,1,0\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:3=1S" ], cigar => [ "3M1S" ], samoptflags => [ { "AS:i:6" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:3" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YP:i:0" => 1, # read aligned repetitively in paired fashion "YT:Z:UU" => 1, # type of alignment (concordant/discordant/etc) } ] }, # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 # T T G T T C G T T T G T T C G T # 0 T x # 1 T x # 2 G x # 3 T x # 4 T x # 5 C x # 6 G x # 7 T x # 8 T x # 9 T x # 0 G x # 1 T x # 2 T x # # Score=130 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 # T T G T T C G T T T G T T C G T # 0 T x # 1 T x # 2 G x # 3 T x # 4 T x # 5 C x # 6 G x # 7 T x # 8 T # 9 T # 0 G # 1 T # 2 T # # Score=80 # Local alignment for a perfect hit { name => "Local alignment 3", # TTGTTCGT # TTGTTCGT ref => [ "TTGTTCGTTTGTTCGT" ], # 0123456789012345 # TTGTTCGTTTGTT # TTGTTCGT----- reads => [ "TTGTTCGTTTGTT" ], args => "--local -L 8 -i C,1,0 --score-min=C,12", report => "-a", hits => [ { 0 => 1, 8 => 1 } ], flags_map => [{ 0 => "XM:0,XP:0,XT:UU,XC:13=", 8 => "XM:0,XP:0,XT:UU,XC:8=" }], cigar_map => [{ 0 => "13M", 8 => "8M5S" }], samoptflags_map => [{ 0 => { "AS:i:26" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }, 8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 } }] }, # 1 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 # T T G T T C G T T T G T T C G T # 0 T x # 1 T x # 2 G x # 3 T x # 4 T x # 5 C x # 6 G x # 7 T x # 8 T # 9 T # 10 G # 1 T # Local alignment for a hit that should be trimmed from the right end { name => "Local alignment 4", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGTTTGT" ], args => "--local --policy \"SEED=0,3\\;IVAL=C,1,0\" --score-min=C,12", report => "-a", hits => [ { 0 => 1, 8 => 1 } ], flags_map => [{ 0 => "XM:0,XP:0,XT:UU,XC:12=", 8 => "XM:0,XP:0,XT:UU,XC:8=4S" }], cigar_map => [{ 0 => "12M", 8 => "8M4S" }], samoptflags_map => [{ 0 => { "AS:i:24" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:12" => 1 }, 8 => { "AS:i:16" => 1, "XS:i:16" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1 } }] }, # # Test some common featuers for the manual. E.g. when more than one # alignment is reported in -k mode, what order are they reported in? They # should be in order by alignment score. # { name => "Alignment order -k", # 012345678 ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ], reads => [ "GCGCATGCACATATCA" ], quals => [ "GOAIYEFGFIWDSFIU" ], args => "--min-score C,-24,0 -L 4", report => "-k 4" }, { name => "Alignment order -a", # 012345678 ref => [ "GCGCATGCACATATCANNNNNGCGCATGCACATATCTNNNNNNNNGCGCATGCACATATTTNNNNNNNNNGCGCATGGTGTTATCA" ], reads => [ "GCGCATGCACATATCA" ], quals => [ "GOAIYEFGFIWDSFIU" ], args => "--min-score C,-24,0 -L 4", report => "-a" }, # # What order are mates reported in? Should be reporting in mate1/mate2 # order. # { name => "Mate reporting order, -a", # 012345678 ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ], mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ], mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ], quals => [ "GOAIYEFGFIWDSFIU" ], args => "", report => "-a" }, { name => "Mate reporting order, -M 1", # 012345678 ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNNNAGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAAATAGACGACTCGATCGCGGATTAGGGGTAGACCCCCCCCCGACTNNNNNNNNCGGTAATACGGCCATCGCGGCGGCATTACTCGGCGACTGCACGAGCAGATATTGGGGGTCTAATATAACGTCTCATTAAAACGCTCTAGTCAGCTCATTGGCTCTA" ], mate1s => [ "CTATCATCACGCGGATATT", "GGGGGGGGTCTACCCCTAA", "ATACGGCCATCGCGGCGGCATTACTCGGCG" ], mate2s => [ "GGGGGGGGTCTACCCCTAA", "CTATCATCACGCGGATATT", "AGCCAATGAGCTGACTAGAGCGTTTT" ], quals => [ "GOAIYEFGFIWDSFIU" ], args => "", report => "-M 1" }, # # Test dovetailing, containment, and overlapping # { name => "Non-overlapping; no args", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, report => "-M 1" }, { name => "Non-overlapping; --no-discordant", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, report => "-M 1 --no-discordant" }, { name => "Non-overlapping; --no-discordant --no-mixed", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, report => "-M 1 --no-discordant --no-mixed" }, { name => "Non-overlapping; --no-discordant --no-mixed", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, report => "-M 1 --no-discordant --no-mixed" }, { name => "Non-overlapping; --no-dovetail", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, args => "--no-dovetail", report => "-M 1" }, { name => "Non-overlapping; --un-conc=.tmp.simple_tests.pl", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, args => "--un-conc=.tmp.simple_tests.pl", report => "-M 1" }, { name => "Non-overlapping; --no-overlap", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATA" ], mate2s => [ "CGCATCGACATTAATATCC" ], pairhits => [{ "1,23" => 1 }], mate1fw => 1, mate2fw => 1, args => "--no-overlap", report => "-M 1" }, { name => "Overlapping; no args", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATATTA" ], mate2s => [ "TTAGCGCATCGACATTAATATCC" ], pairhits => [{ "1,19" => 1 }], mate1fw => 1, mate2fw => 1, args => "", report => "-M 1" }, { name => "Overlapping; --no-dovetail", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATATTA" ], mate2s => [ "TTAGCGCATCGACATTAATATCC" ], pairhits => [{ "1,19" => 1 }], mate1fw => 1, mate2fw => 1, args => "--no-dovetail", report => "-M 1" }, { name => "Overlapping; --no-contain", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATATTA" ], mate2s => [ "TTAGCGCATCGACATTAATATCC" ], pairhits => [{ "1,19" => 1 }], mate1fw => 1, mate2fw => 1, args => "--no-contain", report => "-M 1" }, { name => "Overlapping; --no-overlap", ref => [ "AGCTATCATCACGCGGATATTAGCGCATCGACATTAATATCCCCAAA" ], # 01234567890123456789012345678901234567890123456 mate1s => [ "GCTATCATCACGCGGATATTA" ], mate2s => [ "TTAGCGCATCGACATTAATATCC" ], pairhits => [], mate1fw => 1, mate2fw => 1, args => "--no-overlap", report => "-M 1" }, # # Test XS:i with quality scaling # { name => "Scoring params 1", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTCGCTGCGGGG" ], reads => [ "GCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTTAATTTTATAAACACCTC" ], quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfglduhiuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "88M" ], samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:88" => 1 } ], }, { name => "Scoring params 2", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], # * -1 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "--ignore-quals --score-min C,-40,0 -N 1 -L 20", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "37M4D30M3I14M" ], # 37M4D30M13I4M samoptflags => [ { "AS:i:-38" => 1, "YT:Z:UU" => 1, "MD:Z:0G11A24^GACC44" => 1, "NM:i:9" => 1, "XM:i:2" => 1, "XG:i:7" => 1, "XO:i:2" => 1 } ], }, { name => "Scoring params 3", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], # * -1 * -6 **** -5 -3 -3 -3 -3 *** -1 -2 -2 -2 quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "--ignore-quals --rfg 1,2 --score-min C,-40,0 -N 1 -L 20", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "37M4D30M3I14M" ], samoptflags => [ { "AS:i:-31" => 1, "YT:Z:UU" => 1, "MD:Z:0G11A24^GACC44" => 1, "NM:i:9" => 1, "XM:i:2" => 1, "XG:i:7" => 1, "XO:i:2" => 1 } ], }, { name => "Scoring params 4", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], # * -1 * -6 **** -1 -2 -2 -2 -2 *** -5 -3 -3 -3 quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "--ignore-quals --rdg 1,2 --score-min C,-40,0 -N 1 -L 20", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "37M4D30M3I14M" ], samoptflags => [ { "AS:i:-30" => 1, "YT:Z:UU" => 1, "MD:Z:0G11A24^GACC44" => 1, "NM:i:9" => 1, "XM:i:2" => 1, "XG:i:7" => 1, "XO:i:2" => 1 } ], }, { name => "Scoring params 5", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], # * -1 * -8 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "--ignore-quals --mp 8 --score-min C,-40,0 -N 1 -L 20", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "37M4D30M3I14M" ], samoptflags => [ { "AS:i:-40" => 1, "YT:Z:UU" => 1, "MD:Z:0G11A24^GACC44" => 1, "NM:i:9" => 1, "XM:i:2" => 1, "XG:i:7" => 1, "XO:i:2" => 1 } ], }, { name => "Scoring params 6", # 012345678 ref => [ "ACTATTGCGCGCATGCACATATCAATTAAGCCGTCTCTCTAAAGAGACCCCAATCTCGCGCGCTAGACGTCAGTAGTTT"."TTTATAAACACCTCGCTGCGGGG" ], reads => [ "NCGCATGCACATtTCAATTAAGCCGTCTCTCTAAAGA". "CCAATCTCGCGCGCTAGACGTCAGTAGTTTAAATTTATAAACACCTC" ], # * -4 * -6 **** -5 -3 -3 -3 -3 *** -5 -3 -3 -3 quals => [ "GOAIYEFGFIWDSFIUYWEHRIWQWLFNSLDKkjdfg". "iuevhsiuqkAUHFIUEHGIUDJFHSKseuweyriwfskdgbiuuhh" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 7 8 args => "--ignore-quals --np 4 --score-min C,-41,0 -N 1 -L 20", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "37M4D30M3I14M" ], samoptflags => [ { "AS:i:-41" => 1, "YT:Z:UU" => 1, "MD:Z:0G11A24^GACC44" => 1, "NM:i:9" => 1, "XM:i:2" => 1, "XG:i:7" => 1, "XO:i:2" => 1 } ], }, # # Test XS:i with quality scaling # { name => "Q XS:i 1a", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIIIA" ], args => "--multiseed=0,7,C,1 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-5" => 1, "XS:i:-5" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1a ! --mp 3,3", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII!" ], args => "-L 6 --mp 3,3 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-3" => 1, "XS:i:-3" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1a ! --mp 3,6", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII!" ], args => "-L 6 --mp 6,3 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-3" => 1, "XS:i:-3" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1a I --mp 3,3", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIIII" ], args => "-L 6 --mp 3,3 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-3" => 1, "XS:i:-3" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1a I --mp 3,6", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIIII" ], args => "-L 6 --mp 6,3 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-6" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1a --ignore-quals", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIIIA" ], args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-6" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1b", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII5" ], args => "--multiseed=0,7,C,1 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-4" => 1, "XS:i:-4" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1b --ignore-quals", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII5" ], args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-6" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1c", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII4" ], args => "--multiseed=0,7,C,1 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-3" => 1, "XS:i:-3" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "Q XS:i 1c --ignore-quals", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], quals => [ "IIIIIII4" ], args => "--multiseed=0,7,C,1 --score-min=C,-6 --ignore-quals", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-6" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, # One mate aligns. Ensuring that the unmapped mate gets reference # information filled in from the other mate. { ref => [ "CATCGACTGAGACTCGTACGACAATTACGCGCATTATTCGCATCACCAGCGCGGCGCGCGCCCCCTAT" ], # 01234567890123456789012345678901234567890123456789012345678901234567 # 0 1 2 3 4 5 6 # ATCACCAGCGTTTCGCGCGAAACCTA mate1s => [ "ATCGACTGAGACTCGTACGACAATTAC" ], mate2s => [ "TAGGTTTCGCGCGAAACGCTGGTGAT" ], pairhits_orig => [{ "1,1" => 1}] }, { ref => [ "TTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGT" ], args => "--multiseed=0,4,C,1,0", report => "-M 1" }, # Testing that DEFAULT is -M 1 { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGT" ], report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } ], }, { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGT" ], report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } ], }, # # Test XS:i # { name => "XS:i 1", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], args => "--multiseed=0,7,C,1 --score-min=C,-6", report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-6" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:7A0" => 1, "NM:i:1" => 1, "XM:i:1" => 1 } ], }, { name => "XS:i 2", ref => [ "TTGTTCGATTGTTCGA" ], reads => [ "TTGTTCGT" ], args => "--multiseed=0,7,C,1 --score-min=C,-5", report => "", cigar => [ "*" ], samoptflags => [{ "YT:Z:UU" => 1, "YM:i:0" => 1 }], }, { name => "XS:i 3a", ref => [ "TTGTTCGATTGTTCGT" ], # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,7,C,1 --score-min=C,-6", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 3b", ref => [ "TTGTTCGATTGTTCGT" ], # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=52", report => "-M 1", hits => [ { 8 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 3c", ref => [ "TTGTTCGATTGTTCGT" ], # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,7,C,1 --score-min=C,-6 --seed=53", report => "-M 2", hits => [ { 8 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 4a", ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ], # |||||| ||||||| |||||||| # TTGTTCGT||||||| |||||||| # TTGTTCGT|||||||| # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=53", report => "-M 2", hits => [ { 16 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 4b", ref => [ "TTGTTCAATTGTTCGATTGTTCGT" ], # |||||| ||||||| |||||||| # TTGTTCGT||||||| |||||||| # TTGTTCGT|||||||| # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,6,C,1 --score-min=C,-12 --seed=54", report => "-M 3", hits => [ { 16 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 5a", ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ], # |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| # TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| # TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,6,C,1,1 --score-min=C,-12 --seed=54", report => "-M 1", hits => [ { 16 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, { name => "XS:i 5b", ref => [ "TTGTTCAATTGTTCGATTGTTCGTTTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAATTGTTCAA" ], # |||||| ||||||| |||||||||||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| |||||| # TTGTTCGT||||||| ||||||||TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| TTGTTCGT|||||| # TTGTTCGT|||||||| TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT TTGTTCGT # TTGTTCGT reads => [ "TTGTTCGT" ], args => "--multiseed=0,5,C,1,1 --score-min=C,-12 --seed=55", report => "-M 1", hits => [ { 16 => 1 } ], cigar => [ "8M" ], samoptflags => [ { "AS:i:0" => 1, "XS:i:-6" => 1, "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "NM:i:0" => 1, "XM:i:0" => 1 } ], }, # Testing BWA-SW-like scoring # # a*max{T,c*log(l)} = 1 * max(30, 5.5 * log(56)) = 1 * max(30, 22.139) = 30 # { name => "BWA-SW-like 1", ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], # |||||||||||||||||||||||||||||||||||||||||||||||||||||||| reads => [ "TAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATC" ], # 01234567890123456789012345678901234567890123456789012345 # 1 2 3 4 5 args => "--bwa-sw-like", hits => [{ 3 => 1 }], samoptflags => [{ "AS:i:56" => 1, "NM:i:0" => 1, "MD:Z:56" => 1, "YT:Z:UU" => 1 }] }, { name => "BWA-SW-like 2", # 0123 ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], # |||||||||||||||||||||||||| |||||||||||||||||||||||||||| reads => [ "TAGATTCCACTACGCTAACCATCGAGTTCTCGTCTCAGAGTTTCGATAGGAAAATC" ], # 01234567890123456789012345678901234567890123456789012345 # 1 2 3 4 5 args => "--bwa-sw-like -L 18", hits => [{ 3 => 1 }], # Tot matches = 54 # Tot penalties = 6 samoptflags => [{ "AS:i:48" => 1, "NM:i:2" => 1, "XM:i:2" => 1, "MD:Z:26A0A28" => 1, "YT:Z:UU" => 1 }] }, { name => "BWA-SW-like 3", # 0123 ref => [ "GTTTAGATTCCACTACGCTAACCATCGAGAACTCGTCTCAGAGTTTCGATAGGAAAATCTGCGA" ], # |||||||||||||||||||||||||| ||||||||||||||||||||||||||| reads => [ "TAGATTCCACTACGCTAACCATCGAG"."TCGTCTCAGAGTTTCGATAGGAAAATC" ], # 01234567890123456789012345678901234567890123456789012345 # 1 2 3 4 5 args => "--bwa-sw-like -i C,1,0", hits => [{ 3 => 1 }], # Tot matches = 53 # Tot penalties = 11 samoptflags => [{ "AS:i:42" => 1, "NM:i:3" => 1, "XM:i:0" => 1, "XO:i:1" => 1, "XG:i:3" => 1, "MD:Z:26^AAC27" => 1, "YT:Z:UU" => 1 }] }, # Some tricky SAM FLAGS field tests { name => "SAM paired-end where both mates align 1", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ], mate2s => [ "GATTGTCACACACCCGGTCGT" ], # ----------------------------------------------------- # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template pairhits => [{ "3,35" => 1 }], norc => 1, samflags_map => [{ 3 => (1 | 2 | 32 | 64), 35 => (1 | 2 | 16 | 128) }], tlen_map => [{ 3 => 53, 35 => -53 }] }, { name => "SAM paired-end where both mates align 2", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "TCACCGACGCCGAAGCGTAGATAG" ], mate2s => [ "ACGACCGGGTGTGTGACAATC" ], # ----------------------------------------------------- # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template mate1fw => 0, mate2fw => 1, pairhits => [{ "3,35" => 1 }], norc => 1, samflags_map => [{ 3 => (1 | 2 | 16 | 64), 35 => (1 | 2 | 32 | 128) }], tlen_map => [{ 3 => 53, 35 => -53 }] }, { name => "SAM paired-end where both mates align 3", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "CTATCTACGCTTCGGCGTCGGTGA" ], mate2s => [ "ACGACCGGGTGTGTGACAATC" ], # ----------------------------------------------------- # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template mate1fw => 1, mate2fw => 1, pairhits => [{ "3,35" => 1 }], norc => 1, samflags_map => [{ 3 => (1 | 2 | 64), 35 => (1 | 2 | 128) }], tlen_map => [{ 3 => 53, 35 => -53 }] }, { name => "SAM paired-end where mate #1 aligns but mate #2 doesn't", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ], mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ], # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template pairhits => [{ "*,3" => 1 }], norc => 1, samflags_map => [{ 3 => (1 | 8 | 64), "*" => (1 | 4 | 128) }] }, { name => "SAM paired-end where neither mate aligns", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "CTATATACGAAAAAGCGTCGGCGA" ], mate2s => [ "GATTGTCTTTTCCCGGAAAAATCGT" ], # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template pairhits => [{ "*,*" => 1 }], norc => 1, samoptflags_flagmap => [{ (1 | 4 | 8 | 64) => { "YT:Z:UP" => 1 }, (1 | 4 | 8 | 128) => { "YT:Z:UP" => 1 } }] }, { name => "SAM paired-end where both mates align, but discordantly", ref => [ "GCACTATCTACGCTTCGGCGTCGGCGAAAAAACGCACGACCGGGTGTGTGACAATCATATATAGCGCGC" ], # 012345678901234567890123456789012345678901234567890123456789012345678 # 0 1 2 3 4 5 6 mate1s => [ "CTATCTACGCTTCGGCGTCGGCGA" ], mate2s => [ "ACGACCGGGTGTGTGACAATC" ], # ----------------------------------------------------- # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 # 0x1 template having multiple fragments in sequencing # 0x2 each fragment properly aligned according to the aligner # 0x4 fragment unmapped # 0x8 next fragment in the template unmapped # 0x10 SEQ being reverse complemented # 0x20 SEQ of the next fragment in the template being reversed # 0x40 the first fragment in the template # 0x80 the last fragment in the template pairhits => [{ "3,35" => 1 }], norc => 1, samflags_map => [{ 3 => (1 | 64), 35 => (1 | 128) }], # Which TLEN is right? Depends on criteria for when to infer TLEN. If # criterion is mates are concordant, then it should be 0 here. If the # criterion is that both mates align to the same chromosome, should be # +-53 #tlen_map => [{ 3 => 0, 35 => 0 }] }, tlen_map => [{ 3 => 53, 35 => -53 }] }, { name => "matchesRef regression 4", ref => [ "CCGGGTCGTCACGCCCCGCTTGCGTCANGCCCCTCACCCTCCCTTTGTCGGCTCCCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGGTCGTCACGCCCCGCTTGCGTCANGC", "GCTCGGAATTCGTGCTCCGNCCCGTACGGTT" ], # # NNNNNGA------A-------------------G-NTTT # |||||||||||||||||||||||||||||||||| # CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG args => "--policy \"NP=Q\\;RDG=46.3220993654702\\;RFG=41.3796024365659\\;MIN=L,5.57015383125426,-3.28597145122829\\;NCEIL=L,0.263054599454459,0.130843661549367\\;SEED=1,29\\;IVAL=L,0.0169183264663712,3.75762168662522\" --overhang --trim5 6", reads => [ "CTTTGCACCCCTCCCTTGTCGGCTCCCACCCATCCCCATCCGTTGTCCCCGCCCCCGCCCGCCGGTCGTCACTCCCCGTTTGCGTCATGCCCCTCACCCTCCCTTTGTCGGCTCGCACCCCTCCCCATCCGTTGTCCCCGCCCCCGCTCTCGGGGTCTTCACGCCCCGCTTGCTTCATGCCCCTCACTCGCACCCCG" ], }, { name => "matchesRef regression 3", ref => [ "GAAGNTTTTCCAATATTTTTAATTTCCTCTATTTTTCTCTCGTCTTGNTCTAC" ], # # NNNNNGA------A-------------------G-NTTT # |||||||||||||||||||||||||||||||||| # CCAAT-ATTTTTAATTTCCTCTATTTTTCTCTCGTCTTG args => "--policy \"MMP=R\\;MIN=L,8.8,-8.1\" --overhang", reads => [ "CAAGACGAGAGAAAAATAGAGGAAATTAAAAATATTGG" ], }, { name => "matchesRef regression 2", ref => ["GTTGTCGGCAGCTCTGGATATGTGNTCTCGGGTTTATNTCGTTGTCG", "CCTTGTTNTTAATGCTGCCTGGTTTNG"], args => "--policy \"RDG=2.02030755427021,2.81949533273331\\;MIN=L,-6.52134769703939,-3.39889659588514\\;IVAL=L,0.127835912101927\" --overhang --trim5 5", mate1s => ["TCTGGCGGTTGCGAAGGCCCCTGGCGGTTGCTATGTCCTCTGGCGGTTGCGTTGTCGGCAGCTCG"], mate2s => ["AGAACACATATCCAGAGCTGCCGACAACGAAATGAACCCGAGAGCACAAATCCAGAG"] }, # Regression test for an issue observed once { name => "matchesRef regression 1", # 0 1 2 3 4 5 6 7 # 01234567890123456789012345678901234567890123456789012345678901234567890 ref => [ "AGGTCGACCGAAAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTAC" ], # |||||||||||||||||||||||||||||||||||||||||||||||||| reads => [ "AAGGCCTAGAGGTCGACCGACAATCTGACCATGGGGCGAGGAGCGAGTACTGGTCTGGGG" ], # 012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 args => "--overhang" }, # 1 discordant alignment and one concordant alignment. Discordant because # the fragment is too long. { name => "Discordant with different chromosomes", ref => [ "TTTATAAAAATATTTCCCCCCCC", "CCCCCCTGTCGCTACCGCCCCCCCCCCC" ], # ATAAAAATAT GTCGCTACCG # ATAAAAATAT TGTCGCTACC # 01234567890123456789012 # 0 1 2 # 0123456789012345678901234567 # 0 1 2 mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ], mate2s => [ "GTCGCTACCG", "TGTCGCTACC" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 35", # Not really any way to flag an alignment as discordant pairhits => [ { "3,7" => 1 }, { "3,6" => 1 } ], rnext_map => [ { 3 => 1, 7 => 0 }, { 3 => 1, 6 => 0 } ], pnext_map => [ { 3 => 7, 7 => 3 }, { 3 => 6, 6 => 3 } ] }, { name => "Fastq 1", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII", hits => [{ 2 => 1 }] }, { name => "Tabbed 1", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII", hits => [{ 2 => 1 }] }, { name => "Fasta 1", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => ">r0\nCATCGATCAGTATCTG", hits => [{ 2 => 1 }] }, { name => "Qseq 1", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1"), hits => [{ 2 => 1 }] }, { name => "Raw 1", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "CATCGATCAGTATCTG", hits => [{ 2 => 1 }] }, # Like Fastq 1 but with extra newline { name => "Fastq 2", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\@r0\nCATCGATCAGTATCTG\n+\nIIIIIIIIIIIIIIII\n", hits => [{ 2 => 1 }] }, { name => "Tabbed 1", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "r0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n", hits => [{ 2 => 1 }] }, { name => "Fasta 2", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => ">r0\nCATCGATCAGTATCTG\n", hits => [{ 2 => 1 }] }, { name => "Qseq 2", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n", hits => [{ 2 => 1 }] }, { name => "Raw 2", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "CATCGATCAGTATCTG\n", hits => [{ 2 => 1 }] }, # Like Fastq 1 but with many extra newlines { name => "Fastq 3", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }] }, { name => "Tabbed 3", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }] }, { name => "Fasta 3", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n\n", hits => [{ 2 => 1 }] }, { name => "Qseq 3", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n", hits => [{ 2 => 1 }] }, { name => "Raw 3", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "\n\n\nCATCGATCAGTATCTG\n\n", hits => [{ 2 => 1 }] }, # Quality string length doesn't match (too short by 1) { name => "Fastq 4", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", should_abort => 1}, { name => "Tabbed 4", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", should_abort => 1}, { name => "Qseq 4", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIII", "1")."\n\n", should_abort => 1}, # Name line doesn't start with @ { name => "Fastq 5", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\nr0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", should_abort => 1, hits => [{ }] }, # Name line doesn't start with > { name => "Fasta 5", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\nr0\nCATCGATCAGTATCTG\r", should_abort => 1, hits => [{ }] }, # Name line doesn't start with @ (2) { name => "Fastq 6", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", should_abort => 1, hits => [{ }] }, # Name line doesn't start with > (2) { name => "Fasta 6", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "r0\nCATCGATCAGTATCTG\r", should_abort => 1, hits => [{ }] }, # Part of sequence is trimmed { name => "Fastq 7", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", args => "--trim3 4", norc => 1, hits => [{ 2 => 1 }] }, { name => "Tabbed 7", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", args => "--trim3 4", norc => 1, hits => [{ 2 => 1 }] }, { name => "Fasta 7", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n", args => "--trim3 4", norc => 1, hits => [{ 2 => 1 }] }, { name => "Qseq 7", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n", args => "--trim3 4", norc => 1, hits => [{ 2 => 1 }] }, { name => "Raw 7", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "\n\n\r\nCATCGATCAGTATCTG\r\n", args => "--trim3 4", norc => 1, hits => [{ 2 => 1 }] }, # Whole sequence is trimmed { name => "Fastq 8", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", args => "--trim5 16", hits => [{ "*" => 1 }] }, { name => "Tabbed 8", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", args => "--trim5 16", hits => [{ "*" => 1 }] }, { name => "Fasta 8", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\n\>r0\nCATCGATCAGTATCTG\r\n", args => "--trim3 16", hits => [{ "*" => 1 }] }, { name => "Qseq 8", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n", args => "--trim3 16", hits => [{ "*" => 1 }] }, { name => "Raw 8", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "\n\n\r\nCATCGATCAGTATCTG\r\n", args => "--trim3 16", hits => [{ "*" => 1 }] }, # Sequence is skipped { name => "Fastq 9", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n", args => "-s 1", hits => [{ }] }, { name => "Tabbed 9", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n", args => "-s 1", hits => [{ }] }, { name => "Fasta 9", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n", args => "-s 1", hits => [{ }] }, { name => "Qseq 9", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "X", "Y", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n", args => "-s 1", hits => [{ }] }, { name => "Raw 9", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "CATCGATCAGTATCTG\n", args => "-s 1", hits => [{ }] }, # Like Fastq 1 but with many extra newlines { name => "Fastq multiread 1", ref => [ "AGCATCGATCAGTATCTGA" ], fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Tabbed multiread 1", ref => [ "AGCATCGATCAGTATCTGA" ], tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Fasta multiread 1", ref => [ "AGCATCGATCAGTATCTGA" ], fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\n\n". "\n\n\r\n>r1\nATCGATCAGTATCTG\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Qseq multiread 1", ref => [ "AGCATCGATCAGTATCTGA" ], qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "0", # Mate "ATCGATCAGTATCTG", "IIIIIIIIIIIIIII", "1")."\n\n", idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Raw multiread 1", ref => [ "AGCATCGATCAGTATCTGA" ], raw => "\n\n\r\nCATCGATCAGTATCTG\n\n". "\n\n\r\nATCGATCAGTATCTG\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, # Like Fastq multiread 1 but with -u 1 { name => "Fastq multiread 2", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 1", fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }] }, { name => "Tabbed multiread 2", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 1", tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }] }, { name => "Fasta multiread 2", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 1", fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n". "\n\n\r\n>r1\nATCGATCAGTATCTG\r\n", hits => [{ 2 => 1 }] }, { name => "Qseq multiread 2", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 1", qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "0", # Mate "ATCGATCAGTATCTG", "IIIIIIIIIIIIIII", "1")."\n\n", idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, hits => [{ 2 => 1 }] }, { name => "Raw multiread 2", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 1", raw => "\n\n\r\nCATCGATCAGTATCTG\r\n". "\n\n\r\nATCGATCAGTATCTG\r\n", hits => [{ 2 => 1 }] }, # Like Fastq multiread 1 but with -u 2 { name => "Fastq multiread 3", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 2", fastq => "\n\n\r\n\@r0\nCATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nATCGATCAGTATCTG\r\n+\n\nIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Tabbed multiread 3", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 2", tabbed => "\n\n\r\nr0\tCATCGATCAGTATCTG\tIIIIIIIIIIIIIIII\n\n". "\n\n\r\nr1\tATCGATCAGTATCTG\tIIIIIIIIIIIIIII\n\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Fasta multiread 3", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 2", fasta => "\n\n\r\n>r0\nCATCGATCAGTATCTG\r\n". "\n\n\r\n>r1\nATCGATCAGTATCTG\r\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Qseq multiread 3", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 2", qseq => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "0", # Mate "CATCGATCAGTATCTG", "IIIIIIIIIIIIIIII", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "0", # Mate "ATCGATCAGTATCTG", "IIIIIIIIIIIIIII", "1")."\n\n", idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, hits => [{ 2 => 1 }, { 3 => 1 }] }, { name => "Raw multiread 3", ref => [ "AGCATCGATCAGTATCTGA" ], args => "-u 2", raw => "\n\n\r\nCATCGATCAGTATCTG\r\n". "\n\n\r\nATCGATCAGTATCTG\r\n", hits => [{ 2 => 1 }, { 3 => 1 }] }, # Paired-end reads that should align { name => "Fastq paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". "\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII", pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, { name => "Tabbed paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". "\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n", paired => 1, pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, { name => "Fasta paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". "\n\n>r1\nTCAGTTTTTGA\r\n", fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". "\n\n\r\n>r1\nAGCATCGATC", pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, { name => "Qseq paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 qseq1 => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "1", # Mate "AGCATCGATC", "ABCBGACBCB", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "1", # Mate "TCAGTTTTTGA", "95849456875", "1")."\n\n", qseq2 => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "2", # Mate "TCAGTTTTTGA", "ABCBGACBCBA", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "2", # Mate "AGCATCGATC", "AGGCBBGCBG", "1")."\n\n", idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, { name => "Raw paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 raw1 => "\n\n\r\nAGCATCGATC\r\n". "\n\nTCAGTTTTTGA\r\n", raw2 => "\n\n\r\nTCAGTTTTTGA\n". "\n\n\r\nAGCATCGATC", pairhits => [ { "0,8" => 1 }, { "0,8" => 1 } ] }, # Paired-end reads that should align { name => "Fastq paired 2", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-s 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fastq1 => "\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". "\n\n\@r1\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n", fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nAGCATCGATC\r\n+\n\nIIIIIIIIII", pairhits => [ { }, { "0,8" => 1 } ] }, { name => "Tabbed paired 2", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-s 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 tabbed => "r0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". "\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII", paired => 1, pairhits => [ { }, { "0,8" => 1 } ] }, { name => "Fasta paired 2", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-s 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fasta1 => ">r0\nAGCATCGATC\r\n". "\n\n>r1\nTCAGTTTTTGA\n", fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". "\n\n\r\n>r1\nAGCATCGATC", pairhits => [ { }, { "0,8" => 1 } ] }, { name => "Qseq paired 1", ref => [ "AGCATCGATCAAAAACTGA" ], # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 args => "-s 1", qseq1 => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "1", # Mate "AGCATCGATC", "ABCBGACBCB", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "1", # Mate "TCAGTTTTTGA", "95849456875", "1")."\n\n", qseq2 => "\n\n\n".join("\t", "MachName", "RunNum", "Lane", "Tile", "10", "10", "Index", "2", # Mate "TCAGTTTTTGA", "ABCBGACBCBA", "1")."\n\n". join("\t", "MachName", "RunNum", "Lane", "Tile", "12", "15", "Index", "2", # Mate "AGCATCGATC", "AGGCBBGCBG", "1")."\n\n", idx_map => { "MachName_RunNum_Lane_Tile_10_10_Index" => 0, "MachName_RunNum_Lane_Tile_12_15_Index" => 1 }, pairhits => [ { }, { "0,8" => 1 } ] }, { name => "Raw paired 2", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-s 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 raw1 => "AGCATCGATC\r\n". "\n\nTCAGTTTTTGA\n", raw2 => "\n\n\r\nTCAGTTTTTGA\n". "\n\n\r\nAGCATCGATC", pairhits => [ { }, { "0,8" => 1 } ] }, # Paired-end reads that should align { name => "Fastq paired 3", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-u 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". "\n\n\@r1\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". "\n\n\r\n\@r1\nAGCATCGATC\r\n+\nIIIIIIIIII", pairhits => [ { "0,8" => 1 }, { } ] }, { name => "Tabbed paired 3", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-u 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". "\n\nr1\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII", paired => 1, pairhits => [ { "0,8" => 1 }, { } ] }, { name => "Fasta paired 3", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-u 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". "\n\n>r1\nTCAGTTTTTGA\r\n", fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n". "\n\n\r\n>r1\nAGCATCGATC", pairhits => [ { "0,8" => 1 }, { } ] }, { name => "Raw paired 3", ref => [ "AGCATCGATCAAAAACTGA" ], args => "-u 1", # AGCATCGATC # TCAAAAACTGA # 0123456789012345678 raw1 => "\n\n\r\nAGCATCGATC\r\n". "\n\nTCAGTTTTTGA\r\n", raw2 => "\n\n\r\nTCAGTTTTTGA\n". "\n\n\r\nAGCATCGATC", pairhits => [ { "0,8" => 1 }, { } ] }, # Paired-end reads that should align #{ name => "Fastq paired 4", # ref => [ "AGCATCGATCAAAAACTGA" ], # args => "-s 1 -L 4 -i C,1,0", # # AGCATCGATC # # TCAAAAACTGA # # 0123456789012345678 # fastq1 => "\n\n\r\n\@r0\nAGCATCGATC\r\n+\n\nIIIIIIIIII\n\n". # #"\n\n\@r1\nTC\r\n+\n\nII\n\n". # "\n\n\@r2\nTCAGTTTTTGA\r\n+\n\nIIIIIIIIIII\n\n", # fastq2 => "\n\n\r\n\@r0\nTCAGTTTTTGA\n+\n\nIIIIIIIIIII\n\n". # #"\n\n\r\n\@r1\nAG\r\n+\nII". # "\n\@r2\nAGCATCGATC\r\n+\nIIIIIIIIII", # paired => 1, # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], # pairhits => [ { "0,8" => 1 } ], # samoptflags_map => [ # { }, # { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, ## { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] #}, #{ name => "Tabbed paired 4", # ref => [ "AGCATCGATCAAAAACTGA" ], # args => "-s 1 -L 4 -i C,1,0", # # AGCATCGATC # # TCAAAAACTGA # # 0123456789012345678 # tabbed => "\n\n\r\nr0\tAGCATCGATC\tIIIIIIIIII\tTCAGTTTTTGA\tIIIIIIIIIII\n\n". # "\n\nr1\tTC\tII\tAG\tII". # "\n\nr2\tTCAGTTTTTGA\tIIIIIIIIIII\tAGCATCGATC\tIIIIIIIIII\n\n", # paired => 1, # #pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], # pairhits => [ { }, { "0,8" => 1 } ], # samoptflags_map => [ # { }, # #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] #}, #{ name => "Fasta paired 4", # ref => [ "AGCATCGATCAAAAACTGA" ], # args => "-s 1 -L 4 -i C,1,0", # # AGCATCGATC # # TCAAAAACTGA # # 0123456789012345678 # fasta1 => "\n\n\r\n>r0\nAGCATCGATC\r\n". # # "\n\n>r1\nTC\r\n". # "\n\n>r2\nTCAGTTTTTGA\r\n", # fasta2 => "\n\n\r\n>r0\nTCAGTTTTTGA\n\n". # # "\n\n\r\n>r1\nAG". # "\n>r2\nAGCATCGATC", # # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], # pairhits => [ { }, { "0,8" => 1 } ], # samoptflags_map => [ # { }, # #{ "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] #}, #{ name => "Raw paired 4", # ref => [ "AGCATCGATCAAAAACTGA" ], # args => "-s 1 -L 4 -i C,1,0", # # AGCATCGATC # # TCAAAAACTGA # # 0123456789012345678 # raw1 => "\n\n\r\nAGCATCGATC\r\n". ## "\n\nTC\r\n". # "\n\nTCAGTTTTTGA\r\n", # raw2 => "\n\n\r\nTCAGTTTTTGA\n\n". # "\n\n\r\nAG". # "\nAGCATCGATC", # pairhits => [ { }, { "*,*" => 1 }, { "0,8" => 1 } ], # pairhits => [ { }, { "0,8" => 1 } ], # samoptflags_map => [ # { }, # { "*" => { "YT:Z:UP" => 1, "YF:Z:LN" => 1 } }, # { 0 => { "MD:Z:10" => 1, "YT:Z:CP" => 1 }, # 8 => { "MD:Z:11" => 1, "YT:Z:CP" => 1 } }] #}, # # Check that skipping of empty reads is handled correctly. A read that is # empty or becomes empty after --trim3/--trim5 are applied should still # count as a first-class read that gets propagated up into the alignment # loop. And it should be counted in the -s/-u totals. # { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "", "ATCGATCAGTA" ], args => "-s 1", hits => [ {}, { 3 => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], mate1s => [ "", "AGCATCGATC" ], mate2s => [ "", "TCAGATACTG" ], args => "-s 1", pairhits => [ {}, { "0,9" => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "", "ATCGATCAGTA" ], args => "-s 2", hits => [ {}, {} ] }, { ref => [ "AGCATCGATCAGTATCTGA" ], mate1s => [ "", "AGCATCGATC" ], mate2s => [ "", "TCAGATACTG" ], args => "-s 2", pairhits => [ {}, {} ] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "", "ATCGATCAGTA", "AGTATCTGA" ], args => "-s 1 -u 1", hits => [ {}, { 3 => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "AC", "ATCGATCAGTA" ], args => "-s 1 --trim3 2", norc => 1, hits => [ {}, { 3 => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "AC", "ATCGATCAGTA" ], args => "-s 1 --trim3 2", nofw => 1, hits => [ {}, { 5 => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "AC", "ATCGATCAGTA" ], args => "-s 1 --trim5 2", nofw => 1, hits => [ {}, { 3 => 1 }] }, { ref => [ "AGCATCGATCAGTATCTGA" ], reads => [ "AC", "ATCGATCAGTA" ], args => "-s 1 --trim5 2", norc => 1, hits => [ {}, { 5 => 1 }] }, # # Alignment with overhang # { ref => [ "TGC" ], reads => [ "ATGC" ], args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,1,0\"", hits => [ { 0 => 1 } ], cigar => [ "1S3M" ], samoptflags => [ { "AS:i:-1" => 1, "YT:Z:UU" => 1, "MD:Z:3" => 1, "XN:i:1" => 1 } ] }, { ref => [ "TTGTTCGT" ], reads => [ "TTGTTCG" ], args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 0 => 1 } ], cigar => [ "7M" ], samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] }, { ref => [ "TTGTTCGT" ], reads => [ "TTGTTCG" ], args => "", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:7=" ], cigar => [ "7M" ], samoptflags => [ { "AS:i:0" => 1, "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] }, { ref => [ "TTGTTCGT" ], reads => [ "TGTTCGT", "TTGTTCG" ], args => "--overhang", hits => [ { 1 => 1 }, { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ], cigar => [ "7M", "7M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, { "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] }, { ref => [ "TTGTTCGT" ], reads => [ "TGTTCGT", "TTGTTCG" ], args => "", hits => [ { 1 => 1 }, { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:7=", "XM:0,XP:0,XT:UU,XC:7=" ], cigar => [ "7M", "7M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, { "YT:Z:UU" => 1, "MD:Z:7" => 1 } ] }, # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends { ref => [ "TTGTTCGT" ], # TGTTCGT # GTTCGTA # ATTGTTC reads => [ "TGTTCGT", "GTTCGTA", "ATTGTTC" ], args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 1 => 1 }, { 2 => 1 }, { 0 => 1 } ], cigar => [ "7M", "6M1S", "1S6M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, { "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 }, { "AS:i:-1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6" => 1 } ]}, # Same as previous case but --overhang not specified { ref => [ "TTGTTCGT" ], reads => [ "TGTTCGT", "TTGTTCG", "GTTCGTA", "ATTGTTC" ], args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 1 => 1 }, { 0 => 1 } ], # only the internal hits cigar => [ "7M", "7M", "*", "*" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, { "YT:Z:UU" => 1, "MD:Z:7" => 1 }, { "YT:Z:UU" => 1 }, { "YT:Z:UU" => 1 } ] }, # A simple case that should align with or without overhang, with or without # a special NCEIL setting. { ref => [ "TTGTTCGT" ], reads => [ "TTGTTCG" ], args => "--overhang --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 0 => 1 } ]}, { ref => [ "TTGTTCGT" ], reads => [ "TTGTTCG" ], args => "--overhang", hits => [ { 0 => 1 } ]}, # # Testing the various -M/-m/-k/-a alignment modes in both unpaired and # paired-end modes. Ensuring that SAM optional flags such as YM:i, YP:i # are set properly in all cases. # # # Paired-end # { name => "P.M.58.G.b Unpaired -M 5 w/ 8 hits global, but mate #1 has just 1", # 0 1 2 3 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9 # 0 0 0 0 0 0 0 0 0 0 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "-X 1000", report => "-M 5", pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, "12,486" => 1, "12,552" => 1, "12,723" => 1, "12,789" => 1, "12,960" => 1 }], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "28M", 315 => "28M", 486 => "28M", 552 => "28M", 723 => "28M", 789 => "28M", 960 => "28M" }], samoptflags_map => [ { 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, } ] }, { name => "P.M.58.L.b Unpaired -M 5 w/ 8 hits local, but mate #1 has just 1", # 0 1 2 3 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9 # 0 0 0 0 0 0 0 0 0 0 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "-X 1000 --local", report => "-M 5", pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, "12,486" => 1, "12,552" => 1, "12,723" => 1, "12,789" => 1, "12,960" => 1 }], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "28M", 315 => "28M", 486 => "28M", 552 => "28M", 723 => "28M", 789 => "28M", 960 => "28M" }], samoptflags_map => [ { 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, } ] }, { name => "P.k.58.G.b Unpaired -k 5 w/ 8 hits global, but mate #1 has just 1", # 0 1 2 3 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9 # 0 0 0 0 0 0 0 0 0 0 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "-X 1000", report => "-k 5", pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, "12,486" => 1, "12,552" => 1, "12,723" => 1, "12,789" => 1, "12,960" => 1 }], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "28M", 315 => "28M", 486 => "28M", 552 => "28M", 723 => "28M", 789 => "28M", 960 => "28M" }], samoptflags_map => [ { 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, } ] }, { name => "P.k.58.L.b Unpaired -k 5 w/ 8 hits local, but mate #1 has just 1", # 0 1 2 3 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACACACACCCCTATAGCTCGGAGCTGACTGGATCGACGACGT" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 1 2 3 4 5 6 7 8 9 # 0 0 0 0 0 0 0 0 0 0 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "-X 1000 --local", report => "-k 5", pairhits => [{ "12,78" => 1, "12,249" => 1, "12,315" => 1, "12,486" => 1, "12,552" => 1, "12,723" => 1, "12,789" => 1, "12,960" => 1 }], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "28M", 315 => "28M", 486 => "28M", 552 => "28M", 723 => "28M", 789 => "28M", 960 => "28M" }], samoptflags_map => [ { 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 486 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 723 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 960 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, } ] }, { name => "P.M.22.G. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, global", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "-X 150", report => "-M 2", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], hits_are_superset => [ 1 ], samoptflags_map => [{ 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, }] }, { name => "P.M.22.L. Paired -M 2 w/ 2 paired hit, 2 unpaired hits each, local", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "--local -X 150", report => "-M 2", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], hits_are_superset => [ 1 ], samoptflags_map => [{ 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, }] }, { name => "P.k.2.G. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, global", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "-X 150", report => "-k 1", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, }] }, { name => "P.k.2.L. Paired -k 1 w/ 2 paired hit, 2 unpaired hits each, local", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "--local -X 150", report => "-k 1", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, }] }, { name => "P.M.2.G. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, global", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, report => "-M 1 -X 150", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, }] }, { name => "P.M.2.L. Paired -M 1 w/ 2 paired hit, 2 unpaired hits each, local", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 1 2 3 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, report => "-M 1 --local -X 150", pairhits => [ { "12,78" => 1, "249,315" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 249 => "33M", 78 => "28M", 315 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, }] }, { name => "P.k.1.G. Paired -k w/ 1 paired hit, 1 unpaired hit each, global", # 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 6 7 8 9 0 1 2 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, report => "-k 1 -X 150", pairhits => [ { "12,78" => 1 } ], cigar_map => [{ 12 => "33M", 78 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, }] }, { name => "P.k.1.L. Paired -k 1 w/ 1 paired hit, 1 unpaired hit each, local", # 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 6 7 8 9 0 1 2 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "--local -X 150", report => "-k 1", pairhits => [ { "12,78" => 1 } ], cigar_map => [{ 12 => "33M", 78 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, }] }, { name => "P.M.1.G. Paired -M w/ 1 paired hit, 1 unpaired hit each, global", # 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 6 7 8 9 0 1 2 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "-X 150", report => "-M 1", pairhits => [ { "12,78" => 1 } ], cigar_map => [{ 12 => "33M", 78 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, }] }, { name => "P.M.1.L. Paired -M w/ 1 paired hit, 1 unpaired hit each, local", # 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 6 7 8 9 0 1 2 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped mate1fw => 1, mate2fw => 0, args => "-X 150 --local", report => "-M 1", pairhits => [ { "12,78" => 1 } ], cigar_map => [{ 12 => "33M", 78 => "28M" }], samoptflags_map => [{ 12 => { "AS:i:66" => 1, "XS:i:0" => 1, "MD:Z:33" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:0" => 1, "MD:Z:28" => 1, "YM:i:0" => 1, "YP:i:0" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, }] }, { name => "P.M.58.G. Unpaired -M 5 w/ 8 hits global", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGA" ], # 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "-X 150", report => "-M 5", pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1, "723,789" => 1, "960,1026" => 1, "1197,1263" => 1, "1434,1500" => 1, "1671,1737" => 1, "1908,1974" => 1, "2145,2211" => 1, "2382,2448" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "33M", 315 => "28M", 486 => "33M", 552 => "28M", 723 => "33M", 789 => "28M", 960 => "33M", 1026 => "28M", 1197 => "33M", 1263 => "28M", 1434 => "33M", 1500 => "28M", 1671 => "33M", 1737 => "28M", 1908 => "33M", 1974 => "28M", 2145 => "33M", 2211 => "28M", 2382 => "33M", 2448 => "28M", }], samoptflags_map => [ { 12 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 78 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 249 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 315 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 486 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 552 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 723 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 789 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 960 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1026 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1197 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1263 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1434 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1500 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1671 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1737 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1908 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 1974 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 2145 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 2211 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 2382 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, 2448 => { "AS:i:0" => 1, "XS:i:0" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:0" => 1 }, } ] }, { name => "P.M.58.L. Unpaired -M 5 w/ 8 hits local", # 0 1 2 3 0 1 2 0 1 2 3 0 1 2 # 012345678901234567890123456789012 0123456789012345678901234567 012345678901234567890123456789012 0123456789012345678901234567 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG CAGCGTACGGTATCTAGCTATGGGCATCGATCG ACACACCCCTATAGCTCGGAGCTGACTG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGAGCGGTATCTACAGCCACTCATCACACACCCCTATAGCTCGGAGCTGACTGGGTTACTGGGGGGGATGCGTATCGACTATCGACAATATGACGCGTCGGTCACCCCATAATATGCAAAAATTATAGCTCACGACGCGTACTAATAGAAAACGCGCTATCAGCCTCCGACGCGGCGGTATCGAAGACGCAGTC" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 # 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 mate1s => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], mate2s => [ "CAGTCAGCTCCGAGCTATAGGGGTGTGT" ], # rev comped args => "--local -X 150", report => "-M 5", pairhits => [ { "12,78" => 1, "249,315" => 1, "486,552" => 1, "723,789" => 1, "960,1026" => 1, "1197,1263" => 1, "1434,1500" => 1, "1671,1737" => 1 } ], hits_are_superset => [ 1 ], cigar_map => [{ 12 => "33M", 78 => "28M", 249 => "33M", 315 => "28M", 486 => "33M", 552 => "28M", 723 => "33M", 789 => "28M", 960 => "33M", 1026 => "28M", 1197 => "33M", 1263 => "28M", 1434 => "33M", 1500 => "28M", 1671 => "33M", 1737 => "28M" }], samoptflags_map => [ { 12 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 78 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 249 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 315 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 486 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 552 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 723 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 789 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 960 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 1026 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 1197 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 1263 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 1434 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 1500 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, 1671 => { "AS:i:66" => 1, "XS:i:66" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:33" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:56" => 1 }, 1737 => { "AS:i:56" => 1, "XS:i:56" => 1, "XN:i:0" => 1, "XM:i:0" => 1, "XO:i:0" => 1, "XG:i:0" => 1, "NM:i:0" => 1, "MD:Z:28" => 1, "YM:i:1" => 1, "YP:i:1" => 1, "YT:Z:CP" => 1, "YS:i:66" => 1 }, } ] }, # # Unpaired # { name => "U.M.1.G. Unpaired -M w/ 1 hit global", # 0 1 2 3 # 012345678901234567890123456789012 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG # CAGCGTACGGTATCTAGCTATG # GGTATCTAGCTATGGGCATCGA # AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], # 01234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], report => "-M 1", hits => [ { 12 => 1 } ], cigar => [ "33M" ], samoptflags => [{ "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:33" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion }] }, { name => "U.M.1.L. Unpaired -M w/ 1 hit local", # 0 1 2 3 # 012345678901234567890123456789012 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], # 01234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], args => "--local", report => "-M 1", hits => [ { 12 => 1 } ], cigar => [ "33M" ], samoptflags => [{ "AS:i:66" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:33" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion }] }, { name => "U.k.1.G. Unpaired -k 1 w/ 1 hit global", # 0 1 2 3 # 012345678901234567890123456789012 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], # 01234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], report => "-k 1", hits => [ { 12 => 1 } ], cigar => [ "33M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:33" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.1.L. Unpaired -m w/ 1 hit local", # 0 1 2 3 # 012345678901234567890123456789012 # CAGCGTACGGTATCTAGCTATGGGCATCGATCG ref => [ "AGACGCAGTCACCAGCGTACGGTATCTAGCTATGGGCATCGATCGACGACGTACGA" ], # 01234567890123456789012345678901234567890123456789012345 # 0 1 2 3 4 5 reads => [ "CAGCGTACGGTATCTAGCTATGGGCATCGATCG" ], args => "--local", report => "-k 1", hits => [ { 12 => 1 } ], cigar => [ "33M" ], samoptflags => [ { "AS:i:66" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:33" => 1, # mismatching positions/bases "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.2.G. Unpaired -M 1 w/ 2 hit global", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "", report => "-M 1", hits => [ { 6 => 1, 48 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:1" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.2.L. Unpaired -M 1 w/ 2 hit local", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "--local", report => "-M 1", hits => [ { 6 => 1, 48 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:60" => 1, # alignment score "XS:i:60" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:1" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.2.G. Unpaired -k 1 w/ 2 hit global", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-k 1", hits => [ { 6 => 1, 48 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.2.L. Unpaired -k 1 w/ 2 hit local", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-k 1", hits => [ { 6 => 1, 48 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.22.G. Unpaired -M 2 w/ 2 hit global", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-M 2", hits => [ { 6 => 1, 48 => 1 } ], cigar => [ "30M" ], hits_are_superset => [ 1 ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.22.L. Unpaired -M 2 w/ 2 hit local", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-M 2 --local", hits => [ { 6 => 1, 48 => 1 } ], cigar => [ "30M" ], hits_are_superset => [ 1 ], samoptflags => [ { "AS:i:60" => 1, # alignment score "XS:i:60" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.22.G. Unpaired -k 2 w/ 2 hit global", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-k 2", hits => [ { 6 => 1, 48 => 1 } ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.22.L. Unpaired -k 2 w/ 2 hit local", # 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234 # 0 1 2 3 4 5 6 7 8 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "--local", report => "-k 2", hits => [ { 6 => 1, 48 => 1 } ], cigar => [ "30M" ], samoptflags => [ { "AS:i:60" => 1, # alignment score "XS:i:60" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.58.G. Unpaired -M 5 w/ 8 hits global", # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 # 0 1 2 3 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "-X 150", report => "-M 5", hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:1" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.M.58.L. Unpaired -M 5 w/ 8 hits global", # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 # 0 1 2 3 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "--local", report => "-M 5", hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:60" => 1, # alignment score "XS:i:60" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:1" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.58.G. Unpaired -k 5 w/ 8 hits global", # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 # 0 1 2 3 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], report => "-k 5", hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:0" => 1, # alignment score "XS:i:0" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, { name => "U.k.58.L. Unpaired -k 5 w/ 8 hits local", # 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 0 1 2 # 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 012345678901234567890123456789 # AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA AGATTACGGATCTACGATTCGAGTCGGTCA ref => [ "AGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGAAGACGCAGATTACGGATCTACGATTCGAGTCGGTCAGTCACCAGCGTAAGATTACGGATCTACGATTCGAGTCGGTCAAGTGCGA" ], # 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 # 0 1 2 3 reads => [ "AGATTACGGATCTACGATTCGAGTCGGTCA" ], args => "--local", report => "-k 5", hits => [ { 6 => 1, 48 => 1, 91 => 1, 133 => 1, 176 => 1, 218 => 1, 261 => 1, 303 => 1 } ], hits_are_superset => [ 1 ], cigar => [ "30M" ], samoptflags => [ { "AS:i:60" => 1, # alignment score "XS:i:60" => 1, # suboptimal alignment score "XN:i:0" => 1, # num ambiguous ref bases "XM:i:0" => 1, # num mismatches "XO:i:0" => 1, # num gap opens "XG:i:0" => 1, # num gap extensions "NM:i:0" => 1, # num edits "MD:Z:30" => 1, # mismatching positions/bases "YM:i:0" => 1, # read aligned repetitively in unpaired fashion "YT:Z:UU" => 1, # unpaired read aligned in unpaired fashion } ] }, # Following cases depend on this being the case: # # static const float DEFAULT_CEIL_CONST = 3.0f; # static const float DEFAULT_CEIL_LINEAR = 3.0f; # Just enough budget for hits, so it should align { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27 args => "-L 6 -i C,1,0 --policy \"MMP=C27\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 27 report => "-a", hits => [ { 0 => 1, 8 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:6=1X1=" ], cigar => [ "8M" ], samoptflags => [ { "AS:i:-27" => 1, "XS:i:-27" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6G1" => 1 } ] }, # Not quite enough budget for hits, so it should NOT align { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCAT" ], # budget = 3 + 8 * 3 = 27 args => "-L 6 -i C,1,0 --policy \"MMP=C28\\;MIN=L,-3,-3\\;RDG=25,15\\;RFG=25,15\"", # penalty = 28 report => "-a", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [ { "YT:Z:UU" => 1 } ] }, # Check that using a seed of length 1 with 1-mismatch doesn't crash. # Perhaps we should disallow it though? { ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ], reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ], names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ], args => "--policy \"SEED=1,1\"", check_random => 1, report => "-k 1" }, # # Gap penalties # # Alignment with 1 read gap { name => "Gap penalties 1", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=29,10\\;RFG=25,15\\;MIN=L,-3,-3\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:6=1D6=" ], cigar => [ "6M1D6M" ], samoptflags => [{ "AS:i:-39" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:6^G6" => 1 }] }, # Alignment with 1 read gap, but not enough budget { name => "Gap penalties 2", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCTTTGTT" ], # budget = 3 + 12 * 3 = 39 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=30,10\\;RFG=25,15\\;MIN=L,-3,-3\"", report => "-a", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [{ "YT:Z:UU" => 1 }] }, # Alignment with 1 reference gap { name => "Gap penalties 3", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,15\\;MIN=L,-3,-3\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ], cigar => [ "7M1I6M" ], samoptflags => [{ "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }] }, # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 # T T G T T C G T T T G T T C G T # 0 1 1 0 2 3 1 0 0 1 1 0 2 3 1 # 0 T x # 0 0 x # 1 T x # 1 1 x # 2 G x # 2 1 x # 3 T x # 3 0 x # 4 T x # 4 2 x # 5 C x # 5 3 x # 6 G # 6 2 x # 7 A # 7 3 x # 8 T x # 8 0 x # 9 T x # 9 0 x # 0 T x # 0 1 x # 1 G x # 1 1 x # 2 T x # 2 0 x # 3 T # # Alignment with 1 reference gap, but not enough budget { name => "Gap penalties 4", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=30,16\\;MIN=L,-3,-3\"", report => "-a", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [ { "YT:Z:UU" => 1 } ] }, # Alignment with 1 reference gap, but not enough budget { name => "Gap penalties 5", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=25,15\\;RFG=31,15\\;MIN=L,-3,-3\"", report => "-a", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [ { "YT:Z:UU" => 1 } ] }, # Alignment with 1 reference gap and 1 read gap { name => "Gap penalties 6", ref => [ "ATTGTTCGTTTGTTCGTA" ], reads => [ "ATTGTTGTTTGATTCGTA" ], # budget = 3 + 18 * 3 = 57 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=19,10\\;RFG=18,10\\;MIN=L,-3,-3\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:6=1D5=1I6=" ], cigar => [ "6M1D5M1I6M" ] }, # Alignment with 1 reference gap and 1 read gap, but not enough budget { name => "Gap penalties 7", ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTGTTTGATTCGT" ], # budget = 3 + 16 * 3 = 51 args => "--policy \"MMP=C30\\;SEED=0,3\\;IVAL=C,1,0\\;RDG=16,10\\;RFG=16,10\\;MIN=L,-3,-3\"", report => "-a", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ] }, # Experiment with N filtering { name => "N filtering 1", ref => [ "GAGACTTTATACGCATCGAACTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0\"", report => "-a", hits => [ { 8 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:13=" ] }, { name => "N filtering 2", ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0\"", report => "-a", hits => [ { "*" => 1 } ] }, { name => "N filtering 3", ref => [ "GAGACTTTATACGCATCGAANTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0\"", report => "-a", hits => [ { "*" => 1 } ] }, { name => "N filtering 4", ref => [ "GAGACTTTNTACGCATCGAACTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0\"", report => "-a", hits => [ { "*" => 1 } ] }, { name => "N filtering 5", ref => [ "GAGACTTTATNCGCATCGAACTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"", report => "-a", hits => [ { 8 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:2=1X10=" ] }, { name => "N filtering 6", ref => [ "GAGACTTTNTACGCATCGAANTATCGCTCTA" ], reads => [ "ATACGCATCGAAC" ], # 0123456789012345678901234567890 # 1 2 3 args => "--policy \"NCEIL=L,0,0.1\\;SEED=0,10\\;IVAL=C,1,0\"", report => "-a", hits => [ { "*" => 1 } ] }, # No discordant alignment because one mate is repetitive. # Alignment with 1 reference gap { ref => [ "TTTTGTTCGTTTG" ], reads => [ "TTTTGTTCGATTTG" ], # budget = 3 + 14 * 3 = 45 args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:9=1I4=" ], cigar => [ "9M1I4M" ], samoptflags => [ { "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }, ] }, # TTGTTCGTTTGTT # Tx # T x # G x # T x # T x # C x # G x # A x # T x # T x # T x # G x # T x # T x # Alignment with 1 reference gap { ref => [ "TTGTTCGTTTGTT" ], reads => [ "TTGTTCGATTTGTT" ], # budget = 3 + 14 * 3 = 45 args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C30\\;RDG=25,15\\;RFG=25,20\\;MIN=L,-3,-3\"", report => "-a", hits => [ { 0 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:7=1I6=" ], cigar => [ "7M1I6M" ], samoptflags => [ { "AS:i:-45" => 1, "NM:i:1" => 1, "XO:i:1" => 1, "XG:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:13" => 1 }, ] }, { ref => [ "ACNCA" ], reads => [ "CA" ], args => "", report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", hits => [ { 3 => 1 } ], edits => [ ], flags => [ "XM:0,XP:0,XT:UU,XC:2=" ], cigar => [ "2M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, ] }, { name => "N ceil = 0, 2 legit hits (1)", ref => [ "ACNCA" ], reads => [ "AC" ], args => "", report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", hits => [ { 0 => 1 } ], edits => [ ], flags => [ ] }, { name => "N ceil = 0, 2 legit hits (2)", ref => [ "ACNCANNNNNNNNCGNNNNNNNNCG" ], # 0123456789012345678901234 # 0 1 2 reads => [ "CG" ], args => "", report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", hits => [ { 13 => 2, 23 => 2 } ], edits => [ ], cigar => [ "2M", "2M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, ] }, { ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ], # 0123456789012345678901234567890123456 # 0 1 2 3 reads => [ "CG" ], args => "", report => "-a --policy \"SEED=0,2\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", hits => [ { 13 => 2, 23 => 2, 31 => 2 } ], edits => [ ], flags => [ "XM:0,XP:0,XT:UU,XC:2=", "XM:0,XP:0,XT:UU,XC:2=", "XM:0,XP:0,XT:UU,XC:2=" ], cigar => [ "2M", "2M", "2M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, ] }, { ref => [ "ACNCANNNNNNAACGNNNNNNNACGAANNNNCGAAAN" ], # 0123456789012345678901234567890123456 # 0 1 2 3 reads => [ "CG" ], args => "", report => "-a --policy \"SEED=0,1\\;IVAL=C,1,0\\;NCEIL=L,0,0\"", hits => [ { 13 => 2, 23 => 2, 31 => 2 } ], edits => [ ], flags => [ "XM:0,XP:0,XT:UU,XC:2=", "XM:0,XP:0,XT:UU,XC:2=", "XM:0,XP:0,XT:UU,XC:2=" ], cigar => [ "2M", "2M", "2M" ], samoptflags => [ { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, { "YT:Z:UU" => 1, "MD:Z:2" => 1 }, ] }, # # Alignment involving ambiguous reference character # # First read has non-compatible unambiguous charcacter (G for Y), # second read has compatible one { ref => [ "TTGTTYGT" ], reads => [ "TTGTTGGT", "TTGTTCGT" ], args => "", report => "-a --policy \"SEED=0,5\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 0 => 1 }, { 0 => 1 } ], norc => 1, edits => [ "5:N>G", "5:N>C" ], flags => [ "XM:0,XP:0,XT:UU,XC:5=1X2=", "XM:0,XP:0,XT:UU,XC:5=1X2=" ], cigar => [ "8M", "8M" ], samoptflags => [ { "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:5N2" => 1 }, { "AS:i:-1" => 1, "NM:i:1" => 1, "XM:i:1" => 1, "XN:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:5N2" => 1 }, ] }, # # Alignment with multi-character read gap # # Relatively small example with a read gap extend { ref => [ "ATAACCTTCG" ], reads => [ "ATAATTCG" ], # 3 * 19 + 3 = 60 # ^ # 4:CC>- args => "", report => "-a --overhang --gbar 3 --policy \"MMP=C30\\;RDG=5,5\\;SEED=0,4\\;IVAL=C,1,0\\;RFG=25,20\\;MIN=L,-3,-3\"", hits => [ { 0 => 1 } ], edits => [ "4:CC>-" ], flags => [ "XM:0,XP:0,XT:UU,XC:4=2D4=" ], cigar => [ "4M2D4M" ], samoptflags => [ { "AS:i:-15" => 1, "NM:i:2" => 1, "XO:i:1" => 1, "XG:i:2" => 3, "YT:Z:UU" => 1, "MD:Z:4^CC4" => 1 } ] }, # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends { ref => [ "ATATGCCCCATGCCCCCCTCCG" ], reads => [ "ATATGCCCCCCCCCCTCCG" ], # 3 * 19 + 3 = 60 # ^ # 9:ATG>- args => "--policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,15\\;MIN=L,-3,-3\"", hits => [ { 0 => 1 } ], edits => [ "9:ATG>-" ], norc => 1, flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ], cigar => [ "9M3D10M" ], samoptflags => [ { "AS:i:-20" => 1, "NM:i:3" => 1, "XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 } ] }, # Reads 1 and 2 don't have overhang, reads 3 and 4 overhang opposite ends { ref => [ "ATATGCCCCATGCCCCCCTCCG" ], reads => [ "CGGAGGGGGGGGGGCATAT" ], # ATATGCCCCCCCCCCTCCG # ^ # 10:GTA>- args => "", report => "-a --overhang --policy \"SEED=0,8\\;IVAL=C,1,0\\;MMP=C30\\;RDG=5,5\\;RFG=25,20\\;MIN=L,-3,-3\"", hits => [ { 0 => 1 } ], edits => [ "10:GTA>-" ], norc => 1, flags => [ "XM:0,XP:0,XT:UU,XC:9=3D10=" ], cigar => [ "9M3D10M" ], samoptflags => [ { "AS:i:-20" => 1, "NM:i:3" => 1, "XO:i:1" => 1, "XG:i:3" => 3, "YT:Z:UU" => 1, "MD:Z:9^ATG10" => 1 } ] }, # 1 discordant alignment and one concordant alignment. Discordant because # the fragment is too long. { name => "Simple paired-end 13", ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCTGTCGCTACCGCCCCCCCCCCC" ], # 012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 # ATAAAAATAT GTCGCTACCG # ATAAAAATAT TGTCGCTACC # ATAAAAATAT CTGTCGCTAC # ATAAAAATAT CCTGTCGCTA # TAAAAATATT GTCGCTACCG # TAAAAATATT TGTCGCTACC # TAAAAATATT CTGTCGCTAC # TAAAAATATT CCTGTCGCTA # 012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 # ----------------------------------- # 012345678901234567890123456789012345678901234567 # 0 1 2 3 4 mate1s => [ "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT", "ATAAAAATAT", "TAAAAATATT", "TAAAAATATT", "TAAAAATATT", "TAAAAATATT", ], mate2s => [ "GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA", "GTCGCTACCG", "TGTCGCTACC", "CTGTCGCTAC", "CCTGTCGCTA" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 35", # Not really any way to flag an alignment as discordant pairhits => [ { "3,30" => 1 }, { "3,29" => 1 }, { "3,28" => 1 }, { "3,27" => 1 }, { "4,30" => 1 }, { "4,29" => 1 }, { "4,28" => 1 }, { "4,27" => 1 } ], flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] }, # 1 discordant alignment and one concordant alignment. Discordant because # the fragment is too long. { name => "Simple paired-end 12", ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCGGGCCCGCCCGCCCCCCCCCCC" ], # ATAAAAATAT GGCCCGCCCG # ATAAAAATAT CCGGGCCCGC # 012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 # ------------------------------------- # 012345678901234567890123456789012345678901234567 mate1s => [ "ATAAAAATAT", "ATAAAAATAT" ], mate2s => [ "GGCCCGCCCG", "CCGGGCCCGC" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 36", # Not really any way to flag an alignment as discordant pairhits => [ { "3,30" => 1 }, { "3,27" => 1 } ], flags => [ "XM:0,XP:0,XT:DP,XC:10=", "XM:0,XP:0,XT:CP,XC:10=" ] }, # 1 discordant alignment. Discordant because the fragment is too long. { name => "Simple paired-end 11", ref => [ "TTTATAAAAATATTTCCCCCCCCCCCCCCCCGATCGCCCGCCCCCCCCCCC" ], # ATAAAAATAT CGATCGCCCG # 012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 # ------------------------------------- # 012345678901234567890123456789012345678901234567 mate1s => [ "ATAAAAATAT" ], mate2s => [ "CGATCGCCCG" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 36", # Not really any way to flag an alignment as discordant pairhits => [ { "3,30" => 1 } ], flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] }, # 1 discordant alignment. Discordant because the fragment is too short. { name => "Simple paired-end 10", ref => [ "TTTATAAAAATATTTCCCCCCGATCGCCCGCCCCCCCCCCC" ], # ATAAAAATAT CGATCGCCCG # 01234567890123456789012345678901234567890 # 0 1 2 3 4 # --------------------------- # 012345678901234567890123456 mate1s => [ "ATAAAAATAT" ], mate2s => [ "CGATCGCCCG" ], mate1fw => 1, mate2fw => 1, args => "-I 28 -X 80", # Not really any way to flag an alignment as discordant pairhits => [ { "3,20" => 1 } ], flags => [ "XM:0,XP:0,XT:DP,XC:10=" ] }, # Like 6, but with -M limit { name => "Simple paired-end 9", ref => [ "CCCATATATATATCCTCCCATATATATATCCCTCCCCATATATATATCCCTTTTCCTTTCGCGCGCGCGTTTCCCCCCCCC" ], # ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG # 012345678901234567890123456789012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 6 7 8 mate1s => [ "ATATATATAT" ], mate2s => [ "CGCGCGCGCG" ], mate1fw => 1, mate2fw => 0, args => "-I 0 -X 80", report => "-M 2", lines => 2, pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ], hits_are_superset => [ 1 ], flags => [ "XM:1,XP:1,XT:CP,XC:10=", "XM:1,XP:1,XT:CP,XC:10=" ] }, # Like 6, but without -m limit { name => "Simple paired-end 8", ref => [ "CCCATATATATATCCTCCCATATATATATCCCTTCCCATATATATATCCCTTTTTTTTTCGCGCGCGCGTTTCCCCCCCCC" ], # ATATATATAT ATATATATAT ATATATATAT CGCGCGCGCG # 012345678901234567890123456789012345678901234567890123456789012345678901234567890 # 0 1 2 3 4 5 6 7 8 mate1s => [ "ATATATATAT" ], mate2s => [ "CGCGCGCGCG" ], mate1fw => 1, mate2fw => 0, args => "-I 0 -X 80", pairhits => [ { "3,59" => 1, "19,59" => 1, "37,59" => 1 } ], flags => [ "XM:0,XP:0,XT:CP,XC:10=" ] }, # Paired-end read, but only one mate aligns { name => "Simple paired-end 2; no --no-mixed", ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCCCTTCGCGCGCGCGTTTCCCCC" ], # ATATATATAT CGCGCGCGCG # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 mate1s => [ "ATATATATAT" ], mate2s => [ "CCCCCGGGGG" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 50 --nofw", nofw => 1, pairhits => [ { "*,3" => 1 } ], flags_map => [ { 3 => "XM:0,XP:0,XT:UP,XC:10=", "*" => "XM:0,XP:0,XT:UP" } ], cigar_map => [{ 3 => "10M", "*" => "*" }], samoptflags_map => [{ 3 => { "MD:Z:10" => 1, # mismatching positions/bases "YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc) }, "*" => { "YT:Z:UP" => 1, # type of alignment (concordant/discordant/etc) } }] }, { name => "Simple paired-end 2; --no-mixed", ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTCCCCC" ], # ATATATATAT CGCGCGCGCG # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 mate1s => [ "ATATATATAT" ], mate2s => [ "CCCCCGGGGG" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 50 --no-mixed", pairhits => [ { "*,*" => 1 } ] }, # Simple paired-end alignment { name => "Simple paired-end 1", ref => [ "CCCATATATATATCCCTTTTTTTCCCCCCCCTTTTCGCGCGCGCGTTTTCCCC" ], # ATATATATAT CGCGCGCGCG # 01234567890123456789012345678901234567890123456789012 # 0 1 2 3 4 5 mate1s => [ "ATATATATAT" ], mate2s => [ "CGCGCGCGCG" ], mate1fw => 1, mate2fw => 1, args => "-I 0 -X 50", pairhits => [ { "3,35" => 1 } ], flags => [ "XM:0,XP:0,XT:CP,XC:10=" ], cigar_map => [{ 3 => "10M", 35 => "10M" }], samoptflags_map => [{ 3 => { "MD:Z:10" => 1, # mismatching positions/bases "YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc) }, 35 => { "MD:Z:10" => 1, # mismatching positions/bases "YT:Z:CP" => 1, # type of alignment (concordant/discordant/etc) } }] }, # Check that pseudo-random generation is always the same for # same-sequence, same-name reads { ref => [ "AAAAAAAAAAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCCCCCCCCC" ], reads => [ "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC", "AA", "AA", "AA", "AA", "CC", "CC", "CC", "CC" ], names => [ "r1", "r1", "r1", "r1", "r2", "r2", "r2", "r2", "r3", "r3", "r3", "r3", "r4", "r4", "r4", "r4" ], args => "", check_random => 1, report => "-k 1" }, { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTGTTCGT" ], report => "-M 1", hits => [ { 0 => 1, 8 => 1 } ], flags => [ "XM:1,XP:0,XT:UU,XC:8=" ], hits_are_superset => [ 1 ], cigar => [ "8M" ], samoptflags => [ { "YM:i:1" => 1, "YT:Z:UU" => 1, "MD:Z:8" => 1, "YM:i:1" => 1 } ], }, # Read 3 overhangs right end { ref => [ "TTGTTCGT" ], reads => [ "GTTCGTA" ], args => "--overhang --policy \"SEED=0,3\\;IVAL=C,1,0\\;NCEIL=L,2,0\"", hits => [ { 2 => 1 } ], flags => [ "XM:0,XP:0,XT:UU,XC:6=1X" ] }, # Mess with arguments # Default should be 1-mismatch, so this shouldn't align { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTATTAGT" ], args => "", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [{ "YT:Z:UU" => 1 }], }, # Shouldn't align with 0 mismatches either { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTATTAGT" ], args => "--policy SEED=0", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [{ "YT:Z:UU" => 1 }], }, # Should align with 0 mismatches if we can wedge a seed into the 2 # matching characters between the two mismatches. Here we fail to # wedge a length-3 seed in (there's no room) { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTATTAGT" ], args => "--policy \"SEED=0,3\\;IVAL=C,1,0\\;MMP=C1\"", hits => [ { "*" => 1 } ], flags => [ "XM:0,XP:0,XT:UU" ], cigar => [ "*" ], samoptflags => [{ "YT:Z:UU" => 1 }], }, # Should align with 0 mismatches if we can wedge a seed into the 2 # matching characters between the two mismatches. Here we wedge a # length-2 seed in { ref => [ "TTGTTCGTTTGTTCGT" ], reads => [ "TTATTAGT" ], args => "--policy \"SEED=0,2\\;IVAL=C,1,0\\;MMP=C1\"", # # TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT # || || || || | | || || # TTATTAGT TTATTAGT TTATTAGT # # TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT TTGTTCGTTTGTTCGT # || | || | || || || # TTATTAGT TTATTAGT TTATTAGT # hits => [ { 0 => 1, 3 => 1, 4 => 1, 5 => 1, 7 => 1, 8 => 1} ], flag_map => [ { 0 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2=", 3 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X", 4 => "XM:0,XP:0,XT:UU,XC:1=2X2=1X2=", 5 => "XM:0,XP:0,XT:UU,XC:3X2=2X1=", 7 => "XM:0,XP:0,XT:UU,XC:2=2X1=3X", 8 => "XM:0,XP:0,XT:UU,XC:2=1X2=1X2="} ], cigar_map => [ { 0 => "8M", 3 => "8M", 4 => "8M", 5 => "8M", 7 => "8M", 8 => "8M" } ], samoptflags_map => [{ 0 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1, "YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 }, 3 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, "YT:Z:UU" => 1, "MD:Z:2C0G1T0T0G0" => 1 }, 4 => { "AS:i:-3" => 1, "XS:i:-2" => 1, "NM:i:3" => 1, "XM:i:3" => 1, "YT:Z:UU" => 1, "MD:Z:1C0G2T2" => 1 }, 5 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, "YT:Z:UU" => 1, "MD:Z:0C0G0T2G0T1" => 1 }, 7 => { "AS:i:-5" => 1, "XS:i:-2" => 1, "NM:i:5" => 1, "XM:i:5" => 1, "YT:Z:UU" => 1, "MD:Z:2T0G1T0C0G0" => 1 }, 8 => { "AS:i:-2" => 1, "XS:i:-2" => 1, "NM:i:2" => 1, "XM:i:2" => 1, "YT:Z:UU" => 1, "MD:Z:2G2C2" => 1 }, }], }, ); ## # Take a list of reference sequences and write them to a temporary # FASTA file of the given name. # sub writeFasta($$) { my ($l, $fa) = @_; open(FA, ">$fa") || die "Could not open $fa for writing"; my $idx = 0; for(my $i = 0; $i < scalar(@$l); $i++) { print FA ">$idx\n".$l->[$i]."\n"; $idx++; } close(FA); } ## # Take a lists of named reads/mates and write them to appropriate # files. # sub writeReads($$$$$$$$$) { my ( $reads, $quals, $mate1s, $qual1s, $mate2s, $qual2s, $names, $fq1, $fq2) = @_; open(FQ1, ">$fq1") || die "Could not open '$fq1' for writing"; open(FQ2, ">$fq2") || die "Could not open '$fq2' for writing"; my $pe = (defined($mate1s) && $mate1s ne ""); if($pe) { for (0..scalar(@$mate1s)-1) { my $m1 = $mate1s->[$_]; my $m2 = $mate2s->[$_]; my $q1 = $qual1s->[$_]; my $q2 = $qual2s->[$_]; my $nm = $names->[$_]; defined($m1) || die; defined($m2) || die; $q1 = $q1 || ("I" x length($m1)); $q2 = $q2 || ("I" x length($m2)); $nm = $nm || "r$_"; print FQ1 "\@$nm/1\n$m1\n+\n$q1\n"; print FQ2 "\@$nm/2\n$m2\n+\n$q2\n"; } } else { for (0..scalar(@$reads)-1) { my $read = $reads->[$_]; defined($read) || die; my $qual = $quals->[$_]; my $nm = $names->[$_]; $qual = $qual || ("I" x length($read)); $nm = $nm || "r$_"; print FQ1 "\@$nm\n$read\n+\n$qual\n"; } } close(FQ1); close(FQ2); } ## # Run bowtie2 with given arguments # sub runbowtie2($$$$$$$$$$$$$$$$$$$$$) { my ( $do_build, $args, $color, $fa, $reportargs, #5 $read_file_format, $read_file, $mate1_file, $mate2_file, $reads, $quals, $mate1s, $qual1s, $mate2s, $qual2s, $names, $ls, $rawls, $header_ls, $raw_header_ls, $should_abort) = @_; $args .= " --quiet"; $reportargs = "-a" unless defined($reportargs); $args .= " -C" if $color; $args .= " $reportargs"; # Write the reference to a fasta file print "References:\n"; open(FA, $fa) || die; while() { print $_; } close(FA); if($do_build) { my $build_args = ($color ? "-C" : ""); my $cmd = "$bowtie2_build --quiet --sanity $build_args $fa .simple_tests.tmp"; print "$cmd\n"; system($cmd); ($? == 0) || die "Bad exitlevel from bowtie2-build: $?"; } my $pe = (defined($mate1s) && $mate1s ne ""); $pe = $pe || (defined($mate1_file)); my $mate1arg; my $mate2arg; my $readarg; my $formatarg = "-c"; my ($readstr, $m1str, $m2str) = (undef, undef, undef); $readstr = join(",", @$reads) if defined($reads); $m1str = join(",", @$mate1s) if defined($mate1s); $m2str = join(",", @$mate2s) if defined($mate2s); if(defined($read_file) || defined($mate1_file)) { defined($read_file_format) || die; my $ext = ""; if($read_file_format eq "fastq") { $formatarg = "-q"; $ext = ".fq"; } elsif($read_file_format eq "tabbed") { $formatarg = "--12"; $ext = ".tab"; } elsif($read_file_format eq "fasta") { $formatarg = "-f"; $ext = ".fa"; } elsif($read_file_format eq "qseq") { $formatarg = "--qseq"; $ext = "_qseq.txt"; } elsif($read_file_format eq "raw") { $formatarg = "-r"; $ext = ".raw"; } else { die "Bad format: $read_file_format"; } if(defined($read_file)) { # Unpaired open(RD, ">.simple_tests$ext") || die; print RD $read_file; close(RD); $readarg = ".simple_tests$ext"; } else { defined($mate1_file) || die; defined($mate2_file) || die; # Paired open(M1, ">.simple_tests.1$ext") || die; print M1 $mate1_file; close(M1); open(M2, ">.simple_tests.2$ext") || die; print M2 $mate2_file; close(M2); $mate1arg = ".simple_tests.1$ext"; $mate2arg = ".simple_tests.2$ext"; } } else { writeReads( $reads, $quals, $mate1s, $qual1s, $mate2s, $qual2s, $names, ".simple_tests.1.fq", ".simple_tests.2.fq"); $mate1arg = ".simple_tests.1.fq"; $mate2arg = ".simple_tests.2.fq"; $formatarg = "-q"; $readarg = $mate1arg; } my $cmd; if($pe) { # Paired-end case $cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg -1 $mate1arg -2 $mate2arg"; } else { # Unpaired case $cmd = "$bowtie2 --debug $args .simple_tests.tmp $formatarg $readarg"; } print "$cmd\n"; open(BT, "$cmd |") || die "Could not open pipe '$cmd |'"; while() { print $_; chomp; if(substr($_, 0, 1) eq "@") { push @$header_ls, [ split(/\t/, $_, -1) ]; push @$raw_header_ls, $_; } else { push @$ls, [ split(/\t/, $_, -1) ]; push @$rawls, $_; } } close(BT); ($? == 0 || $should_abort) || die "bowtie2 aborted with exitlevel $?\n"; ($? != 0 || !$should_abort) || die "bowtie2 failed to abort!\n"; } ## # Compare a hash ref of expected SAM flags with a hash ref of observed SAM # flags. # sub matchSamOptionalFlags($$) { my ($flags, $ex_flags) = @_; my %ex = (); for(keys %$ex_flags) { my ($nm, $ty, $vl) = split(/:/, $_); defined($vl) || die "Could not parse optional flag field \"$_\""; ($ex{$nm}{ty}, $ex{$nm}{vl}) = ($ty, $vl); } for(keys %$flags) { my ($ex_ty, $ex_vl); if(defined($ex{$_})) { ($ex_ty, $ex_vl) = ($ex{$_}{ty}, $ex{$_}{vl}); } else { ($ex_ty, $ex_vl) = ("i", "0"); } defined($ex_ty) || die; defined($ex_vl) || die; my ($ty, $vl) = ($flags->{$_}{ty}, $flags->{$_}{vl}); defined($ty) || die; defined($vl) || die; $ex_ty eq $ty || die "Expected SAM optional flag $_ to have type $ex_ty, had $ty"; $ex_vl eq $vl || die "Expected SAM optional flag $_ to have value $ex_vl, had $vl"; } return 1; } my $tmpfafn = ".simple_tests.pl.fa"; my $last_ref = undef; for (my $ci = 0; $ci < scalar(@cases); $ci++) { my $c = $cases[$ci]; last unless defined($c); # If there's any skipping of cases to be done, do it here prior to the # eq_deeply check my $color = 0; $color = $c->{color} if defined($c->{color}); next if ($color && $skipColor); my $do_build = 0; unless(defined($last_ref) && eq_deeply($c->{ref}, $last_ref)) { writeFasta($c->{ref}, $tmpfafn); $do_build = 1; } $last_ref = $c->{ref}; # For each set of arguments... my $case_args = $c->{args}; $case_args = "" unless defined($case_args); my $first = 1; # did we build the index yet? # Forward, then reverse-complemented my $fwlo = ($c->{nofw} ? 1 : 0); my $fwhi = ($c->{norc} ? 0 : 1); for(my $fwi = $fwlo; $fwi <= $fwhi; $fwi++) { my $fw = ($fwi == 0); my $sam = 1; my $reads = $c->{reads}; my $quals = $c->{quals}; my $m1s = $c->{mate1s}; my $q1s = $c->{qual1s}; my $m2s = $c->{mate2s}; my $q2s = $c->{qual2s}; my $read_file = undef; my $mate1_file = undef; my $mate2_file = undef; $read_file = $c->{fastq} if defined($c->{fastq}); $read_file = $c->{tabbed} if defined($c->{tabbed}); $read_file = $c->{fasta} if defined($c->{fasta}); $read_file = $c->{qseq} if defined($c->{qseq}); $read_file = $c->{raw} if defined($c->{raw}); $mate1_file = $c->{fastq1} if defined($c->{fastq1}); $mate1_file = $c->{tabbed1} if defined($c->{tabbed1}); $mate1_file = $c->{fasta1} if defined($c->{fasta1}); $mate1_file = $c->{qseq1} if defined($c->{qseq1}); $mate1_file = $c->{raw1} if defined($c->{raw1}); $mate2_file = $c->{fastq2} if defined($c->{fastq2}); $mate2_file = $c->{tabbed2} if defined($c->{tabbed2}); $mate2_file = $c->{fasta2} if defined($c->{fasta2}); $mate2_file = $c->{qseq2} if defined($c->{qseq2}); $mate2_file = $c->{raw2} if defined($c->{raw2}); my $read_file_format = undef; if(!defined($reads) && !defined($m1s) && !defined($m2s)) { defined($read_file) || defined($mate1_file) || die; $read_file_format = "fastq" if defined($c->{fastq}) || defined($c->{fastq1}); $read_file_format = "tabbed" if defined($c->{tabbed}) || defined($c->{tabbed}); $read_file_format = "fasta" if defined($c->{fasta}) || defined($c->{fasta1}); $read_file_format = "qseq" if defined($c->{qseq}) || defined($c->{qseq1}); $read_file_format = "raw" if defined($c->{raw}) || defined($c->{raw1}); next unless $fw; } # Run bowtie2 my @lines = (); my @rawlines = (); my @header_lines = (); my @header_rawlines = (); print $c->{name}." " if defined($c->{name}); print "(fw:".($fw ? 1 : 0).", sam:$sam)\n"; my $mate1fw = 1; my $mate2fw = 0; $mate1fw = $c->{mate1fw} if defined($c->{mate1fw}); $mate2fw = $c->{mate2fw} if defined($c->{mate2fw}); if(!$fw) { # Reverse-complement the reads my @s = (); @s = @$reads if defined($reads); my @q = (); @q = @$quals if defined($quals); # Reverse-complement mates and switch mate1 with mate2 my @m1 = (); @m1 = @$m1s if defined($m1s); my @m2 = (); @m2 = @$m2s if defined($m2s); my @q1 = (); @q1 = @$q1s if defined($q1s); my @q2 = (); @q2 = @$q2s if defined($q2s); for(0..scalar(@s)-1) { $s[$_] = DNA::revcomp($s[$_], $color); $q[$_] = reverse $q[$_] if $_ < scalar(@q); } if($mate1fw == $mate2fw) { for(0..$#m1) { $m1[$_] = DNA::revcomp($m1[$_], $color); } for(0..$#m2) { $m2[$_] = DNA::revcomp($m2[$_], $color); } for(0..$#q1) { $q1[$_] = reverse $q1[$_]; } for(0..$#q2) { $q2[$_] = reverse $q2[$_]; } } $reads = \@s if defined($reads); $quals = \@q if defined($quals); $m1s = \@m2 if defined($m1s); $q1s = \@q2 if defined($q1s); $m2s = \@m1 if defined($m2s); $q2s = \@q1 if defined($q2s); } my $a = $case_args; if(defined($m2s)) { $a .= " --"; $a .= ($mate1fw ? "f" : "r"); $a .= ($mate2fw ? "f" : "r"); } runbowtie2( $do_build && $first, "$a", $color, $tmpfafn, $c->{report}, $read_file_format, # formate of read/mate files $read_file, # read file $mate1_file, # mate #1 file $mate2_file, # mate #2 file $reads, # read list $quals, # quality list $m1s, # mate #1 sequence list $q1s, # mate #1 quality list $m2s, # mate #2 sequence list $q2s, # mate #2 quality list $c->{names}, \@lines, \@rawlines, \@header_lines, \@header_rawlines, $c->{should_abort}); $first = 0; my $pe = defined($c->{mate1s}) && $c->{mate1s} ne ""; $pe = $pe || defined($mate1_file); $pe = $pe || $c->{paired}; my ($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1); # Keep temporary copies of hits and pairhits so that we can # restore for the next orientation my $hitstmp = []; $hitstmp = clone($c->{hits}) if defined($c->{hits}); my $pairhitstmp = []; $pairhitstmp = clone($c->{pairhits}) if defined($c->{pairhits}); my $pairhits_orig_tmp = []; $pairhits_orig_tmp = clone($c->{pairhits_orig}) if defined($c->{pairhits_orig}); # Record map from already-seen read name, read sequence and # quality to the place on the reference where it's reported. # This allows us to check that the pseudo-random generator # isn't mistakenly yielding different alignments for identical # reads. my %seenNameSeqQual = (); if(defined($c->{lines})) { my $l = scalar(@lines); $l == $c->{lines} || die "Expected $c->{lines} lines, got $l"; } for my $li (0 .. scalar(@lines)-1) { my $l = $lines[$li]; my ($readname, $orient, $chr, $off_orig, $off, $seq, $qual, $mapq, $oms, $editstr, $flagstr, $samflags, $cigar, $rnext, $pnext, $tlen); my %samoptflags = (); if($sam) { scalar(@$l) >= 11 || die "Bad number of fields; expected at least 11 got ". scalar(@$l).":\n$rawlines[$li]\n"; ($readname, $samflags, $chr, $off) = @$l[0..3]; ($seq, $qual) = @$l[9..10]; $orient = ((($samflags >> 4) & 1) == 0) ? "+" : "-"; $mapq = $l->[4]; # mapping quality $cigar = $l->[5]; # CIGAR string $rnext = $l->[6]; # ref seq of next frag in template $pnext = $l->[7]; # position of next frag in template $tlen = $l->[8]; # template length if($pnext == 0) { $pnext = "*"; } else { $pnext--; } for(my $m = 11; $m < scalar(@$l); $m++) { next if $l->[$m] eq ""; my ($nm, $ty, $vl) = split(/:/, $l->[$m]); defined($vl) || die "Could not parse optional flag field $m: ". "\"$l->[$m]\""; $samoptflags{$nm}{ty} = $ty; $samoptflags{$nm}{vl} = $vl; } if($off > 0) { $off--; } else { $off = "*"; } $off_orig = $off; $off = "*" if $cigar eq "*"; } else { scalar(@$l) == 9 || die "Bad number of fields; expected 9 got ". scalar(@$l).":\n$rawlines[$li]\n"; ($readname, $orient, $chr, $off, $seq, $qual, $oms, $editstr, $flagstr) = @$l; $off_orig = $off; } if($c->{check_random}) { my $rsqKey = "$readname\t$orient\t$seq\t$qual"; my $rsqVal = "$chr\t$off"; if(defined($seenNameSeqQual{$rsqKey})) { $seenNameSeqQual{$rsqKey} eq $rsqVal || die "Two hits for read/seq/qual:\n$rsqKey\n". "had different alignments:\n". "$seenNameSeqQual{$rsqKey}\n$rsqVal\n"; } $seenNameSeqQual{$rsqKey} = $rsqVal; } $readname ne "" || die "readname was blank:\n".Dumper($c); my $rdi = $readname; $rdi = substr($rdi, 1) if substr($rdi, 0, 1) eq "r"; my $mate = 0; if($readname =~ /\//) { ($rdi, $mate) = split(/\//, $readname); defined($rdi) || die; } $rdi = $c->{idx_map}{$rdi} if defined($c->{idx_map}{$rdi}); $rdi ne "" || die "rdi was blank:\nreadname=$readname\n".Dumper($c); if($rdi != int($rdi)) { # Read name has non-numeric characters. Figure out # what number it is by scanning the names list. my $found = 0; for(my $i = 0; $i < scalar(@{$c->{names}}); $i++) { if($c->{names}->[$i] eq $readname) { $rdi = $i; $found = 1; last; } } $found || die "No specified name matched reported name $readname"; } # Check that the sequence printed in the alignment is sane if($color) { # It's a decoded nucleotide sequence my $dseq = $c->{dec_seq}->[$rdi]; if(defined($dseq)) { $seq eq $dseq || die "Expected decoded sequence '$seq' from alignment to match '$dseq'"; } my $dqual = $c->{dec_qual}->[$rdi]; if(defined($dqual)) { $qual eq $dqual || die "Expected decoded qualities '$qual' from alignment to match '$dqual'"; } } else { } # Make simply-named copies of some portions of the test case # 'hits' my %hits = (); %hits = %{$c->{hits}->[$rdi]} if defined($c->{hits}->[$rdi]); # 'flags' my $flags = undef; $flags = $c->{flags}->[$rdi] if defined($c->{flags}->[$rdi]); # 'samflags' my $ex_samflags = undef; $ex_samflags = $c->{ex_samflags}->[$rdi] if defined($c->{ex_samflags}->[$rdi]); # 'samflags_map' my $ex_samflags_map = undef; $ex_samflags_map = $c->{samflags_map}->[$rdi] if defined($c->{samflags_map}->[$rdi]); # 'samoptflags' my $ex_samoptflags = undef; $ex_samoptflags = $c->{samoptflags}->[$rdi] if defined($c->{samoptflags}->[$rdi]); # 'cigar' my $ex_cigar = undef; $ex_cigar = $c->{cigar}->[$rdi] if defined($c->{cigar}->[$rdi]); # 'cigar_map' my $ex_cigar_map = undef; $ex_cigar_map = $c->{cigar_map}->[$rdi] if defined($c->{cigar_map}->[$rdi]); # 'mapq_hi' - boolean indicating whether mapq is hi/lo my $ex_mapq_hi = undef; $ex_mapq_hi = $c->{mapq_hi}->[$rdi] if defined($c->{mapq_hi}->[$rdi]); # 'mapq' my $ex_mapq = undef; $ex_mapq = $c->{mapq}->[$rdi] if defined($c->{mapq}->[$rdi]); # 'mapq_map' my $ex_mapq_map = undef; $ex_mapq_map = $c->{mapq_map}->[$rdi] if defined($c->{mapq_map}->[$rdi]); # 'rnext_map' my $ex_rnext_map = undef; $ex_rnext_map = $c->{rnext_map}->[$rdi] if defined($c->{rnext_map}) && defined($c->{rnext_map}->[$rdi]); # 'pnext_map' my $ex_pnext_map = undef; $ex_pnext_map = $c->{pnext_map}->[$rdi] if defined($c->{pnext_map}) && defined($c->{pnext_map}->[$rdi]); # 'tlen_map' my $ex_tlen_map = undef; $ex_tlen_map = $c->{tlen_map}->[$rdi] if defined($c->{tlen_map}) && defined($c->{tlen_map}->[$rdi]); # 'flags_fw' my $flags_fw = undef; $flags_fw = $c->{flags_fw}->[$rdi] if defined($c->{flags_fw}->[$rdi]); # 'flags_rc' my $flags_rc = undef; $flags_rc = $c->{flags_rc}->[$rdi] if defined($c->{flags_rc}->[$rdi]); # 'pairhits' my %pairhits = (); %pairhits = %{$c->{pairhits}->[$rdi]} if defined($c->{pairhits}->[$rdi]); # 'pairhits_orig' my %pairhits_orig = (); %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]} if defined($c->{pairhits_orig}->[$rdi]); # 'pairflags' my %pairflags = (); %pairflags = %{$c->{pairflags}->[$rdi]} if defined($c->{pairflags}->[$rdi]); # 'hits_are_superset' my $hits_are_superset = 0; $hits_are_superset = $c->{hits_are_superset}->[$rdi] if defined($ci); # edits my $ex_edits = undef; $ex_edits = $c->{edits}->[$rdi] if defined($c->{edits}->[$rdi]); if(!$sam) { # Bowtie flags if(defined($flags)) { $flagstr eq $flags || die "Expected flags=\"$flags\", got \"$flagstr\""; } if(defined($flags_fw) && $fw) { $flagstr eq $flags_fw || die "Expected flags=\"$flags_fw\", got \"$flagstr\""; } if(defined($flags_rc) && !$fw) { $flagstr eq $flags_rc || die "Expected flags=\"$flags_rc\", got \"$flagstr\""; } if(defined($c->{flag_map})) { if(defined($c->{flag_map}->[$rdi]->{$off})) { $flagstr eq $c->{flag_map}->[$rdi]->{$off} || die "Expected flags=\"$c->{flag_map}->[$rdi]->{$off}\"". " at offset $off, got \"$flagstr\""; } } } if($sam) { # SAM flags if(defined($ex_samflags)) { $samflags eq $ex_samflags || die "Expected flags $ex_samflags, got $samflags"; } if(defined($ex_samflags_map)) { if(defined($c->{samflags_map}->[$rdi]->{$off})) { my $ex = $c->{samflags_map}->[$rdi]->{$off}; $samflags eq $ex || die "Expected FLAGS value $ex at offset $off, got $samflags" } else { die "Expected to see alignment with offset $off parsing samflags_map"; } } # CIGAR string if(defined($ex_cigar)) { $cigar eq $ex_cigar || die "Expected CIGAR string $ex_cigar, got $cigar"; } if(defined($ex_cigar_map)) { if(defined($c->{cigar_map}->[$rdi]->{$off})) { my $ex = $c->{cigar_map}->[$rdi]->{$off}; $cigar eq $ex || die "Expected CIGAR string $ex at offset $off, got $cigar" } else { die "Expected to see alignment with offset $off parsing cigar_map"; } } # MAPQ if(defined($ex_mapq)) { $mapq eq $ex_mapq || die "Expected MAPQ $ex_mapq, got $mapq"; } if(defined($ex_mapq_map)) { if(defined($c->{mapq_map}->[$rdi]->{$off})) { my $ex = $c->{mapq_map}->[$rdi]->{$off}; $mapq eq $ex || die "Expected MAPQ string $ex at offset $off, got $mapq" } else { die "Expected to see alignment with offset $off parsing mapq_map"; } } # MAPQ if(defined($ex_mapq_hi)) { if($ex_mapq_hi == 0) { $mapq < 20 || die "Expected MAPQ < 20, got $mapq"; } else { $mapq >= 20 || die "Expected MAPQ >= 20, got $mapq"; } } if(defined($ex_mapq_map)) { if(defined($c->{mapq_map}->[$rdi]->{$off})) { my $ex = $c->{mapq_map}->[$rdi]->{$off}; $mapq eq $ex || die "Expected MAPQ string $ex at offset $off, got $mapq" } else { die "Expected to see alignment with offset $off parsing mapq_map"; } } # SAM optional flags if(defined($ex_samoptflags)) { matchSamOptionalFlags(\%samoptflags, $ex_samoptflags); } if(defined($c->{samoptflags_map})) { if(defined($c->{samoptflags_map}->[$rdi]->{$off})) { matchSamOptionalFlags( \%samoptflags, $c->{samoptflags_map}->[$rdi]->{$off}); } else { die "Expected to see alignment with offset $off parsing samoptflags_map"; } } if(defined($c->{samoptflags_flagmap})) { if(defined($c->{samoptflags_flagmap}->[$rdi]->{$samflags})) { matchSamOptionalFlags( \%samoptflags, $c->{samoptflags_flagmap}->[$rdi]->{$samflags}); } else { die "Expected to see alignment with flag $samflags parsing samoptflags_flagmap"; } } # RNEXT map if(defined($c->{rnext_map})) { if(defined($c->{rnext_map}->[$rdi]->{$off})) { my $ex = $c->{rnext_map}->[$rdi]->{$off}; $rnext eq $ex || die "Expected RNEXT '$ex' at offset $off, got '$rnext'" } else { die "Expected to see alignment with offset $off parsing rnext_map".Dumper($c); } } # PNEXT map if(defined($c->{pnext_map})) { if(defined($c->{pnext_map}->[$rdi]->{$off})) { my $ex = $c->{pnext_map}->[$rdi]->{$off}; $pnext eq $ex || die "Expected PNEXT '$ex' at offset $off, got '$pnext'" } else { die "Expected to see alignment with offset $off parsing pnext_map"; } } # TLEN map if(defined($c->{tlen_map})) { if(defined($c->{tlen_map}->[$rdi]->{$off})) { my $ex = $c->{tlen_map}->[$rdi]->{$off}; $tlen eq $ex || die "Expected TLEN '$ex' at offset $off, got '$tlen'" } else { die "Expected to see alignment with offset $off parsing tlen_map"; } } } if($pe && $lastchr ne "") { my $offkey_orig = $lastoff.",".$off_orig; $offkey_orig = $off_orig.",".$lastoff_orig if $off_orig eq "*"; my $offkey = $lastoff.",".$off; $offkey = $off.",".$lastoff if $off eq "*"; if($lastoff ne "*" && $off ne "*") { $offkey = min($lastoff, $off).",".max($lastoff, $off); } if(defined($c->{pairhits}->[$rdi])) { defined($pairhits{$offkey}) || die "No such paired off as $offkey in pairhits list: ".Dumper(\%pairhits)."\n"; $c->{pairhits}->[$rdi]->{$offkey}--; delete $c->{pairhits}->[$rdi]->{$offkey} if $c->{pairhits}->[$rdi]->{$offkey} == 0; %pairhits = %{$c->{pairhits}->[$rdi]}; } if(defined($c->{pairhits_orig}->[$rdi])) { defined($pairhits_orig{$offkey_orig}) || die "No such paired off as $offkey in pairhits_orig list: ".Dumper(\%pairhits_orig)."\n"; $c->{pairhits_orig}->[$rdi]->{$offkey_orig}--; delete $c->{pairhits_orig}->[$rdi]->{$offkey_orig} if $c->{pairhits_orig}->[$rdi]->{$offkey_orig} == 0; %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]}; } ($lastchr, $lastoff, $lastoff_orig) = ("", -1, -1); } elsif($pe) { # Found an unpaired alignment from aligning a pair my $foundSe = defined($c->{pairhits}->[$rdi]) && $c->{pairhits}->[$rdi]->{$off}; if($foundSe) { $c->{pairhits}->[$rdi]->{$off}--; delete $c->{pairhits}->[$rdi]->{$off} if $c->{pairhits}->[$rdi]->{$off} == 0; %pairhits = %{$c->{pairhits}->[$rdi]}; } else { ($lastchr, $lastoff) = ($chr, $off); } # Found an unpaired alignment from aligning a pair $foundSe = defined($c->{pairhits_orig}->[$rdi]) && $c->{pairhits_orig}->[$rdi]->{$off_orig}; if($foundSe) { $c->{pairhits_orig}->[$rdi]->{$off_orig}--; delete $c->{pairhits_orig}->[$rdi]->{$off_orig} if $c->{pairhits_orig}->[$rdi]->{$off_orig} == 0; %pairhits_orig = %{$c->{pairhits_orig}->[$rdi]}; } else { ($lastchr, $lastoff, $lastoff_orig) = ($chr, $off, $off_orig); } } else { if(defined($c->{hits}->[$rdi])) { defined($hits{$off}) || die "No such off as $off in hits list: ".Dumper(\%hits)."\n"; $c->{hits}->[$rdi]->{$off}--; delete $c->{hits}->[$rdi]->{$off} if $c->{hits}->[$rdi]->{$off} == 0; %hits = %{$c->{hits}->[$rdi]}; } } if(!$sam && defined($ex_edits)) { my $eds = $l->[7]; $eds eq $ex_edits || die "For edit string, expected \"$ex_edits\" got \"$eds\"\n"; } } # Go through all the per-read my $klim = 0; $klim = scalar(@{$c->{hits}}) if defined($c->{hits}); $klim = max($klim, scalar(@{$c->{pairhits}})) if defined($c->{pairhits}); for (my $k = 0; $k < $klim; $k++) { # For each read my %hits = %{$c->{hits}->[$k]} if defined($c->{hits}->[$k]); my %pairhits = %{$c->{pairhits}->[$k]} if defined($c->{pairhits}->[$k]); my %pairhits_orig = %{$c->{pairhits_orig}->[$k]} if defined($c->{pairhits_orig}->[$k]); my $hits_are_superset = $c->{hits_are_superset}->[$k]; # Check if there are any hits left over my $hitsLeft = scalar(keys %hits); if($hitsLeft != 0 && !$hits_are_superset) { print Dumper(\%hits); die "Had $hitsLeft hit(s) left over at position $k"; } my $pairhitsLeft = scalar(keys %pairhits); if($pairhitsLeft != 0 && !$hits_are_superset) { print Dumper(\%pairhits); die "Had $pairhitsLeft hit(s) left over at position $k"; } my $pairhits_orig_Left = scalar(keys %pairhits_orig); if($pairhits_orig_Left != 0 && !$hits_are_superset) { print Dumper(\%pairhits_orig); die "Had $pairhits_orig_Left hit(s) left over at position $k"; } } $c->{hits} = $hitstmp; $c->{pairhits} = $pairhitstmp; $c->{pairhits_orig} = $pairhits_orig_tmp; } $last_ref = undef if $first; } print "PASSED\n";