/* * Copyright 2011, Ben Langmead * * This file is part of Bowtie 2. * * Bowtie 2 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Bowtie 2 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Bowtie 2. If not, see . */ #include // -- BTL remove -- //#include //#include // -- -- #include "aligner_sw.h" #include "aligner_result.h" #include "search_globals.h" #include "scoring.h" #include "mask.h" /** * Initialize with a new read. */ void SwAligner::initRead( const BTDnaString& rdfw, // forward read sequence const BTDnaString& rdrc, // revcomp read sequence const BTString& qufw, // forward read qualities const BTString& qurc, // reverse read qualities size_t rdi, // offset of first read char to align size_t rdf, // offset of last read char to align const Scoring& sc) // scoring scheme { assert_gt(rdf, rdi); int nceil = sc.nCeil.f((double)rdfw.length()); rdfw_ = &rdfw; // read sequence rdrc_ = &rdrc; // read sequence qufw_ = &qufw; // read qualities qurc_ = &qurc; // read qualities rdi_ = rdi; // offset of first read char to align rdf_ = rdf; // offset of last read char to align sc_ = ≻ // scoring scheme nceil_ = nceil; // max # Ns allowed in ref portion of aln readSse16_ = false; // true -> sse16 from now on for this read initedRead_ = true; #ifndef NO_SSE sseU8fwBuilt_ = false; // built fw query profile, 8-bit score sseU8rcBuilt_ = false; // built rc query profile, 8-bit score sseI16fwBuilt_ = false; // built fw query profile, 16-bit score sseI16rcBuilt_ = false; // built rc query profile, 16-bit score #endif } /** * Initialize with a new alignment problem. */ void SwAligner::initRef( bool fw, // whether to forward or revcomp read is aligning TRefId refidx, // id of reference aligned against const DPRect& rect, // DP rectangle char *rf, // reference sequence size_t rfi, // offset of first reference char to align to size_t rff, // offset of last reference char to align to TRefOff reflen, // length of reference sequence const Scoring& sc, // scoring scheme TAlScore minsc, // minimum score bool enable8, // use 8-bit SSE if possible? size_t cminlen, // minimum length for using checkpointing scheme size_t cpow2, // interval b/t checkpointed diags; 1 << this bool doTri, // triangular mini-fills? bool extend) // is this a seed extension? { size_t readGaps = sc.maxReadGaps(minsc, rdfw_->length()); size_t refGaps = sc.maxRefGaps(minsc, rdfw_->length()); assert_geq(readGaps, 0); assert_geq(refGaps, 0); assert_gt(rff, rfi); rdgap_ = readGaps; // max # gaps in read rfgap_ = refGaps; // max # gaps in reference state_ = STATE_INITED; fw_ = fw; // orientation rd_ = fw ? rdfw_ : rdrc_; // read sequence qu_ = fw ? qufw_ : qurc_; // quality sequence refidx_ = refidx; // id of reference aligned against rf_ = rf; // reference sequence rfi_ = rfi; // offset of first reference char to align to rff_ = rff; // offset of last reference char to align to reflen_ = reflen; // length of entire reference sequence rect_ = ▭ // DP rectangle minsc_ = minsc; // minimum score cural_ = 0; // idx of next alignment to give out initedRef_ = true; // indicate we've initialized the ref portion enable8_ = enable8; // use 8-bit SSE if possible? extend_ = extend; // true iff this is a seed extension cperMinlen_ = cminlen; // reads shorter than this won't use checkpointer cperPerPow2_ = cpow2; // interval b/t checkpointed diags; 1 << this cperEf_ = true; // whether to checkpoint H, E, and F cperTri_ = doTri; // triangular mini-fills? bter_.initRef( fw_ ? rdfw_->buf() : // in: read sequence rdrc_->buf(), fw_ ? qufw_->buf() : // in: quality sequence qurc_->buf(), // daehwan // rd_->length(), // in: read sequence length rdf_ - rdi_, rf_ + rfi_, // in: reference sequence rff_ - rfi_, // in: in-rectangle reference sequence length reflen, // in: total reference sequence length refidx_, // in: reference id rfi_, // in: reference offset fw_, // in: orientation rect_, // in: DP rectangle &cper_, // in: checkpointer *sc_, // in: scoring scheme nceil_); // in: N ceiling } /** * Given a read, an alignment orientation, a range of characters in a referece * sequence, and a bit-encoded version of the reference, set up and execute the * corresponding dynamic programming problem. * * The caller has already narrowed down the relevant portion of the reference * using, e.g., the location of a seed hit, or the range of possible fragment * lengths if we're searching for the opposite mate in a pair. */ void SwAligner::initRef( bool fw, // whether to forward or revcomp read is aligning TRefId refidx, // reference aligned against const DPRect& rect, // DP rectangle const BitPairReference& refs, // Reference strings TRefOff reflen, // length of reference sequence const Scoring& sc, // scoring scheme TAlScore minsc, // minimum score bool enable8, // use 8-bit SSE if possible? size_t cminlen, // minimum length for using checkpointing scheme size_t cpow2, // interval b/t checkpointed diags; 1 << this bool doTri, // triangular mini-fills? bool extend, // true iff this is a seed extension size_t upto, // count the number of Ns up to this offset size_t& nsUpto) // output: the number of Ns up to 'upto' { TRefOff rfi = rect.refl; TRefOff rff = rect.refr + 1; assert_gt(rff, rfi); // Capture an extra reference character outside the rectangle so that we // can check matches in the next column over to the right rff++; // rflen = full length of the reference substring to consider, including // overhang off the boundaries of the reference sequence const size_t rflen = (size_t)(rff - rfi); // Figure the number of Ns we're going to add to either side size_t leftNs = (rfi >= 0 ? 0 : (size_t)std::abs(static_cast(rfi))); leftNs = min(leftNs, rflen); size_t rightNs = (rff <= (TRefOff)reflen ? 0 : (size_t)std::abs(static_cast(rff - reflen))); rightNs = min(rightNs, rflen); // rflenInner = length of just the portion that doesn't overhang ref ends assert_geq(rflen, leftNs + rightNs); const size_t rflenInner = rflen - (leftNs + rightNs); #ifndef NDEBUG bool haveRfbuf2 = false; EList rfbuf2(rflen); // This is really slow, so only do it some of the time if((rand() % 10) == 0) { TRefOff rfii = rfi; for(size_t i = 0; i < rflen; i++) { if(rfii < 0 || (TRefOff)rfii >= reflen) { rfbuf2.push_back(4); } else { rfbuf2.push_back(refs.getBase(refidx, (uint32_t)rfii)); } rfii++; } haveRfbuf2 = true; } #endif // rfbuf_ = uint32_t list large enough to accommodate both the reference // sequence and any Ns we might add to either side. rfwbuf_.resize((rflen + 16) / 4); int offset = refs.getStretch( rfwbuf_.ptr(), // buffer to store words in refidx, // which reference (rfi < 0) ? 0 : (size_t)rfi, // starting offset (can't be < 0) rflenInner // length to grab (exclude overhang) ASSERT_ONLY(, tmp_destU32_));// for BitPairReference::getStretch() assert_leq(offset, 16); rf_ = (char*)rfwbuf_.ptr() + offset; // Shift ref chars away from 0 so we can stick Ns at the beginning if(leftNs > 0) { // Slide everyone down for(size_t i = rflenInner; i > 0; i--) { rf_[i+leftNs-1] = rf_[i-1]; } // Add Ns for(size_t i = 0; i < leftNs; i++) { rf_[i] = 4; } } if(rightNs > 0) { // Add Ns to the end for(size_t i = 0; i < rightNs; i++) { rf_[i + leftNs + rflenInner] = 4; } } #ifndef NDEBUG // Sanity check reference characters for(size_t i = 0; i < rflen; i++) { assert(!haveRfbuf2 || rf_[i] == rfbuf2[i]); assert_range(0, 4, (int)rf_[i]); } #endif // Count Ns and convert reference characters into A/C/G/T masks. Ambiguous // nucleotides (IUPAC codes) have more than one mask bit set. If a // reference scanner was provided, use it to opportunistically resolve seed // hits. nsUpto = 0; for(size_t i = 0; i < rflen; i++) { // rf_[i] gets mask version of refence char, with N=16 if(i < upto && rf_[i] > 3) { nsUpto++; } rf_[i] = (1 << rf_[i]); } // Correct for having captured an extra reference character rff--; initRef( fw, // whether to forward or revcomp read is aligning refidx, // id of reference aligned against rect, // DP rectangle rf_, // reference sequence, wrapped up in BTString object 0, // use the whole thing (size_t)(rff - rfi), // ditto reflen, // reference length sc, // scoring scheme minsc, // minimum score enable8, // use 8-bit SSE if possible? cminlen, // minimum length for using checkpointing scheme cpow2, // interval b/t checkpointed diags; 1 << this doTri, // triangular mini-fills? extend); // true iff this is a seed extension } /** * Given a read, an alignment orientation, a range of characters in a referece * sequence, and a bit-encoded version of the reference, set up and execute the * corresponding ungapped alignment problem. There can only be one solution. * * The caller has already narrowed down the relevant portion of the reference * using, e.g., the location of a seed hit, or the range of possible fragment * lengths if we're searching for the opposite mate in a pair. */ int SwAligner::ungappedAlign( const BTDnaString& rd, // read sequence (could be RC) const BTString& qu, // qual sequence (could be rev) const Coord& coord, // coordinate aligned to const BitPairReference& refs, // Reference strings size_t reflen, // length of reference sequence const Scoring& sc, // scoring scheme bool ohang, // allow overhang? TAlScore minsc, // minimum score SwResult& res) // put alignment result here { const size_t len = rd.length(); int nceil = sc.nCeil.f((double)len); int ns = 0; TRefOff rfi = coord.off(); TRefOff rff = rfi + (TRefOff)len; TRefId refidx = coord.ref(); assert_gt(rff, rfi); // Figure the number of Ns we're going to add to either side size_t leftNs = 0; if(rfi < 0) { if(ohang) { leftNs = (size_t)(-rfi); } else { return 0; } } size_t rightNs = 0; if(rff > (TRefOff)reflen) { if(ohang) { rightNs = (size_t)(rff - (TRefOff)reflen); } else { return 0; } } if((leftNs + rightNs) > (size_t)nceil) { return 0; } // rflenInner = length of just the portion that doesn't overhang ref ends assert_geq(len, leftNs + rightNs); const size_t rflenInner = len - (leftNs + rightNs); #ifndef NDEBUG bool haveRfbuf2 = false; EList rfbuf2(len); // This is really slow, so only do it some of the time if((rand() % 10) == 0) { TRefOff rfii = rfi; for(size_t i = 0; i < len; i++) { if(rfii < 0 || (size_t)rfii >= reflen) { rfbuf2.push_back(4); } else { rfbuf2.push_back(refs.getBase(refidx, (uint32_t)rfii)); } rfii++; } haveRfbuf2 = true; } #endif // rfbuf_ = uint32_t list large enough to accommodate both the reference // sequence and any Ns we might add to either side. rfwbuf_.resize((len + 16) / 4); int offset = refs.getStretch( rfwbuf_.ptr(), // buffer to store words in refidx, // which reference (rfi < 0) ? 0 : (size_t)rfi, // starting offset (can't be < 0) rflenInner // length to grab (exclude overhang) ASSERT_ONLY(, tmp_destU32_));// for BitPairReference::getStretch() assert_leq(offset, 16); rf_ = (char*)rfwbuf_.ptr() + offset; // Shift ref chars away from 0 so we can stick Ns at the beginning if(leftNs > 0) { // Slide everyone down for(size_t i = rflenInner; i > 0; i--) { rf_[i+leftNs-1] = rf_[i-1]; } // Add Ns for(size_t i = 0; i < leftNs; i++) { rf_[i] = 4; } } if(rightNs > 0) { // Add Ns to the end for(size_t i = 0; i < rightNs; i++) { rf_[i + leftNs + rflenInner] = 4; } } #ifndef NDEBUG // Sanity check reference characters for(size_t i = 0; i < len; i++) { assert(!haveRfbuf2 || rf_[i] == rfbuf2[i]); assert_range(0, 4, (int)rf_[i]); } #endif // Count Ns and convert reference characters into A/C/G/T masks. Ambiguous // nucleotides (IUPAC codes) have more than one mask bit set. If a // reference scanner was provided, use it to opportunistically resolve seed // hits. TAlScore score = 0; res.alres.reset(); size_t rowi = 0; size_t rowf = len-1; if(sc.monotone) { for(size_t i = 0; i < len; i++) { // rf_[i] gets mask version of refence char, with N=16 assert_geq(qu[i], 33); score += sc.score(rd[i], (int)(1 << rf_[i]), qu[i] - 33, ns); assert_leq(score, 0); if(score < minsc || ns > nceil) { // Fell below threshold return 0; } } // Got a result! Fill in the rest of the result object. } else { // Definitely ways to short-circuit this. E.g. if diff between cur // score and minsc can't be met by matches. TAlScore floorsc = 0; TAlScore scoreMax = floorsc; size_t lastfloor = 0; rowi = MAX_SIZE_T; size_t sols = 0; for(size_t i = 0; i < len; i++) { score += sc.score(rd[i], (int)(1 << rf_[i]), qu[i] - 33, ns); if(score >= minsc && score >= scoreMax) { scoreMax = score; rowf = i; if(rowi != lastfloor) { rowi = lastfloor; sols++; } } if(score <= floorsc) { score = floorsc; lastfloor = i+1; } } if(ns > nceil || scoreMax < minsc) { // Too many Ns return 0; } if(sols > 1) { // >1 distinct solution in this diag; defer to DP aligner return -1; } score = scoreMax; // Got a result! Fill in the rest of the result object. } // Now fill in the edits res.alres.setScore(AlnScore(score, ns, 0)); assert_geq(rowf, rowi); EList& ned = res.alres.ned(); size_t refns = 0; ASSERT_ONLY(BTDnaString refstr); for(size_t i = rowi; i <= rowf; i++) { ASSERT_ONLY(refstr.append((int)rf_[i])); if(rf_[i] > 3 || rd[i] != rf_[i]) { // Add edit Edit e((int)i, mask2dna[1 << (int)rf_[i]], "ACGTN"[(int)rd[i]], EDIT_TYPE_MM); ned.push_back(e); if(rf_[i] > 3) { refns++; } } } assert(Edit::repOk(ned, rd)); bool fw = coord.fw(); assert_leq(rowf, len-1); size_t trimEnd = (len-1) - rowf; res.alres.setShape( coord.ref(), // ref id coord.off()+rowi, // 0-based ref offset reflen, // length of reference sequence aligned to fw, // aligned to Watson? len, // read length 0, // read ID true, // pretrim soft? 0, // pretrim 5' end 0, // pretrim 3' end true, // alignment trim soft? fw ? rowi : trimEnd, // alignment trim 5' end fw ? trimEnd : rowi); // alignment trim 3' end res.alres.setRefNs(refns); assert(res.repOk()); #ifndef NDEBUG BTDnaString editstr; Edit::toRef(rd, ned, editstr, true, rowi, trimEnd); if(refstr != editstr) { cerr << "Decoded nucleotides and edits don't match reference:" << endl; cerr << " score: " << res.alres.score().score() << endl; cerr << " edits: "; Edit::print(cerr, ned); cerr << endl; cerr << " decoded nucs: " << rd << endl; cerr << " edited nucs: " << editstr << endl; cerr << " reference nucs: " << refstr << endl; assert(0); } #endif if(!fw) { // All edits are currently w/r/t upstream end; if read aligned to Crick // strand, invert them to be w/r/t 5' end instead. res.alres.invertEdits(); } return 1; } /** * Align read 'rd' to reference using read & reference information given * last time init() was called. */ bool SwAligner::align( RandomSource& rnd, // source of pseudo-randoms TAlScore& best) // best alignment score observed in DP matrix { assert(initedRef() && initedRead()); assert_eq(STATE_INITED, state_); state_ = STATE_ALIGNED; // Reset solutions lists btncand_.clear(); btncanddone_.clear(); btncanddoneSucc_ = btncanddoneFail_ = 0; best = std::numeric_limits::min(); sse8succ_ = sse16succ_ = false; int flag = 0; size_t rdlen = rdf_ - rdi_; bool checkpointed = rdlen >= cperMinlen_; bool gathered = false; // Did gathering happen along with alignment? if(sc_->monotone) { // End-to-end if(enable8_ && !readSse16_ && minsc_ >= -254) { // 8-bit end-to-end if(checkpointed) { best = alignGatherEE8(flag, false); if(flag == 0) { gathered = true; } } else { best = alignNucleotidesEnd2EndSseU8(flag, false); #ifndef NDEBUG int flagtmp = 0; TAlScore besttmp = alignGatherEE8(flagtmp, true); // debug assert_eq(flagtmp, flag); assert_eq(besttmp, best); #endif } sse8succ_ = (flag == 0); #ifndef NDEBUG { int flag2 = 0; TAlScore best2 = alignNucleotidesEnd2EndSseI16(flag2, true); { int flagtmp = 0; TAlScore besttmp = alignGatherEE16(flagtmp, true); assert_eq(flagtmp, flag2); assert(flag2 != 0 || best2 == besttmp); } assert(flag < 0 || best == best2); sse16succ_ = (flag2 == 0); } #endif /*ndef NDEBUG*/ } else { // 16-bit end-to-end if(checkpointed) { best = alignGatherEE16(flag, false); if(flag == 0) { gathered = true; } } else { best = alignNucleotidesEnd2EndSseI16(flag, false); #ifndef NDEBUG int flagtmp = 0; TAlScore besttmp = alignGatherEE16(flagtmp, true); assert_eq(flagtmp, flag); assert_eq(besttmp, best); #endif } sse16succ_ = (flag == 0); } } else { // Local flag = -2; if(enable8_ && !readSse16_) { // 8-bit local if(checkpointed) { best = alignGatherLoc8(flag, false); if(flag == 0) { gathered = true; } } else { best = alignNucleotidesLocalSseU8(flag, false); #ifndef NDEBUG int flagtmp = 0; TAlScore besttmp = alignGatherLoc8(flagtmp, true); assert_eq(flag, flagtmp); assert_eq(best, besttmp); #endif } } if(flag == -2) { // 16-bit local flag = 0; if(checkpointed) { best = alignNucleotidesLocalSseI16(flag, false); best = alignGatherLoc16(flag, false); if(flag == 0) { gathered = true; } } else { best = alignNucleotidesLocalSseI16(flag, false); #ifndef NDEBUG int flagtmp = 0; TAlScore besttmp = alignGatherLoc16(flagtmp, true); assert_eq(flag, flagtmp); assert_eq(best, besttmp); #endif } sse16succ_ = (flag == 0); } else { sse8succ_ = (flag == 0); #ifndef NDEBUG int flag2 = 0; TAlScore best2 = alignNucleotidesLocalSseI16(flag2, true); { int flagtmp = 0; TAlScore besttmp = alignGatherLoc16(flagtmp, true); assert_eq(flag2, flagtmp); assert(flag2 != 0 || best2 == besttmp); } assert(flag2 < 0 || best == best2); sse16succ_ = (flag2 == 0); #endif /*ndef NDEBUG*/ } } #ifndef NDEBUG if(!checkpointed && (rand() & 15) == 0 && sse8succ_ && sse16succ_) { SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; assert_eq(d8.mat_.nrow(), d16.mat_.nrow()); assert_eq(d8.mat_.ncol(), d16.mat_.ncol()); for(size_t i = 0; i < d8.mat_.nrow(); i++) { for(size_t j = 0; j < colstop_; j++) { int h8 = d8.mat_.helt(i, j); int h16 = d16.mat_.helt(i, j); int e8 = d8.mat_.eelt(i, j); int e16 = d16.mat_.eelt(i, j); int f8 = d8.mat_.felt(i, j); int f16 = d16.mat_.felt(i, j); TAlScore h8s = (sc_->monotone ? (h8 - 0xff ) : h8); TAlScore h16s = (sc_->monotone ? (h16 - 0x7fff) : (h16 + 0x8000)); TAlScore e8s = (sc_->monotone ? (e8 - 0xff ) : e8); TAlScore e16s = (sc_->monotone ? (e16 - 0x7fff) : (e16 + 0x8000)); TAlScore f8s = (sc_->monotone ? (f8 - 0xff ) : f8); TAlScore f16s = (sc_->monotone ? (f16 - 0x7fff) : (f16 + 0x8000)); if(h8s < minsc_) { h8s = minsc_ - 1; } if(h16s < minsc_) { h16s = minsc_ - 1; } if(e8s < minsc_) { e8s = minsc_ - 1; } if(e16s < minsc_) { e16s = minsc_ - 1; } if(f8s < minsc_) { f8s = minsc_ - 1; } if(f16s < minsc_) { f16s = minsc_ - 1; } if((h8 != 0 || (int16_t)h16 != (int16_t)0x8000) && h8 > 0) { assert_eq(h8s, h16s); } if((e8 != 0 || (int16_t)e16 != (int16_t)0x8000) && e8 > 0) { assert_eq(e8s, e16s); } if((f8 != 0 || (int16_t)f16 != (int16_t)0x8000) && f8 > 0) { assert_eq(f8s, f16s); } } } } #endif assert(repOk()); cural_ = 0; if(best == MIN_I64 || best < minsc_) { return false; } if(!gathered) { // Look for solutions using SSE matrix assert(sse8succ_ || sse16succ_); if(sc_->monotone) { if(sse8succ_) { gatherCellsNucleotidesEnd2EndSseU8(best); #ifndef NDEBUG if(sse16succ_) { cand_tmp_ = btncand_; gatherCellsNucleotidesEnd2EndSseI16(best); cand_tmp_.sort(); btncand_.sort(); assert(cand_tmp_ == btncand_); } #endif /*ndef NDEBUG*/ } else { gatherCellsNucleotidesEnd2EndSseI16(best); } } else { if(sse8succ_) { gatherCellsNucleotidesLocalSseU8(best); #ifndef NDEBUG if(sse16succ_) { cand_tmp_ = btncand_; gatherCellsNucleotidesLocalSseI16(best); cand_tmp_.sort(); btncand_.sort(); assert(cand_tmp_ == btncand_); } #endif /*ndef NDEBUG*/ } else { gatherCellsNucleotidesLocalSseI16(best); } } } if(!btncand_.empty()) { btncand_.sort(); } return !btncand_.empty(); } /** * Populate the given SwResult with information about the "next best" * alignment if there is one. If there isn't one, false is returned. Note * that false might be returned even though a call to done() would have * returned false. */ bool SwAligner::nextAlignment( SwResult& res, TAlScore minsc, RandomSource& rnd) { assert(initedRead() && initedRef()); assert_eq(STATE_ALIGNED, state_); assert(repOk()); if(done()) { res.reset(); return false; } assert(!done()); size_t off = 0, nbts = 0; assert_lt(cural_, btncand_.size()); assert(res.repOk()); // For each candidate cell that we should try to backtrack from... const size_t candsz = btncand_.size(); size_t SQ = dpRows() >> 4; if(SQ == 0) SQ = 1; size_t rdlen = rdf_ - rdi_; bool checkpointed = rdlen >= cperMinlen_; while(cural_ < candsz) { // Doing 'continue' anywhere in here simply causes us to move on to the // next candidate if(btncand_[cural_].score < minsc) { btncand_[cural_].fate = BT_CAND_FATE_FILT_SCORE; nbtfiltsc_++; cural_++; continue; } nbts = 0; assert(sse8succ_ || sse16succ_); size_t row = btncand_[cural_].row; size_t col = btncand_[cural_].col; assert_lt(row, dpRows()); assert_lt((TRefOff)col, rff_-rfi_); if(sse16succ_) { SSEData& d = fw_ ? sseI16fw_ : sseI16rc_; if(!checkpointed && d.mat_.reset_[row] && d.mat_.reportedThrough(row, col)) { // Skipping this candidate because a previous candidate already // moved through this cell btncand_[cural_].fate = BT_CAND_FATE_FILT_START; //cerr << " skipped becuase starting cell was covered" << endl; nbtfiltst_++; cural_++; continue; } } else if(sse8succ_) { SSEData& d = fw_ ? sseU8fw_ : sseU8rc_; if(!checkpointed && d.mat_.reset_[row] && d.mat_.reportedThrough(row, col)) { // Skipping this candidate because a previous candidate already // moved through this cell btncand_[cural_].fate = BT_CAND_FATE_FILT_START; //cerr << " skipped becuase starting cell was covered" << endl; nbtfiltst_++; cural_++; continue; } } if(sc_->monotone) { bool ret = false; if(sse8succ_) { uint32_t reseed = rnd.nextU32() + 1; rnd.init(reseed); res.reset(); if(checkpointed) { size_t maxiter = MAX_SIZE_T; size_t niter = 0; ret = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter, // max # extensions to try niter, // # extensions tried rnd); // random gen, to choose among equal paths } else { ret = backtraceNucleotidesEnd2EndSseU8( btncand_[cural_].score, // in: expected score res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin nbts, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths } #ifndef NDEBUG // if(...) statement here should check not whether the primary // alignment was checkpointed, but whether a checkpointed // alignment was done at all. if(!checkpointed) { SwResult res2; res2.alres = res.alres; res2.alres.reset(); size_t maxiter2 = MAX_SIZE_T; size_t niter2 = 0; bool ret2 = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res2, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter2, // max # extensions to try niter2, // # extensions tried rnd); // random gen, to choose among equal paths // After the first alignment, there's no guarantee we'll // get the same answer from both backtrackers because of // differences in how they handle marking cells as // reported-through. assert(cural_ > 0 || !ret || ret == ret2); assert(cural_ > 0 || !ret || res.alres == res2.alres); } if(sse16succ_ && !checkpointed) { SwResult res2; res2.alres = res.alres; res2.alres.reset(); size_t off2, nbts2 = 0; rnd.init(reseed); bool ret2 = backtraceNucleotidesEnd2EndSseI16( btncand_[cural_].score, // in: expected score res2, // out: store results (edits and scores) here off2, // out: store diagonal projection of origin nbts2, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths assert_eq(ret, ret2); assert_eq(nbts, nbts2); assert(!ret || res2.alres.score() == res.alres.score()); #if 0 if(!checkpointed && (rand() & 15) == 0) { // Check that same cells are reported through SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; for(size_t i = d8.mat_.nrow(); i > 0; i--) { for(size_t j = 0; j < d8.mat_.ncol(); j++) { assert_eq(d8.mat_.reportedThrough(i-1, j), d16.mat_.reportedThrough(i-1, j)); } } } #endif } #endif rnd.init(reseed+1); // debug/release pseudo-randoms in lock step } else if(sse16succ_) { uint32_t reseed = rnd.nextU32() + 1; res.reset(); if(checkpointed) { size_t maxiter = MAX_SIZE_T; size_t niter = 0; ret = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter, // max # extensions to try niter, // # extensions tried rnd); // random gen, to choose among equal paths } else { ret = backtraceNucleotidesEnd2EndSseI16( btncand_[cural_].score, // in: expected score res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin nbts, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths } #ifndef NDEBUG // if(...) statement here should check not whether the primary // alignment was checkpointed, but whether a checkpointed // alignment was done at all. if(!checkpointed) { SwResult res2; size_t maxiter2 = MAX_SIZE_T; size_t niter2 = 0; bool ret2 = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res2, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter2, // max # extensions to try niter2, // # extensions tried rnd); // random gen, to choose among equal paths // After the first alignment, there's no guarantee we'll // get the same answer from both backtrackers because of // differences in how they handle marking cells as // reported-through. assert(cural_ > 0 || !ret || ret == ret2); assert(cural_ > 0 || !ret || res.alres == res2.alres); } #endif rnd.init(reseed); // debug/release pseudo-randoms in lock step } if(ret) { btncand_[cural_].fate = BT_CAND_FATE_SUCCEEDED; break; } else { btncand_[cural_].fate = BT_CAND_FATE_FAILED; } } else { // Local alignment // Check if this solution is "dominated" by a prior one. // Domination is a heuristic designed to eliminate the vast // majority of valid-but-redundant candidates lying in the // "penumbra" of a high-scoring alignment. bool dom = false; { size_t donesz = btncanddone_.size(); const size_t col = btncand_[cural_].col; const size_t row = btncand_[cural_].row; for(size_t i = 0; i < donesz; i++) { assert_gt(btncanddone_[i].fate, 0); size_t colhi = col, rowhi = row; size_t rowlo = btncanddone_[i].row; size_t collo = btncanddone_[i].col; if(colhi < collo) swap(colhi, collo); if(rowhi < rowlo) swap(rowhi, rowlo); if(colhi - collo <= SQ && rowhi - rowlo <= SQ) { // Skipping this candidate because it's "dominated" by // a previous candidate dom = true; break; } } } if(dom) { btncand_[cural_].fate = BT_CAND_FATE_FILT_DOMINATED; nbtfiltdo_++; cural_++; continue; } bool ret = false; if(sse8succ_) { uint32_t reseed = rnd.nextU32() + 1; res.reset(); rnd.init(reseed); if(checkpointed) { size_t maxiter = MAX_SIZE_T; size_t niter = 0; ret = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter, // max # extensions to try niter, // # extensions tried rnd); // random gen, to choose among equal paths } else { ret = backtraceNucleotidesLocalSseU8( btncand_[cural_].score, // in: expected score res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin nbts, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths } #ifndef NDEBUG // if(...) statement here should check not whether the primary // alignment was checkpointed, but whether a checkpointed // alignment was done at all. if(!checkpointed) { SwResult res2; size_t maxiter2 = MAX_SIZE_T; size_t niter2 = 0; bool ret2 = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res2, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter2, // max # extensions to try niter2, // # extensions tried rnd); // random gen, to choose among equal paths // After the first alignment, there's no guarantee we'll // get the same answer from both backtrackers because of // differences in how they handle marking cells as // reported-through. assert(cural_ > 0 || !ret || ret == ret2); assert(cural_ > 0 || !ret || res.alres == res2.alres); } if(!checkpointed && sse16succ_) { SwResult res2; size_t off2, nbts2 = 0; rnd.init(reseed); // same b/t backtrace calls bool ret2 = backtraceNucleotidesLocalSseI16( btncand_[cural_].score, // in: expected score res2, // out: store results (edits and scores) here off2, // out: store diagonal projection of origin nbts2, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths assert_eq(ret, ret2); assert_eq(nbts, nbts2); assert(!ret || res2.alres.score() == res.alres.score()); #if 0 if(!checkpointed && (rand() & 15) == 0) { // Check that same cells are reported through SSEData& d8 = fw_ ? sseU8fw_ : sseU8rc_; SSEData& d16 = fw_ ? sseI16fw_ : sseI16rc_; for(size_t i = d8.mat_.nrow(); i > 0; i--) { for(size_t j = 0; j < d8.mat_.ncol(); j++) { assert_eq(d8.mat_.reportedThrough(i-1, j), d16.mat_.reportedThrough(i-1, j)); } } } #endif } #endif rnd.init(reseed+1); // debug/release pseudo-randoms in lock step } else if(sse16succ_) { uint32_t reseed = rnd.nextU32() + 1; res.reset(); if(checkpointed) { size_t maxiter = MAX_SIZE_T; size_t niter = 0; ret = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter, // max # extensions to try niter, // # extensions tried rnd); // random gen, to choose among equal paths } else { ret = backtraceNucleotidesLocalSseI16( btncand_[cural_].score, // in: expected score res, // out: store results (edits and scores) here off, // out: store diagonal projection of origin nbts, // out: # backtracks row, // start in this rectangle row col, // start in this rectangle column rnd); // random gen, to choose among equal paths } #ifndef NDEBUG // if(...) statement here should check not whether the primary // alignment was checkpointed, but whether a checkpointed // alignment was done at all. if(!checkpointed) { SwResult res2; size_t maxiter2 = MAX_SIZE_T; size_t niter2 = 0; bool ret2 = backtrace( btncand_[cural_].score, // in: expected score true, // in: use mini-fill? true, // in: use checkpoints? res2, // out: store results (edits and scores) here off, // out: store diagonal projection of origin row, // start in this rectangle row col, // start in this rectangle column maxiter2, // max # extensions to try niter2, // # extensions tried rnd); // random gen, to choose among equal paths // After the first alignment, there's no guarantee we'll // get the same answer from both backtrackers because of // differences in how they handle marking cells as // reported-through. assert(cural_ > 0 || !ret || ret == ret2); assert(cural_ > 0 || !ret || res.alres == res2.alres); } #endif rnd.init(reseed); // same b/t backtrace calls } if(ret) { btncand_[cural_].fate = BT_CAND_FATE_SUCCEEDED; btncanddone_.push_back(btncand_[cural_]); btncanddoneSucc_++; assert(res.repOk()); break; } else { btncand_[cural_].fate = BT_CAND_FATE_FAILED; btncanddone_.push_back(btncand_[cural_]); btncanddoneFail_++; } } cural_++; } // while(cural_ < btncand_.size()) if(cural_ == btncand_.size()) { assert(res.repOk()); return false; } assert(!res.alres.empty()); assert(res.repOk()); if(!fw_) { // All edits are currently w/r/t upstream end; if read aligned // to Crick strand, we need to invert them so that they're // w/r/t the read's 5' end instead. res.alres.invertEdits(); } cural_++; assert(res.repOk()); return true; } #ifdef MAIN_ALIGNER_SW #include #include #include #include "scoring.h" #include "aligner_seed_policy.h" int gGapBarrier; int gSnpPhred; static int bonusMatchType; // how to reward matches static int bonusMatch; // constant if match bonus is a constant static int penMmcType; // how to penalize mismatches static int penMmc; // constant if mm pelanty is a constant static int penNType; // how to penalize Ns in the read static int penN; // constant if N pelanty is a constant static bool nPairCat; // true -> concatenate mates before N filter static int penRdExConst; // constant coeff for cost of gap in read static int penRfExConst; // constant coeff for cost of gap in ref static int penRdExLinear; // linear coeff for cost of gap in read static int penRfExLinear; // linear coeff for cost of gap in ref static float costMinConst; // constant coeff for min score w/r/t read len static float costMinLinear; // linear coeff for min score w/r/t read len static float costFloorConst; // constant coeff for score floor w/r/t read len static float costFloorLinear;// linear coeff for score floor w/r/t read len static float nCeilConst; // constant coeff for N ceiling w/r/t read len static float nCeilLinear; // linear coeff for N ceiling w/r/t read len static bool nCatPair; // concat mates before applying N filter? static int multiseedMms; // mismatches permitted in a multiseed seed static int multiseedLen; // length of multiseed seeds static int multiseedIvalType; static float multiseedIvalA; static float multiseedIvalB; static float posmin; static float posfrac; static float rowmult; enum { ARG_TESTS = 256 }; static const char *short_opts = "s:m:r:d:i:"; static struct option long_opts[] = { {(char*)"snppen", required_argument, 0, 's'}, {(char*)"misspen", required_argument, 0, 'm'}, {(char*)"seed", required_argument, 0, 'r'}, {(char*)"align-policy", no_argument, 0, 'A'}, {(char*)"test", no_argument, 0, ARG_TESTS}, }; static void printUsage(ostream& os) { os << "Usage: aligner_sw

[options]*" << endl; os << "Options:" << endl; os << " -s/--snppen penalty incurred by SNP; used for decoding" << endl; os << " -m/--misspen quality to use for read chars" << endl; os << " -r/-seed seed for pseudo-random generator" << endl; } /** * Parse a T from a string 's' */ template T parse(const char *s) { T tmp; stringstream ss(s); ss >> tmp; return tmp; } static EList stbuf, enbuf; static BTDnaString btread; static BTString btqual; static BTString btref; static BTString btref2; static BTDnaString readrc; static BTString qualrc; /** * Helper function for running a case consisting of a read (sequence * and quality), a reference string, and an offset that anchors the 0th * character of the read to a reference position. */ static void doTestCase( SwAligner& al, const BTDnaString& read, const BTString& qual, const BTString& refin, TRefOff off, EList *en, const Scoring& sc, TAlScore minsc, SwResult& res, bool nsInclusive, bool filterns, uint32_t seed) { RandomSource rnd(seed); btref2 = refin; assert_eq(read.length(), qual.length()); size_t nrow = read.length(); TRefOff rfi, rff; // Calculate the largest possible number of read and reference gaps given // 'minsc' and 'pens' size_t maxgaps; size_t padi, padf; { int readGaps = sc.maxReadGaps(minsc, read.length()); int refGaps = sc.maxRefGaps(minsc, read.length()); assert_geq(readGaps, 0); assert_geq(refGaps, 0); int maxGaps = max(readGaps, refGaps); padi = 2 * maxGaps; padf = maxGaps; maxgaps = (size_t)maxGaps; } size_t nceil = (size_t)sc.nCeil.f((double)read.length()); size_t width = 1 + padi + padf; rfi = off; off = 0; // Pad the beginning of the reference with Ns if necessary if(rfi < padi) { size_t beginpad = (size_t)(padi - rfi); for(size_t i = 0; i < beginpad; i++) { btref2.insert('N', 0); off--; } rfi = 0; } else { rfi -= padi; } assert_geq(rfi, 0); // Pad the end of the reference with Ns if necessary while(rfi + nrow + padi + padf > btref2.length()) { btref2.append('N'); } rff = rfi + nrow + padi + padf; // Convert reference string to masks for(size_t i = 0; i < btref2.length(); i++) { if(toupper(btref2[i]) == 'N' && !nsInclusive) { btref2.set(16, i); } else { int num = 0; int alts[] = {4, 4, 4, 4}; decodeNuc(toupper(btref2[i]), num, alts); assert_leq(num, 4); assert_gt(num, 0); btref2.set(0, i); for(int j = 0; j < num; j++) { btref2.set(btref2[i] | (1 << alts[j]), i); } } } bool fw = true; uint32_t refidx = 0; size_t solwidth = width; if(maxgaps >= solwidth) { solwidth = 0; } else { solwidth -= maxgaps; } if(en == NULL) { enbuf.resize(solwidth); enbuf.fill(true); en = &enbuf; } assert_geq(rfi, 0); assert_gt(rff, rfi); readrc = read; qualrc = qual; al.initRead( read, // read sequence readrc, qual, // read qualities qualrc, 0, // offset of first character within 'read' to consider read.length(), // offset of last char (exclusive) in 'read' to consider floorsc); // local-alignment score floor al.initRef( fw, // 'read' is forward version of read? refidx, // id of reference aligned to off, // offset of upstream ref char aligned against btref2.wbuf(), // reference sequence (masks) rfi, // offset of first char in 'ref' to consider rff, // offset of last char (exclusive) in 'ref' to consider width, // # bands to do (width of parallelogram) solwidth, // # rightmost cols where solns can end sc, // scoring scheme minsc, // minimum score for valid alignment maxgaps, // max of max # read gaps, ref gaps 0, // amount to truncate on left-hand side en); // mask indicating which columns we can end in if(filterns) { al.filter((int)nceil); } al.align(rnd); } /** * Another interface for running a case. */ static void doTestCase2( SwAligner& al, const char *read, const char *qual, const char *refin, TRefOff off, const Scoring& sc, float costMinConst, float costMinLinear, SwResult& res, bool nsInclusive = false, bool filterns = false, uint32_t seed = 0) { btread.install(read, true); TAlScore minsc = (TAlScore)(Scoring::linearFunc( btread.length(), costMinConst, costMinLinear)); TAlScore floorsc = (TAlScore)(Scoring::linearFunc( btread.length(), costFloorConst, costFloorLinear)); btqual.install(qual); btref.install(refin); doTestCase( al, btread, btqual, btref, off, NULL, sc, minsc, floorsc, res, nsInclusive, filterns, seed ); } /** * Another interface for running a case. */ static void doTestCase3( SwAligner& al, const char *read, const char *qual, const char *refin, TRefOff off, Scoring& sc, float costMinConst, float costMinLinear, float nCeilConst, float nCeilLinear, SwResult& res, bool nsInclusive = false, bool filterns = false, uint32_t seed = 0) { btread.install(read, true); // Calculate the penalty ceiling for the read TAlScore minsc = (TAlScore)(Scoring::linearFunc( btread.length(), costMinConst, costMinLinear)); TAlScore floorsc = (TAlScore)(Scoring::linearFunc( btread.length(), costFloorConst, costFloorLinear)); btqual.install(qual); btref.install(refin); sc.nCeil.setType(SIMPLE_FUNC_LINEAR); sc.nCeil.setConst(costMinConst); sc.nCeil.setCoeff(costMinLinear); doTestCase( al, btread, btqual, btref, off, NULL, sc, minsc, floorsc, res, nsInclusive, filterns, seed ); } /** * Another interface for running a case. Like doTestCase3 but caller specifies * st_ and en_ lists. */ static void doTestCase4( SwAligner& al, const char *read, const char *qual, const char *refin, TRefOff off, EList& en, Scoring& sc, float costMinConst, float costMinLinear, float nCeilConst, float nCeilLinear, SwResult& res, bool nsInclusive = false, bool filterns = false, uint32_t seed = 0) { btread.install(read, true); // Calculate the penalty ceiling for the read TAlScore minsc = (TAlScore)(Scoring::linearFunc( btread.length(), costMinConst, costMinLinear)); TAlScore floorsc = (TAlScore)(Scoring::linearFunc( btread.length(), costFloorConst, costFloorLinear)); btqual.install(qual); btref.install(refin); sc.nCeil.setType(SIMPLE_FUNC_LINEAR); sc.nCeil.setConst(costMinConst); sc.nCeil.setCoeff(costMinLinear); doTestCase( al, btread, btqual, btref, off, &en, sc, minsc, floorsc, res, nsInclusive, filterns, seed ); } /** * Do a set of unit tests. */ static void doTests() { bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; bonusMatch = DEFAULT_MATCH_BONUS; penMmcType = DEFAULT_MM_PENALTY_TYPE; penMmc = DEFAULT_MM_PENALTY; penSnp = DEFAULT_SNP_PENALTY; penNType = DEFAULT_N_PENALTY_TYPE; penN = DEFAULT_N_PENALTY; nPairCat = DEFAULT_N_CAT_PAIR; penRdExConst = DEFAULT_READ_GAP_CONST; penRfExConst = DEFAULT_REF_GAP_CONST; penRdExLinear = DEFAULT_READ_GAP_LINEAR; penRfExLinear = DEFAULT_REF_GAP_LINEAR; costMinConst = DEFAULT_MIN_CONST; costMinLinear = DEFAULT_MIN_LINEAR; costFloorConst = DEFAULT_FLOOR_CONST; costFloorLinear = DEFAULT_FLOOR_LINEAR; nCeilConst = 1.0f; // constant factor in N ceil w/r/t read len nCeilLinear = 0.1f; // coeff of linear term in N ceil w/r/t read len multiseedMms = DEFAULT_SEEDMMS; multiseedLen = DEFAULT_SEEDLEN; // Set up penalities Scoring sc( bonusMatch, penMmcType, // how to penalize mismatches 30, // constant if mm pelanty is a constant 30, // penalty for decoded SNP costMinConst, // constant factor in N ceiling w/r/t read length costMinLinear, // coeff of linear term in N ceiling w/r/t read length costFloorConst, // constant factor in N ceiling w/r/t read length costFloorLinear, // coeff of linear term in N ceiling w/r/t read length nCeilConst, // constant factor in N ceiling w/r/t read length nCeilLinear, // coeff of linear term in N ceiling w/r/t read length penNType, // how to penalize Ns in the read penN, // constant if N pelanty is a constant nPairCat, // true -> concatenate mates before N filtering 25, // constant coeff for cost of gap in read 25, // constant coeff for cost of gap in ref 15, // linear coeff for cost of gap in read 15, // linear coeff for cost of gap in ref 1, // # rows at top/bot can only be entered diagonally -1, // min row idx to backtrace from; -1 = no limit false // sort results first by row then by score? ); // Set up alternative penalities Scoring sc2( bonusMatch, COST_MODEL_QUAL, // how to penalize mismatches 30, // constant if mm pelanty is a constant 30, // penalty for decoded SNP costMinConst, // constant factor in N ceiling w/r/t read length costMinLinear, // coeff of linear term in N ceiling w/r/t read length costFloorConst, // constant factor in N ceiling w/r/t read length costFloorLinear, // coeff of linear term in N ceiling w/r/t read length 1.0f, // constant factor in N ceiling w/r/t read length 1.0f, // coeff of linear term in N ceiling w/r/t read length penNType, // how to penalize Ns in the read penN, // constant if N pelanty is a constant nPairCat, // true -> concatenate mates before N filtering 25, // constant coeff for cost of gap in read 25, // constant coeff for cost of gap in ref 15, // linear coeff for cost of gap in read 15, // linear coeff for cost of gap in ref 1, // # rows at top/bot can only be entered diagonally -1, // min row idx to backtrace from; -1 = no limit false // sort results first by row then by score? ); SwResult res; // // Basic nucleotide-space tests // cerr << "Running tests..." << endl; int tests = 1; bool nIncl = false; bool nfilter = false; SwAligner al; RandomSource rnd(73); for(int i = 0; i < 3; i++) { cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", exact)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; sc.rdGapLinear = 15; sc.rfGapLinear = 15; // A C G T A C G T // H E F H E F H E F H E F H E F H E F H E F H E F // A 0 lo lo -30 lo lo -30 lo lo -30 lo lo 0 lo lo -30 lo lo-30 lo lo-30 lo lo // C -30 lo -55 0 -85 -85 -55 -55 -85 // G -30 lo -70 -55 -85 -55 0 -100-100 // T -30 lo -85 -60 -85 -70 -55-100 -55 // A 0 lo -85 -55 -55 -85 -70 -70 -70 // C -30 lo -55 0 -85-100 -55 -55 -85 // G -30 lo -70 -55 -85 -55 0 -100-100 // T -30 lo -85 -60 -85 -70 -55-100 -55 doTestCase2( al, "ACGTACGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 0); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm allowed by minsc)..."; sc.setMmPen(COST_MODEL_CONSTANT, 30); //sc.setMatchBonus(10); doTestCase2( al, "ACGTTCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -30); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm allowed by minsc, check qual 1)..."; doTestCase2( al, "ACGTTCGT", // read "ABCDEFGH", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc2, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); size_t lo, hi; if(i == 0) { lo = 0; hi = 1; } else if(i == 1) { lo = 1; hi = 2; } else { lo = 2; hi = 3; } for(size_t j = lo; j < hi; j++) { al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(j*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -36); assert_eq(res.alres.score().ns(), 0); res.reset(); } al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm allowed by minsc, check qual 2)..."; doTestCase2( al, "ACGAACGT", // read "ABCDEFGH", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc2, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -35); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(res.empty()); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm allowed by minsc, check qual )..."; assert(res.empty()); doTestCase2( al, "TCGTACGT", // read "ABCDEFGH", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc2, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -32); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(res.empty()); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm at the beginning, allowed by minsc)..."; doTestCase2( al, "CCGTACGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -30); assert_eq(res.alres.score().ns(), 0); assert_eq(1, res.alres.ned().size()); assert_eq(0, res.alres.aed().size()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 n in read, allowed)..."; doTestCase3( al, "ACGTNCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 1.0f, // allow 1 N 0.0f, // allow 1 N res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -1); assert_eq(res.alres.score().ns(), 1); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 2 n in read, allowed)..."; doTestCase3( al, "ACGNNCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 2.0f, // const coeff for N ceiling 0.0f, // linear coeff for N ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -2); assert_eq(res.alres.score().ns(), 2); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 2 n in read, 1 at beginning, allowed)..."; doTestCase2( al, "NCGTNCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -2); assert_eq(res.alres.score().ns(), 2); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 n in ref, allowed)..."; doTestCase2( al, "ACGTACGT", // read "IIIIIIII", // qual "ACGTNCGTACGTANGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), -1); assert_eq(res.alres.score().ns(), 1); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm disallowed by minsc)..."; doTestCase2( al, "ACGTTCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -10.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); // Read gap with equal read and ref gap penalties cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", read gap allowed by minsc)..."; assert(res.empty()); sc.rfGapConst = 25; sc.rdGapConst = 25; sc.rfGapLinear = 15; sc.rdGapLinear = 15; doTestCase2( al, "ACGTCGT", // read "IIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", read gap disallowed by minsc)..."; sc.rfGapConst = 25; sc.rdGapConst = 25; sc.rfGapLinear = 15; sc.rdGapLinear = 15; doTestCase2( al, "ACGTCGT", // read "IIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; // Ref gap with equal read and ref gap penalties cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", ref gap allowed by minsc)..."; doTestCase2( al, "ACGTAACGT", // read "IIIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", read gap disallowed by gap barrier)..."; sc.rfGapConst = 25; sc.rdGapConst = 25; sc.rfGapLinear = 15; sc.rdGapLinear = 15; sc.gapbar = 4; doTestCase2( al, "ACGTCGT", // read "IIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns sc.gapbar = 1; al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; // Ref gap with equal read and ref gap penalties cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", ref gap allowed by minsc, gapbar=3)..."; sc.gapbar = 3; doTestCase2( al, "ACGTAACGT", // read "IIIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns sc.gapbar = 1; assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; // Ref gap with equal read and ref gap penalties cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", ref gap allowed by minsc, gapbar=4)..."; sc.gapbar = 4; doTestCase2( al, "ACGTAACGT", // read "IIIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns sc.gapbar = 1; assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", ref gap disallowed by minsc)..."; doTestCase2( al, "ACGTAACGT", // read "IIIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(al.done()); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", ref gap disallowed by gap barrier)..."; sc.gapbar = 5; doTestCase2( al, "ACGTAACGT", // read "IIIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns sc.gapbar = 1; al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(al.done()); cerr << "PASSED" << endl; // Read gap with one read gap and zero ref gaps allowed cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 read gap, ref gaps disallowed by minsc)..."; sc.rfGapConst = 35; sc.rdGapConst = 25; sc.rfGapLinear = 20; sc.rdGapLinear = 10; doTestCase2( al, "ACGTCGT", // read "IIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -35); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", gaps disallowed by minsc)..."; sc.rfGapConst = 25; sc.rdGapConst = 25; sc.rfGapLinear = 10; sc.rdGapLinear = 10; doTestCase2( al, "ACGTCGT", // read "IIIIIII", // qual "ACGTACGTACGTACGT", // ref i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(res.empty()); cerr << "PASSED" << endl; // Ref gap with one ref gap and zero read gaps allowed sc.rfGapConst = 25; sc.rdGapConst = 35; sc.rfGapLinear = 12; sc.rdGapLinear = 22; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 ref gap, read gaps disallowed by minsc)..."; assert(res.empty()); doTestCase2( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -37); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", gaps disallowed by minsc)..."; doTestCase2( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); cerr << "PASSED" << endl; // Read gap with one read gap and two ref gaps allowed sc.rfGapConst = 20; sc.rdGapConst = 25; sc.rfGapLinear = 10; sc.rdGapLinear = 15; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 read gap, 2 ref gaps allowed by minsc)..."; doTestCase2( al, "ACGTCGT", "IIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", gaps disallowed by minsc)..."; doTestCase2( al, "ACGTCGT", "IIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); cerr << "PASSED" << endl; // Ref gap with one ref gap and two read gaps allowed sc.rfGapConst = 25; sc.rdGapConst = 11; // if this were 10, we'd have ties sc.rfGapLinear = 15; sc.rdGapLinear = 10; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 ref gap, 2 read gaps allowed by minsc)..."; doTestCase2( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", gaps disallowed by minsc)..."; doTestCase2( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(res.empty()); res.reset(); assert(al.done()); cerr << "PASSED" << endl; // Read gap with two read gaps and two ref gaps allowed sc.rfGapConst = 15; sc.rdGapConst = 15; sc.rfGapLinear = 10; sc.rdGapLinear = 10; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 2 ref gaps, 2 read gaps allowed by minsc)..."; doTestCase3( al, "ACGTCGT", "IIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -40.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 1.0, // const coeff for N ceiling 0.0, // linear coeff for N ceiling res, // result nIncl, // Ns inclusive (not mismatches) true); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); if(!res.empty()) { //al.printResultStacked(res, cerr); cerr << endl; } assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -25); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); // The following alignment is possible when i == 2: // ACGTACGTACGTACGTN // A x // C x // G x // T x // C x // G x // T x assert(i == 2 || res.empty()); res.reset(); cerr << "PASSED" << endl; sc.rfGapConst = 10; sc.rdGapConst = 10; sc.rfGapLinear = 10; sc.rdGapLinear = 10; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 ref gap, 1 read gap allowed by minsc)..."; doTestCase2( al, "ACGTCGT", "IIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -20); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; // Ref gap with two ref gaps and zero read gaps allowed sc.rfGapConst = 15; sc.rdGapConst = 15; sc.rfGapLinear = 5; sc.rdGapLinear = 5; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 2 ref gaps, 2 read gaps allowed by minsc)..."; // Careful: it might be possible for the read to align with overhang // instead of with a gap doTestCase3( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -35.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 1.0f, // needed to avoid overhang alignments 0.0f, // needed to avoid overhang alignments res, // result nIncl, // Ns inclusive (not mismatches) true); // filter Ns if(i == 0) { lo = 0; hi = 1; } else if(i == 1) { lo = 1; hi = 2; } else { lo = 2; hi = 3; } for(size_t j = lo; j < hi; j++) { al.nextAlignment(res, rnd); assert(!res.empty()); //al.printResultStacked(res, cerr); cerr << endl; assert(res.alres.refoff() == 0 || res.alres.refoff() == 4 || res.alres.refoff() == 8); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -20); assert_eq(res.alres.score().ns(), 0); res.reset(); } al.nextAlignment(res, rnd); //assert(res.empty()); //res.reset(); cerr << "PASSED" << endl; sc.rfGapConst = 25; sc.rdGapConst = 25; sc.rfGapLinear = 4; sc.rdGapLinear = 4; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1 ref gap, 1 read gap allowed by minsc)..."; doTestCase2( al, "ACGTAACGT", "IIIIIIIII", "ACGTACGTACGTACGT", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 1); assert_eq(res.alres.score().score(), -29); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", short read)..."; doTestCase2( al, "A", "I", "AAAAAAAAAAAA", i*4, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 0); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; if(i == 0) { cerr << " Test " << tests++ << " (nuc space, offset 0, short read & ref)..."; doTestCase2( al, "A", "I", "A", 0, // off sc, // scoring scheme -30.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 0); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; } cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", short read, many allowed gaps)..."; doTestCase2( al, "A", "I", "AAAAAAAAAAAA", i*4, // off sc, // scoring scheme -150.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 0); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; if(i == 0) { cerr << " Test " << tests++ << " (nuc space, offset 0, short read & ref, " << "many allowed gaps)..."; doTestCase2( al, "A", "I", "A", 0, // off sc, // scoring scheme -150.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 0); assert_eq(res.alres.score().ns(), 0); res.reset(); cerr << "PASSED" << endl; } } // A test case where a valid alignment with a worse score should be // accepted over a valid alignment with a better score but too many // Ns cerr << " Test " << tests++ << " (N ceiling 1)..."; sc.mmcostType = COST_MODEL_CONSTANT; sc.mmcost = 10; sc.snp = 30; sc.nCeilConst = 0.0f; sc.nCeilLinear = 0.0f; sc.rfGapConst = 10; sc.rdGapLinear = 10; sc.rfGapConst = 10; sc.rfGapLinear = 10; sc.setNPen(COST_MODEL_CONSTANT, 2); sc.gapbar = 1; // No Ns allowed, so this hit should be filtered doTestCase2( al, "ACGTACGT", // read seq "IIIIIIII", // read quals "NCGTACGT", // ref seq 0, // offset sc, // scoring scheme -25.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result false, // ns are in inclusive true, // nfilter 0); al.nextAlignment(res, rnd); assert(res.empty()); cerr << "PASSED" << endl; res.reset(); // 1 N allowed, so this hit should stand cerr << " Test " << tests++ << " (N ceiling 2)..."; doTestCase3( al, "ACGTACGT", // read seq "IIIIIIII", // read quals "NCGTACGT", // ref seq 0, // offset sc, // scoring scheme -25.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 1.0f, // constant coefficient for # Ns allowed 0.0f, // linear coefficient for # Ns allowed res, // result false, // ns are in inclusive false, // nfilter - NOTE: FILTER OFF 0); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(0, res.alres.score().gaps()); assert_eq(-2, res.alres.score().score()); assert_eq(1, res.alres.score().ns()); cerr << "PASSED" << endl; res.reset(); // 1 N allowed, but we set st_ such that this hit should not stand for(size_t i = 0; i < 2; i++) { cerr << " Test " << tests++ << " (N ceiling 2 with st_ override)..."; EList en; en.resize(3); en.fill(true); if(i == 1) { en[1] = false; } sc.rfGapConst = 10; sc.rdGapLinear = 10; sc.rfGapConst = 10; sc.rfGapLinear = 10; doTestCase4( al, "ACGTACGT", // read seq "IIIIIIII", // read quals "NCGTACGT", // ref seq 0, // offset en, // rectangle columns where solution can end sc, // scoring scheme -25.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling 1.0f, // constant coefficient for # Ns allowed 0.0f, // linear coefficient for # Ns allowed res, // result false, // ns are in inclusive false, // nfilter - NOTE: FILTER OFF 0); al.nextAlignment(res, rnd); if(i > 0) { assert(res.empty()); } else { assert(!res.empty()); } cerr << "PASSED" << endl; res.reset(); } // No Ns allowed, so this hit should be filtered cerr << " Test " << tests++ << " (N ceiling 3)..."; sc.nCeilConst = 1.0f; sc.nCeilLinear = 0.0f; doTestCase2( al, "ACGTACGT", // read seq "IIIIIIII", // read quals "NCGTACGT", // ref seq 0, // offset sc, // scoring scheme -25.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result false, // ns are in inclusive true, // nfilter - NOTE: FILTER ON 0); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(0, res.alres.score().gaps()); assert_eq(-2, res.alres.score().score()); assert_eq(1, res.alres.score().ns()); cerr << "PASSED" << endl; res.reset(); // No Ns allowed, so this hit should be filtered cerr << " Test " << tests++ << " (redundant alignment elimination 1)..."; sc.nCeilConst = 1.0f; sc.nCeilLinear = 0.0f; sc.rfGapConst = 25; sc.rdGapLinear = 15; sc.rfGapConst = 25; sc.rfGapLinear = 15; doTestCase2( al, // 1 2 3 4 // 01234567890123456789012345678901234567890123456 "AGGCTATGCCTCTGACGCGATATCGGCGCCCACTTCAGAGCTAACCG", "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII", "TTTTTTTTAGGCTATGCCTCTGACGCGATATCGGCGCCCACTTCAGAGCTAACCGTTTTTTT", // 01234567890123456789012345678901234567890123456789012345678901 // 1 2 3 4 5 6 8, // offset sc, // scoring scheme -25.0f, // const coeff for cost ceiling -5.0f, // linear coeff for cost ceiling res, // result false, // ns are in inclusive true, // nfilter - NOTE: FILTER ON 0); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(8, res.alres.refoff()); assert_eq(47, res.alres.refExtent()); assert_eq(0, res.alres.score().gaps()); assert_eq(0, res.alres.score().score()); assert_eq(0, res.alres.score().ns()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); cerr << "PASSED" << endl; res.reset(); } /** * Do a set of unit tests for local alignment. */ static void doLocalTests() { bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; bonusMatch = DEFAULT_MATCH_BONUS_LOCAL; penMmcType = DEFAULT_MM_PENALTY_TYPE; penMmc = DEFAULT_MM_PENALTY; penSnp = DEFAULT_SNP_PENALTY; penNType = DEFAULT_N_PENALTY_TYPE; penN = DEFAULT_N_PENALTY; nPairCat = DEFAULT_N_CAT_PAIR; penRdExConst = DEFAULT_READ_GAP_CONST; penRfExConst = DEFAULT_REF_GAP_CONST; penRdExLinear = DEFAULT_READ_GAP_LINEAR; penRfExLinear = DEFAULT_REF_GAP_LINEAR; costMinConst = DEFAULT_MIN_CONST_LOCAL; costMinLinear = DEFAULT_MIN_LINEAR_LOCAL; costFloorConst = DEFAULT_FLOOR_CONST_LOCAL; costFloorLinear = DEFAULT_FLOOR_LINEAR_LOCAL; nCeilConst = 1.0f; // constant factor in N ceil w/r/t read len nCeilLinear = 0.1f; // coeff of linear term in N ceil w/r/t read len multiseedMms = DEFAULT_SEEDMMS; multiseedLen = DEFAULT_SEEDLEN; // Set up penalities Scoring sc( 10, penMmcType, // how to penalize mismatches 30, // constant if mm pelanty is a constant penSnp, // penalty for decoded SNP costMinConst, // constant factor in N ceiling w/r/t read length costMinLinear, // coeff of linear term in N ceiling w/r/t read length costFloorConst, // constant factor in N ceiling w/r/t read length costFloorLinear, // coeff of linear term in N ceiling w/r/t read length nCeilConst, // constant factor in N ceiling w/r/t read length nCeilLinear, // coeff of linear term in N ceiling w/r/t read length penNType, // how to penalize Ns in the read penN, // constant if N pelanty is a constant nPairCat, // true -> concatenate mates before N filtering 25, // constant coeff for cost of gap in read 25, // constant coeff for cost of gap in ref 15, // linear coeff for cost of gap in read 15, // linear coeff for cost of gap in ref 1, // # rows at top/bot can only be entered diagonally -1, // min row idx to backtrace from; -1 = no limit false // sort results first by row then by score? ); SwResult res; // // Basic nucleotide-space tests // cerr << "Running local tests..." << endl; int tests = 1; bool nIncl = false; bool nfilter = false; SwAligner al; RandomSource rnd(73); for(int i = 0; i < 3; i++) { cerr << " Test " << tests++ << " (short nuc space, offset " << (i*4) << ", exact)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; doTestCase2( al, "ACGT", // read "IIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 8.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(4, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 40); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 // A C G T A C G T A C G T A C G T // 0 C // 1 C x // 2 G x // 3 T x cerr << " Test " << tests++ << " (short nuc space, offset " << (i*4) << ", 1mm)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; doTestCase2( al, "CCGT", // read "IIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 7.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4+1, res.alres.refoff()); assert_eq(3, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 30); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (short nuc space, offset " << (i*4) << ", 1mm)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; doTestCase2( al, "ACGA", // read "IIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 7.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(3, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 30); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; if(i == 0) { cerr << " Test " << tests++ << " (short nuc space, offset " << (i*4) << ", 1mm, match bonus=20)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; sc.setMatchBonus(20); doTestCase2( al, "TTGT", // read "IIII", // qual "TTGA", // ref in i*4, // off sc, // scoring scheme 25.0f, // const coeff for cost ceiling 0.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(3, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 60); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); sc.setMatchBonus(10); cerr << "PASSED" << endl; } cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", exact)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; doTestCase2( al, "ACGTACGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 8.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 80); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); assert(res.empty()); assert(al.done()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (long nuc space, offset " << (i*8) << ", exact)..."; sc.rdGapConst = 40; sc.rfGapConst = 40; doTestCase2( al, "ACGTACGTACGTACGTACGTA", // read "IIIIIIIIIIIIIIIIIIIII", // qual "ACGTACGTACGTACGTACGTACGTACGTACGTACGTA", // ref in // ACGTACGTACGTACGTACGT // ACGTACGTACGTACGTACGT // ACGTACGTACGTACGTACGT i*8, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 8.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*8, res.alres.refoff()); assert_eq(21, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 210); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); //assert(res.empty()); //assert(al.done()); res.reset(); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (nuc space, offset " << (i*4) << ", 1mm allowed by minsc)..."; doTestCase2( al, "ACGTTCGT", // read "IIIIIIII", // qual "ACGTACGTACGTACGT", // ref in i*4, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 5.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*4, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 40); assert_eq(res.alres.score().ns(), 0); res.reset(); al.nextAlignment(res, rnd); //assert(res.empty()); //assert(al.done()); cerr << "PASSED" << endl; cerr << " Test " << tests++ << " (long nuc space, offset " << (i*8) << ", 6mm allowed by minsc)..."; sc.rdGapConst = 50; sc.rfGapConst = 50; sc.rdGapLinear = 45; sc.rfGapLinear = 45; doTestCase2( al, "ACGTACGATGCATCGTACGTA", // read "IIIIIIIIIIIIIIIIIIIII", // qual "ACGTACGTACGTACGTACGTACGTACGTACGTACGTA", // ref in // ACGTACGTACGTACGTACGT // ACGTACGTACGTACGTACGT // ACGTACGTACGTACGTACGT i*8, // off sc, // scoring scheme 0.0f, // const coeff for cost ceiling 1.0f, // linear coeff for cost ceiling res, // result nIncl, // Ns inclusive (not mismatches) nfilter); // filter Ns assert(!al.done()); al.nextAlignment(res, rnd); assert(!res.empty()); assert_eq(i*8 + 13, res.alres.refoff()); assert_eq(8, res.alres.refExtent()); assert_eq(res.alres.score().gaps(), 0); assert_eq(res.alres.score().score(), 80); assert_eq(res.alres.score().ns(), 0); assert(res.alres.ned().empty()); assert(res.alres.aed().empty()); res.reset(); al.nextAlignment(res, rnd); res.reset(); cerr << "PASSED" << endl; } } int main(int argc, char **argv) { int option_index = 0; int next_option; unsigned seed = 0; gGapBarrier = 1; gSnpPhred = 30; bool nsInclusive = false; bool nfilter = false; bonusMatchType = DEFAULT_MATCH_BONUS_TYPE; bonusMatch = DEFAULT_MATCH_BONUS; penMmcType = DEFAULT_MM_PENALTY_TYPE; penMmc = DEFAULT_MM_PENALTY; penSnp = DEFAULT_SNP_PENALTY; penNType = DEFAULT_N_PENALTY_TYPE; penN = DEFAULT_N_PENALTY; penRdExConst = DEFAULT_READ_GAP_CONST; penRfExConst = DEFAULT_REF_GAP_CONST; penRdExLinear = DEFAULT_READ_GAP_LINEAR; penRfExLinear = DEFAULT_REF_GAP_LINEAR; costMinConst = DEFAULT_MIN_CONST; costMinLinear = DEFAULT_MIN_LINEAR; costFloorConst = DEFAULT_FLOOR_CONST; costFloorLinear = DEFAULT_FLOOR_LINEAR; nCeilConst = 1.0f; // constant factor in N ceiling w/r/t read length nCeilLinear = 1.0f; // coeff of linear term in N ceiling w/r/t read length nCatPair = false; multiseedMms = DEFAULT_SEEDMMS; multiseedLen = DEFAULT_SEEDLEN; multiseedIvalType = DEFAULT_IVAL; multiseedIvalA = DEFAULT_IVAL_A; multiseedIvalB = DEFAULT_IVAL_B; mhits = 1; do { next_option = getopt_long(argc, argv, short_opts, long_opts, &option_index); switch (next_option) { case 's': gSnpPhred = parse(optarg); break; case 'r': seed = parse(optarg); break; case ARG_TESTS: { doTests(); cout << "PASSED end-to-ends" << endl; doLocalTests(); cout << "PASSED locals" << endl; return 0; } case 'A': { bool localAlign = false; bool noisyHpolymer = false; bool ignoreQuals = false; SeedAlignmentPolicy::parseString( optarg, localAlign, noisyHpolymer, ignoreQuals, bonusMatchType, bonusMatch, penMmcType, penMmc, penNType, penN, penRdExConst, penRfExConst, penRdExLinear, penRfExLinear, costMinConst, costMinLinear, costFloorConst, costFloorLinear, nCeilConst, nCeilLinear, nCatPair, multiseedMms, multiseedLen, multiseedIvalType, multiseedIvalA, multiseedIvalB, posmin); break; } case -1: break; default: { cerr << "Unknown option: " << (char)next_option << endl; printUsage(cerr); exit(1); } } } while(next_option != -1); srand(seed); if(argc - optind < 4) { cerr << "Need at least 4 arguments" << endl; printUsage(cerr); exit(1); } BTDnaString read; BTString ref, qual; // Get read read.installChars(argv[optind]); // Get qualities qual.install(argv[optind+1]); assert_eq(read.length(), qual.length()); // Get reference ref.install(argv[optind+2]); // Get reference offset size_t off = parse(argv[optind+3]); // Set up penalities Scoring sc( false, // local alignment? false, // bad homopolymer? bonusMatchType, bonusMatch, penMmcType, // how to penalize mismatches penMmc, // constant if mm pelanty is a constant costMinConst, costMinLinear, costFloorConst, costFloorLinear, nCeilConst, // N ceiling constant coefficient nCeilLinear, // N ceiling linear coefficient penNType, // how to penalize Ns in the read penN, // constant if N pelanty is a constant nCatPair, // true -> concatenate mates before N filtering penRdExConst, // constant cost of extending a gap in the read penRfExConst, // constant cost of extending a gap in the reference penRdExLinear, // coeff of linear term for cost of gap extension in read penRfExLinear // coeff of linear term for cost of gap extension in ref ); // Calculate the penalty ceiling for the read TAlScore minsc = Scoring::linearFunc( read.length(), costMinConst, costMinLinear); TAlScore floorsc = Scoring::linearFunc( read.length(), costFloorConst, costFloorLinear); SwResult res; SwAligner al; doTestCase( al, read, qual, ref, off, NULL, sc, minsc, res, nsInclusive, nfilter, seed); } #endif /*MAIN_ALIGNER_SW*/