/* * Copyright 2011, Ben Langmead * * This file is part of Bowtie 2. * * Bowtie 2 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Bowtie 2 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Bowtie 2. If not, see . */ #ifndef ALIGNER_SW_NUC_H_ #define ALIGNER_SW_NUC_H_ #include #include "aligner_sw_common.h" #include "aligner_result.h" /** * Encapsulates a backtrace stack frame. Includes enough information that we * can "pop" back up to this frame and choose to make a different backtracking * decision. The information included is: * * 1. The mask at the decision point. When we first move through the mask and * when we backtrack to it, we're careful to mask out the bit corresponding * to the path we're taking. When we move through it after removing the * last bit from the mask, we're careful to pop it from the stack. * 2. The sizes of the edit lists. When we backtrack, we resize the lists back * down to these sizes to get rid of any edits introduced since the branch * point. */ struct DpNucFrame { /** * Initialize a new DpNucFrame stack frame. */ void init( size_t nedsz_, size_t aedsz_, size_t celsz_, size_t row_, size_t col_, size_t gaps_, size_t readGaps_, size_t refGaps_, AlnScore score_, int ct_) { nedsz = nedsz_; aedsz = aedsz_; celsz = celsz_; row = row_; col = col_; gaps = gaps_; readGaps = readGaps_; refGaps = refGaps_; score = score_; ct = ct_; } size_t nedsz; // size of the nucleotide edit list at branch (before // adding the branch edit) size_t aedsz; // size of ambiguous nucleotide edit list at branch size_t celsz; // size of cell-traversed list at branch size_t row; // row of cell where branch occurred size_t col; // column of cell where branch occurred size_t gaps; // number of gaps before branch occurred size_t readGaps; // number of read gaps before branch occurred size_t refGaps; // number of ref gaps before branch occurred AlnScore score; // score where branch occurred int ct; // table type (oall, rdgap or rfgap) }; enum { BT_CAND_FATE_SUCCEEDED = 1, BT_CAND_FATE_FAILED, BT_CAND_FATE_FILT_START, // skipped b/c starting cell already explored BT_CAND_FATE_FILT_DOMINATED, // skipped b/c it was dominated BT_CAND_FATE_FILT_SCORE // skipped b/c score not interesting anymore }; /** * Encapsulates a cell that we might want to backtrace from. */ struct DpBtCandidate { DpBtCandidate() { reset(); } DpBtCandidate(size_t row_, size_t col_, TAlScore score_) { init(row_, col_, score_); } void reset() { init(0, 0, 0); } void init(size_t row_, size_t col_, TAlScore score_) { row = row_; col = col_; score = score_; // 0 = invalid; this should be set later according to what happens // before / during the backtrace fate = 0; } /** * Return true iff this candidate is (heuristically) dominated by the given * candidate. We say that candidate A dominates candidate B if (a) B is * somewhere in the N x N square that extends up and to the left of A, * where N is an arbitrary number like 20, and (b) B's score is <= than * A's. */ inline bool dominatedBy(const DpBtCandidate& o) { const size_t SQ = 40; size_t rowhi = row; size_t rowlo = o.row; if(rowhi < rowlo) swap(rowhi, rowlo); size_t colhi = col; size_t collo = o.col; if(colhi < collo) swap(colhi, collo); return (colhi - collo) <= SQ && (rowhi - rowlo) <= SQ; } /** * Return true if this candidate is "greater than" (should be considered * later than) the given candidate. */ bool operator>(const DpBtCandidate& o) const { if(score < o.score) return true; if(score > o.score) return false; if(row < o.row ) return true; if(row > o.row ) return false; if(col < o.col ) return true; if(col > o.col ) return false; return false; } /** * Return true if this candidate is "less than" (should be considered * sooner than) the given candidate. */ bool operator<(const DpBtCandidate& o) const { if(score > o.score) return true; if(score < o.score) return false; if(row > o.row ) return true; if(row < o.row ) return false; if(col > o.col ) return true; if(col < o.col ) return false; return false; } /** * Return true if this candidate equals the given candidate. */ bool operator==(const DpBtCandidate& o) const { return row == o.row && col == o.col && score == o.score; } bool operator>=(const DpBtCandidate& o) const { return !((*this) < o); } bool operator<=(const DpBtCandidate& o) const { return !((*this) > o); } #ifndef NDEBUG /** * Check internal consistency. */ bool repOk() const { assert(VALID_SCORE(score)); return true; } #endif size_t row; // cell row size_t col; // cell column w/r/t LHS of rectangle TAlScore score; // score fo alignment int fate; // flag indicating whether we succeeded, failed, skipped }; template class NBest { public: NBest() { nelt_ = nbest_ = n_ = 0; } bool inited() const { return nelt_ > 0; } void init(size_t nelt, size_t nbest) { nelt_ = nelt; nbest_ = nbest; elts_.resize(nelt * nbest); ncur_.resize(nelt); ncur_.fill(0); n_ = 0; } /** * Add a new result to bin 'elt'. Where it gets prioritized in the list of * results in that bin depends on the result of operator>. */ bool add(size_t elt, const T& o) { assert_lt(elt, nelt_); const size_t ncur = ncur_[elt]; assert_leq(ncur, nbest_); n_++; for(size_t i = 0; i < nbest_ && i <= ncur; i++) { if(o > elts_[nbest_ * elt + i] || i >= ncur) { // Insert it here // Move everyone from here on down by one slot for(int j = (int)ncur; j > (int)i; j--) { if(j < (int)nbest_) { elts_[nbest_ * elt + j] = elts_[nbest_ * elt + j - 1]; } } elts_[nbest_ * elt + i] = o; if(ncur < nbest_) { ncur_[elt]++; } return true; } } return false; } /** * Return true iff there are no solutions. */ bool empty() const { return n_ == 0; } /** * Dump all the items in our payload into the given EList. */ template void dump(TList& l) const { if(empty()) return; for(size_t i = 0; i < nelt_; i++) { assert_leq(ncur_[i], nbest_); for(size_t j = 0; j < ncur_[i]; j++) { l.push_back(elts_[i * nbest_ + j]); } } } protected: size_t nelt_; size_t nbest_; EList elts_; EList ncur_; size_t n_; // total # results added }; #endif /*def ALIGNER_SW_NUC_H_*/