hisat-3n/aligner_sw.h

/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * aligner_sw.h
 *
 * Classes and routines for solving dynamic programming problems in aid of read
 * alignment.  Goals include the ability to handle:
 *
 * - Both read alignment, where the query must align end-to-end, and local
 *   alignment, where we seek a high-scoring alignment that need not involve
 *   the entire query.
 * - Situations where: (a) we've found a seed hit and are trying to extend it
 *   into a larger hit, (b) we've found an alignment for one mate of a pair and
 *   are trying to find a nearby alignment for the other mate, (c) we're
 *   aligning against an entire reference sequence.
 * - Caller-specified indicators for what columns of the dynamic programming
 *   matrix we are allowed to start in or end in.
 *
 * TODO:
 *
 * - A slicker way to filter out alignments that violate a ceiling placed on
 *   the number of Ns permitted in the reference portion of the alignment.
 *   Right now we accomplish this by masking out ending columns that correspond
 *   to *ungapped* alignments with too many Ns.  This results in false
 *   positives and false negatives for gapped alignments.  The margin of error
 *   (# of Ns by which we might miscount) is bounded by the number of gaps.
 */

/**
 *  |-maxgaps-|
 *  ***********oooooooooooooooooooooo    -
 *   ***********ooooooooooooooooooooo    |
 *    ***********oooooooooooooooooooo    |
 *     ***********ooooooooooooooooooo    |
 *      ***********oooooooooooooooooo    |
 *       ***********ooooooooooooooooo read len
 *        ***********oooooooooooooooo    |
 *         ***********ooooooooooooooo    |
 *          ***********oooooooooooooo    |
 *           ***********ooooooooooooo    |
 *            ***********oooooooooooo    -
 *            |-maxgaps-|
 *  |-readlen-|
 *  |-------skip--------|
 */

#ifndef ALIGNER_SW_H_
#define ALIGNER_SW_H_

#define INLINE_CUPS

#include <stdint.h>
#include <iostream>
#include <limits>
#include "threading.h"
#include <emmintrin.h>
#include "aligner_sw_common.h"
#include "aligner_sw_nuc.h"
#include "ds.h"
#include "aligner_seed.h"
#include "reference.h"
#include "random_source.h"
#include "mem_ids.h"
#include "aligner_result.h"
#include "mask.h"
#include "dp_framer.h"
#include "aligner_swsse.h"
#include "aligner_bt.h"

#define QUAL2(d, f) sc_->mm((int)(*rd_)[rdi_ + d], \
							(int)  rf_ [rfi_ + f], \
							(int)(*qu_)[rdi_ + d] - 33)
#define QUAL(d)     sc_->mm((int)(*rd_)[rdi_ + d], \
							(int)(*qu_)[rdi_ + d] - 33)
#define N_SNP_PEN(c) (((int)rf_[rfi_ + c] > 15) ? sc_->n(30) : sc_->penSnp)

/**
 * SwAligner
 * =========
 *
 * Ensapsulates facilities for alignment using dynamic programming.  Handles
 * alignment of nucleotide reads against known reference nucleotides.
 *
 * The class is stateful.  First the user must call init() to initialize the
 * object with details regarding the dynamic programming problem to be solved.
 * Next, the user calls align() to fill the dynamic programming matrix and
 * calculate summaries describing the solutions.  Finally the user calls 
 * nextAlignment(...), perhaps repeatedly, to populate the SwResult object with
 * the next result.  Results are dispensend in best-to-worst, left-to-right
 * order.
 *
 * The class expects the read string, quality string, and reference string
 * provided by the caller live at least until the user is finished aligning and
 * obtaining alignments from this object.
 *
 * There is a design tradeoff between hiding/exposing details of the genome and
 * its strands to the SwAligner.  In a sense, a better design is to hide
 * details such as the id of the reference sequence aligned to, or whether
 * we're aligning the read in its original forward orientation or its reverse
 * complement.  But this means that any alignment results returned by SwAligner
 * have to be extended to include those details before they're useful to the
 * caller.  We opt for messy but expedient - the reference id and orientation
 * of the read are given to SwAligner, remembered, and used to populate
 * SwResults.
 *
 * LOCAL VS GLOBAL
 *
 * The dynamic programming aligner supports both local and global alignment,
 * and one option in between.  To implement global alignment, the aligner (a)
 * allows negative scores (i.e. doesn't necessarily clamp them up to 0), (b)
 * checks in rows other than the last row for acceptable solutions, and (c)
 * optionally adds a bonus to the score for matches.
 * 
 * For global alignment, we:
 *
 * (a) Allow negative scores
 * (b) Check only in the last row
 * (c) Either add a bonus for matches or not (doesn't matter)
 *
 * For local alignment, we:
 *
 * (a) Clamp scores to 0
 * (b) Check in any row for a sufficiently high score
 * (c) Add a bonus for matches
 *
 * An in-between solution is to allow alignments to be curtailed on the
 * right-hand side if a better score can be achieved thereby, but not on the
 * left.  For this, we:
 *
 * (a) Allow negative scores
 * (b) Check in any row for a sufficiently high score
 * (c) Either add a bonus for matches or not (doesn't matter)
 *
 * REDUNDANT ALIGNMENTS
 *
 * When are two alignments distinct and when are they redundant (not distinct)?
 * At one extreme, we might say the best alignment from any given dynamic
 * programming problem is redundant with all other alignments from that
 # problem.  At the other extreme, we might say that any two alignments with
 * distinct starting points and edits are distinct.  The former is probably too
 * conservative for mate-finding DP problems.  The latter is certainly too
 * permissive, since two alignments that differ only in how gaps are arranged
 * should not be considered distinct.
 *
 * Some in-between solutions are:
 *
 * (a) If two alignments share an end point on either end, they are redundant.
 *     Otherwise, they are distinct.
 * (b) If two alignments share *both* end points, they are redundant.
 * (c) If two alignments share any cells in the DP table, they are redundant.
 * (d) 2 alignments are redundant if either end within N poss of each other
 * (e) Like (d) but both instead of either
 * (f, g) Like d, e, but where N is tied to maxgaps somehow
 *
 * Why not (a)?  One reason is that it's possible for two alignments to have
 * different start & end positions but share many cells.  Consider alignments 1
 * and 2 below; their end-points are labeled.
 *
 *  1 2
 *  \ \
 *    -\
 *      \
 *       \
 *        \
 *        -\
 *        \ \
 *        1 2
 *
 * 1 and 2 are distinct according to (a) but they share many cells in common.
 *
 * Why not (f, g)?  It fixes the problem with (a) above by forcing the
 * alignments to be spread so far that they can't possibly share diagonal cells
 * in common
 */
class SwAligner {

	typedef std::pair<size_t, size_t> SizeTPair;

	// States that the aligner can be in
	enum {
		STATE_UNINIT,  // init() hasn't been called yet
		STATE_INITED,  // init() has been called, but not align()
		STATE_ALIGNED, // align() has been called
	};
	
	const static size_t ALPHA_SIZE = 5;

public:

	explicit SwAligner() :
		sseU8fw_(DP_CAT),
		sseU8rc_(DP_CAT),
		sseI16fw_(DP_CAT),
		sseI16rc_(DP_CAT),
		state_(STATE_UNINIT),
		initedRead_(false),
		readSse16_(false),
		initedRef_(false),
		rfwbuf_(DP_CAT),
		btnstack_(DP_CAT),
		btcells_(DP_CAT),
		btdiag_(),
		btncand_(DP_CAT),
		btncanddone_(DP_CAT),
		btncanddoneSucc_(0),
		btncanddoneFail_(0),
		cper_(),
		cperMinlen_(),
		cperPerPow2_(),
		cperEf_(),
		cperTri_(),
		colstop_(0),
		lastsolcol_(0),
		cural_(0)
		ASSERT_ONLY(, cand_tmp_(DP_CAT))
	{ }

	/**
	 * Prepare the dynamic programming driver with a new read and a new scoring
	 * scheme.
	 */
	void initRead(
		const BTDnaString& rdfw, // read sequence for fw read
		const BTDnaString& rdrc, // read sequence for rc read
		const BTString& qufw,    // read qualities for fw read
		const BTString& qurc,    // read qualities for rc read
		size_t rdi,              // offset of first read char to align
		size_t rdf,              // offset of last read char to align
		const Scoring& sc);      // scoring scheme
	
	/**
	 * Initialize with a new alignment problem.
	 */
	void initRef(
		bool fw,               // whether to forward or revcomp read is aligning
		TRefId refidx,         // id of reference aligned against
		const DPRect& rect,    // DP rectangle
		char *rf,              // reference sequence
		size_t rfi,            // offset of first reference char to align to
		size_t rff,            // offset of last reference char to align to
		TRefOff reflen,        // length of reference sequence
		const Scoring& sc,     // scoring scheme
		TAlScore minsc,        // minimum score
		bool enable8,          // use 8-bit SSE if possible?
		size_t cminlen,        // minimum length for using checkpointing scheme
		size_t cpow2,          // interval b/t checkpointed diags; 1 << this
		bool doTri,            // triangular mini-fills?
		bool extend);          // true iff this is a seed extension

	/**
	 * Given a read, an alignment orientation, a range of characters in a
	 * referece sequence, and a bit-encoded version of the reference,
	 * execute the corresponding dynamic programming problem.
	 *
	 * Here we expect that the caller has already narrowed down the relevant
	 * portion of the reference (e.g. using a seed hit) and all we do is
	 * banded dynamic programming in the vicinity of that portion.  This is not
	 * the function to call if we are trying to solve the whole alignment
	 * problem with dynamic programming (that is TODO).
	 *
	 * Returns true if an alignment was found, false otherwise.
	 */
	void initRef(
		bool fw,               // whether to forward or revcomp read aligned
		TRefId refidx,         // reference aligned against
		const DPRect& rect,    // DP rectangle
		const BitPairReference& refs, // Reference strings
		TRefOff reflen,        // length of reference sequence
		const Scoring& sc,     // scoring scheme
		TAlScore minsc,        // minimum alignment score
		bool enable8,          // use 8-bit SSE if possible?
		size_t cminlen,        // minimum length for using checkpointing scheme
		size_t cpow2,          // interval b/t checkpointed diags; 1 << this
		bool doTri,            // triangular mini-fills?
		bool extend,           // true iff this is a seed extension
		size_t  upto,          // count the number of Ns up to this offset
		size_t& nsUpto);       // output: the number of Ns up to 'upto'

	/**
	 * Given a read, an alignment orientation, a range of characters in a
	 * referece sequence, and a bit-encoded version of the reference, set up
	 * and execute the corresponding ungapped alignment problem.  There can
	 * only be one solution.
	 *
	 * The caller has already narrowed down the relevant portion of the
	 * reference using, e.g., the location of a seed hit, or the range of
	 * possible fragment lengths if we're searching for the opposite mate in a
	 * pair.
	 */
	int ungappedAlign(
		const BTDnaString&      rd,     // read sequence (could be RC)
		const BTString&         qu,     // qual sequence (could be rev)
		const Coord&            coord,  // coordinate aligned to
		const BitPairReference& refs,   // Reference strings
		size_t                  reflen, // length of reference sequence
		const Scoring&          sc,     // scoring scheme
		bool                    ohang,  // allow overhang?
		TAlScore                minsc,  // minimum score
		SwResult&               res);   // put alignment result here

	/**
	 * Align read 'rd' to reference using read & reference information given
	 * last time init() was called.  Uses dynamic programming.
	 */
	bool align(RandomSource& rnd, TAlScore& best);
	
	/**
	 * Populate the given SwResult with information about the "next best"
	 * alignment if there is one.  If there isn't one, false is returned.  Note
	 * that false might be returned even though a call to done() would have
	 * returned false.
	 */
	bool nextAlignment(
		SwResult& res,
		TAlScore minsc,
		RandomSource& rnd);
	
	/**
	 * Print out an alignment result as an ASCII DP table.
	 */
	void printResultStacked(
		const SwResult& res,
		std::ostream& os)
	{
		res.alres.printStacked(*rd_, os);
	}
	
	/**
	 * Return true iff there are no more solution cells to backtace from.
	 * Note that this may return false in situations where there are actually
	 * no more solutions, but that hasn't been discovered yet.
	 */
	bool done() const {
		assert(initedRead() && initedRef());
		return cural_ == btncand_.size();
	}

	/**
	 * Return true iff this SwAligner has been initialized with a read to align.
	 */
	inline bool initedRef() const { return initedRef_; }

	/**
	 * Return true iff this SwAligner has been initialized with a reference to
	 * align against.
	 */
	inline bool initedRead() const { return initedRead_; }
	
	/**
	 * Reset, signaling that we're done with this dynamic programming problem
	 * and won't be asking for any more alignments.
	 */
	inline void reset() { initedRef_ = initedRead_ = false; }

#ifndef NDEBUG
	/**
	 * Check that aligner is internally consistent.
	 */
	bool repOk() const {
		assert_gt(dpRows(), 0);
		// Check btncand_
		for(size_t i = 0; i < btncand_.size(); i++) {
			assert(btncand_[i].repOk());
			assert_geq(btncand_[i].score, minsc_);
		}
		return true;
	}
#endif
	
	/**
	 * Return the number of alignments given out so far by nextAlignment().
	 */
	size_t numAlignmentsReported() const { return cural_; }

	/**
	 * Merge tallies in the counters related to filling the DP table.
	 */
	void merge(
		SSEMetrics& sseU8ExtendMet,
		SSEMetrics& sseU8MateMet,
		SSEMetrics& sseI16ExtendMet,
		SSEMetrics& sseI16MateMet,
		uint64_t&   nbtfiltst,
		uint64_t&   nbtfiltsc,
		uint64_t&   nbtfiltdo)
	{
		sseU8ExtendMet.merge(sseU8ExtendMet_);
		sseU8MateMet.merge(sseU8MateMet_);
		sseI16ExtendMet.merge(sseI16ExtendMet_);
		sseI16MateMet.merge(sseI16MateMet_);
		nbtfiltst += nbtfiltst_;
		nbtfiltsc += nbtfiltsc_;
		nbtfiltdo += nbtfiltdo_;
	}
	
	/**
	 * Reset all the counters related to filling in the DP table to 0.
	 */
	void resetCounters() {
		sseU8ExtendMet_.reset();
		sseU8MateMet_.reset();
		sseI16ExtendMet_.reset();
		sseI16MateMet_.reset();
		nbtfiltst_ = nbtfiltsc_ = nbtfiltdo_ = 0;
	}
	
	/**
	 * Return the size of the DP problem.
	 */
	size_t size() const {
		return dpRows() * (rff_ - rfi_);
	}

protected:
	
	/**
	 * Return the number of rows that will be in the dynamic programming table.
	 */
	inline size_t dpRows() const {
		assert(initedRead_);
		return rdf_ - rdi_;
	}

	/**
	 * Align nucleotides from read 'rd' to the reference string 'rf' using
	 * vector instructions.  Return the score of the best alignment found, or
	 * the minimum integer if an alignment could not be found.  Flag is set to
	 * 0 if an alignment is found, -1 if no valid alignment is found, or -2 if
	 * the score saturated at any point during alignment.
	 */
	TAlScore alignNucleotidesEnd2EndSseU8(  // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesLocalSseU8(    // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesEnd2EndSseI16( // signed 16-bit elements
		int& flag, bool debug);
	TAlScore alignNucleotidesLocalSseI16(   // signed 16-bit elements
		int& flag, bool debug);
	
	/**
	 * Aligns by filling a dynamic programming matrix with the SSE-accelerated,
	 * banded DP approach of Farrar.  As it goes, it determines which cells we
	 * might backtrace from and tallies the best (highest-scoring) N backtrace
	 * candidate cells per diagonal.  Also returns the alignment score of the best
	 * alignment in the matrix.
	 *
	 * This routine does *not* maintain a matrix holding the entire matrix worth of
	 * scores, nor does it maintain any other dense O(mn) data structure, as this
	 * would quickly exhaust memory for queries longer than about 10,000 kb.
	 * Instead, in the fill stage it maintains two columns worth of scores at a
	 * time (current/previous, or right/left) - these take O(m) space.  When
	 * finished with the current column, it determines which cells from the
	 * previous column, if any, are candidates we might backtrace from to find a
	 * full alignment.  A candidate cell has a score that rises above the threshold
	 * and isn't improved upon by a match in the next column.  The best N
	 * candidates per diagonal are stored in a O(m + n) data structure.
	 */
	TAlScore alignGatherEE8(                // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignGatherLoc8(               // unsigned 8-bit elements
		int& flag, bool debug);
	TAlScore alignGatherEE16(               // signed 16-bit elements
		int& flag, bool debug);
	TAlScore alignGatherLoc16(              // signed 16-bit elements
		int& flag, bool debug);
	
	/**
	 * Build query profile look up tables for the read.  The query profile look
	 * up table is organized as a 1D array indexed by [i][j] where i is the
	 * reference character in the current DP column (0=A, 1=C, etc), and j is
	 * the segment of the query we're currently working on.
	 */
	void buildQueryProfileEnd2EndSseU8(bool fw);
	void buildQueryProfileLocalSseU8(bool fw);

	/**
	 * Build query profile look up tables for the read.  The query profile look
	 * up table is organized as a 1D array indexed by [i][j] where i is the
	 * reference character in the current DP column (0=A, 1=C, etc), and j is
	 * the segment of the query we're currently working on.
	 */
	void buildQueryProfileEnd2EndSseI16(bool fw);
	void buildQueryProfileLocalSseI16(bool fw);
	
	bool gatherCellsNucleotidesLocalSseU8(TAlScore best);
	bool gatherCellsNucleotidesEnd2EndSseU8(TAlScore best);

	bool gatherCellsNucleotidesLocalSseI16(TAlScore best);
	bool gatherCellsNucleotidesEnd2EndSseI16(TAlScore best);

	bool backtraceNucleotidesLocalSseU8(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesLocalSseI16(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesEnd2EndSseU8(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtraceNucleotidesEnd2EndSseI16(
		TAlScore       escore, // in: expected score
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t&        nbts,   // out: # backtracks
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		RandomSource&  rand);  // random gen, to choose among equal paths

	bool backtrace(
		TAlScore       escore, // in: expected score
		bool           fill,   // in: use mini-fill?
		bool           usecp,  // in: use checkpoints?
		SwResult&      res,    // out: store results (edits and scores) here
		size_t&        off,    // out: store diagonal projection of origin
		size_t         row,    // start in this rectangle row
		size_t         col,    // start in this rectangle column
		size_t         maxiter,// max # extensions to try
		size_t&        niter,  // # extensions tried
		RandomSource&  rnd)    // random gen, to choose among equal paths
	{
		bter_.initBt(
			escore,              // in: alignment score
			row,                 // in: start in this row
			col,                 // in: start in this column
			fill,                // in: use mini-fill?
			usecp,               // in: use checkpoints?
			cperTri_,            // in: triangle-shaped mini-fills?
			rnd);                // in: random gen, to choose among equal paths
		assert(bter_.inited());
		size_t nrej = 0;
		if(bter_.emptySolution()) {
			return false;
		} else {
			return bter_.nextAlignment(maxiter, res, off, nrej, niter, rnd);
		}
	}

	const BTDnaString  *rd_;     // read sequence
	const BTString     *qu_;     // read qualities
	const BTDnaString  *rdfw_;   // read sequence for fw read
	const BTDnaString  *rdrc_;   // read sequence for rc read
	const BTString     *qufw_;   // read qualities for fw read
	const BTString     *qurc_;   // read qualities for rc read
	TReadOff            rdi_;    // offset of first read char to align
	TReadOff            rdf_;    // offset of last read char to align
	bool                fw_;     // true iff read sequence is original fw read
	TRefId              refidx_; // id of reference aligned against
	TRefOff             reflen_; // length of entire reference sequence
	const DPRect*       rect_;   // DP rectangle
	char               *rf_;     // reference sequence
	TRefOff             rfi_;    // offset of first ref char to align to
	TRefOff             rff_;    // offset of last ref char to align to (excl)
	size_t              rdgap_;  // max # gaps in read
	size_t              rfgap_;  // max # gaps in reference
	bool                enable8_;// enable 8-bit sse
	bool                extend_; // true iff this is a seed-extend problem
	const Scoring      *sc_;     // penalties for edit types
	TAlScore            minsc_;  // penalty ceiling for valid alignments
	int                 nceil_;  // max # Ns allowed in ref portion of aln

	bool                sse8succ_;  // whether 8-bit worked
	bool                sse16succ_; // whether 16-bit worked
	SSEData             sseU8fw_;   // buf for fw query, 8-bit score
	SSEData             sseU8rc_;   // buf for rc query, 8-bit score
	SSEData             sseI16fw_;  // buf for fw query, 16-bit score
	SSEData             sseI16rc_;  // buf for rc query, 16-bit score
	bool                sseU8fwBuilt_;   // built fw query profile, 8-bit score
	bool                sseU8rcBuilt_;   // built rc query profile, 8-bit score
	bool                sseI16fwBuilt_;  // built fw query profile, 16-bit score
	bool                sseI16rcBuilt_;  // built rc query profile, 16-bit score

	SSEMetrics			sseU8ExtendMet_;
	SSEMetrics			sseU8MateMet_;
	SSEMetrics			sseI16ExtendMet_;
	SSEMetrics			sseI16MateMet_;

	int                 state_;        // state
	bool                initedRead_;   // true iff initialized with initRead
	bool                readSse16_;    // true -> sse16 from now on for read
	bool                initedRef_;    // true iff initialized with initRef
	EList<uint32_t>     rfwbuf_;       // buffer for wordized ref stretches
	
	EList<DpNucFrame>    btnstack_;    // backtrace stack for nucleotides
	EList<SizeTPair>     btcells_;     // cells involved in current backtrace

	NBest<DpBtCandidate> btdiag_;      // per-diagonal backtrace candidates
	EList<DpBtCandidate> btncand_;     // cells we might backtrace from
	EList<DpBtCandidate> btncanddone_; // candidates that we investigated
	size_t              btncanddoneSucc_; // # investigated and succeeded
	size_t              btncanddoneFail_; // # investigated and failed
	
	BtBranchTracer       bter_;        // backtracer
	
	Checkpointer         cper_;        // structure for saving checkpoint cells
	size_t               cperMinlen_;  // minimum length for using checkpointer
	size_t               cperPerPow2_; // checkpoint every 1 << perpow2 diags (& next)
	bool                 cperEf_;      // store E and F in addition to H?
	bool                 cperTri_;     // checkpoint for triangular mini-fills?
	
	size_t              colstop_;      // bailed on DP loop after this many cols
	size_t              lastsolcol_;   // last DP col with valid cell
	size_t              cural_;        // index of next alignment to be given
	
	uint64_t nbtfiltst_; // # candidates filtered b/c starting cell was seen
	uint64_t nbtfiltsc_; // # candidates filtered b/c score uninteresting
	uint64_t nbtfiltdo_; // # candidates filtered b/c dominated by other cell
	
	ASSERT_ONLY(SStringExpandable<uint32_t> tmp_destU32_);
	ASSERT_ONLY(BTDnaString tmp_editstr_, tmp_refstr_);
	ASSERT_ONLY(EList<DpBtCandidate> cand_tmp_);
};

#endif /*ALIGNER_SW_H_*/