hisat-3n/aligner_cache.h

1014 lines
26 KiB
C
Raw Normal View History

2025-01-18 13:09:52 +00:00
/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ALIGNER_CACHE_H_
#define ALIGNER_CACHE_H_
/**
* CACHEING
*
* By caching the results of some alignment sub-problems, we hope to
* enable a "fast path" for read alignment whereby answers are mostly
* looked up rather than calculated from scratch. This is particularly
* effective when the input is sorted or otherwise grouped in a way
* that brings together reads with (at least some) seed sequences in
* common.
*
* But the cache is also where results are held, regardless of whether
* the results are maintained & re-used across reads.
*
* The cache consists of two linked potions:
*
* 1. A multimap from seed strings (i.e. read substrings) to reference strings
* that are within some edit distance (roughly speaking). This is the "seed
* multimap".
*
* Key: Read substring (2-bit-per-base encoded + length)
* Value: Set of reference substrings (i.e. keys into the suffix
* array multimap).
*
* 2. A multimap from reference strings to the corresponding elements of the
* suffix array. Elements are filled in with reference-offset info as it's
* calculated. This is the "suffix array multimap"
*
* Key: Reference substring (2-bit-per-base encoded + length)
* Value: (a) top from BWT, (b) length of range, (c) offset of first
* range element in
*
* For both multimaps, we use a combo Red-Black tree and EList. The payload in
* the Red-Black tree nodes points to a range in the EList.
*/
#include <iostream>
#include "ds.h"
#include "read.h"
#include "threading.h"
#include "mem_ids.h"
#include "simple_func.h"
#include "btypes.h"
#define CACHE_PAGE_SZ (16 * 1024)
typedef PListSlice<TIndexOffU, CACHE_PAGE_SZ> TSlice;
/**
* Key for the query multimap: the read substring and its length.
*/
struct QKey {
/**
* Initialize invalid QKey.
*/
QKey() { reset(); }
/**
* Initialize QKey from DNA string.
*/
QKey(const BTDnaString& s ASSERT_ONLY(, BTDnaString& tmp)) {
init(s ASSERT_ONLY(, tmp));
}
/**
* Initialize QKey from DNA string. Rightmost character is placed in the
* least significant bitpair.
*/
bool init(
const BTDnaString& s
ASSERT_ONLY(, BTDnaString& tmp))
{
seq = 0;
len = (uint32_t)s.length();
ASSERT_ONLY(tmp.clear());
if(len > 32) {
len = 0xffffffff;
return false; // wasn't cacheable
} else {
// Rightmost char of 's' goes in the least significant bitpair
for(size_t i = 0; i < 32 && i < s.length(); i++) {
int c = (int)s.get(i);
assert_range(0, 4, c);
if(c == 4) {
len = 0xffffffff;
return false;
}
seq = (seq << 2) | s.get(i);
}
ASSERT_ONLY(toString(tmp));
assert(sstr_eq(tmp, s));
assert_leq(len, 32);
return true; // was cacheable
}
}
/**
* Convert this key to a DNA string.
*/
void toString(BTDnaString& s) {
s.resize(len);
uint64_t sq = seq;
for(int i = (len)-1; i >= 0; i--) {
s.set((uint32_t)(sq & 3), i);
sq >>= 2;
}
}
/**
* Return true iff the read substring is cacheable.
*/
bool cacheable() const { return len != 0xffffffff; }
/**
* Reset to uninitialized state.
*/
void reset() { seq = 0; len = 0xffffffff; }
/**
* True -> my key is less than the given key.
*/
bool operator<(const QKey& o) const {
return seq < o.seq || (seq == o.seq && len < o.len);
}
/**
* True -> my key is greater than the given key.
*/
bool operator>(const QKey& o) const {
return !(*this < o || *this == o);
}
/**
* True -> my key is equal to the given key.
*/
bool operator==(const QKey& o) const {
return seq == o.seq && len == o.len;
}
/**
* True -> my key is not equal to the given key.
*/
bool operator!=(const QKey& o) const {
return !(*this == o);
}
#ifndef NDEBUG
/**
* Check that this is a valid, initialized QKey.
*/
bool repOk() const {
return len != 0xffffffff;
}
#endif
uint64_t seq; // sequence
uint32_t len; // length of sequence
};
template <typename index_t>
class AlignmentCache;
/**
* Payload for the query multimap: a range of elements in the reference
* string list.
*/
template <typename index_t>
class QVal {
public:
QVal() { reset(); }
/**
* Return the offset of the first reference substring in the qlist.
*/
index_t offset() const { return i_; }
/**
* Return the number of reference substrings associated with a read
* substring.
*/
index_t numRanges() const {
assert(valid());
return rangen_;
}
/**
* Return the number of elements associated with all associated
* reference substrings.
*/
index_t numElts() const {
assert(valid());
return eltn_;
}
/**
* Return true iff the read substring is not associated with any
* reference substrings.
*/
bool empty() const {
assert(valid());
return numRanges() == 0;
}
/**
* Return true iff the QVal is valid.
*/
bool valid() const { return rangen_ != (index_t)OFF_MASK; }
/**
* Reset to invalid state.
*/
void reset() {
i_ = 0; rangen_ = eltn_ = (index_t)OFF_MASK;
}
/**
* Initialize Qval.
*/
void init(index_t i, index_t ranges, index_t elts) {
i_ = i; rangen_ = ranges; eltn_ = elts;
}
/**
* Tally another range with given number of elements.
*/
void addRange(index_t numElts) {
rangen_++;
eltn_ += numElts;
}
#ifndef NDEBUG
/**
* Check that this QVal is internally consistent and consistent
* with the contents of the given cache.
*/
bool repOk(const AlignmentCache<index_t>& ac) const;
#endif
protected:
index_t i_; // idx of first elt in qlist
index_t rangen_; // # ranges (= # associated reference substrings)
index_t eltn_; // # elements (total)
};
/**
* Key for the suffix array multimap: the reference substring and its
* length. Same as QKey so I typedef it.
*/
typedef QKey SAKey;
/**
* Payload for the suffix array multimap: (a) the top element of the
* range in BWT, (b) the offset of the first elt in the salist, (c)
* length of the range.
*/
template <typename index_t>
struct SAVal {
SAVal() : topf(), topb(), i(), len(OFF_MASK) { }
/**
* Return true iff the SAVal is valid.
*/
bool valid() { return len != (index_t)OFF_MASK; }
#ifndef NDEBUG
/**
* Check that this SAVal is internally consistent and consistent
* with the contents of the given cache.
*/
bool repOk(const AlignmentCache<index_t>& ac) const;
#endif
/**
* Initialize the SAVal.
*/
void init(
index_t tf,
index_t tb,
index_t ii,
index_t ln)
{
topf = tf;
topb = tb;
i = ii;
len = ln;
}
index_t topf; // top in BWT
index_t topb; // top in BWT'
index_t i; // idx of first elt in salist
index_t len; // length of range
};
/**
* One data structure that encapsulates all of the cached information
* associated with a particular reference substring. This is useful
* for summarizing what info should be added to the cache for a partial
* alignment.
*/
template <typename index_t>
class SATuple {
public:
SATuple() { reset(); };
SATuple(SAKey k, index_t tf, index_t tb, TSlice o) {
init(k, tf, tb, o);
}
void init(SAKey k, index_t tf, index_t tb, TSlice o) {
key = k; topf = tf; topb = tb; offs = o;
}
/**
* Initialize this SATuple from a subrange of the SATuple 'src'.
*/
void init(const SATuple& src, index_t first, index_t last) {
assert_neq((index_t)OFF_MASK, src.topb);
key = src.key;
topf = (index_t)(src.topf + first);
topb = (index_t)OFF_MASK; // unknown!
offs.init(src.offs, first, last);
}
#ifndef NDEBUG
/**
* Check that this SATuple is internally consistent and that its
* PListSlice is consistent with its backing PList.
*/
bool repOk() const {
assert(offs.repOk());
return true;
}
#endif
/**
* Function for ordering SATuples. This is used when prioritizing which to
* explore first when extending seed hits into full alignments. Smaller
* ranges get higher priority and we use 'top' to break ties, though any
* way of breaking a tie would be fine.
*/
bool operator<(const SATuple& o) const {
if(offs.size() < o.offs.size()) {
return true;
}
if(offs.size() > o.offs.size()) {
return false;
}
return topf < o.topf;
}
bool operator>(const SATuple& o) const {
if(offs.size() < o.offs.size()) {
return false;
}
if(offs.size() > o.offs.size()) {
return true;
}
return topf > o.topf;
}
bool operator==(const SATuple& o) const {
return key == o.key && topf == o.topf && topb == o.topb && offs == o.offs;
}
void reset() { topf = topb = (index_t)OFF_MASK; offs.reset(); }
/**
* Set the length to be at most the original length.
*/
void setLength(index_t nlen) {
assert_leq(nlen, offs.size());
offs.setLength(nlen);
}
/**
* Return the number of times this reference substring occurs in the
* reference, which is also the size of the 'offs' TSlice.
*/
index_t size() const { return (index_t)offs.size(); }
// bot/length of SA range equals offs.size()
SAKey key; // sequence key
index_t topf; // top in BWT index
index_t topb; // top in BWT' index
TSlice offs; // offsets
};
/**
* Encapsulate the data structures and routines that constitute a
* particular cache, i.e., a particular stratum of the cache system,
* which might comprise many strata.
*
* Each thread has a "current-read" AlignmentCache which is used to
* build and store subproblem results as alignment is performed. When
* we're finished with a read, we might copy the cached results for
* that read (and perhaps a bundle of other recently-aligned reads) to
* a higher-level "across-read" cache. Higher-level caches may or may
* not be shared among threads.
*
* A cache consists chiefly of two multimaps, each implemented as a
* Red-Black tree map backed by an EList. A 'version' counter is
* incremented every time the cache is cleared.
*/
template <typename index_t>
class AlignmentCache {
typedef RedBlackNode<QKey, QVal<index_t> > QNode;
typedef RedBlackNode<SAKey, SAVal<index_t> > SANode;
typedef PList<SAKey, CACHE_PAGE_SZ> TQList;
typedef PList<index_t, CACHE_PAGE_SZ> TSAList;
public:
AlignmentCache(
uint64_t bytes,
bool shared) :
pool_(bytes, CACHE_PAGE_SZ, CA_CAT),
qmap_(CACHE_PAGE_SZ, CA_CAT),
qlist_(CA_CAT),
samap_(CACHE_PAGE_SZ, CA_CAT),
salist_(CA_CAT),
shared_(shared),
mutex_m(),
version_(0)
{
}
/**
* Given a QVal, populate the given EList of SATuples with records
* describing all of the cached information about the QVal's
* reference substrings.
*/
template <int S>
void queryQval(
const QVal<index_t>& qv,
EList<SATuple<index_t>, S>& satups,
index_t& nrange,
index_t& nelt,
bool getLock = true)
{
ThreadSafe ts(lockPtr(), shared_ && getLock);
assert(qv.repOk(*this));
const index_t refi = qv.offset();
const index_t reff = refi + qv.numRanges();
// For each reference sequence sufficiently similar to the
// query sequence in the QKey...
for(index_t i = refi; i < reff; i++) {
// Get corresponding SAKey, containing similar reference
// sequence & length
SAKey sak = qlist_.get(i);
// Shouldn't have identical keys in qlist_
assert(i == refi || qlist_.get(i) != qlist_.get(i-1));
// Get corresponding SANode
SANode *n = samap_.lookup(sak);
assert(n != NULL);
const SAVal<index_t>& sav = n->payload;
assert(sav.repOk(*this));
if(sav.len > 0) {
nrange++;
satups.expand();
satups.back().init(sak, sav.topf, sav.topb, TSlice(salist_, sav.i, sav.len));
nelt += sav.len;
#ifndef NDEBUG
// Shouldn't add consecutive identical entries too satups
if(i > refi) {
const SATuple<index_t> b1 = satups.back();
const SATuple<index_t> b2 = satups[satups.size()-2];
assert(b1.key != b2.key || b1.topf != b2.topf || b1.offs != b2.offs);
}
#endif
}
}
}
/**
* Return true iff the cache has no entries in it.
*/
bool empty() const {
bool ret = qmap_.empty();
assert(!ret || qlist_.empty());
assert(!ret || samap_.empty());
assert(!ret || salist_.empty());
return ret;
}
/**
* Add a new query key ('qk'), usually a 2-bit encoded substring of
* the read) as the key in a new Red-Black node in the qmap and
* return a pointer to the node's QVal.
*
* The expectation is that the caller is about to set about finding
* associated reference substrings, and that there will be future
* calls to addOnTheFly to add associations to reference substrings
* found.
*/
QVal<index_t>* add(
const QKey& qk,
bool *added,
bool getLock = true)
{
ThreadSafe ts(lockPtr(), shared_ && getLock);
assert(qk.cacheable());
QNode *n = qmap_.add(pool(), qk, added);
return (n != NULL ? &n->payload : NULL);
}
/**
* Add a new association between a read sequnce ('seq') and a
* reference sequence ('')
*/
bool addOnTheFly(
QVal<index_t>& qv, // qval that points to the range of reference substrings
const SAKey& sak, // the key holding the reference substring
index_t topf, // top range elt in BWT index
index_t botf, // bottom range elt in BWT index
index_t topb, // top range elt in BWT' index
index_t botb, // bottom range elt in BWT' index
bool getLock = true);
/**
* Clear the cache, i.e. turn it over. All HitGens referring to
* ranges in this cache will become invalid and the corresponding
* reads will have to be re-aligned.
*/
void clear(bool getLock = true) {
ThreadSafe ts(lockPtr(), shared_ && getLock);
pool_.clear();
qmap_.clear();
qlist_.clear();
samap_.clear();
salist_.clear();
version_++;
}
/**
* Return the number of keys in the query multimap.
*/
index_t qNumKeys() const { return (index_t)qmap_.size(); }
/**
* Return the number of keys in the suffix array multimap.
*/
index_t saNumKeys() const { return (index_t)samap_.size(); }
/**
* Return the number of elements in the reference substring list.
*/
index_t qSize() const { return (index_t)qlist_.size(); }
/**
* Return the number of elements in the SA range list.
*/
index_t saSize() const { return (index_t)salist_.size(); }
/**
* Return the pool.
*/
Pool& pool() { return pool_; }
/**
* Return the lock object.
*/
MUTEX_T& lock() {
return mutex_m;
}
/**
* Return a const pointer to the lock object. This allows us to
* write const member functions that grab the lock.
*/
MUTEX_T* lockPtr() const {
return const_cast<MUTEX_T*>(&mutex_m);
}
/**
* Return true iff this cache is shared among threads.
*/
bool shared() const { return shared_; }
/**
* Return the current "version" of the cache, i.e. the total number
* of times it has turned over since its creation.
*/
uint32_t version() const { return version_; }
protected:
Pool pool_; // dispenses memory pages
RedBlack<QKey, QVal<index_t> > qmap_; // map from query substrings to reference substrings
TQList qlist_; // list of reference substrings
RedBlack<SAKey, SAVal<index_t> > samap_; // map from reference substrings to SA ranges
TSAList salist_; // list of SA ranges
bool shared_; // true -> this cache is global
MUTEX_T mutex_m; // mutex used for syncronization in case the the cache is shared.
uint32_t version_; // cache version
};
/**
* Interface used to query and update a pair of caches: one thread-
* local and unsynchronized, another shared and synchronized. One or
* both can be NULL.
*/
template <typename index_t>
class AlignmentCacheIface {
public:
AlignmentCacheIface(
AlignmentCache<index_t> *current,
AlignmentCache<index_t> *local,
AlignmentCache<index_t> *shared) :
qk_(),
qv_(NULL),
cacheable_(false),
rangen_(0),
eltsn_(0),
current_(current),
local_(local),
shared_(shared)
{
assert(current_ != NULL);
}
#if 0
/**
* Query the relevant set of caches, looking for a QVal to go with
* the provided QKey. If the QVal is found in a cache other than
* the current-read cache, it is copied into the current-read cache
* first and the QVal pointer for the current-read cache is
* returned. This function never returns a pointer from any cache
* other than the current-read cache. If the QVal could not be
* found in any cache OR if the QVal was found in a cache other
* than the current-read cache but could not be copied into the
* current-read cache, NULL is returned.
*/
QVal* queryCopy(const QKey& qk, bool getLock = true) {
assert(qk.cacheable());
AlignmentCache* caches[3] = { current_, local_, shared_ };
for(int i = 0; i < 3; i++) {
if(caches[i] == NULL) continue;
QVal* qv = caches[i]->query(qk, getLock);
if(qv != NULL) {
if(i == 0) return qv;
if(!current_->copy(qk, *qv, *caches[i], getLock)) {
// Exhausted memory in the current cache while
// attempting to copy in the qk
return NULL;
}
QVal* curqv = current_->query(qk, getLock);
assert(curqv != NULL);
return curqv;
}
}
return NULL;
}
/**
* Query the relevant set of caches, looking for a QVal to go with
* the provided QKey. If a QVal is found and which is non-NULL,
* *which is set to 0 if the qval was found in the current-read
* cache, 1 if it was found in the local across-read cache, and 2
* if it was found in the shared across-read cache.
*/
inline QVal* query(
const QKey& qk,
AlignmentCache** which,
bool getLock = true)
{
assert(qk.cacheable());
AlignmentCache* caches[3] = { current_, local_, shared_ };
for(int i = 0; i < 3; i++) {
if(caches[i] == NULL) continue;
QVal* qv = caches[i]->query(qk, getLock);
if(qv != NULL) {
if(which != NULL) *which = caches[i];
return qv;
}
}
return NULL;
}
#endif
/**
* This function is called whenever we start to align a new read or
* read substring. We make key for it and store the key in qk_.
* If the sequence is uncacheable, we don't actually add it to the
* map but the corresponding reference substrings are still added
* to the qlist_.
*
* Returns:
* -1 if out of memory
* 0 if key was found in cache
* 1 if key was not found in cache (and there's enough memory to
* add a new key)
*/
int beginAlign(
const BTDnaString& seq,
const BTString& qual,
QVal<index_t>& qv, // out: filled in if we find it in the cache
bool getLock = true)
{
assert(repOk());
qk_.init(seq ASSERT_ONLY(, tmpdnastr_));
//if(qk_.cacheable() && (qv_ = current_->query(qk_, getLock)) != NULL) {
// // qv_ holds the answer
// assert(qv_->valid());
// qv = *qv_;
// resetRead();
// return 1; // found in cache
//} else
if(qk_.cacheable()) {
// Make a QNode for this key and possibly add the QNode to the
// Red-Black map; but if 'seq' isn't cacheable, just create the
// QNode (without adding it to the map).
qv_ = current_->add(qk_, &cacheable_, getLock);
} else {
qv_ = &qvbuf_;
}
if(qv_ == NULL) {
resetRead();
return -1; // Not in memory
}
qv_->reset();
return 0; // Need to search for it
}
ASSERT_ONLY(BTDnaString tmpdnastr_);
/**
* Called when is finished aligning a read (and so is finished
* adding associated reference strings). Returns a copy of the
* final QVal object and resets the alignment state of the
* current-read cache.
*
* Also, if the alignment is cacheable, it commits it to the next
* cache up in the cache hierarchy.
*/
QVal<index_t> finishAlign(bool getLock = true) {
if(!qv_->valid()) {
qv_->init(0, 0, 0);
}
// Copy this pointer because we're about to reset the qv_ field
// to NULL
QVal<index_t>* qv = qv_;
// Commit the contents of the current-read cache to the next
// cache up in the hierarchy.
// If qk is cacheable, then it must be in the cache
#if 0
if(qk_.cacheable()) {
AlignmentCache* caches[3] = { current_, local_, shared_ };
ASSERT_ONLY(AlignmentCache* which);
ASSERT_ONLY(QVal* qv2 = query(qk_, &which, true));
assert(qv2 == qv);
assert(which == current_);
for(int i = 1; i < 3; i++) {
if(caches[i] != NULL) {
// Copy this key/value pair to the to the higher
// level cache and, if its memory is exhausted,
// clear the cache and try again.
caches[i]->clearCopy(qk_, *qv_, *current_, getLock);
break;
}
}
}
#endif
// Reset the state in this iface in preparation for the next
// alignment.
resetRead();
assert(repOk());
return *qv;
}
/**
* A call to this member indicates that the caller has finished
* with the last read (if any) and is ready to work on the next.
* This gives the cache a chance to reset some of its state if
* necessary.
*/
void nextRead() {
current_->clear();
resetRead();
assert(!aligning());
}
/**
* Return true iff we're in the middle of aligning a sequence.
*/
bool aligning() const {
return qv_ != NULL;
}
/**
* Clears both the local and shared caches.
*/
void clear() {
if(current_ != NULL) current_->clear();
if(local_ != NULL) local_->clear();
if(shared_ != NULL) shared_->clear();
}
/**
* Add an alignment to the running list of alignments being
* compiled for the current read in the local cache.
*/
bool addOnTheFly(
const BTDnaString& rfseq, // reference sequence close to read seq
index_t topf, // top in BWT index
index_t botf, // bot in BWT index
index_t topb, // top in BWT' index
index_t botb, // bot in BWT' index
bool getLock = true) // true -> lock is not held by caller
{
assert(aligning());
assert(repOk());
ASSERT_ONLY(BTDnaString tmp);
SAKey sak(rfseq ASSERT_ONLY(, tmp));
//assert(sak.cacheable());
if(current_->addOnTheFly((*qv_), sak, topf, botf, topb, botb, getLock)) {
rangen_++;
eltsn_ += (botf-topf);
return true;
}
return false;
}
/**
* Given a QVal, populate the given EList of SATuples with records
* describing all of the cached information about the QVal's
* reference substrings.
*/
template<int S>
void queryQval(
const QVal<index_t>& qv,
EList<SATuple<index_t>, S>& satups,
index_t& nrange,
index_t& nelt,
bool getLock = true)
{
current_->queryQval(qv, satups, nrange, nelt, getLock);
}
/**
* Return a pointer to the current-read cache object.
*/
const AlignmentCache<index_t>* currentCache() const { return current_; }
index_t curNumRanges() const { return rangen_; }
index_t curNumElts() const { return eltsn_; }
#ifndef NDEBUG
/**
* Check that AlignmentCacheIface is internally consistent.
*/
bool repOk() const {
assert(current_ != NULL);
assert_geq(eltsn_, rangen_);
if(qv_ == NULL) {
assert_eq(0, rangen_);
assert_eq(0, eltsn_);
}
return true;
}
#endif
/**
* Return the alignment cache for the current read.
*/
const AlignmentCache<index_t>& current() {
return *current_;
}
protected:
/**
* Reset fields encoding info about the in-process read.
*/
void resetRead() {
cacheable_ = false;
rangen_ = eltsn_ = 0;
qv_ = NULL;
}
QKey qk_; // key representation for current read substring
QVal<index_t> *qv_; // pointer to value representation for current read substring
QVal<index_t> qvbuf_; // buffer for when key is uncacheable but we need a qv
bool cacheable_; // true iff the read substring currently being aligned is cacheable
index_t rangen_; // number of ranges since last alignment job began
index_t eltsn_; // number of elements since last alignment job began
AlignmentCache<index_t> *current_; // cache dedicated to the current read
AlignmentCache<index_t> *local_; // local, unsynchronized cache
AlignmentCache<index_t> *shared_; // shared, synchronized cache
};
#ifndef NDEBUG
/**
* Check that this QVal is internally consistent and consistent
* with the contents of the given cache.
*/
template <typename index_t>
bool QVal<index_t>::repOk(const AlignmentCache<index_t>& ac) const {
if(rangen_ > 0) {
assert_lt(i_, ac.qSize());
assert_leq(i_ + rangen_, ac.qSize());
}
assert_geq(eltn_, rangen_);
return true;
}
#endif
#ifndef NDEBUG
/**
* Check that this SAVal is internally consistent and consistent
* with the contents of the given cache.
*/
template <typename index_t>
bool SAVal<index_t>::repOk(const AlignmentCache<index_t>& ac) const {
assert(len == 0 || i < ac.saSize());
assert_leq(i + len, ac.saSize());
return true;
}
#endif
/**
* Add a new association between a read sequnce ('seq') and a
* reference sequence ('')
*/
template <typename index_t>
bool AlignmentCache<index_t>::addOnTheFly(
QVal<index_t>& qv, // qval that points to the range of reference substrings
const SAKey& sak, // the key holding the reference substring
index_t topf, // top range elt in BWT index
index_t botf, // bottom range elt in BWT index
index_t topb, // top range elt in BWT' index
index_t botb, // bottom range elt in BWT' index
bool getLock)
{
ThreadSafe ts(lockPtr(), shared_ && getLock);
bool added = true;
// If this is the first reference sequence we're associating with
// the query sequence, initialize the QVal.
if(!qv.valid()) {
qv.init((index_t)qlist_.size(), 0, 0);
}
qv.addRange(botf-topf); // update tally for # ranges and # elts
if(!qlist_.add(pool(), sak)) {
return false; // Exhausted pool memory
}
#ifndef NDEBUG
for(index_t i = qv.offset(); i < qlist_.size(); i++) {
if(i > qv.offset()) {
assert(qlist_.get(i) != qlist_.get(i-1));
}
}
#endif
assert_eq(qv.offset() + qv.numRanges(), qlist_.size());
SANode *s = samap_.add(pool(), sak, &added);
if(s == NULL) {
return false; // Exhausted pool memory
}
assert(s->key.repOk());
if(added) {
s->payload.i = (index_t)salist_.size();
s->payload.len = botf - topf;
s->payload.topf = topf;
s->payload.topb = topb;
for(size_t j = 0; j < (botf-topf); j++) {
if(!salist_.add(pool(), (index_t)0xffffffff)) {
// Change the payload's len field
s->payload.len = (uint32_t)j;
return false; // Exhausted pool memory
}
}
assert(s->payload.repOk(*this));
}
// Now that we know all allocations have succeeded, we can do a few final
// updates
return true;
}
#endif /*ALIGNER_CACHE_H_*/