hisat-3n/group_walk.h
2025-01-18 21:09:52 +08:00

1625 lines
56 KiB
C++

/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* group_walk.h
*
* Classes and routines for walking a set of BW ranges backwards from the edge
* of a seed hit with the goal of resolving the offset of each row in each
* range. Here "offset" means offset into the concatenated string of all
* references. The main class is 'GroupWalk' and an important helper is
* 'GWState'.
*
* For each combination of seed offset and orientation, there is an associated
* QVal. Each QVal describes a (possibly empty) set of suffix array ranges.
* Call these "seed range sets." Each range in the set is "backed" by a range
* of the salist, represented as a PListSlice. Such a range is the origin of a
* walk.
*
* When an offset is resolved, it is entered into the salist via the
* PListSlice. Note that other routines in this same thread might also be
* setting elements of the salist, so routines here should expect that elements
* can go from unresolved to resolved at any time.
*
* What bookkeeping do we have to do as we walk? Before the first step, we
* convert the initial QVal into a list of SATuples; the SATuples are our link
* to the correpsonding ranges in the suffix array. The list of SATuples is
* then converted to a list of GWState objects; these keep track of where we
* are in our walk (e.g. what 'top' and 'bot' are, how many steps have we gone,
* etc) as well as how the elements in the current range correspond to elements
* from the original range.
*
* The user asks the GroupWalk to resolve another offset by calling advance().
* advance() can be called in various ways:
*
* (a) The user can request that the GroupWalk proceed until a
* *particular* element is resolved, then return that resolved
* element. Other elements may be resolved along the way, but
* those results are buffered and may be dispensed in future calls
* to advance().
*
* (b) The user can request that the GroupWalk select an as-yet-
* unreported element at random and and proceed until that element
* is resolved and report it. Again, other elements may be
* resolved along the way but they are buffered.
*
* (c) The user can request that the GroupWalk resolve elements in a
* particular BW range (with a particular offset and orientation)
* in an order of its choosing. The GroupWalk in this case
* attempts to resolve as many offsets as possible as quickly as
* possible, and returns them as soon as they're found. The res_
* buffer is used in this case.
*
* (d) Like (c) but resolving elements at a paritcular offset and
* orientation instead of at a specific BW range. The res_ buffer
* is used in this case, since there's a chance that the
*
* There are simple ways to heuristically reduce the problem size while
* maintaining randomness. For instance, the user put a ceiling on the
* number of elements that we walk from any given seed offset or range.
* We can then trim away random subranges to reduce the size of the
* problem. There is no need for the caller to do this for us.
*/
#ifndef GROUP_WALK_H_
#define GROUP_WALK_H_
#include <stdint.h>
#include <limits>
#include "ds.h"
#include "gfm.h"
#include "read.h"
#include "reference.h"
#include "mem_ids.h"
/**
* Encapsulate an SA range and an associated list of slots where the resolved
* offsets can be placed.
*/
template<typename T, typename index_t>
class SARangeWithOffs {
public:
SARangeWithOffs() { reset(); };
SARangeWithOffs(
index_t tf,
index_t bf,
index_t ntf,
index_t nbf,
const EList<pair<index_t, index_t> >& n_iedge_count,
size_t len,
const T& o) {
init(tf, bf, ntf, nbf, n_iedge_count, len, o);
}
void init(
index_t tf,
index_t bf,
index_t ntf,
index_t nbf,
const EList<pair<index_t, index_t> >& n_iedge_count,
size_t len_,
const T& o) {
topf = tf;
botf = bf;
assert_lt(topf, botf);
node_top = ntf;
node_bot = nbf;
assert_leq(node_bot - node_top, botf - topf);
node_iedge_count = n_iedge_count;
len = len_,
offs = o;
}
/**
* Reset to uninitialized state.
*/
void reset() { topf = (index_t)INDEX_MAX; }
/**
* Return true if this is initialized.
*/
bool inited() const {
return topf != (index_t)INDEX_MAX;
}
/**
* Return the number of times this reference substring occurs in the
* reference, which is also the size of the 'offs' TSlice.
*/
size_t size() const { return offs.size(); }
index_t topf; // top in GBWT index
index_t botf;
index_t node_top; // top node
index_t node_bot;
EList<pair<index_t, index_t> > node_iedge_count;
size_t len; // length of the reference sequence involved
T offs; // offsets
};
/**
* A group of per-thread state that can be shared between all the GroupWalks
* used in that thread.
*/
template <typename index_t>
struct GroupWalkState {
GroupWalkState(int cat) : map(cat) {
masks[0].setCat(cat);
masks[1].setCat(cat);
masks[2].setCat(cat);
masks[3].setCat(cat);
}
EList<bool> masks[4]; // temporary list for masks; used in GWState
EList<index_t, 16> map; // temporary list of GWState maps
};
/**
* Encapsulates counters that encode how much work the walk-left logic
* has done.
*/
struct WalkMetrics {
WalkMetrics() {
reset();
}
/**
* Sum each across this object and 'm'. This is the only safe way
* to update a WalkMetrics shared by many threads.
*/
void merge(const WalkMetrics& m, bool getLock = false) {
ThreadSafe ts(&mutex_m, getLock);
bwops += m.bwops;
branches += m.branches;
resolves += m.resolves;
refresolves += m.refresolves;
reports += m.reports;
}
/**
* Set all to 0.
*/
void reset() {
bwops = branches = resolves = refresolves = reports = 0;
}
uint64_t bwops; // Burrows-Wheeler operations
uint64_t branches; // BW range branch-offs
uint64_t resolves; // # offs resolved with BW walk-left
uint64_t refresolves; // # resolutions caused by reference scanning
uint64_t reports; // # offs reported (1 can be reported many times)
MUTEX_T mutex_m;
};
/**
* Coordinates for a BW element that the GroupWalk might resolve.
*/
template <typename index_t>
struct GWElt {
GWElt() { reset(); }
/**
* Reset GWElt to uninitialized state.
*/
void reset() {
offidx = range = elt = len = (index_t)OFF_MASK;
fw = false;
}
/**
* Initialize this WalkResult.
*/
void init(
index_t oi,
bool f,
index_t r,
index_t e,
index_t l)
{
offidx = oi;
fw = f;
range = r;
elt = e;
len = l;
}
/**
* Return true iff this GWElt and the given GWElt refer to the same
* element.
*/
bool operator==(const GWElt& o) const {
return offidx == o.offidx &&
fw == o.fw &&
range == o.range &&
elt == o.elt &&
len == o.len;
}
/**
* Return true iff this GWElt and the given GWElt refer to
* different elements.
*/
bool operator!=(const GWElt& o) const {
return !(*this == o);
}
index_t offidx; // seed offset index
bool fw; // strand
index_t range; // range
index_t elt; // element
index_t len; // length
};
/**
* A record encapsulating the result of looking up one BW element in
* the Bowtie index.
*/
template <typename index_t>
struct WalkResult {
WalkResult() { reset(); }
/**
* Reset GWElt to uninitialized state.
*/
void reset() {
elt.reset();
bwrow = toff = (index_t)OFF_MASK;
}
/**
* Initialize this WalkResult.
*/
void init(
index_t oi, // seed offset index
bool f, // strand
index_t r, // range
index_t e, // element
index_t bwr, // BW row
index_t len, // length
index_t to) // text offset
{
elt.init(oi, f, r, e, len);
bwrow = bwr;
toff = to;
}
GWElt<index_t> elt; // element resolved
index_t bwrow; // SA row resolved
index_t toff; // resolved offset from SA sample
};
/**
* A GW hit encapsulates an SATuple describing a reference substring
* in the cache, along with a bool indicating whether each element of
* the hit has been reported yet.
*/
template<typename index_t, typename T>
class GWHit {
public:
GWHit() :
fmap(0, GW_CAT),
offidx((index_t)OFF_MASK),
fw(false),
range((index_t)OFF_MASK),
len((index_t)OFF_MASK),
reported_(0, GW_CAT),
nrep_(0)
{
assert(repOkBasic());
}
/**
* Initialize with a new SA range. Resolve the done vector so that
* there's one bool per suffix array element.
*/
void init(
SARangeWithOffs<T, index_t>& sa,
index_t oi,
bool f,
index_t r)
{
nrep_ = 0;
offidx = oi;
fw = f;
range = r;
len = (index_t)sa.len;
reported_.resize(sa.offs.size());
reported_.fill(false);
fmap.resize(sa.offs.size());
fmap.fill(make_pair((index_t)OFF_MASK, (index_t)OFF_MASK));
}
/**
* Clear contents of sat and done.
*/
void reset() {
reported_.clear();
fmap.clear();
nrep_ = 0;
offidx = (index_t)OFF_MASK;
fw = false;
range = (index_t)OFF_MASK;
len = (index_t)OFF_MASK;
}
#ifndef NDEBUG
/**
* Check that GWHit is internally consistent. If a pointer to an
* EList of GWStates is given, we assume that it is the EList
* corresponding to this GWHit and check whether the forward and
* reverse mappings match up for the as-yet-unresolved elements.
*/
bool repOk(const SARangeWithOffs<T, index_t>& sa) const {
assert_eq(reported_.size(), sa.offs.size());
assert_eq(fmap.size(), sa.offs.size());
// Shouldn't be any repeats among as-yet-unresolveds
size_t nrep = 0;
for(size_t i = 0; i < fmap.size(); i++) {
if(reported_[i]) nrep++;
if(sa.offs[i] != (index_t)OFF_MASK) {
continue;
}
for(size_t j = i+1; j < fmap.size(); j++) {
if(sa.offs[j] != (index_t)OFF_MASK) {
continue;
}
assert(fmap[i] != fmap[j]);
}
}
assert_eq(nrep_, nrep);
return true;
}
/**
* Return true iff this GWHit is not obviously corrupt.
*/
bool repOkBasic() {
return true;
}
#endif
/**
* Set the ith element to be reported.
*/
void setReported(index_t i) {
assert(!reported_[i]);
assert_lt(i, reported_.size());
reported_[i] = true;
nrep_++;
}
/**
* Return true iff element i has been reported.
*/
bool reported(index_t i) const {
assert_lt(i, reported_.size());
return reported_[i];
}
/**
* Return true iff all elements have been reported.
*/
bool done() const {
assert_leq(nrep_, reported_.size());
return nrep_ == reported_.size();
}
EList<std::pair<index_t, index_t>, 16> fmap; // forward map; to GWState & elt
index_t offidx; // offset idx
bool fw; // orientation
index_t range; // original range index
index_t len; // length of hit
protected:
EList<bool, 16> reported_; // per-elt bool indicating whether it's been reported
index_t nrep_;
};
/**
* Encapsulates the progress made along a particular path from the original
* range.
*/
template<typename index_t, typename T>
class GWState {
public:
GWState() : map_(0, GW_CAT) {
reset(); assert(repOkBasic());
}
/**
* Initialize this GWState with new gfm, top, bot, step, and sat.
*
* We assume map is already set up.
*
* Returns true iff at least one elt was resolved.
*/
template<int S>
pair<int, int> init(
const GFM<index_t>& gfm, // index to walk left in
const BitPairReference& ref, // bitpair-encoded reference
SARangeWithOffs<T, index_t>& sa, // SA range with offsets
EList<GWState, S>& sts, // EList of GWStates for range being advanced
GWHit<index_t, T>& hit, // Corresponding hit structure
index_t range, // which range is this?
bool reportList, // if true, "report" resolved offsets immediately by adding them to 'res' list
EList<WalkResult<index_t>, 16>* res, // EList where resolved offsets should be appended
index_t tp, // top of range at this step
index_t bt, // bot of range at this step
index_t n_tp, // node at top
index_t n_bt, // node at bot
const EList<pair<index_t, index_t> >& n_iedge_count,
index_t st, // # steps taken to get to this step
WalkMetrics& met)
{
assert_gt(bt, tp);
assert_gt(n_bt, n_tp);
assert_geq(bt - tp, n_bt - n_tp);
assert_lt(range, sts.size());
top = tp;
bot = bt;
node_top = n_tp;
node_bot = n_bt;
node_iedge_count = n_iedge_count;
step = st;
assert(!inited_);
ASSERT_ONLY(inited_ = true);
ASSERT_ONLY(lastStep_ = step-1);
return init(gfm, ref, sa, sts, hit, range, reportList, res, met);
}
/**
* Initialize this GWState.
*
* We assume map is already set up, and that 'step' is equal to the
* number of steps taken to get to the new top/bot pair *currently*
* in the top and bot fields.
*
* Returns a pair of numbers, the first being the number of
* resolved but unreported offsets found during this advance, the
* second being the number of as-yet-unresolved offsets.
*/
template<int S>
pair<int, int> init(
const GFM<index_t>& gfm, // forward Bowtie index
const BitPairReference& ref, // bitpair-encoded reference
SARangeWithOffs<T, index_t>& sa, // SA range with offsets
EList<GWState, S>& st, // EList of GWStates for advancing range
GWHit<index_t, T>& hit, // Corresponding hit structure
index_t range, // range being inited
bool reportList, // report resolutions, adding to 'res' list?
EList<WalkResult<index_t>, 16>* res, // EList to append resolutions
WalkMetrics& met) // update these metrics
{
assert(inited_);
assert_eq(step, lastStep_+1);
ASSERT_ONLY(lastStep_++);
assert_leq((index_t)step, gfm.gh().len());
assert_lt(range, st.size());
pair<int, int> ret = make_pair(0, 0);
index_t trimBegin = 0, trimEnd = 0;
bool empty = true; // assume all resolved until proven otherwise
// Commit new information, if any, to the PListSlide. Also,
// trim and check if we're done.
assert_eq(node_bot - node_top, map_.size());
ASSERT_ONLY(index_t num_orig_iedges = 0, orig_e = 0);
index_t num_iedges = 0, e = 0;
for(size_t i = mapi_; i < map_.size(); i++) {
bool resolved = (off((index_t)i, sa) != (index_t)OFF_MASK);
if(!resolved) {
#ifndef NDEBUG
while(orig_e < sa.node_iedge_count.size()) {
if(map((index_t)i) <= sa.node_iedge_count[orig_e].first) {
break;
}
num_orig_iedges += sa.node_iedge_count[orig_e].second;
orig_e++;
}
#endif
while(e < node_iedge_count.size()) {
if(i <= node_iedge_count[e].first) {
break;
}
num_iedges += node_iedge_count[e].second;
e++;
}
// Elt not resolved yet; try to resolve it now
index_t bwrow = (index_t)(top + i + num_iedges);
index_t node = (index_t)(node_top + i);
index_t toff = gfm.tryOffset(bwrow, node);
ASSERT_ONLY(index_t origBwRow = sa.topf + map((index_t)i) + num_orig_iedges);
ASSERT_ONLY(index_t origNode = sa.node_top + map((index_t)i));
assert_eq(bwrow, gfm.walkLeft(origBwRow, step));
if(toff != (index_t)OFF_MASK) {
// Yes, toff was resolvable
assert_eq(toff, gfm.getOffset(bwrow, node));
met.resolves++;
toff += step;
assert_eq(toff, gfm.getOffset(origBwRow, origNode));
setOff((index_t)i, toff, sa, met);
if(!reportList) ret.first++;
#if 0
// used to be #ifndef NDEBUG, but since we no longer require that the reference
// string info be included, this is no longer relevant.
// Sanity check that the reference characters under this
// hit match the seed characters in hit.satup->key.seq.
// This is NOT a check that we associated the exact right
// text offset with the BW row. This is an important
// distinction because when resolved offsets are filled in
// via refernce scanning, they are not necessarily the
// exact right text offsets to associate with the
// respective BW rows but they WILL all be correct w/r/t
// the reference sequence underneath, which is what really
// matters here.
index_t tidx = (index_t)OFF_MASK, tof, tlen;
bool straddled = false;
gfm.joinedToTextOff(
hit.len, // length of seed
toff, // offset in joined reference string
tidx, // reference sequence id
tof, // offset in reference coordinates
tlen, // length of reference sequence
true, // don't reject straddlers
straddled);
if(tidx != (index_t)OFF_MASK &&
hit.satup->key.seq != std::numeric_limits<uint64_t>::max())
{
// key: 2-bit characters packed into a 64-bit word with
// the least significant bitpair corresponding to the
// rightmost character on the Watson reference strand.
uint64_t key = hit.satup->key.seq;
for(int64_t j = tof + hit.len-1; j >= tof; j--) {
// Get next reference base to the left
int c = ref.getBase(tidx, j);
assert_range(0, 3, c);
// Must equal least significant bitpair of key
if(c != (int)(key & 3)) {
// Oops; when we jump to the piece of the
// reference where the seed hit is, it doesn't
// match the seed hit. Before dying, check
// whether we have the right spot in the joined
// reference string
SString<char> jref;
gfm.restore(jref);
uint64_t key2 = hit.satup->key.seq;
for(int64_t k = toff + hit.len-1; k >= toff; k--) {
int c = jref[k];
assert_range(0, 3, c);
assert_eq(c, (int)(key2 & 3));
key2 >>= 2;
}
assert(false);
}
key >>= 2;
}
}
#endif
}
}
// Is the element resolved? We ask this regardless of how it was
// resolved (whether this function did it just now, whether it did
// it a while ago, or whether some other function outside GroupWalk
// did it).
if(off((index_t)i, sa) != (index_t)OFF_MASK) {
if(reportList && !hit.reported(map((index_t)i))) {
// Report it
index_t toff = off((index_t)i, sa);
assert(res != NULL);
res->expand();
index_t origBwRow = sa.topf + map((index_t)i);
res->back().init(
hit.offidx, // offset idx
hit.fw, // orientation
hit.range, // original range index
map((index_t)i), // original element offset
origBwRow, // BW row resolved
hit.len, // hit length
toff); // text offset
hit.setReported(map((index_t)i));
met.reports++;
}
// Offset resolved
if(empty) {
// Haven't seen a non-empty entry yet, so we
// can trim this from the beginning.
trimBegin++;
} else {
trimEnd++;
}
} else {
// Offset not yet resolved
ret.second++;
trimEnd = 0;
empty = false;
// Set the forward map in the corresponding GWHit
// object to point to the appropriate element of our
// range
assert_geq(i, mapi_);
index_t bmap = map((index_t)i);
hit.fmap[bmap].first = range;
hit.fmap[bmap].second = (index_t)i;
#ifndef NDEBUG
for(size_t j = 0; j < bmap; j++) {
if(sa.offs[j] == (index_t)OFF_MASK &&
hit.fmap[j].first == range)
{
assert_neq(i, hit.fmap[j].second);
}
}
#endif
}
}
// Trim from beginning
assert_geq(trimBegin, 0);
mapi_ += trimBegin;
if(trimBegin > 0) {
top += trimBegin;
index_t e = 0;
for(; e < node_iedge_count.size(); e++) {
if(node_iedge_count[e].first >= trimBegin) break;
assert_geq(top, node_iedge_count[e].second);
top += node_iedge_count[e].second;
}
if(e > 0) node_iedge_count.erase(0, e);
for(e = 0; e < node_iedge_count.size(); e++) {
assert_geq(node_iedge_count[e].first, trimBegin);
node_iedge_count[e].first -= trimBegin;
}
}
node_top += trimBegin;
if(trimEnd > 0) {
// Trim from end
map_.resize(map_.size() - trimEnd);
bot -= trimEnd;
index_t node_range = node_bot - node_top;
while(node_iedge_count.size() > 0) {
if(node_iedge_count.back().first < (node_range - trimEnd)) break;
assert_geq(bot, node_iedge_count.back().second);
bot -= node_iedge_count.back().second;
node_iedge_count.pop_back();
}
}
node_bot -= trimEnd;
#ifndef NDEBUG
assert_leq(node_top, node_bot);
index_t num_nodes = node_bot - node_top;
index_t add = 0;
for(index_t e = 0; e < node_iedge_count.size(); e++) {
assert_lt(node_iedge_count[e].first, num_nodes);
add += node_iedge_count[e].second;
}
assert_eq(bot - top, num_nodes + add);
#endif
if(empty) {
assert(done());
#ifndef NDEBUG
// If range is done, all elements from map should be
// resolved
for(size_t i = mapi_; i < map_.size(); i++) {
assert_neq((index_t)OFF_MASK, off((index_t)i, sa));
}
// If this range is done, then it should be the case that
// all elements in the corresponding GWHit that point to
// this range are resolved.
for(size_t i = 0; i < hit.fmap.size(); i++) {
if(sa.offs[i] == (index_t)OFF_MASK) {
assert_neq(range, hit.fmap[i].first);
}
}
#endif
return ret;
} else {
assert(!done());
}
// Is there a dollar sign in the middle of the range?
tmp_zOffs.clear();
for(index_t i = 0; i < gfm._zOffs.size(); i++) {
#ifndef NDEBUG
if(i > 0) {
assert_lt(gfm._zOffs[i-1], gfm._zOffs[i]);
}
#endif
assert_neq(top, gfm._zOffs[i]);
// assert_neq(bot-1, gfm._zOffs[i]);
if(gfm._zOffs[i] > top && gfm._zOffs[i] < bot) {
tmp_zOffs.push_back(gfm._zOffs[i]);
}
}
// Yes, the dollar sign is in the middle of this range. We
// must split it into the two ranges on either side of the
// dollar. Let 'bot' and 'top' delimit the portion of the
// range prior to the dollar.
if(tmp_zOffs.size() > 0) {
tmp_gbwt_to_node.clear();
index_t n = 0, e = 0;
for(index_t r = 0; r < (bot - top); r++) {
tmp_gbwt_to_node.push_back(n);
if(e < node_iedge_count.size()) {
assert_leq(n, node_iedge_count[e].first);
if(n == node_iedge_count[e].first) {
for(index_t a = 0; a < node_iedge_count[e].second; a++) {
tmp_gbwt_to_node.push_back(n);
r++;
}
e++;
}
}
n++;
}
assert_eq(bot - top, tmp_gbwt_to_node.size());
for(index_t i = 0; i < tmp_zOffs.size(); i++) {
// Note: might be able to do additional trimming off the end.
// Create a new range for the portion after the dollar.
index_t new_top = tmp_zOffs[i] + 1;
if(i + 1 < tmp_zOffs.size() && new_top == tmp_zOffs[i+1]) {
continue;
}
assert_leq(new_top - top, tmp_gbwt_to_node.size());
if(new_top - top == tmp_gbwt_to_node.size()) {
break;
}
index_t new_node_top = tmp_gbwt_to_node[new_top - top] + node_top;
assert_lt(new_node_top, node_bot);
index_t new_bot;
if(i + 1 < tmp_zOffs.size()) {
new_bot = tmp_zOffs[i+1];
} else {
new_bot = bot;
}
index_t new_node_bot = node_bot;
if(new_bot - top < tmp_gbwt_to_node.size()) {
new_node_bot = node_top + tmp_gbwt_to_node[new_bot - top];
if(new_bot - top > 0 &&
tmp_gbwt_to_node[new_bot - top] == tmp_gbwt_to_node[new_bot - top - 1]) {
new_node_bot++;
}
}
tmp_node_iedge_count.clear();
if(new_top >= new_bot) continue;
for(index_t j = new_top - top; j + 1 < new_bot - top;) {
index_t n = tmp_gbwt_to_node[j];
index_t j2 = j + 1;
while(j2 < new_bot - top) {
if(n != tmp_gbwt_to_node[j2]) {
break;
}
j2++;
}
if(j + 1 < j2) {
tmp_node_iedge_count.expand();
assert_lt(node_top, new_node_top);
tmp_node_iedge_count.back().first = n - (new_node_top - node_top);
tmp_node_iedge_count.back().second = j2 - j - 1;
}
j = j2;
}
st.expand();
st.back().reset();
st.back().initMap(new_node_bot - new_node_top);
for(index_t j = new_node_top; j < new_node_bot; j++) {
st.back().map_[j - new_node_top] = map(j - node_top + mapi_);
}
st.back().init(
gfm,
ref,
sa,
st,
hit,
(index_t)st.size()-1,
reportList,
res,
new_top,
new_bot,
new_node_top,
new_node_bot,
tmp_node_iedge_count,
step,
met);
}
assert_eq((index_t)map_.size(), node_bot - node_top + mapi_);
bot = tmp_zOffs[0];
assert_lt(bot - top, tmp_gbwt_to_node.size());
node_bot = tmp_gbwt_to_node[bot - top - 1] + node_top + 1;
map_.resize(node_bot - node_top + mapi_);
index_t width = node_bot - node_top;
for(index_t e = 0; e < node_iedge_count.size(); e++) {
if(node_iedge_count[e].first >= node_bot - node_top) {
node_iedge_count.resize(e);
break;
}
width += node_iedge_count[e].second;
}
if(width != bot - top) {
assert_eq(width, bot - top + 1);
assert_gt(node_iedge_count.size(), 0);
assert_gt(node_iedge_count.back().second, 0);
node_iedge_count.back().second -= 1;
if(node_iedge_count.back().second == 0) {
node_iedge_count.resize(node_iedge_count.size()- 1);
}
}
}
assert_gt(bot, top);
// Prepare SideLocus's for next step
if(bot-top > 1) {
SideLocus<index_t>::initFromTopBot(top, bot, gfm.gh(), gfm.gfm(), tloc, bloc);
assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
assert(bloc.valid()); assert(bloc.repOk(gfm.gh()));
} else {
tloc.initFromRow(top, gfm.gh(), gfm.gfm());
assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
bloc.invalidate();
}
return ret;
}
#ifndef NDEBUG
/**
* Check if this GWP is internally consistent.
*/
bool repOk(
const GFM<index_t>& gfm,
GWHit<index_t, T>& hit,
index_t range) const
{
assert(done() || bot > top);
assert(doneResolving(hit) || (tloc.valid() && tloc.repOk(gfm.gh())));
assert(doneResolving(hit) || bot == top+1 || (bloc.valid() && bloc.repOk(gfm.gh())));
assert_eq(map_.size()-mapi_, bot-top);
// Make sure that 'done' is compatible with whether we have >=
// 1 elements left to resolve.
int left = 0;
for(size_t i = mapi_; i < map_.size(); i++) {
ASSERT_ONLY(index_t row = (index_t)(top + i - mapi_));
ASSERT_ONLY(index_t origRow = hit.satup->topf + map(i));
assert(step == 0 || row != origRow);
assert_eq(row, gfm.walkLeft(origRow, step));
assert_lt(map_[i], hit.satup->offs.size());
if(off(i, hit) == (index_t)OFF_MASK) left++;
}
assert(repOkMapRepeats());
assert(repOkMapInclusive(hit, range));
return true;
}
/**
* Return true iff this GWState is not obviously corrupt.
*/
bool repOkBasic() {
assert_geq(bot, top);
return true;
}
/**
* Check that the fmap elements pointed to by our map_ include all
* of the fmap elements that point to this range.
*/
bool repOkMapInclusive(GWHit<index_t, T>& hit, index_t range) const {
for(size_t i = 0; i < hit.fmap.size(); i++) {
if(hit.satup->offs[i] == (index_t)OFF_MASK) {
if(range == hit.fmap[i].first) {
ASSERT_ONLY(bool found = false);
for(size_t j = mapi_; j < map_.size(); j++) {
if(map(j) == i) {
ASSERT_ONLY(found = true);
break;
}
}
assert(found);
}
}
}
return true;
}
/**
* Check that no two elements in map_ are the same.
*/
bool repOkMapRepeats() const {
for(size_t i = mapi_; i < map_.size(); i++) {
for(size_t j = i+1; j < map_.size(); j++) {
assert_neq(map_[i], map_[j]);
}
}
return true;
}
#endif
/**
* Return the offset currently assigned to the ith element. If it
* has not yet been resolved, return 0xffffffff.
*/
index_t off(
index_t i,
const SARangeWithOffs<T, index_t>& sa)
{
assert_geq(i, mapi_);
assert_lt(i, map_.size());
assert_lt(map_[i], sa.offs.size());
return sa.offs.get(map_[i]);
}
/**
* Return the offset of the element within the original range's
* PListSlice that the ith element of this range corresponds to.
*/
index_t map(index_t i) const {
assert_geq(i, mapi_);
assert_lt(i, map_.size());
return map_[i];
}
/**
* Return the offset of the first untrimmed offset in the map.
*/
index_t mapi() const {
return mapi_;
}
/**
* Return number of active elements in the range being tracked by
* this GWState.
*/
index_t size() const {
return (index_t)(map_.size() - mapi_);
}
/**
* Return true iff all elements in this leaf range have been
* resolved.
*/
bool done() const {
return size() == 0;
}
/**
* Set the PListSlice element that corresponds to the ith element
* of 'map' to the specified offset.
*/
void setOff(
index_t i,
index_t off,
SARangeWithOffs<T, index_t>& sa,
WalkMetrics& met)
{
assert_lt(i + mapi_, map_.size());
assert_lt(map_[i + mapi_], sa.offs.size());
size_t saoff = map_[i + mapi_];
sa.offs[saoff] = off;
assert_eq(off, sa.offs[saoff]);
}
/**
* Advance this GWState by one step (i.e. one BW operation). In
* the event of a "split", more elements are added to the EList
* 'st', which must have room for at least 3 more elements without
* needing another expansion. If an expansion of 'st' is
* triggered, this GWState object becomes invalid.
*
* Returns a pair of numbers, the first being the number of
* resolved but unreported offsets found during this advance, the
* second being the number of as-yet-unresolved offsets.
*/
template <int S>
pair<int, int> advance(
const GFM<index_t>& gfm, // the forward Bowtie index, for stepping left
const BitPairReference& ref, // bitpair-encoded reference
SARangeWithOffs<T, index_t>& sa, // SA range with offsets
GWHit<index_t, T>& hit, // the associated GWHit object
index_t range, // which range is this?
bool reportList, // if true, "report" resolved offsets immediately by adding them to 'res' list
EList<WalkResult<index_t>, 16>* res, // EList where resolved offsets should be appended
EList<GWState, S>& st, // EList of GWStates for range being advanced
GroupWalkState<index_t>& gws, // temporary storage for masks
WalkMetrics& met,
PerReadMetrics& prm)
{
ASSERT_ONLY(index_t origTop = top);
ASSERT_ONLY(index_t origBot = bot);
assert_geq(step, 0);
assert_eq(step, lastStep_);
// assert_geq(st.capacity(), st.size() + 4);
assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
assert_eq(node_bot-node_top, (index_t)(map_.size()-mapi_));
pair<int, int> ret = make_pair(0, 0);
assert_eq(top, tloc.toBWRow(gfm.gh()));
if(bot - top > 1) {
bool first = true;
ASSERT_ONLY(index_t sum = 0);
index_t newtop = 0, newbot = 0;
index_t new_node_top = 0, new_node_bot = 0;
gws.map.clear();
// Still multiple elements being tracked
index_t curtop = top, curbot = bot;
index_t cur_node_top = node_top, cur_node_bot = node_bot;
for(index_t e = 0; e < node_iedge_count.size() + 1; e++) {
if(e >= node_iedge_count.size()) {
if(e > 0) {
curtop = curbot + node_iedge_count[e-1].second;
curbot = bot;
if(curtop >= curbot) {
assert_eq(curtop, curbot);
break;
}
cur_node_top = cur_node_bot;
cur_node_bot = node_bot;
}
} else {
if(e > 0) {
curtop = curbot + node_iedge_count[e-1].second;
assert_lt(node_iedge_count[e-1].first, node_iedge_count[e].first);
curbot = curtop + (node_iedge_count[e].first - node_iedge_count[e-1].first);
cur_node_top = cur_node_bot;
} else {
curbot = curtop + node_iedge_count[e].first + 1;
}
cur_node_bot = node_top + node_iedge_count[e].first + 1;
}
assert_lt(curtop, curbot);
index_t upto[4], in[4];
upto[0] = in[0] = upto[1] = in[1] =
upto[2] = in[2] = upto[3] = in[3] = 0;
// assert_eq(bot, bloc.toBWRow(gfm.gh()));
met.bwops++;
prm.nExFmops++;
// Assert that there's not a dollar sign in the middle of
// this range
#ifndef NDEBUG
for(index_t i = 0; i < gfm._zOffs.size(); i++) {
assert(curbot <= gfm._zOffs[i] || curtop > gfm._zOffs[i]);
}
#endif
SideLocus<index_t> curtloc, curbloc;
SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
gfm.mapLFRange(curtloc, curbloc, curbot-curtop, upto, in, gws.masks);
#ifndef NDEBUG
for(int i = 0; i < 4; i++) {
assert_eq(curbot-curtop, (index_t)(gws.masks[i].size()));
}
#endif
for(int i = 0; i < 4; i++) {
if(in[i] > 0) {
// Non-empty range resulted
if(first) {
// For the first one,
first = false;
pair<index_t, index_t> range, node_range;
backup_node_iedge_count.clear();
SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &backup_node_iedge_count, cur_node_bot - cur_node_top);
newtop = range.first;
newbot = range.second;
new_node_top = node_range.first;
new_node_bot = node_range.second;
// Range narrowed so we have to look at the masks
for(size_t j = 0; j < gws.masks[i].size(); j++) {
assert_lt(j+mapi_+(cur_node_top - node_top), map_.size());
if(gws.masks[i][j]) {
gws.map.push_back(map_[j+mapi_+(cur_node_top - node_top)]);
assert(gws.map.size() <= 1 || gws.map.back() != gws.map[gws.map.size()-2]);
#if 0
// If this element is not yet resolved,
// then check that it really is the
// expected number of steps to the left
// of the corresponding element in the
// root range
assert_lt(gws.map.back(), sa.size());
if(sa.offs[gws.map.back()] == (index_t)OFF_MASK) {
assert_eq(newtop + gws.map.size() - 1,
gfm.walkLeft(sa.topf + gws.map.back(), step+1));
}
#endif
}
}
assert_lt(new_node_top, new_node_bot);
if(new_node_bot - new_node_top < gws.map.size()) {
assert_eq(curbot - curtop, cur_node_bot - cur_node_top);
SideLocus<index_t> tmptloc, tmpbloc;
pair<index_t, index_t> tmp_node_range;
index_t j1 = 0, j2 = 0;
for(index_t c = 0; c < gws.masks[i].size(); c++) {
if(gws.masks[i][c]) {
j1 = c;
break;
}
}
for(index_t j = 0; j + 1 < gws.map.size(); j++) {
for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) {
if(gws.masks[i][c]) {
j2 = c;
break;
}
}
assert_lt(j1, j2);
SideLocus<index_t>::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc);
gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range);
assert_gt(tmp_node_range.second - tmp_node_range.first, 0);
if(tmp_node_range.second - tmp_node_range.first == 1) {
index_t jmap = gws.map[j];
assert_lt(jmap, sa.offs.size());
sa.offs[jmap] = gws.map[j];
gws.map[j] = (index_t)OFF_MASK;
}
j1 = j2;
j2 = 0;
}
for(index_t j = 0; j < gws.map.size();) {
if(gws.map[j] == (index_t)OFF_MASK) {
gws.map.erase(j);
} else j++;
}
#ifndef NDEBUG
for(index_t j = 0; j < gws.map.size(); j++) {
assert_neq(gws.map[j], (index_t)OFF_MASK);
}
#endif
}
assert_eq(new_node_bot - new_node_top, (index_t)(gws.map.size()));
} else {
// For each beyond the first, create a new
// GWState and add it to the GWState list.
// NOTE: this can cause the underlying list to
// be expanded which in turn might leave 'st'
// pointing to bad memory.
st.expand();
st.back().reset();
tmp_node_iedge_count.clear();
pair<index_t, index_t> range, node_range;
SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &tmp_node_iedge_count, cur_node_bot - cur_node_top);
assert_geq(range.second - range.first, node_range.second - node_range.first);
index_t ntop = range.first;
index_t nbot = range.second;
st.back().mapi_ = 0;
st.back().map_.clear();
met.branches++;
// Range narrowed so we have to look at the masks
for(size_t j = 0; j < gws.masks[i].size(); j++) {
if(gws.masks[i][j]) st.back().map_.push_back(map_[j+mapi_+(cur_node_top - node_top)]);
}
assert_lt(node_range.first, node_range.second);
if(node_range.second - node_range.first < st.back().map_.size()) {
assert_eq(curbot - curtop, cur_node_bot - cur_node_top);
SideLocus<index_t> tmptloc, tmpbloc;
pair<index_t, index_t> tmp_node_range;
index_t j1 = 0, j2 = 0;
for(index_t c = 0; c < gws.masks[i].size(); c++) {
if(gws.masks[i][c]) {
j1 = c;
break;
}
}
for(index_t j = 0; j + 1 < st.back().map_.size(); j++) {
for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) {
if(gws.masks[i][c]) {
j2 = c;
break;
}
}
assert_lt(j1, j2);
SideLocus<index_t>::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc);
gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range);
assert_gt(tmp_node_range.second - tmp_node_range.first, 0);
if(tmp_node_range.second - tmp_node_range.first == 1) {
index_t jmap = st.back().map_[j];
assert_lt(jmap, sa.offs.size());
sa.offs[jmap] = st.back().map_[j];
st.back().map_[j] = (index_t)OFF_MASK;
}
j1 = j2;
j2 = 0;
}
for(index_t j = 0; j < st.back().map_.size();) {
if(st.back().map_[j] == (index_t)OFF_MASK) {
st.back().map_.erase(j);
} else j++;
}
#ifndef NDEBUG
for(index_t j = 0; j < st.back().map_.size(); j++) {
assert_neq(st.back().map_[j], (index_t)OFF_MASK);
}
#endif
}
assert_eq(node_range.second - node_range.first, st.back().map_.size());
pair<int, int> rret =
st.back().init(
gfm, // forward Bowtie index
ref, // bitpair-encodede reference
sa, // SA range with offsets
st, // EList of all GWStates associated with original range
hit, // associated GWHit object
(index_t)st.size()-1, // range offset
reportList, // if true, report hits to 'res' list
res, // report hits here if reportList is true
ntop, // BW top of new range
nbot, // BW bot of new range
node_range.first,
node_range.second,
tmp_node_iedge_count,
step+1, // # steps taken to get to this new range
met); // update these metrics
ret.first += rret.first;
ret.second += rret.second;
}
ASSERT_ONLY(sum += in[i]);
}
}
}
mapi_ = 0;
// assert_eq(new_node_bot-new_node_top, sum);
assert_gt(newbot, newtop);
assert(top != newtop || bot != newbot);
//assert(!(newtop < top && newbot > top));
top = newtop;
bot = newbot;
node_top = new_node_top;
node_bot = new_node_bot;
node_iedge_count = backup_node_iedge_count;
backup_node_iedge_count.clear();
if(!gws.map.empty()) {
map_ = gws.map;
}
//assert(repOkMapRepeats());
//assert(repOkMapInclusive(hit, range));
assert_eq(node_bot-node_top, (index_t)map_.size());
} else {
// Down to one element
assert_eq(bot, top+1);
assert_eq(1, map_.size()-mapi_);
// Sets top, returns char walked through (which we ignore)
ASSERT_ONLY(index_t oldtop = top);
met.bwops++;
prm.nExFmops++;
pair<index_t, index_t> node_range(0, 0);
pair<index_t, index_t> range = gfm.mapGLF1(top, tloc, &node_range);
top = range.first;
assert_neq(top, oldtop);
bot = top+1;
node_top = node_range.first;
node_bot = node_range.second;
if(mapi_ > 0) {
map_[0] = map_[mapi_];
mapi_ = 0;
}
map_.resize(1);
}
assert(top != origTop || bot != origBot);
step++;
assert_gt(step, 0);
assert_leq((index_t)step, gfm.gh().len());
pair<int, int> rret =
init<S>(
gfm, // forward GFM index
ref, // bitpair-encodede reference
sa, // SA range with offsets
st, // EList of all GWStates associated with original range
hit, // associated GWHit object
range, // range offset
reportList, // if true, report hits to 'res' list
res, // report hits here if reportList is true
met); // update these metrics
ret.first += rret.first;
ret.second += rret.second;
return ret;
}
/**
* Clear all state in preparation for the next walk.
*/
void reset() {
top = bot = node_top = node_bot = step = mapi_ = 0;
ASSERT_ONLY(lastStep_ = -1);
ASSERT_ONLY(inited_ = false);
tloc.invalidate();
bloc.invalidate();
map_.clear();
node_iedge_count.clear();
backup_node_iedge_count.clear();
tmp_node_iedge_count.clear();
}
/**
* Resize the map_ field to the given size.
*/
void initMap(size_t newsz) {
mapi_ = 0;
map_.resize(newsz);
for(size_t i = 0; i < newsz; i++) {
map_[i] = (index_t)i;
}
}
/**
* Return true iff all rows corresponding to this GWState have been
* resolved and reported.
*/
bool doneReporting(const GWHit<index_t, T>& hit) const {
for(size_t i = mapi_; i < map_.size(); i++) {
if(!hit.reported(map(i))) return false;
}
return true;
}
/**
* Return true iff all rows corresponding to this GWState have been
* resolved (but not necessarily reported).
*/
bool doneResolving(const SARangeWithOffs<T, index_t>& sa) const {
for(size_t i = mapi_; i < map_.size(); i++) {
if(sa.offs[map((index_t)i)] == (index_t)OFF_MASK) return false;
}
return true;
}
SideLocus<index_t> tloc; // SideLocus for top
SideLocus<index_t> bloc; // SideLocus for bottom
index_t top; // top elt of range in GBWT
index_t bot; // bot elt of range in GBWT
index_t node_top;
index_t node_bot;
EList<pair<index_t, index_t> > node_iedge_count;
int step; // how many steps have we walked to the left so far
// temporary
EList<pair<index_t, index_t> > backup_node_iedge_count;
EList<pair<index_t, index_t> > tmp_node_iedge_count;
EList<index_t> tmp_zOffs;
EList<index_t> tmp_gbwt_to_node;
protected:
ASSERT_ONLY(bool inited_);
ASSERT_ONLY(int lastStep_);
EList<index_t, 16> map_; // which elts in range 'range' we're tracking
index_t mapi_; // first untrimmed element of map
};
template<typename index_t, typename T, int S>
class GroupWalk2S {
public:
typedef EList<GWState<index_t, T>, S> TStateV;
GroupWalk2S() : st_(8, GW_CAT) {
reset();
}
/**
* Reset the GroupWalk in preparation for the next SeedResults.
*/
void reset() {
elt_ = rep_ = 0;
ASSERT_ONLY(inited_ = false);
}
/**
* Initialize a new group walk w/r/t a QVal object.
*/
void init(
const GFM<index_t>& gfmFw, // forward Bowtie index for walking left
const BitPairReference& ref, // bitpair-encoded reference
SARangeWithOffs<T, index_t>& sa, // SA range with offsets
RandomSource& rnd, // pseudo-random generator for sampling rows
WalkMetrics& met) // update metrics here
{
reset();
#ifndef NDEBUG
inited_ = true;
#endif
// Init GWHit
hit_.init(sa, 0, false, 0);
// Init corresponding GWState
st_.resize(1);
st_.back().reset();
assert(st_.back().repOkBasic());
index_t top = sa.topf;
index_t bot = sa.botf;
index_t node_top = sa.node_top;
index_t node_bot = (index_t)(node_top + sa.size());
st_.back().initMap(sa.size());
st_.ensure(4);
st_.back().init(
gfmFw, // Bowtie index
ref, // bitpair-encoded reference
sa, // SA range with offsets
st_, // EList<GWState>
hit_, // GWHit
0, // range 0
false, // put resolved elements into res_?
NULL, // put resolved elements here
top, // GBW row at top
bot, // GBW row at bot
node_top, // node at top
node_bot, // node at bot
sa.node_iedge_count,
0, // # steps taken
met); // update metrics here
elt_ += sa.size();
assert(hit_.repOk(sa));
}
//
// ELEMENT-BASED
//
/**
* Advance the GroupWalk until all elements have been resolved.
*/
void resolveAll(WalkMetrics& met, PerReadMetrics& prm) {
WalkResult<index_t> res; // ignore results for now
for(size_t i = 0; i < elt_; i++) {
advanceElement((index_t)i, res, met, prm);
}
}
/**
* Advance the GroupWalk until the specified element has been
* resolved.
*/
bool advanceElement(
index_t elt, // element within the range
const GFM<index_t>& gfmFw, // forward Bowtie index for walking left
const BitPairReference& ref, // bitpair-encoded reference
SARangeWithOffs<T, index_t>& sa, // SA range with offsets
GroupWalkState<index_t>& gws, // GroupWalk state; scratch space
WalkResult<index_t>& res, // put the result here
WalkMetrics& met, // metrics
PerReadMetrics& prm) // per-read metrics
{
assert(inited_);
assert(!done());
assert(hit_.repOk(sa));
assert_lt(elt, sa.size()); // elt must fall within range
// Until we've resolved our element of interest...
while(sa.offs[elt] == (index_t)OFF_MASK) {
// Get the GWState that contains our element of interest
size_t range = hit_.fmap[elt].first;
assert_lt(range, st_.size());
st_.ensure(st_[range].node_bot - st_[range].node_top);
// st_.ensure(4);
GWState<index_t, T>& st = st_[range];
assert(!st.doneResolving(sa));
// Returns a pair of numbers, the first being the number of
// resolved but unreported offsets found during this advance, the
// second being the number of as-yet-unresolved offsets.
st.advance(
gfmFw,
ref,
sa,
hit_,
(index_t)range,
false,
NULL,
st_,
gws,
met,
prm);
assert(sa.offs[elt] != (index_t)OFF_MASK ||
!st_[hit_.fmap[elt].first].doneResolving(sa));
}
assert_neq((index_t)OFF_MASK, sa.offs[elt]);
// Report it!
if(!hit_.reported(elt)) {
hit_.setReported(elt);
}
met.reports++;
res.init(
0, // seed offset
false, // orientation
0, // range
elt, // element
sa.topf + elt, // bw row
(index_t)sa.len, // length of hit
sa.offs[elt]); // resolved text offset
rep_++;
return true;
}
/**
* Return true iff all elements have been resolved and reported.
*/
bool done() const { return rep_ == elt_; }
#ifndef NDEBUG
/**
* Check that GroupWalk is internally consistent.
*/
bool repOk(const SARangeWithOffs<T, index_t>& sa) const {
assert(hit_.repOk(sa));
assert_leq(rep_, elt_);
// This is a lot of work
size_t resolved = 0, reported = 0;
// For each element
const size_t sz = sa.size();
for(size_t m = 0; m < sz; m++) {
// Is it resolved?
if(sa.offs[m] != (index_t)OFF_MASK) {
resolved++;
} else {
assert(!hit_.reported(m));
}
// Is it reported?
if(hit_.reported(m)) {
reported++;
}
assert_geq(resolved, reported);
}
assert_geq(resolved, reported);
assert_eq(rep_, reported);
assert_eq(elt_, sz);
return true;
}
#endif
/**
* Return the number of BW elements that we can resolve.
*/
index_t numElts() const { return elt_; }
/**
* Return the size occupied by this GroupWalk and all its constituent
* objects.
*/
size_t totalSizeBytes() const {
return 2 * sizeof(size_t) + st_.totalSizeBytes() + sizeof(GWHit<index_t, T>);
}
/**
* Return the capacity of this GroupWalk and all its constituent objects.
*/
size_t totalCapacityBytes() const {
return 2 * sizeof(size_t) + st_.totalCapacityBytes() + sizeof(GWHit<index_t, T>);
}
#ifndef NDEBUG
bool initialized() const { return inited_; }
#endif
protected:
ASSERT_ONLY(bool inited_); // initialized?
index_t elt_; // # BW elements under the control of the GropuWalk
index_t rep_; // # BW elements reported
// For each orientation and seed offset, keep a GWState object that
// holds the state of the walk so far.
TStateV st_;
// For each orientation and seed offset, keep an EList of GWHit.
GWHit<index_t, T> hit_;
};
#endif /*GROUP_WALK_H_*/