hisat-3n/group_walk.h

/*
 * Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
 *
 * This file is part of Bowtie 2.
 *
 * Bowtie 2 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Bowtie 2 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bowtie 2.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * group_walk.h
 *
 * Classes and routines for walking a set of BW ranges backwards from the edge
 * of a seed hit with the goal of resolving the offset of each row in each
 * range.  Here "offset" means offset into the concatenated string of all
 * references.  The main class is 'GroupWalk' and an important helper is
 * 'GWState'.
 *
 * For each combination of seed offset and orientation, there is an associated
 * QVal.  Each QVal describes a (possibly empty) set of suffix array ranges.
 * Call these "seed range sets."  Each range in the set is "backed" by a range
 * of the salist, represented as a PListSlice. Such a range is the origin of a
 * walk.
 *
 * When an offset is resolved, it is entered into the salist via the
 * PListSlice.  Note that other routines in this same thread might also be
 * setting elements of the salist, so routines here should expect that elements
 * can go from unresolved to resolved at any time.
 *
 * What bookkeeping do we have to do as we walk?  Before the first step, we
 * convert the initial QVal into a list of SATuples; the SATuples are our link
 * to the correpsonding ranges in the suffix array.  The list of SATuples is
 * then converted to a list of GWState objects; these keep track of where we
 * are in our walk (e.g. what 'top' and 'bot' are, how many steps have we gone,
 * etc) as well as how the elements in the current range correspond to elements
 * from the original range.
 *
 * The user asks the GroupWalk to resolve another offset by calling advance().
 * advance() can be called in various ways:
 *
 * (a) The user can request that the GroupWalk proceed until a
 *     *particular* element is resolved, then return that resolved
 *     element.  Other elements may be resolved along the way, but
 *     those results are buffered and may be dispensed in future calls
 *     to advance().
 *
 * (b) The user can request that the GroupWalk select an as-yet-
 *     unreported element at random and and proceed until that element
 *     is resolved and report it.  Again, other elements may be
 *     resolved along the way but they are buffered.
 *
 * (c) The user can request that the GroupWalk resolve elements in a
 *     particular BW range (with a particular offset and orientation)
 *     in an order of its choosing.  The GroupWalk in this case
 *     attempts to resolve as many offsets as possible as quickly as
 *     possible, and returns them as soon as they're found.  The res_
 *     buffer is used in this case.
 *
 * (d) Like (c) but resolving elements at a paritcular offset and
 *     orientation instead of at a specific BW range.  The res_ buffer
 *     is used in this case, since there's a chance that the
 *
 * There are simple ways to heuristically reduce the problem size while
 * maintaining randomness.  For instance, the user put a ceiling on the
 * number of elements that we walk from any given seed offset or range.
 * We can then trim away random subranges to reduce the size of the
 * problem.  There is no need for the caller to do this for us.
 */

#ifndef GROUP_WALK_H_
#define GROUP_WALK_H_

#include <stdint.h>
#include <limits>
#include "ds.h"
#include "gfm.h"
#include "read.h"
#include "reference.h"
#include "mem_ids.h"

/**
 * Encapsulate an SA range and an associated list of slots where the resolved
 * offsets can be placed.
 */
template<typename T, typename index_t>
class SARangeWithOffs {

public:

	SARangeWithOffs() { reset(); };

	SARangeWithOffs(
                    index_t tf,
                    index_t bf,
                    index_t ntf,
                    index_t nbf,
                    const EList<pair<index_t, index_t> >& n_iedge_count,
                    size_t len,
                    const T& o) {
		init(tf, bf, ntf, nbf, n_iedge_count, len, o);
	}

	void init(
              index_t tf,
              index_t bf,
              index_t ntf,
              index_t nbf,
              const EList<pair<index_t, index_t> >& n_iedge_count,
              size_t len_,
              const T& o) {
        topf = tf;
        botf = bf;
        assert_lt(topf, botf);
        node_top = ntf;
        node_bot = nbf;
        assert_leq(node_bot - node_top, botf - topf);
        node_iedge_count = n_iedge_count;
        len = len_,
        offs = o;
	}

	/**
	 * Reset to uninitialized state.
	 */
	void reset() { topf = (index_t)INDEX_MAX; }

	/**
	 * Return true if this is initialized.
	 */
	bool inited() const {
		return topf != (index_t)INDEX_MAX;
	}

	/**
	 * Return the number of times this reference substring occurs in the
	 * reference, which is also the size of the 'offs' TSlice.
	 */
	size_t size() const { return offs.size(); }

	index_t topf;      // top in GBWT index
    index_t botf;
    index_t node_top;  // top node
    index_t node_bot;
    EList<pair<index_t, index_t> > node_iedge_count;
	size_t     len;        // length of the reference sequence involved
	T          offs;       // offsets
};

/**
 * A group of per-thread state that can be shared between all the GroupWalks
 * used in that thread.
 */
template <typename index_t>
struct GroupWalkState {

	GroupWalkState(int cat) : map(cat) {
		masks[0].setCat(cat);
		masks[1].setCat(cat);
		masks[2].setCat(cat);
		masks[3].setCat(cat);
	}

	EList<bool> masks[4];      // temporary list for masks; used in GWState
	EList<index_t, 16> map;   // temporary list of GWState maps
};

/**
 * Encapsulates counters that encode how much work the walk-left logic
 * has done.
 */
struct WalkMetrics {

	WalkMetrics() {
	    reset();
	}

	/**
	 * Sum each across this object and 'm'.  This is the only safe way
	 * to update a WalkMetrics shared by many threads.
	 */
	void merge(const WalkMetrics& m, bool getLock = false) {
		ThreadSafe ts(&mutex_m, getLock);
		bwops += m.bwops;
		branches += m.branches;
		resolves += m.resolves;
		refresolves += m.refresolves;
		reports += m.reports;
	}

	/**
	 * Set all to 0.
	 */
	void reset() {
		bwops = branches = resolves = refresolves = reports = 0;
	}

	uint64_t bwops;       // Burrows-Wheeler operations
	uint64_t branches;    // BW range branch-offs
	uint64_t resolves;    // # offs resolved with BW walk-left
	uint64_t refresolves; // # resolutions caused by reference scanning
	uint64_t reports;     // # offs reported (1 can be reported many times)
	MUTEX_T mutex_m;
};

/**
 * Coordinates for a BW element that the GroupWalk might resolve.
 */
template <typename index_t>
struct GWElt {

	GWElt() { reset(); }

	/**
	 * Reset GWElt to uninitialized state.
	 */
	void reset() {
		offidx = range = elt = len = (index_t)OFF_MASK;
		fw = false;
	}

	/**
	 * Initialize this WalkResult.
	 */
	void init(
		index_t oi,
		bool f,
		index_t r,
		index_t e,
		index_t l)
	{
		offidx = oi;
		fw = f;
		range = r;
		elt = e;
		len = l;
	}

	/**
	 * Return true iff this GWElt and the given GWElt refer to the same
	 * element.
	 */
	bool operator==(const GWElt& o) const {
		return offidx == o.offidx &&
		       fw == o.fw &&
		       range == o.range &&
		       elt == o.elt &&
		       len == o.len;
	}

	/**
	 * Return true iff this GWElt and the given GWElt refer to
	 * different elements.
	 */
	bool operator!=(const GWElt& o) const {
		return !(*this == o);
	}

	index_t offidx; // seed offset index
	bool    fw;     // strand
	index_t range;  // range
	index_t elt;    // element
	index_t len;    // length
};

/**
 * A record encapsulating the result of looking up one BW element in
 * the Bowtie index.
 */
template <typename index_t>
struct WalkResult {

	WalkResult() { reset(); }

	/**
	 * Reset GWElt to uninitialized state.
	 */
	void reset() {
		elt.reset();
		bwrow = toff = (index_t)OFF_MASK;
	}

	/**
	 * Initialize this WalkResult.
	 */
	void init(
		index_t oi,  // seed offset index
		bool f,       // strand
		index_t r,   // range
		index_t e,   // element
		index_t bwr, // BW row
		index_t len, // length
		index_t to)  // text offset
	{
		elt.init(oi, f, r, e, len);
		bwrow = bwr;
		toff = to;
	}

	GWElt<index_t> elt;   // element resolved
	index_t        bwrow; // SA row resolved
	index_t        toff;  // resolved offset from SA sample
};

/**
 * A GW hit encapsulates an SATuple describing a reference substring
 * in the cache, along with a bool indicating whether each element of
 * the hit has been reported yet.
 */
template<typename index_t, typename T>
class GWHit {

public:
	GWHit() :
		fmap(0, GW_CAT),
		offidx((index_t)OFF_MASK),
		fw(false),
		range((index_t)OFF_MASK),
		len((index_t)OFF_MASK),
		reported_(0, GW_CAT),
		nrep_(0)
	{
		assert(repOkBasic());
	}

	/**
	 * Initialize with a new SA range.  Resolve the done vector so that
	 * there's one bool per suffix array element.
	 */
	void init(
		SARangeWithOffs<T, index_t>& sa,
		index_t oi,
		bool f,
		index_t r)
	{
		nrep_ = 0;
		offidx = oi;
		fw = f;
		range = r;
		len = (index_t)sa.len;
		reported_.resize(sa.offs.size());
		reported_.fill(false);
		fmap.resize(sa.offs.size());
		fmap.fill(make_pair((index_t)OFF_MASK, (index_t)OFF_MASK));
	}

	/**
	 * Clear contents of sat and done.
	 */
	void reset() {
		reported_.clear();
		fmap.clear();
		nrep_ = 0;
		offidx = (index_t)OFF_MASK;
		fw = false;
		range = (index_t)OFF_MASK;
		len = (index_t)OFF_MASK;
	}

#ifndef NDEBUG
	/**
	 * Check that GWHit is internally consistent.  If a pointer to an
	 * EList of GWStates is given, we assume that it is the EList
	 * corresponding to this GWHit and check whether the forward and
	 * reverse mappings match up for the as-yet-unresolved elements.
	 */
	bool repOk(const SARangeWithOffs<T, index_t>& sa) const {
		assert_eq(reported_.size(), sa.offs.size());
		assert_eq(fmap.size(), sa.offs.size());
		// Shouldn't be any repeats among as-yet-unresolveds
		size_t nrep = 0;
		for(size_t i = 0; i < fmap.size(); i++) {
			if(reported_[i]) nrep++;
			if(sa.offs[i] != (index_t)OFF_MASK) {
				continue;
			}
			for(size_t j = i+1; j < fmap.size(); j++) {
				if(sa.offs[j] != (index_t)OFF_MASK) {
					continue;
				}
				assert(fmap[i] != fmap[j]);
			}
		}
		assert_eq(nrep_, nrep);
		return true;
	}

	/**
	 * Return true iff this GWHit is not obviously corrupt.
	 */
	bool repOkBasic() {
		return true;
	}
#endif

	/**
	 * Set the ith element to be reported.
	 */
	void setReported(index_t i) {
		assert(!reported_[i]);
		assert_lt(i, reported_.size());
		reported_[i] = true;
		nrep_++;
	}

	/**
	 * Return true iff element i has been reported.
	 */
	bool reported(index_t i) const {
		assert_lt(i, reported_.size());
		return reported_[i];
	}

	/**
	 * Return true iff all elements have been reported.
	 */
	bool done() const {
		assert_leq(nrep_, reported_.size());
		return nrep_ == reported_.size();
	}

	EList<std::pair<index_t, index_t>, 16> fmap; // forward map; to GWState & elt
	index_t offidx; // offset idx
	bool fw;         // orientation
	index_t range;  // original range index
	index_t len;    // length of hit

protected:

	EList<bool, 16> reported_; // per-elt bool indicating whether it's been reported
	index_t nrep_;
};

/**
 * Encapsulates the progress made along a particular path from the original
 * range.
 */
template<typename index_t, typename T>
class GWState {

public:

	GWState() : map_(0, GW_CAT) {
		reset(); assert(repOkBasic());
	}

	/**
	 * Initialize this GWState with new gfm, top, bot, step, and sat.
	 *
	 * We assume map is already set up.
	 *
	 * Returns true iff at least one elt was resolved.
	 */
	template<int S>
	pair<int, int> init(
		const GFM<index_t>& gfm,      // index to walk left in
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T, index_t>& sa,       // SA range with offsets
		EList<GWState, S>& sts,       // EList of GWStates for range being advanced
		GWHit<index_t, T>& hit,       // Corresponding hit structure
		index_t range,                // which range is this?
		bool reportList,              // if true, "report" resolved offsets immediately by adding them to 'res' list
		EList<WalkResult<index_t>, 16>* res,   // EList where resolved offsets should be appended
		index_t tp,                   // top of range at this step
		index_t bt,                   // bot of range at this step
        index_t n_tp,                 // node at top
        index_t n_bt,                 // node at bot
        const EList<pair<index_t, index_t> >& n_iedge_count,
		index_t st,                   // # steps taken to get to this step
		WalkMetrics& met)
	{
		assert_gt(bt, tp);
        assert_gt(n_bt, n_tp);
        assert_geq(bt - tp, n_bt - n_tp);
		assert_lt(range, sts.size());
		top = tp;
		bot = bt;
        node_top = n_tp;
        node_bot = n_bt;
        node_iedge_count = n_iedge_count;
		step = st;
		assert(!inited_);
		ASSERT_ONLY(inited_ = true);
		ASSERT_ONLY(lastStep_ = step-1);
		return init(gfm, ref, sa, sts, hit, range, reportList, res, met);
	}

	/**
	 * Initialize this GWState.
	 *
	 * We assume map is already set up, and that 'step' is equal to the
	 * number of steps taken to get to the new top/bot pair *currently*
	 * in the top and bot fields.
	 *
	 * Returns a pair of numbers, the first being the number of
	 * resolved but unreported offsets found during this advance, the
	 * second being the number of as-yet-unresolved offsets.
	 */
	template<int S>
	pair<int, int> init(
		const GFM<index_t>& gfm,      // forward Bowtie index
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T, index_t>& sa,       // SA range with offsets
		EList<GWState, S>& st,        // EList of GWStates for advancing range
		GWHit<index_t, T>& hit,       // Corresponding hit structure
		index_t range,                // range being inited
		bool reportList,              // report resolutions, adding to 'res' list?
		EList<WalkResult<index_t>, 16>* res,   // EList to append resolutions
		WalkMetrics& met)             // update these metrics
	{
		assert(inited_);
		assert_eq(step, lastStep_+1);
		ASSERT_ONLY(lastStep_++);
		assert_leq((index_t)step, gfm.gh().len());
		assert_lt(range, st.size());
        pair<int, int> ret = make_pair(0, 0);
		index_t trimBegin = 0, trimEnd = 0;
		bool empty = true; // assume all resolved until proven otherwise
		// Commit new information, if any, to the PListSlide.  Also,
		// trim and check if we're done.
        assert_eq(node_bot - node_top, map_.size());
        ASSERT_ONLY(index_t num_orig_iedges = 0, orig_e = 0);
        index_t num_iedges = 0, e = 0;
		for(size_t i = mapi_; i < map_.size(); i++) {
			bool resolved = (off((index_t)i, sa) != (index_t)OFF_MASK);
			if(!resolved) {
#ifndef NDEBUG
                while(orig_e < sa.node_iedge_count.size()) {
                    if(map((index_t)i) <= sa.node_iedge_count[orig_e].first) {
                        break;
                    }
                    num_orig_iedges += sa.node_iedge_count[orig_e].second;
                    orig_e++;
                }
#endif
                while(e < node_iedge_count.size()) {
                    if(i <= node_iedge_count[e].first) {
                        break;
                    }
                    num_iedges += node_iedge_count[e].second;
                    e++;
                }
				// Elt not resolved yet; try to resolve it now
				index_t bwrow = (index_t)(top + i + num_iedges);
                index_t node = (index_t)(node_top + i);
				index_t toff = gfm.tryOffset(bwrow, node);
                ASSERT_ONLY(index_t origBwRow = sa.topf + map((index_t)i) + num_orig_iedges);
                ASSERT_ONLY(index_t origNode = sa.node_top + map((index_t)i));
				assert_eq(bwrow, gfm.walkLeft(origBwRow, step));
				if(toff != (index_t)OFF_MASK) {
					// Yes, toff was resolvable
					assert_eq(toff, gfm.getOffset(bwrow, node));
					met.resolves++;
					toff += step;
                    assert_eq(toff, gfm.getOffset(origBwRow, origNode));
					setOff((index_t)i, toff, sa, met);
					if(!reportList) ret.first++;
#if 0
// used to be #ifndef NDEBUG, but since we no longer require that the reference
// string info be included, this is no longer relevant.

					// Sanity check that the reference characters under this
					// hit match the seed characters in hit.satup->key.seq.
					// This is NOT a check that we associated the exact right
					// text offset with the BW row.  This is an important
					// distinction because when resolved offsets are filled in
					// via refernce scanning, they are not necessarily the
					// exact right text offsets to associate with the
					// respective BW rows but they WILL all be correct w/r/t
					// the reference sequence underneath, which is what really
					// matters here.
					index_t tidx = (index_t)OFF_MASK, tof, tlen;
					bool straddled = false;
					gfm.joinedToTextOff(
						hit.len, // length of seed
						toff,    // offset in joined reference string
						tidx,    // reference sequence id
						tof,     // offset in reference coordinates
						tlen,    // length of reference sequence
						true,    // don't reject straddlers
						straddled);
					if(tidx != (index_t)OFF_MASK &&
					   hit.satup->key.seq != std::numeric_limits<uint64_t>::max())
					{
						// key: 2-bit characters packed into a 64-bit word with
						// the least significant bitpair corresponding to the
						// rightmost character on the Watson reference strand.
						uint64_t key = hit.satup->key.seq;
						for(int64_t j = tof + hit.len-1; j >= tof; j--) {
							// Get next reference base to the left
							int c = ref.getBase(tidx, j);
							assert_range(0, 3, c);
							// Must equal least significant bitpair of key
							if(c != (int)(key & 3)) {
								// Oops; when we jump to the piece of the
								// reference where the seed hit is, it doesn't
								// match the seed hit.  Before dying, check
								// whether we have the right spot in the joined
								// reference string
								SString<char> jref;
								gfm.restore(jref);
								uint64_t key2 = hit.satup->key.seq;
								for(int64_t k = toff + hit.len-1; k >= toff; k--) {
									int c = jref[k];
									assert_range(0, 3, c);
									assert_eq(c, (int)(key2 & 3));
									key2 >>= 2;
								}
								assert(false);
							}
							key >>= 2;
						}
					}
#endif
				}
			}
			// Is the element resolved?  We ask this regardless of how it was
			// resolved (whether this function did it just now, whether it did
			// it a while ago, or whether some other function outside GroupWalk
			// did it).
			if(off((index_t)i, sa) != (index_t)OFF_MASK) {
				if(reportList && !hit.reported(map((index_t)i))) {
					// Report it
					index_t toff = off((index_t)i, sa);
					assert(res != NULL);
					res->expand();
					index_t origBwRow = sa.topf + map((index_t)i);
					res->back().init(
						hit.offidx, // offset idx
						hit.fw,     // orientation
						hit.range,  // original range index
						map((index_t)i),     // original element offset
						origBwRow,  // BW row resolved
						hit.len,    // hit length
						toff);      // text offset
					hit.setReported(map((index_t)i));
					met.reports++;
				}
				// Offset resolved
				if(empty) {
					// Haven't seen a non-empty entry yet, so we
					// can trim this from the beginning.
					trimBegin++;
				} else {
					trimEnd++;
				}
			} else {
				// Offset not yet resolved
				ret.second++;
				trimEnd = 0;
				empty = false;
				// Set the forward map in the corresponding GWHit
				// object to point to the appropriate element of our
				// range
				assert_geq(i, mapi_);
				index_t bmap = map((index_t)i);
				hit.fmap[bmap].first = range;
				hit.fmap[bmap].second = (index_t)i;
#ifndef NDEBUG
				for(size_t j = 0; j < bmap; j++) {
					if(sa.offs[j] == (index_t)OFF_MASK &&
					   hit.fmap[j].first == range)
					{
						assert_neq(i, hit.fmap[j].second);
					}
				}
#endif
			}
		}

		// Trim from beginning
		assert_geq(trimBegin, 0);
		mapi_ += trimBegin;
        if(trimBegin > 0) {
            top += trimBegin;
            index_t e = 0;
            for(; e < node_iedge_count.size(); e++) {
                if(node_iedge_count[e].first >= trimBegin) break;
                assert_geq(top, node_iedge_count[e].second);
                top += node_iedge_count[e].second;
            }
            if(e > 0) node_iedge_count.erase(0, e);
            for(e = 0; e < node_iedge_count.size(); e++) {
                assert_geq(node_iedge_count[e].first, trimBegin);
                node_iedge_count[e].first -= trimBegin;
            }
        }

        node_top += trimBegin;
		if(trimEnd > 0) {
			// Trim from end
			map_.resize(map_.size() - trimEnd);
            bot -= trimEnd;
            index_t node_range = node_bot - node_top;
            while(node_iedge_count.size() > 0) {
                if(node_iedge_count.back().first < (node_range - trimEnd)) break;
                assert_geq(bot, node_iedge_count.back().second);
                bot -= node_iedge_count.back().second;
                node_iedge_count.pop_back();
            }
		}
        node_bot -= trimEnd;
#ifndef NDEBUG
        assert_leq(node_top, node_bot);
        index_t num_nodes = node_bot - node_top;
        index_t add = 0;
        for(index_t e = 0; e < node_iedge_count.size(); e++) {
            assert_lt(node_iedge_count[e].first, num_nodes);
            add += node_iedge_count[e].second;
        }
        assert_eq(bot - top, num_nodes + add);

#endif
		if(empty) {
			assert(done());
#ifndef NDEBUG
			// If range is done, all elements from map should be
			// resolved
			for(size_t i = mapi_; i < map_.size(); i++) {
				assert_neq((index_t)OFF_MASK, off((index_t)i, sa));
			}
			// If this range is done, then it should be the case that
			// all elements in the corresponding GWHit that point to
			// this range are resolved.
			for(size_t i = 0; i < hit.fmap.size(); i++) {
				if(sa.offs[i] == (index_t)OFF_MASK) {
					assert_neq(range, hit.fmap[i].first);
				}
			}
#endif
			return ret;
		} else {
			assert(!done());
		}
		// Is there a dollar sign in the middle of the range?
        tmp_zOffs.clear();
        for(index_t i = 0; i < gfm._zOffs.size(); i++) {
#ifndef NDEBUG
            if(i > 0) {
                assert_lt(gfm._zOffs[i-1], gfm._zOffs[i]);
            }
#endif
            assert_neq(top, gfm._zOffs[i]);
            // assert_neq(bot-1, gfm._zOffs[i]);
            if(gfm._zOffs[i] > top && gfm._zOffs[i] < bot) {
                tmp_zOffs.push_back(gfm._zOffs[i]);
            }
        }

        // Yes, the dollar sign is in the middle of this range.  We
        // must split it into the two ranges on either side of the
        // dollar.  Let 'bot' and 'top' delimit the portion of the
        // range prior to the dollar.
        if(tmp_zOffs.size() > 0) {
            tmp_gbwt_to_node.clear();
            index_t n = 0, e = 0;
            for(index_t r = 0; r < (bot - top); r++) {
                tmp_gbwt_to_node.push_back(n);
                if(e < node_iedge_count.size()) {
                    assert_leq(n, node_iedge_count[e].first);
                    if(n == node_iedge_count[e].first) {
                        for(index_t a = 0; a < node_iedge_count[e].second; a++) {
                            tmp_gbwt_to_node.push_back(n);
                            r++;
                        }
                        e++;
                    }
                }
                n++;
            }
            assert_eq(bot - top, tmp_gbwt_to_node.size());
            for(index_t i = 0; i < tmp_zOffs.size(); i++) {
                // Note: might be able to do additional trimming off the end.
                // Create a new range for the portion after the dollar.
                index_t new_top = tmp_zOffs[i] + 1;
                if(i + 1 < tmp_zOffs.size() && new_top == tmp_zOffs[i+1]) {
                    continue;
                }
                assert_leq(new_top - top, tmp_gbwt_to_node.size());
                if(new_top - top == tmp_gbwt_to_node.size()) {
                    break;
                }
                index_t new_node_top = tmp_gbwt_to_node[new_top - top] + node_top;
                assert_lt(new_node_top, node_bot);
                index_t new_bot;
                if(i + 1 < tmp_zOffs.size()) {
                    new_bot = tmp_zOffs[i+1];
                } else {
                    new_bot = bot;
                }
                index_t new_node_bot = node_bot;
                if(new_bot - top < tmp_gbwt_to_node.size()) {
                    new_node_bot = node_top + tmp_gbwt_to_node[new_bot - top];
                    if(new_bot - top > 0 &&
                       tmp_gbwt_to_node[new_bot - top] == tmp_gbwt_to_node[new_bot - top - 1]) {
                        new_node_bot++;
                    }
                }
                tmp_node_iedge_count.clear();
                if(new_top >= new_bot) continue;
                for(index_t j = new_top - top; j + 1 < new_bot - top;) {
                    index_t n = tmp_gbwt_to_node[j];
                    index_t j2 = j + 1;
                    while(j2 < new_bot - top) {
                        if(n != tmp_gbwt_to_node[j2]) {
                            break;
                        }
                        j2++;
                    }
                    if(j + 1 < j2) {
                        tmp_node_iedge_count.expand();
                        assert_lt(node_top, new_node_top);
                        tmp_node_iedge_count.back().first = n - (new_node_top - node_top);
                        tmp_node_iedge_count.back().second = j2 - j - 1;
                    }
                    j = j2;
                }
                st.expand();
                st.back().reset();
                st.back().initMap(new_node_bot - new_node_top);
                for(index_t j = new_node_top; j < new_node_bot; j++) {
                    st.back().map_[j - new_node_top] = map(j - node_top + mapi_);
                }
                st.back().init(
                               gfm,
                               ref,
                               sa,
                               st,
                               hit,
                               (index_t)st.size()-1,
                               reportList,
                               res,
                               new_top,
                               new_bot,
                               new_node_top,
                               new_node_bot,
                               tmp_node_iedge_count,
                               step,
                               met);
            }
            assert_eq((index_t)map_.size(), node_bot - node_top + mapi_);
            bot = tmp_zOffs[0];
            assert_lt(bot - top, tmp_gbwt_to_node.size());
            node_bot = tmp_gbwt_to_node[bot - top - 1] + node_top + 1;
            map_.resize(node_bot - node_top + mapi_);
            index_t width = node_bot - node_top;
            for(index_t e = 0; e < node_iedge_count.size(); e++) {
                if(node_iedge_count[e].first >= node_bot - node_top) {
                    node_iedge_count.resize(e);
                    break;
                }
                width += node_iedge_count[e].second;
            }
            if(width != bot - top) {
                assert_eq(width, bot - top + 1);
                assert_gt(node_iedge_count.size(), 0);
                assert_gt(node_iedge_count.back().second, 0);
                node_iedge_count.back().second -= 1;
                if(node_iedge_count.back().second == 0) {
                    node_iedge_count.resize(node_iedge_count.size()- 1);
                }
            }
        }
		assert_gt(bot, top);
		// Prepare SideLocus's for next step
		if(bot-top > 1) {
			SideLocus<index_t>::initFromTopBot(top, bot, gfm.gh(), gfm.gfm(), tloc, bloc);
			assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
			assert(bloc.valid()); assert(bloc.repOk(gfm.gh()));
		} else {
			tloc.initFromRow(top, gfm.gh(), gfm.gfm());
			assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
			bloc.invalidate();
		}
		return ret;
	}

#ifndef NDEBUG
	/**
	 * Check if this GWP is internally consistent.
	 */
	bool repOk(
		const GFM<index_t>& gfm,
		GWHit<index_t, T>& hit,
		index_t range) const
	{
		assert(done() || bot > top);
		assert(doneResolving(hit) || (tloc.valid() && tloc.repOk(gfm.gh())));
		assert(doneResolving(hit) || bot == top+1 || (bloc.valid() && bloc.repOk(gfm.gh())));
		assert_eq(map_.size()-mapi_, bot-top);
		// Make sure that 'done' is compatible with whether we have >=
		// 1 elements left to resolve.
		int left = 0;
		for(size_t i = mapi_; i < map_.size(); i++) {
			ASSERT_ONLY(index_t row = (index_t)(top + i - mapi_));
			ASSERT_ONLY(index_t origRow = hit.satup->topf + map(i));
			assert(step == 0 || row != origRow);
			assert_eq(row, gfm.walkLeft(origRow, step));
			assert_lt(map_[i], hit.satup->offs.size());
			if(off(i, hit) == (index_t)OFF_MASK) left++;
		}
		assert(repOkMapRepeats());
		assert(repOkMapInclusive(hit, range));
		return true;
	}

	/**
	 * Return true iff this GWState is not obviously corrupt.
	 */
	bool repOkBasic() {
		assert_geq(bot, top);
		return true;
	}

	/**
	 * Check that the fmap elements pointed to by our map_ include all
	 * of the fmap elements that point to this range.
	 */
	bool repOkMapInclusive(GWHit<index_t, T>& hit, index_t range) const {
		for(size_t i = 0; i < hit.fmap.size(); i++) {
			if(hit.satup->offs[i] == (index_t)OFF_MASK) {
				if(range == hit.fmap[i].first) {
					ASSERT_ONLY(bool found = false);
					for(size_t j = mapi_; j < map_.size(); j++) {
						if(map(j) == i) {
							ASSERT_ONLY(found = true);
							break;
						}
					}
					assert(found);
				}
			}
		}
		return true;
	}

	/**
	 * Check that no two elements in map_ are the same.
	 */
	bool repOkMapRepeats() const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			for(size_t j = i+1; j < map_.size(); j++) {
				assert_neq(map_[i], map_[j]);
			}
		}
		return true;
	}
#endif

	/**
	 * Return the offset currently assigned to the ith element.  If it
	 * has not yet been resolved, return 0xffffffff.
	 */
	index_t off(
				index_t i,
				const SARangeWithOffs<T, index_t>& sa)
	{
		assert_geq(i, mapi_);
		assert_lt(i, map_.size());
		assert_lt(map_[i], sa.offs.size());
		return sa.offs.get(map_[i]);
	}

	/**
	 * Return the offset of the element within the original range's
	 * PListSlice that the ith element of this range corresponds to.
	 */
	index_t map(index_t i) const {
		assert_geq(i, mapi_);
		assert_lt(i, map_.size());
		return map_[i];
	}

	/**
	 * Return the offset of the first untrimmed offset in the map.
	 */
	index_t mapi() const {
		return mapi_;
	}

	/**
	 * Return number of active elements in the range being tracked by
	 * this GWState.
	 */
	index_t size() const {
		return (index_t)(map_.size() - mapi_);
	}

	/**
	 * Return true iff all elements in this leaf range have been
	 * resolved.
	 */
	bool done() const {
		return size() == 0;
	}

	/**
	 * Set the PListSlice element that corresponds to the ith element
	 * of 'map' to the specified offset.
	 */
	void setOff(
		index_t i,
		index_t off,
		SARangeWithOffs<T, index_t>& sa,
		WalkMetrics& met)
	{
		assert_lt(i + mapi_, map_.size());
		assert_lt(map_[i + mapi_], sa.offs.size());
		size_t saoff = map_[i + mapi_];
		sa.offs[saoff] = off;
		assert_eq(off, sa.offs[saoff]);
	}

	/**
	 * Advance this GWState by one step (i.e. one BW operation).  In
	 * the event of a "split", more elements are added to the EList
	 * 'st', which must have room for at least 3 more elements without
	 * needing another expansion.  If an expansion of 'st' is
	 * triggered, this GWState object becomes invalid.
	 *
	 * Returns a pair of numbers, the first being the number of
	 * resolved but unreported offsets found during this advance, the
	 * second being the number of as-yet-unresolved offsets.
	 */
	template <int S>
	pair<int, int> advance(
		const GFM<index_t>& gfm,     // the forward Bowtie index, for stepping left
		const BitPairReference& ref, // bitpair-encoded reference
		SARangeWithOffs<T, index_t>& sa,      // SA range with offsets
		GWHit<index_t, T>& hit,      // the associated GWHit object
		index_t range,               // which range is this?
		bool reportList,             // if true, "report" resolved offsets immediately by adding them to 'res' list
		EList<WalkResult<index_t>, 16>* res,  // EList where resolved offsets should be appended
		EList<GWState, S>& st,       // EList of GWStates for range being advanced
		GroupWalkState<index_t>& gws,         // temporary storage for masks
		WalkMetrics& met,
		PerReadMetrics& prm)
	{
		ASSERT_ONLY(index_t origTop = top);
		ASSERT_ONLY(index_t origBot = bot);
		assert_geq(step, 0);
		assert_eq(step, lastStep_);
		// assert_geq(st.capacity(), st.size() + 4);
		assert(tloc.valid()); assert(tloc.repOk(gfm.gh()));
		assert_eq(node_bot-node_top, (index_t)(map_.size()-mapi_));
		pair<int, int> ret = make_pair(0, 0);
		assert_eq(top, tloc.toBWRow(gfm.gh()));
		if(bot - top > 1) {
            bool first = true;
            ASSERT_ONLY(index_t sum = 0);
            index_t newtop = 0, newbot = 0;
            index_t new_node_top = 0, new_node_bot = 0;
            gws.map.clear();
			// Still multiple elements being tracked
            index_t curtop = top, curbot = bot;
            index_t cur_node_top = node_top, cur_node_bot = node_bot;
            for(index_t e = 0; e < node_iedge_count.size() + 1; e++) {
                if(e >= node_iedge_count.size()) {
                    if(e > 0) {
                        curtop = curbot + node_iedge_count[e-1].second;
                        curbot = bot;
                        if(curtop >= curbot) {
                            assert_eq(curtop, curbot);
                            break;
                        }
                        cur_node_top = cur_node_bot;
                        cur_node_bot = node_bot;
                    }
                } else {
                    if(e > 0) {
                        curtop = curbot + node_iedge_count[e-1].second;
                        assert_lt(node_iedge_count[e-1].first, node_iedge_count[e].first);
                        curbot = curtop + (node_iedge_count[e].first - node_iedge_count[e-1].first);
                        cur_node_top = cur_node_bot;
                    } else {
                        curbot = curtop + node_iedge_count[e].first + 1;
                    }
                    cur_node_bot = node_top + node_iedge_count[e].first + 1;
                }
                assert_lt(curtop, curbot);
                index_t upto[4], in[4];
                upto[0] = in[0] = upto[1] = in[1] =
                upto[2] = in[2] = upto[3] = in[3] = 0;
                // assert_eq(bot, bloc.toBWRow(gfm.gh()));
                met.bwops++;
                prm.nExFmops++;
                // Assert that there's not a dollar sign in the middle of
                // this range
#ifndef NDEBUG
                for(index_t i = 0; i < gfm._zOffs.size(); i++) {
                    assert(curbot <= gfm._zOffs[i] || curtop > gfm._zOffs[i]);
                }
#endif
                SideLocus<index_t> curtloc, curbloc;
                SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
                gfm.mapLFRange(curtloc, curbloc, curbot-curtop, upto, in, gws.masks);
#ifndef NDEBUG
                for(int i = 0; i < 4; i++) {
                    assert_eq(curbot-curtop, (index_t)(gws.masks[i].size()));
                }
#endif

                for(int i = 0; i < 4; i++) {
                    if(in[i] > 0) {
                        // Non-empty range resulted
                        if(first) {
                            // For the first one,
                            first = false;
                            pair<index_t, index_t> range, node_range;
                            backup_node_iedge_count.clear();
                            SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
                            range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &backup_node_iedge_count, cur_node_bot - cur_node_top);
                            newtop = range.first;
                            newbot = range.second;
                            new_node_top = node_range.first;
                            new_node_bot = node_range.second;
                            // Range narrowed so we have to look at the masks
                            for(size_t j = 0; j < gws.masks[i].size(); j++) {
                                assert_lt(j+mapi_+(cur_node_top - node_top), map_.size());
                                if(gws.masks[i][j]) {
                                    gws.map.push_back(map_[j+mapi_+(cur_node_top - node_top)]);
                                    assert(gws.map.size() <= 1 || gws.map.back() != gws.map[gws.map.size()-2]);
#if 0
                                    // If this element is not yet resolved,
                                    // then check that it really is the
                                    // expected number of steps to the left
                                    // of the corresponding element in the
                                    // root range
                                    assert_lt(gws.map.back(), sa.size());
                                    if(sa.offs[gws.map.back()] == (index_t)OFF_MASK) {
                                        assert_eq(newtop + gws.map.size() - 1,
                                                  gfm.walkLeft(sa.topf + gws.map.back(), step+1));
                                    }
#endif
                                }
                            }
                            assert_lt(new_node_top, new_node_bot);
                            if(new_node_bot - new_node_top < gws.map.size()) {
                                assert_eq(curbot - curtop, cur_node_bot - cur_node_top);
                                SideLocus<index_t> tmptloc, tmpbloc;
                                pair<index_t, index_t> tmp_node_range;
                                index_t j1 = 0, j2 = 0;
                                for(index_t c = 0; c < gws.masks[i].size(); c++) {
                                    if(gws.masks[i][c]) {
                                        j1 = c;
                                        break;
                                    }
                                }
                                for(index_t j = 0; j + 1 < gws.map.size(); j++) {
                                    for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) {
                                        if(gws.masks[i][c]) {
                                            j2 = c;
                                            break;
                                        }
                                    }
                                    assert_lt(j1, j2);
                                    SideLocus<index_t>::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc);
                                    gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range);
                                    assert_gt(tmp_node_range.second - tmp_node_range.first, 0);
                                    if(tmp_node_range.second - tmp_node_range.first == 1) {
                                        index_t jmap = gws.map[j];
                                        assert_lt(jmap, sa.offs.size());
                                        sa.offs[jmap] = gws.map[j];
                                        gws.map[j] = (index_t)OFF_MASK;
                                    }
                                    j1 = j2;
                                    j2 = 0;
                                }
                                for(index_t j = 0; j < gws.map.size();) {
                                    if(gws.map[j] == (index_t)OFF_MASK) {
                                        gws.map.erase(j);
                                    } else j++;
                                }
#ifndef NDEBUG
                                for(index_t j = 0; j < gws.map.size(); j++) {
                                    assert_neq(gws.map[j], (index_t)OFF_MASK);
                                }
#endif
                            }
                            assert_eq(new_node_bot - new_node_top, (index_t)(gws.map.size()));
                        } else {
                            // For each beyond the first, create a new
                            // GWState and add it to the GWState list.
                            // NOTE: this can cause the underlying list to
                            // be expanded which in turn might leave 'st'
                            // pointing to bad memory.
                            st.expand();
                            st.back().reset();
                            tmp_node_iedge_count.clear();
                            pair<index_t, index_t> range, node_range;
                            SideLocus<index_t>::initFromTopBot(curtop, curbot, gfm.gh(), gfm.gfm(), curtloc, curbloc);
                            range = gfm.mapGLF(curtloc, curbloc, i, &node_range, &tmp_node_iedge_count, cur_node_bot - cur_node_top);
                            assert_geq(range.second - range.first, node_range.second - node_range.first);
                            index_t ntop = range.first;
                            index_t nbot = range.second;
                            st.back().mapi_ = 0;
                            st.back().map_.clear();
                            met.branches++;
                            // Range narrowed so we have to look at the masks
                            for(size_t j = 0; j < gws.masks[i].size(); j++) {
                                if(gws.masks[i][j]) st.back().map_.push_back(map_[j+mapi_+(cur_node_top - node_top)]);
                            }
                            assert_lt(node_range.first, node_range.second);
                            if(node_range.second - node_range.first < st.back().map_.size()) {
                                assert_eq(curbot - curtop, cur_node_bot - cur_node_top);
                                SideLocus<index_t> tmptloc, tmpbloc;
                                pair<index_t, index_t> tmp_node_range;
                                index_t j1 = 0, j2 = 0;
                                for(index_t c = 0; c < gws.masks[i].size(); c++) {
                                    if(gws.masks[i][c]) {
                                        j1 = c;
                                        break;
                                    }
                                }
                                for(index_t j = 0; j + 1 < st.back().map_.size(); j++) {
                                    for(index_t c = j1 + 1; c < gws.masks[i].size(); c++) {
                                        if(gws.masks[i][c]) {
                                            j2 = c;
                                            break;
                                        }
                                    }
                                    assert_lt(j1, j2);
                                    SideLocus<index_t>::initFromTopBot(curtop + j1, curtop + j2 + 1, gfm.gh(), gfm.gfm(), tmptloc, tmpbloc);
                                    gfm.mapGLF(tmptloc, tmpbloc, i, &tmp_node_range);
                                    assert_gt(tmp_node_range.second - tmp_node_range.first, 0);
                                    if(tmp_node_range.second - tmp_node_range.first == 1) {
                                        index_t jmap = st.back().map_[j];
                                        assert_lt(jmap, sa.offs.size());
                                        sa.offs[jmap] = st.back().map_[j];
                                        st.back().map_[j] = (index_t)OFF_MASK;
                                    }
                                    j1 = j2;
                                    j2 = 0;
                                }
                                for(index_t j = 0; j < st.back().map_.size();) {
                                    if(st.back().map_[j] == (index_t)OFF_MASK) {
                                        st.back().map_.erase(j);
                                    } else j++;
                                }
#ifndef NDEBUG
                                for(index_t j = 0; j < st.back().map_.size(); j++) {
                                    assert_neq(st.back().map_[j], (index_t)OFF_MASK);
                                }
#endif
                            }
                            assert_eq(node_range.second - node_range.first, st.back().map_.size());
                            pair<int, int> rret =
                            st.back().init(
                                           gfm,         // forward Bowtie index
                                           ref,         // bitpair-encodede reference
                                           sa,          // SA range with offsets
                                           st,          // EList of all GWStates associated with original range
                                           hit,         // associated GWHit object
                                           (index_t)st.size()-1, // range offset
                                           reportList,  // if true, report hits to 'res' list
                                           res,         // report hits here if reportList is true
                                           ntop,        // BW top of new range
                                           nbot,        // BW bot of new range
                                           node_range.first,
                                           node_range.second,
                                           tmp_node_iedge_count,
                                           step+1,      // # steps taken to get to this new range
                                           met);        // update these metrics
                            ret.first += rret.first;
                            ret.second += rret.second;
                        }
                        ASSERT_ONLY(sum += in[i]);
                    }
                }
            }
            mapi_ = 0;
            // assert_eq(new_node_bot-new_node_top, sum);
            assert_gt(newbot, newtop);
            assert(top != newtop || bot != newbot);
            //assert(!(newtop < top && newbot > top));
            top = newtop;
            bot = newbot;
            node_top = new_node_top;
            node_bot = new_node_bot;
            node_iedge_count = backup_node_iedge_count;
            backup_node_iedge_count.clear();
            if(!gws.map.empty()) {
                map_ = gws.map;
            }
            //assert(repOkMapRepeats());
            //assert(repOkMapInclusive(hit, range));
            assert_eq(node_bot-node_top, (index_t)map_.size());
        } else {
            // Down to one element
			assert_eq(bot, top+1);
			assert_eq(1, map_.size()-mapi_);
			// Sets top, returns char walked through (which we ignore)
			ASSERT_ONLY(index_t oldtop = top);
			met.bwops++;
			prm.nExFmops++;
            pair<index_t, index_t> node_range(0, 0);
			pair<index_t, index_t> range = gfm.mapGLF1(top, tloc, &node_range);
            top = range.first;
			assert_neq(top, oldtop);
			bot = top+1;
            node_top = node_range.first;
            node_bot = node_range.second;
			if(mapi_ > 0) {
				map_[0] = map_[mapi_];
				mapi_ = 0;
			}
			map_.resize(1);
		}
		assert(top != origTop || bot != origBot);
		step++;
		assert_gt(step, 0);
		assert_leq((index_t)step, gfm.gh().len());
		pair<int, int> rret =
		init<S>(
			gfm,        // forward GFM index
			ref,        // bitpair-encodede reference
			sa,         // SA range with offsets
			st,         // EList of all GWStates associated with original range
			hit,        // associated GWHit object
			range,      // range offset
			reportList, // if true, report hits to 'res' list
			res,        // report hits here if reportList is true
			met);       // update these metrics
		ret.first += rret.first;
		ret.second += rret.second;
		return ret;
	}

	/**
	 * Clear all state in preparation for the next walk.
	 */
	void reset() {
		top = bot = node_top = node_bot = step = mapi_ = 0;
		ASSERT_ONLY(lastStep_ = -1);
		ASSERT_ONLY(inited_ = false);
		tloc.invalidate();
		bloc.invalidate();
		map_.clear();
        node_iedge_count.clear();
        backup_node_iedge_count.clear();
        tmp_node_iedge_count.clear();
	}

	/**
	 * Resize the map_ field to the given size.
	 */
	void initMap(size_t newsz) {
		mapi_ = 0;
		map_.resize(newsz);
		for(size_t i = 0; i < newsz; i++) {
			map_[i] = (index_t)i;
		}
	}

	/**
	 * Return true iff all rows corresponding to this GWState have been
	 * resolved and reported.
	 */
	bool doneReporting(const GWHit<index_t, T>& hit) const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			if(!hit.reported(map(i))) return false;
		}
		return true;
	}

	/**
	 * Return true iff all rows corresponding to this GWState have been
	 * resolved (but not necessarily reported).
	 */
	bool doneResolving(const SARangeWithOffs<T, index_t>& sa) const {
		for(size_t i = mapi_; i < map_.size(); i++) {
			if(sa.offs[map((index_t)i)] == (index_t)OFF_MASK) return false;
		}
		return true;
	}

	SideLocus<index_t> tloc;      // SideLocus for top
	SideLocus<index_t> bloc;      // SideLocus for bottom
	index_t            top;       // top elt of range in GBWT
	index_t            bot;       // bot elt of range in GBWT
    index_t            node_top;
    index_t            node_bot;
    EList<pair<index_t, index_t> > node_iedge_count;
	int                step;      // how many steps have we walked to the left so far

    // temporary
    EList<pair<index_t, index_t> > backup_node_iedge_count;
    EList<pair<index_t, index_t> > tmp_node_iedge_count;

    EList<index_t>                 tmp_zOffs;
    EList<index_t>                 tmp_gbwt_to_node;

protected:

	ASSERT_ONLY(bool inited_);
	ASSERT_ONLY(int lastStep_);
	EList<index_t, 16> map_; // which elts in range 'range' we're tracking
	index_t mapi_;           // first untrimmed element of map
};

template<typename index_t, typename T, int S>
class GroupWalk2S {
public:
	typedef EList<GWState<index_t, T>, S> TStateV;

	GroupWalk2S() : st_(8, GW_CAT) {
		reset();
	}

	/**
	 * Reset the GroupWalk in preparation for the next SeedResults.
	 */
	void reset() {
		elt_ = rep_ = 0;
		ASSERT_ONLY(inited_ = false);
	}

	/**
	 * Initialize a new group walk w/r/t a QVal object.
	 */
	void init(
		const GFM<index_t>& gfmFw,   // forward Bowtie index for walking left
		const BitPairReference& ref, // bitpair-encoded reference
		SARangeWithOffs<T, index_t>& sa,      // SA range with offsets
		RandomSource& rnd,           // pseudo-random generator for sampling rows
		WalkMetrics& met)            // update metrics here
	{
		reset();
#ifndef NDEBUG
		inited_ = true;
#endif
		// Init GWHit
		hit_.init(sa, 0, false, 0);
		// Init corresponding GWState
		st_.resize(1);
		st_.back().reset();
		assert(st_.back().repOkBasic());
		index_t top = sa.topf;
		index_t bot = sa.botf;
        index_t node_top = sa.node_top;
        index_t node_bot = (index_t)(node_top + sa.size());
		st_.back().initMap(sa.size());
		st_.ensure(4);
		st_.back().init(
			gfmFw,              // Bowtie index
			ref,                // bitpair-encoded reference
			sa,                 // SA range with offsets
			st_,                // EList<GWState>
			hit_,               // GWHit
			0,                  // range 0
			false,              // put resolved elements into res_?
			NULL,               // put resolved elements here
			top,                // GBW row at top
			bot,                // GBW row at bot
            node_top,           // node at top
            node_bot,           // node at bot
            sa.node_iedge_count,
			0,                  // # steps taken
			met);               // update metrics here
		elt_ += sa.size();
		assert(hit_.repOk(sa));
	}

	//
	// ELEMENT-BASED
	//

	/**
	 * Advance the GroupWalk until all elements have been resolved.
	 */
	void resolveAll(WalkMetrics& met, PerReadMetrics& prm) {
		WalkResult<index_t> res; // ignore results for now
		for(size_t i = 0; i < elt_; i++) {
			advanceElement((index_t)i, res, met, prm);
		}
	}

	/**
	 * Advance the GroupWalk until the specified element has been
	 * resolved.
	 */
	bool advanceElement(
		index_t elt,                  // element within the range
		const GFM<index_t>& gfmFw,    // forward Bowtie index for walking left
		const BitPairReference& ref,  // bitpair-encoded reference
		SARangeWithOffs<T, index_t>& sa,       // SA range with offsets
		GroupWalkState<index_t>& gws, // GroupWalk state; scratch space
		WalkResult<index_t>& res,     // put the result here
		WalkMetrics& met,             // metrics
		PerReadMetrics& prm)          // per-read metrics
	{
		assert(inited_);
		assert(!done());
		assert(hit_.repOk(sa));
		assert_lt(elt, sa.size()); // elt must fall within range
		// Until we've resolved our element of interest...
		while(sa.offs[elt] == (index_t)OFF_MASK) {
			// Get the GWState that contains our element of interest
			size_t range = hit_.fmap[elt].first;
            assert_lt(range, st_.size());
			st_.ensure(st_[range].node_bot - st_[range].node_top);
            // st_.ensure(4);
			GWState<index_t, T>& st = st_[range];
			assert(!st.doneResolving(sa));
			// Returns a pair of numbers, the first being the number of
			// resolved but unreported offsets found during this advance, the
			// second being the number of as-yet-unresolved offsets.
			st.advance(
				gfmFw,
				ref,
				sa,
				hit_,
				(index_t)range,
				false,
				NULL,
				st_,
				gws,
				met,
				prm);
			assert(sa.offs[elt] != (index_t)OFF_MASK ||
			       !st_[hit_.fmap[elt].first].doneResolving(sa));
		}
		assert_neq((index_t)OFF_MASK, sa.offs[elt]);
		// Report it!
		if(!hit_.reported(elt)) {
			hit_.setReported(elt);
		}
		met.reports++;
		res.init(
			0,              // seed offset
			false,          // orientation
			0,              // range
			elt,            // element
			sa.topf + elt,  // bw row
			(index_t)sa.len, // length of hit
			sa.offs[elt]);  // resolved text offset
		rep_++;
		return true;
	}

	/**
	 * Return true iff all elements have been resolved and reported.
	 */
	bool done() const { return rep_ == elt_; }

#ifndef NDEBUG
	/**
	 * Check that GroupWalk is internally consistent.
	 */
	bool repOk(const SARangeWithOffs<T, index_t>& sa) const {
		assert(hit_.repOk(sa));
		assert_leq(rep_, elt_);
		// This is a lot of work
		size_t resolved = 0, reported = 0;
		// For each element
		const size_t sz = sa.size();
		for(size_t m = 0; m < sz; m++) {
			// Is it resolved?
			if(sa.offs[m] != (index_t)OFF_MASK) {
				resolved++;
			} else {
				assert(!hit_.reported(m));
			}
			// Is it reported?
			if(hit_.reported(m)) {
				reported++;
			}
			assert_geq(resolved, reported);
		}
		assert_geq(resolved, reported);
		assert_eq(rep_, reported);
		assert_eq(elt_, sz);
		return true;
	}
#endif

	/**
	 * Return the number of BW elements that we can resolve.
	 */
	index_t numElts() const { return elt_; }

	/**
	 * Return the size occupied by this GroupWalk and all its constituent
	 * objects.
	 */
	size_t totalSizeBytes() const {
		return 2 * sizeof(size_t) + st_.totalSizeBytes() + sizeof(GWHit<index_t, T>);
	}
	/**
	 * Return the capacity of this GroupWalk and all its constituent objects.
	 */
	size_t totalCapacityBytes() const {
		return 2 * sizeof(size_t) + st_.totalCapacityBytes() + sizeof(GWHit<index_t, T>);
	}

#ifndef NDEBUG
	bool initialized() const { return inited_; }
#endif

protected:

	ASSERT_ONLY(bool inited_);    // initialized?

	index_t elt_;    // # BW elements under the control of the GropuWalk
	index_t rep_;    // # BW elements reported

	// For each orientation and seed offset, keep a GWState object that
	// holds the state of the walk so far.
	TStateV st_;

	// For each orientation and seed offset, keep an EList of GWHit.
	GWHit<index_t, T> hit_;
};

#endif /*GROUP_WALK_H_*/