1238 lines
38 KiB
C++
1238 lines
38 KiB
C++
/*
|
|
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
|
|
*
|
|
* This file is part of Bowtie 2.
|
|
*
|
|
* Bowtie 2 is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Bowtie 2 is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef MULTIKEY_QSORT_H_
|
|
#define MULTIKEY_QSORT_H_
|
|
|
|
#include <iostream>
|
|
#include "sequence_io.h"
|
|
#include "alphabet.h"
|
|
#include "assert_helpers.h"
|
|
#include "diff_sample.h"
|
|
#include "sstring.h"
|
|
#include "btypes.h"
|
|
|
|
using namespace std;
|
|
|
|
/**
|
|
* Swap elements a and b in s
|
|
*/
|
|
template <typename TStr, typename TPos>
|
|
static inline void swap(TStr& s, size_t slen, TPos a, TPos b) {
|
|
assert_lt(a, slen);
|
|
assert_lt(b, slen);
|
|
swap(s[a], s[b]);
|
|
}
|
|
|
|
/**
|
|
* Swap elements a and b in array s
|
|
*/
|
|
template <typename TVal, typename TPos>
|
|
static inline void swap(TVal* s, size_t slen, TPos a, TPos b) {
|
|
assert_lt(a, slen);
|
|
assert_lt(b, slen);
|
|
swap(s[a], s[b]);
|
|
}
|
|
|
|
/**
|
|
* Helper macro for swapping elements a and b in s. Does some additional
|
|
* sainty checking w/r/t begin and end (which are parameters to the sorting
|
|
* routines below).
|
|
*/
|
|
#define SWAP(s, a, b) { \
|
|
assert_geq(a, begin); \
|
|
assert_geq(b, begin); \
|
|
assert_lt(a, end); \
|
|
assert_lt(b, end); \
|
|
swap(s, slen, a, b); \
|
|
}
|
|
|
|
/**
|
|
* Helper macro for swapping the same pair of elements a and b in two different
|
|
* strings s and s2. This is a helpful variant if, for example, the caller
|
|
* would like to see how their input was permuted by the sort routine (in that
|
|
* case, the caller would let s2 be an array s2[] where s2 is the same length
|
|
* as s and s2[i] = i).
|
|
*/
|
|
#define SWAP2(s, s2, a, b) { \
|
|
SWAP(s, a, b); \
|
|
swap(s2, slen, a, b); \
|
|
}
|
|
|
|
#define SWAP1(s, s2, a, b) { \
|
|
SWAP(s, a, b); \
|
|
}
|
|
|
|
/**
|
|
* Helper macro that swaps a range of elements [i, i+n) with another
|
|
* range [j, j+n) in s.
|
|
*/
|
|
#define VECSWAP(s, i, j, n) { \
|
|
if(n > 0) { vecswap(s, slen, i, j, n, begin, end); } \
|
|
}
|
|
|
|
/**
|
|
* Helper macro that swaps a range of elements [i, i+n) with another
|
|
* range [j, j+n) both in s and s2.
|
|
*/
|
|
#define VECSWAP2(s, s2, i, j, n) { \
|
|
if(n > 0) { vecswap2(s, slen, s2, i, j, n, begin, end); } \
|
|
}
|
|
|
|
/**
|
|
* Helper function that swaps a range of elements [i, i+n) with another
|
|
* range [j, j+n) in s. begin and end represent the current range under
|
|
* consideration by the caller (one of the recursive multikey_quicksort
|
|
* routines below).
|
|
*/
|
|
template <typename TStr, typename TPos>
|
|
static inline void vecswap(TStr& s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) {
|
|
assert_geq(i, begin);
|
|
assert_geq(j, begin);
|
|
assert_lt(i, end);
|
|
assert_lt(j, end);
|
|
while(n-- > 0) {
|
|
assert_geq(n, 0);
|
|
TPos a = i+n;
|
|
TPos b = j+n;
|
|
assert_geq(a, begin);
|
|
assert_geq(b, begin);
|
|
assert_lt(a, end);
|
|
assert_lt(b, end);
|
|
swap(s, slen, a, b);
|
|
}
|
|
}
|
|
|
|
template <typename TVal, typename TPos>
|
|
static inline void vecswap(TVal *s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) {
|
|
assert_geq(i, begin);
|
|
assert_geq(j, begin);
|
|
assert_lt(i, end);
|
|
assert_lt(j, end);
|
|
while(n-- > 0) {
|
|
assert_geq(n, 0);
|
|
TPos a = i+n;
|
|
TPos b = j+n;
|
|
assert_geq(a, begin);
|
|
assert_geq(b, begin);
|
|
assert_lt(a, end);
|
|
assert_lt(b, end);
|
|
swap(s, slen, a, b);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper function that swaps a range of elements [i, i+n) with another range
|
|
* [j, j+n) both in s and s2. begin and end represent the current range under
|
|
* consideration by the caller (one of the recursive multikey_quicksort
|
|
* routines below).
|
|
*/
|
|
template <typename TStr, typename TPos>
|
|
static inline void vecswap2(
|
|
TStr& s,
|
|
size_t slen,
|
|
TStr& s2,
|
|
TPos i,
|
|
TPos j,
|
|
TPos n,
|
|
TPos begin,
|
|
TPos end)
|
|
{
|
|
assert_geq(i, begin);
|
|
assert_geq(j, begin);
|
|
assert_lt(i, end);
|
|
assert_lt(j, end);
|
|
while(n-- > 0) {
|
|
assert_geq(n, 0);
|
|
TPos a = i+n;
|
|
TPos b = j+n;
|
|
assert_geq(a, begin);
|
|
assert_geq(b, begin);
|
|
assert_lt(a, end);
|
|
assert_lt(b, end);
|
|
swap(s, slen, a, b);
|
|
swap(s2, slen, a, b);
|
|
}
|
|
}
|
|
|
|
template <typename TVal, typename TPos>
|
|
static inline void vecswap2(TVal* s, size_t slen, TVal* s2, TPos i, TPos j, TPos n, TPos begin, TPos end) {
|
|
assert_geq(i, begin);
|
|
assert_geq(j, begin);
|
|
assert_lt(i, end);
|
|
assert_lt(j, end);
|
|
while(n-- > 0) {
|
|
assert_geq(n, 0);
|
|
TPos a = i+n;
|
|
TPos b = j+n;
|
|
assert_geq(a, begin);
|
|
assert_geq(b, begin);
|
|
assert_lt(a, end);
|
|
assert_lt(b, end);
|
|
swap(s, slen, a, b);
|
|
swap(s2, slen, a, b);
|
|
}
|
|
}
|
|
|
|
/// Retrieve an int-ized version of the ath character of string s, or,
|
|
/// if a goes off the end of s, return a (user-specified) int greater
|
|
/// than any TAlphabet character - 'hi'.
|
|
#define CHAR_AT(ss, aa) ((length(s[ss]) > aa) ? (int)(s[ss][aa]) : hi)
|
|
|
|
/// Retrieve an int-ized version of the ath character of string s, or,
|
|
/// if a goes off the end of s, return a (user-specified) int greater
|
|
/// than any TAlphabet character - 'hi'.
|
|
#define CHAR_AT_SUF(si, off) \
|
|
(((off + s[si]) < hlen) ? ((int)(host[off + s[si]])) : (hi))
|
|
|
|
/// Retrieve an int-ized version of the ath character of string s, or,
|
|
/// if a goes off the end of s, return a (user-specified) int greater
|
|
/// than any TAlphabet character - 'hi'.
|
|
|
|
#define CHAR_AT_SUF_U8(si, off) char_at_suf_u8(host, hlen, s, si, off, hi)
|
|
|
|
// Note that CHOOSE_AND_SWAP_RANDOM_PIVOT is unused
|
|
#define CHOOSE_AND_SWAP_RANDOM_PIVOT(sw, ch) { \
|
|
/* Note: rand() didn't really cut it here; it seemed to run out of */ \
|
|
/* randomness and, after a time, returned the same thing over and */ \
|
|
/* over again */ \
|
|
a = (rand() % n) + begin; /* choose pivot between begin and end */ \
|
|
assert_lt(a, end); assert_geq(a, begin); \
|
|
sw(s, s2, begin, a); /* move pivot to beginning */ \
|
|
}
|
|
|
|
/**
|
|
* Ad-hoc DNA-centric way of choose a pretty good pivot without using
|
|
* the pseudo-random number generator. We try to get a 1 or 2 if
|
|
* possible, since they'll split things more evenly than a 0 or 4. We
|
|
* also avoid swapping in the event that we choose the first element.
|
|
*/
|
|
#define CHOOSE_AND_SWAP_SMART_PIVOT(sw, ch) { \
|
|
a = begin; /* choose first elt */ \
|
|
/* now try to find a better elt */ \
|
|
if(n >= 5) { /* n is the difference between begin and end */ \
|
|
if (ch(begin+1, depth) == 1 || ch(begin+1, depth) == 2) a = begin+1; \
|
|
else if(ch(begin+2, depth) == 1 || ch(begin+2, depth) == 2) a = begin+2; \
|
|
else if(ch(begin+3, depth) == 1 || ch(begin+3, depth) == 2) a = begin+3; \
|
|
else if(ch(begin+4, depth) == 1 || ch(begin+4, depth) == 2) a = begin+4; \
|
|
if(a != begin) sw(s, s2, begin, a); /* move pivot to beginning */ \
|
|
} \
|
|
/* the element at [begin] now holds the pivot value */ \
|
|
}
|
|
|
|
#define CHOOSE_AND_SWAP_PIVOT CHOOSE_AND_SWAP_SMART_PIVOT
|
|
|
|
#ifndef NDEBUG
|
|
|
|
/**
|
|
* Assert that the range of chars at depth 'depth' in strings 'begin'
|
|
* to 'end' in string-of-suffix-offsets s is parititioned properly
|
|
* according to the ternary paritioning strategy of Bentley and McIlroy
|
|
* (*prior to* swapping the = regions to the center)
|
|
*/
|
|
template<typename THost>
|
|
bool assertPartitionedSuf(
|
|
const THost& host,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
int hi,
|
|
int pivot,
|
|
size_t begin,
|
|
size_t end,
|
|
size_t depth)
|
|
{
|
|
size_t hlen = host.length();
|
|
int state = 0; // 0 -> 1st = section, 1 -> < section, 2 -> > section, 3 -> 2nd = section
|
|
for(size_t i = begin; i < end; i++) {
|
|
switch(state) {
|
|
case 0:
|
|
if (CHAR_AT_SUF(i, depth) < pivot) { state = 1; break; }
|
|
else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; }
|
|
assert_eq(CHAR_AT_SUF(i, depth), pivot); break;
|
|
case 1:
|
|
if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; }
|
|
else if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; }
|
|
assert_lt(CHAR_AT_SUF(i, depth), pivot); break;
|
|
case 2:
|
|
if (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; }
|
|
assert_gt(CHAR_AT_SUF(i, depth), pivot); break;
|
|
case 3:
|
|
assert_eq(CHAR_AT_SUF(i, depth), pivot); break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Assert that the range of chars at depth 'depth' in strings 'begin'
|
|
* to 'end' in string-of-suffix-offsets s is parititioned properly
|
|
* according to the ternary paritioning strategy of Bentley and McIlroy
|
|
* (*after* swapping the = regions to the center)
|
|
*/
|
|
template<typename THost>
|
|
bool assertPartitionedSuf2(
|
|
const THost& host,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
int hi,
|
|
int pivot,
|
|
size_t begin,
|
|
size_t end,
|
|
size_t depth)
|
|
{
|
|
size_t hlen = host.length();
|
|
int state = 0; // 0 -> < section, 1 -> = section, 2 -> > section
|
|
for(size_t i = begin; i < end; i++) {
|
|
switch(state) {
|
|
case 0:
|
|
if (CHAR_AT_SUF(i, depth) == pivot) { state = 1; break; }
|
|
else if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; }
|
|
assert_lt(CHAR_AT_SUF(i, depth), pivot); break;
|
|
case 1:
|
|
if (CHAR_AT_SUF(i, depth) > pivot) { state = 2; break; }
|
|
assert_eq(CHAR_AT_SUF(i, depth), pivot); break;
|
|
case 2:
|
|
assert_gt(CHAR_AT_SUF(i, depth), pivot); break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* Assert that string s of suffix offsets into string 'host' is a seemingly
|
|
* legitimate suffix-offset list (at this time, we just check that it doesn't
|
|
* list any suffix twice).
|
|
*/
|
|
static inline void sanityCheckInputSufs(TIndexOffU *s, size_t slen) {
|
|
assert_gt(slen, 0);
|
|
for(size_t i = 0; i < slen; i++) {
|
|
// Actually, it's convenient to allow the caller to provide
|
|
// suffix offsets thare are off the end of the host string.
|
|
// See, e.g., build() in diff_sample.cpp.
|
|
//assert_lt(s[i], length(host));
|
|
for(size_t j = i+1; j < slen; j++) {
|
|
assert_neq(s[i], s[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Assert that the string s of suffix offsets into 'host' really are in
|
|
* lexicographical order up to depth 'upto'.
|
|
*/
|
|
template <typename T>
|
|
void sanityCheckOrderedSufs(
|
|
const T& host,
|
|
size_t hlen,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
size_t upto,
|
|
size_t lower = 0,
|
|
size_t upper = OFF_MASK)
|
|
{
|
|
assert_lt(s[0], hlen);
|
|
upper = min<size_t>(upper, slen-1);
|
|
for(size_t i = lower; i < upper; i++) {
|
|
// Allow s[i+t] to point off the end of the string; this is
|
|
// convenient for some callers
|
|
if(s[i+1] >= hlen) continue;
|
|
#ifndef NDEBUG
|
|
if(upto == OFF_MASK) {
|
|
assert(sstr_suf_lt(host, s[i], hlen, host, s[i+1], hlen, false));
|
|
} else {
|
|
if(sstr_suf_upto_lt(host, s[i], host, s[i+1], upto, false)) {
|
|
// operator > treats shorter strings as
|
|
// lexicographically smaller, but we want to opposite
|
|
//assert(isPrefix(suffix(host, s[i+1]), suffix(host, s[i])));
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main multikey quicksort function for suffixes. Based on Bentley &
|
|
* Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for
|
|
* Sorting and Searching Strings". That algorithm has been extended in
|
|
* three ways:
|
|
*
|
|
* 1. Deal with keys of different lengths by checking bounds and
|
|
* considering off-the-end values to be 'hi' (b/c our goal is the
|
|
* BWT transform, we're biased toward considring prefixes as
|
|
* lexicographically *greater* than their extensions).
|
|
* 2. The multikey_qsort_suffixes version takes a single host string
|
|
* and a list of suffix offsets as input. This reduces memory
|
|
* footprint compared to an approach that treats its input
|
|
* generically as a set of strings (not necessarily suffixes), thus
|
|
* requiring that we store at least two integers worth of
|
|
* information for each string.
|
|
* 3. Sorting functions take an extra "upto" parameter that upper-
|
|
* bounds the depth to which the function sorts.
|
|
*
|
|
* TODO: Consult a tie-breaker (like a difference cover sample) if two
|
|
* keys share a long prefix.
|
|
*/
|
|
template<typename T>
|
|
void mkeyQSortSuf(
|
|
const T& host,
|
|
size_t hlen,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
int hi,
|
|
size_t begin,
|
|
size_t end,
|
|
size_t depth,
|
|
size_t upto = OFF_MASK)
|
|
{
|
|
// Helper for making the recursive call; sanity-checks arguments to
|
|
// make sure that the problem actually got smaller.
|
|
#define MQS_RECURSE_SUF(nbegin, nend, ndepth) { \
|
|
assert(nbegin > begin || nend < end || ndepth > depth); \
|
|
if(ndepth < upto) { /* don't exceed depth of 'upto' */ \
|
|
mkeyQSortSuf(host, hlen, s, slen, hi, nbegin, nend, ndepth, upto); \
|
|
} \
|
|
}
|
|
assert_leq(begin, slen);
|
|
assert_leq(end, slen);
|
|
size_t a, b, c, d, /*e,*/ r;
|
|
size_t n = end - begin;
|
|
if(n <= 1) return; // 1-element list already sorted
|
|
CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF); // pick pivot, swap it into [begin]
|
|
int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value
|
|
#ifndef NDEBUG
|
|
{
|
|
bool stillInBounds = false;
|
|
for(size_t i = begin; i < end; i++) {
|
|
if(depth < (hlen-s[i])) {
|
|
stillInBounds = true;
|
|
break;
|
|
} else { /* already fell off this suffix */ }
|
|
}
|
|
assert(stillInBounds); // >=1 suffix must still be in bounds
|
|
}
|
|
#endif
|
|
a = b = begin;
|
|
c = d = end-1;
|
|
while(true) {
|
|
// Invariant: everything before a is = pivot, everything
|
|
// between a and b is <
|
|
int bc = 0; // shouldn't have to init but gcc on Mac complains
|
|
while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) {
|
|
if(v == bc) {
|
|
SWAP(s, a, b); a++;
|
|
}
|
|
b++;
|
|
}
|
|
// Invariant: everything after d is = pivot, everything
|
|
// between c and d is >
|
|
int cc = 0; // shouldn't have to init but gcc on Mac complains
|
|
while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) {
|
|
if(v == cc) {
|
|
SWAP(s, c, d); d--;
|
|
}
|
|
c--;
|
|
}
|
|
if(b > c) break;
|
|
SWAP(s, b, c);
|
|
b++;
|
|
c--;
|
|
}
|
|
assert(a > begin || c < end-1); // there was at least one =s
|
|
assert_lt(d-c, n); // they can't all have been > pivot
|
|
assert_lt(b-a, n); // they can't all have been < pivot
|
|
assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant
|
|
r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center
|
|
r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center
|
|
assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant
|
|
r = b-a; // r <- # of <'s
|
|
if(r > 0) {
|
|
MQS_RECURSE_SUF(begin, begin + r, depth); // recurse on <'s
|
|
}
|
|
// Do not recurse on ='s if the pivot was the off-the-end value;
|
|
// they're already fully sorted
|
|
if(v != hi) {
|
|
MQS_RECURSE_SUF(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s
|
|
}
|
|
r = d-c; // r <- # of >'s excluding those exhausted
|
|
if(r > 0 && v < hi-1) {
|
|
MQS_RECURSE_SUF(end-r, end, depth); // recurse on >'s
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Toplevel function for multikey quicksort over suffixes.
|
|
*/
|
|
template<typename T>
|
|
void mkeyQSortSuf(
|
|
const T& host,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
int hi,
|
|
bool verbose = false,
|
|
bool sanityCheck = false,
|
|
size_t upto = OFF_MASK)
|
|
{
|
|
size_t hlen = host.length();
|
|
assert_gt(slen, 0);
|
|
if(sanityCheck) sanityCheckInputSufs(s, slen);
|
|
mkeyQSortSuf(host, hlen, s, slen, hi, (size_t)0, slen, (size_t)0, upto);
|
|
if(sanityCheck) sanityCheckOrderedSufs(host, hlen, s, slen, upto);
|
|
}
|
|
|
|
/**
|
|
* Just like mkeyQSortSuf but all swaps are applied to s2 as well as s.
|
|
* This is a helpful variant if, for example, the caller would like to
|
|
* see how their input was permuted by the sort routine (in that case,
|
|
* the caller would let s2 be an array s2[] where s2 is the same length
|
|
* as s and s2[i] = i).
|
|
*/
|
|
struct QSortRange {
|
|
size_t begin;
|
|
size_t end;
|
|
size_t depth;
|
|
};
|
|
template<typename T>
|
|
void mkeyQSortSuf2(
|
|
const T& host,
|
|
size_t hlen,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
TIndexOffU *s2,
|
|
int hi,
|
|
size_t _begin,
|
|
size_t _end,
|
|
size_t _depth,
|
|
size_t upto = OFF_MASK,
|
|
EList<size_t>* boundaries = NULL)
|
|
{
|
|
ELList<QSortRange, 3, 1024> block_list;
|
|
while(true) {
|
|
size_t begin = 0, end = 0, depth = 0;
|
|
if(block_list.size() == 0) {
|
|
begin = _begin;
|
|
end = _end;
|
|
depth = _depth;
|
|
} else {
|
|
if(block_list.back().size() > 0) {
|
|
begin = block_list.back()[0].begin;
|
|
end = block_list.back()[0].end;
|
|
depth = block_list.back()[0].depth;
|
|
block_list.back().erase(0);
|
|
} else {
|
|
block_list.resize(block_list.size() - 1);
|
|
if(block_list.size() == 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if(depth == upto) {
|
|
if(boundaries != NULL) {
|
|
(*boundaries).push_back(end);
|
|
}
|
|
continue;
|
|
}
|
|
assert_leq(begin, slen);
|
|
assert_leq(end, slen);
|
|
size_t a, b, c, d, /*e,*/ r;
|
|
size_t n = end - begin;
|
|
if(n <= 1) { // 1-element list already sorted
|
|
if(n == 1 && boundaries != NULL) {
|
|
boundaries->push_back(end);
|
|
}
|
|
continue;
|
|
}
|
|
CHOOSE_AND_SWAP_PIVOT(SWAP2, CHAR_AT_SUF); // pick pivot, swap it into [begin]
|
|
int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value
|
|
#ifndef NDEBUG
|
|
{
|
|
bool stillInBounds = false;
|
|
for(size_t i = begin; i < end; i++) {
|
|
if(depth < (hlen-s[i])) {
|
|
stillInBounds = true;
|
|
break;
|
|
} else { /* already fell off this suffix */ }
|
|
}
|
|
assert(stillInBounds); // >=1 suffix must still be in bounds
|
|
}
|
|
#endif
|
|
a = b = begin;
|
|
c = d = /*e =*/ end-1;
|
|
while(true) {
|
|
// Invariant: everything before a is = pivot, everything
|
|
// between a and b is <
|
|
int bc = 0; // shouldn't have to init but gcc on Mac complains
|
|
while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) {
|
|
if(v == bc) {
|
|
SWAP2(s, s2, a, b); a++;
|
|
}
|
|
b++;
|
|
}
|
|
// Invariant: everything after d is = pivot, everything
|
|
// between c and d is >
|
|
int cc = 0; // shouldn't have to init but gcc on Mac complains
|
|
while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) {
|
|
if(v == cc) {
|
|
SWAP2(s, s2, c, d); d--; /*e--;*/
|
|
}
|
|
//else if(c == e && v == hi) e--;
|
|
c--;
|
|
}
|
|
if(b > c) break;
|
|
SWAP2(s, s2, b, c);
|
|
b++;
|
|
c--;
|
|
}
|
|
assert(a > begin || c < end-1); // there was at least one =s
|
|
assert_lt(/*e*/d-c, n); // they can't all have been > pivot
|
|
assert_lt(b-a, n); // they can't all have been < pivot
|
|
assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth)); // check pre-=-swap invariant
|
|
r = min(a-begin, b-a); VECSWAP2(s, s2, begin, b-r, r); // swap left = to center
|
|
r = min(d-c, end-d-1); VECSWAP2(s, s2, b, end-r, r); // swap right = to center
|
|
assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant
|
|
r = b-a; // r <- # of <'s
|
|
block_list.expand();
|
|
block_list.back().clear();
|
|
if(r > 0) { // recurse on <'s
|
|
block_list.back().expand();
|
|
block_list.back().back().begin = begin;
|
|
block_list.back().back().end = begin + r;
|
|
block_list.back().back().depth = depth;
|
|
}
|
|
// Do not recurse on ='s if the pivot was the off-the-end value;
|
|
// they're already fully sorted
|
|
//if(v != hi) { // recurse on ='s
|
|
block_list.back().expand();
|
|
block_list.back().back().begin = begin + r;
|
|
block_list.back().back().end = begin + r + (a-begin) + (end-d-1);
|
|
block_list.back().back().depth = depth + 1;
|
|
//}
|
|
r = d-c; // r <- # of >'s excluding those exhausted
|
|
if(r > 0 /*&& v < hi-1*/) { // recurse on >'s
|
|
block_list.back().expand();
|
|
block_list.back().back().begin = end - r;
|
|
block_list.back().back().end = end;
|
|
block_list.back().back().depth = depth;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Toplevel function for multikey quicksort over suffixes with double
|
|
* swapping.
|
|
*/
|
|
template<typename T>
|
|
void mkeyQSortSuf2(
|
|
const T& host,
|
|
TIndexOffU *s,
|
|
size_t slen,
|
|
TIndexOffU *s2,
|
|
int hi,
|
|
bool verbose = false,
|
|
bool sanityCheck = false,
|
|
size_t upto = OFF_MASK,
|
|
EList<size_t>* boundaries = NULL)
|
|
{
|
|
size_t hlen = host.length();
|
|
if(sanityCheck) sanityCheckInputSufs(s, slen);
|
|
TIndexOffU *sOrig = NULL;
|
|
if(sanityCheck) {
|
|
sOrig = new TIndexOffU[slen];
|
|
memcpy(sOrig, s, OFF_SIZE * slen);
|
|
}
|
|
mkeyQSortSuf2(host, hlen, s, slen, s2, hi, (size_t)0, slen, (size_t)0, upto, boundaries);
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host, hlen, s, slen, upto);
|
|
for(size_t i = 0; i < slen; i++) {
|
|
assert_eq(s[i], sOrig[s2[i]]);
|
|
}
|
|
delete[] sOrig;
|
|
}
|
|
}
|
|
|
|
// Ugly but necessary; otherwise the compiler chokes dramatically on
|
|
// the DifferenceCoverSample<> template args to the next few functions
|
|
template <typename T>
|
|
class DifferenceCoverSample;
|
|
|
|
/**
|
|
* Constant time
|
|
*/
|
|
template<typename T1, typename T2> inline
|
|
bool sufDcLt(
|
|
const T1& host,
|
|
const T2& s1,
|
|
const T2& s2,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
bool sanityCheck = false)
|
|
{
|
|
size_t diff = dc.tieBreakOff(s1, s2);
|
|
ASSERT_ONLY(size_t hlen = host.length());
|
|
assert_lt(diff, dc.v());
|
|
assert_lt(diff, hlen-s1);
|
|
assert_lt(diff, hlen-s2);
|
|
if(sanityCheck) {
|
|
for(size_t i = 0; i < diff; i++) {
|
|
assert_eq(host[s1+i], host[s2+i]);
|
|
}
|
|
}
|
|
bool ret = dc.breakTie(s1+diff, s2+diff) < 0;
|
|
#ifndef NDEBUG
|
|
if(sanityCheck && ret != sstr_suf_lt(host, s1, hlen, host, s2, hlen, false)) {
|
|
assert(false);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* k log(k)
|
|
*/
|
|
template<typename T> inline
|
|
void qsortSufDc(
|
|
const T& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T>& dc,
|
|
size_t begin,
|
|
size_t end,
|
|
bool sanityCheck = false)
|
|
{
|
|
assert_leq(end, slen);
|
|
assert_lt(begin, slen);
|
|
assert_gt(end, begin);
|
|
size_t n = end - begin;
|
|
if(n <= 1) return; // 1-element list already sorted
|
|
// Note: rand() didn't really cut it here; it seemed to run out of
|
|
// randomness and, after a time, returned the same thing over and
|
|
// over again
|
|
size_t a = (rand() % n) + begin; // choose pivot between begin and end
|
|
assert_lt(a, end);
|
|
assert_geq(a, begin);
|
|
SWAP(s, end-1, a); // move pivot to end
|
|
size_t cur = 0;
|
|
for(size_t i = begin; i < end-1; i++) {
|
|
if(sufDcLt(host, s[i], s[end-1], dc, sanityCheck)) {
|
|
if(sanityCheck)
|
|
assert(dollarLt(suffix(host, s[i]), suffix(host, s[end-1])));
|
|
assert_lt(begin + cur, end-1);
|
|
SWAP(s, i, begin + cur);
|
|
cur++;
|
|
}
|
|
}
|
|
// Put pivot into place
|
|
assert_lt(cur, end-begin);
|
|
SWAP(s, end-1, begin+cur);
|
|
if(begin+cur > begin) qsortSufDc(host, hlen, s, slen, dc, begin, begin+cur);
|
|
if(end > begin+cur+1) qsortSufDc(host, hlen, s, slen, dc, begin+cur+1, end);
|
|
}
|
|
|
|
/**
|
|
* Toplevel function for multikey quicksort over suffixes.
|
|
*/
|
|
template<typename T1, typename T2>
|
|
void mkeyQSortSufDcU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
int hi,
|
|
bool verbose = false,
|
|
bool sanityCheck = false)
|
|
{
|
|
if(sanityCheck) sanityCheckInputSufs(s, slen);
|
|
mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, 0, slen, 0, sanityCheck);
|
|
if(sanityCheck) sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK);
|
|
}
|
|
|
|
/**
|
|
* Return a boolean indicating whether s1 < s2 using the difference
|
|
* cover to break the tie.
|
|
*/
|
|
template<typename T1, typename T2> inline
|
|
bool sufDcLtU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
size_t s1,
|
|
size_t s2,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
bool sanityCheck = false)
|
|
{
|
|
hlen += 0;
|
|
size_t diff = dc.tieBreakOff((TIndexOffU)s1, (TIndexOffU)s2);
|
|
assert_lt(diff, dc.v());
|
|
assert_lt(diff, hlen-s1);
|
|
assert_lt(diff, hlen-s2);
|
|
if(sanityCheck) {
|
|
for(size_t i = 0; i < diff; i++) {
|
|
assert_eq(host[s1+i], host1[s2+i]);
|
|
}
|
|
}
|
|
bool ret = dc.breakTie((TIndexOffU)(s1+diff), (TIndexOffU)(s2+diff)) < 0;
|
|
// Sanity-check return value using dollarLt
|
|
#ifndef NDEBUG
|
|
bool ret2 = sstr_suf_lt(host1, s1, hlen, host, s2, hlen, false);
|
|
assert(!sanityCheck || ret == ret2);
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* k log(k)
|
|
*/
|
|
template<typename T1, typename T2> inline
|
|
void qsortSufDcU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
size_t begin,
|
|
size_t end,
|
|
bool sanityCheck = false)
|
|
{
|
|
assert_leq(end, slen);
|
|
assert_lt(begin, slen);
|
|
assert_gt(end, begin);
|
|
size_t n = end - begin;
|
|
if(n <= 1) return; // 1-element list already sorted
|
|
// Note: rand() didn't really cut it here; it seemed to run out of
|
|
// randomness and, after a time, returned the same thing over and
|
|
// over again
|
|
size_t a = (rand() % n) + begin; // choose pivot between begin and end
|
|
assert_lt(a, end);
|
|
assert_geq(a, begin);
|
|
SWAP(s, end-1, a); // move pivot to end
|
|
size_t cur = 0;
|
|
for(size_t i = begin; i < end-1; i++) {
|
|
if(sufDcLtU8(host1, host, hlen, s[i], s[end-1], dc, sanityCheck)) {
|
|
#ifndef NDEBUG
|
|
if(sanityCheck) {
|
|
assert(sstr_suf_lt(host1, s[i], hlen, host1, s[end-1], hlen, false));
|
|
}
|
|
assert_lt(begin + cur, end-1);
|
|
#endif
|
|
SWAP(s, i, begin + cur);
|
|
cur++;
|
|
}
|
|
}
|
|
// Put pivot into place
|
|
assert_lt(cur, end-begin);
|
|
SWAP(s, end-1, begin+cur);
|
|
if(begin+cur > begin) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin, begin+cur);
|
|
if(end > begin+cur+1) qsortSufDcU8(host1, host, hlen, s, slen, dc, begin+cur+1, end);
|
|
}
|
|
|
|
#define BUCKET_SORT_CUTOFF (4 * 1024 * 1024)
|
|
#define SELECTION_SORT_CUTOFF 6
|
|
|
|
/**
|
|
* Straightforwardly obtain a uint8_t-ized version of t[off]. This
|
|
* works fine as long as TStr is not packed.
|
|
*/
|
|
template<typename TStr>
|
|
inline uint8_t get_uint8(const TStr& t, size_t off) {
|
|
return t[off];
|
|
}
|
|
|
|
/**
|
|
* For incomprehensible generic-programming reasons, getting a uint8_t
|
|
* version of a character in a packed String<> requires casting first
|
|
* to Dna then to uint8_t.
|
|
*/
|
|
template<>
|
|
inline uint8_t get_uint8<S2bDnaString>(const S2bDnaString& t, size_t off) {
|
|
return (uint8_t)t[off];
|
|
}
|
|
|
|
/**
|
|
* Return character at offset 'off' from the 'si'th suffix in the array
|
|
* 's' of suffixes. If the character is out-of-bounds, return hi.
|
|
*/
|
|
template<typename TStr>
|
|
static inline int char_at_suf_u8(
|
|
const TStr& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t si,
|
|
size_t off,
|
|
uint8_t hi)
|
|
{
|
|
return ((off+s[si]) < hlen) ? get_uint8(host, off+s[si]) : (hi);
|
|
}
|
|
|
|
template<typename T1, typename T2>
|
|
static void selectionSortSufDcU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
uint8_t hi,
|
|
size_t begin,
|
|
size_t end,
|
|
size_t depth,
|
|
bool sanityCheck = false)
|
|
{
|
|
#define ASSERT_SUF_LT(l, r) \
|
|
if(sanityCheck && \
|
|
!sstr_suf_lt(host1, s[l], hlen, host1, s[r], hlen, false)) { \
|
|
assert(false); \
|
|
}
|
|
|
|
assert_gt(end, begin+1);
|
|
assert_leq(end-begin, SELECTION_SORT_CUTOFF);
|
|
assert_eq(hi, 4);
|
|
size_t v = dc.v();
|
|
if(end == begin+2) {
|
|
size_t off = dc.tieBreakOff(s[begin], s[begin+1]);
|
|
if(off + s[begin] >= hlen ||
|
|
off + s[begin+1] >= hlen)
|
|
{
|
|
off = OFF_MASK;
|
|
}
|
|
if(off != OFF_MASK) {
|
|
if(off < depth) {
|
|
qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc,
|
|
begin, end, sanityCheck);
|
|
// It's helpful for debugging if we call this here
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host1, hlen, s, slen,
|
|
OFF_MASK, begin, end);
|
|
}
|
|
return;
|
|
}
|
|
v = off - depth + 1;
|
|
}
|
|
}
|
|
assert_leq(v, dc.v());
|
|
size_t lim = v;
|
|
assert_geq(lim, 0);
|
|
for(size_t i = begin; i < end-1; i++) {
|
|
size_t targ = i;
|
|
size_t targoff = depth + s[i];
|
|
for(size_t j = i+1; j < end; j++) {
|
|
assert_neq(j, targ);
|
|
size_t joff = depth + s[j];
|
|
size_t k;
|
|
for(k = 0; k <= lim; k++) {
|
|
assert_neq(j, targ);
|
|
uint8_t jc = (k + joff < hlen) ? get_uint8(host, k + joff) : hi;
|
|
uint8_t tc = (k + targoff < hlen) ? get_uint8(host, k + targoff) : hi;
|
|
assert(jc != hi || tc != hi);
|
|
if(jc > tc) {
|
|
// the jth suffix is greater than the current
|
|
// smallest suffix
|
|
ASSERT_SUF_LT(targ, j);
|
|
break;
|
|
} else if(jc < tc) {
|
|
// the jth suffix is less than the current smallest
|
|
// suffix, so update smallest to be j
|
|
ASSERT_SUF_LT(j, targ);
|
|
targ = j;
|
|
targoff = joff;
|
|
break;
|
|
} else if(k == lim) {
|
|
// Check whether either string ends immediately
|
|
// after this character
|
|
assert_leq(k + joff + 1, hlen);
|
|
assert_leq(k + targoff + 1, hlen);
|
|
if(k + joff + 1 == hlen) {
|
|
// targ < j
|
|
assert_neq(k + targoff + 1, hlen);
|
|
ASSERT_SUF_LT(targ, j);
|
|
break;
|
|
} else if(k + targoff + 1 == hlen) {
|
|
// j < targ
|
|
ASSERT_SUF_LT(j, targ);
|
|
targ = j;
|
|
targoff = joff;
|
|
break;
|
|
}
|
|
} else {
|
|
// They're equal so far, keep going
|
|
}
|
|
}
|
|
// The jth suffix was equal to the current smallest suffix
|
|
// up to the difference-cover period, so disambiguate with
|
|
// difference cover
|
|
if(k == lim+1) {
|
|
assert_neq(j, targ);
|
|
if(sufDcLtU8(host1, host, hlen, s[j], s[targ], dc, sanityCheck)) {
|
|
// j < targ
|
|
assert(!sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck));
|
|
ASSERT_SUF_LT(j, targ);
|
|
targ = j;
|
|
targoff = joff;
|
|
} else {
|
|
assert(sufDcLtU8(host1, host, hlen, s[targ], s[j], dc, sanityCheck));
|
|
ASSERT_SUF_LT(targ, j); // !
|
|
}
|
|
}
|
|
}
|
|
if(i != targ) {
|
|
ASSERT_SUF_LT(targ, i);
|
|
// swap i and targ
|
|
TIndexOffU tmp = s[i];
|
|
s[i] = s[targ];
|
|
s[targ] = tmp;
|
|
}
|
|
for(size_t j = i+1; j < end; j++) {
|
|
ASSERT_SUF_LT(i, j);
|
|
}
|
|
}
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end);
|
|
}
|
|
}
|
|
|
|
template<typename T1, typename T2>
|
|
static void bucketSortSufDcU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
uint8_t hi,
|
|
size_t _begin,
|
|
size_t _end,
|
|
size_t _depth,
|
|
bool sanityCheck = false)
|
|
{
|
|
// 5 64-element buckets for bucket-sorting A, C, G, T, $
|
|
TIndexOffU* bkts[4];
|
|
for(size_t i = 0; i < 4; i++) {
|
|
bkts[i] = new TIndexOffU[4 * 1024 * 1024];
|
|
}
|
|
ELList<size_t, 5, 1024> block_list;
|
|
bool first = true;
|
|
while(true) {
|
|
size_t begin = 0, end = 0;
|
|
if(first) {
|
|
begin = _begin;
|
|
end = _end;
|
|
first = false;
|
|
} else {
|
|
if(block_list.size() == 0) {
|
|
break;
|
|
}
|
|
if(block_list.back().size() > 1) {
|
|
end = block_list.back().back(); block_list.back().pop_back();
|
|
begin = block_list.back().back();
|
|
} else {
|
|
block_list.resize(block_list.size() - 1);
|
|
if(block_list.size() == 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
size_t depth = block_list.size() + _depth;
|
|
assert_leq(end-begin, BUCKET_SORT_CUTOFF);
|
|
assert_eq(hi, 4);
|
|
if(end <= begin + 1) { // 1-element list already sorted
|
|
continue;
|
|
}
|
|
if(depth > dc.v()) {
|
|
// Quicksort the remaining suffixes using difference cover
|
|
// for constant-time comparisons; this is O(k*log(k)) where
|
|
// k=(end-begin)
|
|
qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc, begin, end, sanityCheck);
|
|
continue;
|
|
}
|
|
if(end-begin <= SELECTION_SORT_CUTOFF) {
|
|
// Bucket sort remaining items
|
|
selectionSortSufDcU8(host1, host, hlen, s, slen, dc, hi,
|
|
begin, end, depth, sanityCheck);
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host1, hlen, s, slen,
|
|
OFF_MASK, begin, end);
|
|
}
|
|
continue;
|
|
}
|
|
size_t cnts[] = { 0, 0, 0, 0, 0 };
|
|
for(size_t i = begin; i < end; i++) {
|
|
size_t off = depth + s[i];
|
|
uint8_t c = (off < hlen) ? get_uint8(host, off) : hi;
|
|
assert_leq(c, 4);
|
|
if(c == 0) {
|
|
s[begin + cnts[0]++] = s[i];
|
|
} else {
|
|
bkts[c-1][cnts[c]++] = s[i];
|
|
}
|
|
}
|
|
assert_eq(cnts[0] + cnts[1] + cnts[2] + cnts[3] + cnts[4], end - begin);
|
|
size_t cur = begin + cnts[0];
|
|
if(cnts[1] > 0) { memcpy(&s[cur], bkts[0], cnts[1] << (OFF_SIZE/4 + 1)); cur += cnts[1]; }
|
|
if(cnts[2] > 0) { memcpy(&s[cur], bkts[1], cnts[2] << (OFF_SIZE/4 + 1)); cur += cnts[2]; }
|
|
if(cnts[3] > 0) { memcpy(&s[cur], bkts[2], cnts[3] << (OFF_SIZE/4 + 1)); cur += cnts[3]; }
|
|
if(cnts[4] > 0) { memcpy(&s[cur], bkts[3], cnts[4] << (OFF_SIZE/4 + 1)); }
|
|
// This frame is now totally finished with bkts[][], so recursive
|
|
// callees can safely clobber it; we're not done with cnts[], but
|
|
// that's local to the stack frame.
|
|
block_list.expand();
|
|
block_list.back().clear();
|
|
block_list.back().push_back(begin);
|
|
for(size_t i = 0; i < 4; i++) {
|
|
if(cnts[i] > 0) {
|
|
block_list.back().push_back(block_list.back().back() + cnts[i]);
|
|
}
|
|
}
|
|
}
|
|
// Done
|
|
|
|
for(size_t i = 0; i < 4; i++) {
|
|
delete [] bkts[i];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main multikey quicksort function for suffixes. Based on Bentley &
|
|
* Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for
|
|
* Sorting and Searching Strings". That algorithm has been extended in
|
|
* three ways:
|
|
*
|
|
* 1. Deal with keys of different lengths by checking bounds and
|
|
* considering off-the-end values to be 'hi' (b/c our goal is the
|
|
* BWT transform, we're biased toward considring prefixes as
|
|
* lexicographically *greater* than their extensions).
|
|
* 2. The multikey_qsort_suffixes version takes a single host string
|
|
* and a list of suffix offsets as input. This reduces memory
|
|
* footprint compared to an approach that treats its input
|
|
* generically as a set of strings (not necessarily suffixes), thus
|
|
* requiring that we store at least two integers worth of
|
|
* information for each string.
|
|
* 3. Sorting functions take an extra "upto" parameter that upper-
|
|
* bounds the depth to which the function sorts.
|
|
*/
|
|
template<typename T1, typename T2>
|
|
void mkeyQSortSufDcU8(
|
|
const T1& host1,
|
|
const T2& host,
|
|
size_t hlen,
|
|
TIndexOffU* s,
|
|
size_t slen,
|
|
const DifferenceCoverSample<T1>& dc,
|
|
int hi,
|
|
size_t begin,
|
|
size_t end,
|
|
size_t depth,
|
|
bool sanityCheck = false)
|
|
{
|
|
// Helper for making the recursive call; sanity-checks arguments to
|
|
// make sure that the problem actually got smaller.
|
|
#define MQS_RECURSE_SUF_DC_U8(nbegin, nend, ndepth) { \
|
|
assert(nbegin > begin || nend < end || ndepth > depth); \
|
|
mkeyQSortSufDcU8(host1, host, hlen, s, slen, dc, hi, nbegin, nend, ndepth, sanityCheck); \
|
|
}
|
|
assert_leq(begin, slen);
|
|
assert_leq(end, slen);
|
|
size_t n = end - begin;
|
|
if(n <= 1) return; // 1-element list already sorted
|
|
if(depth > dc.v()) {
|
|
// Quicksort the remaining suffixes using difference cover
|
|
// for constant-time comparisons; this is O(k*log(k)) where
|
|
// k=(end-begin)
|
|
qsortSufDcU8<T1,T2>(host1, host, hlen, s, slen, dc, begin, end, sanityCheck);
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end);
|
|
}
|
|
return;
|
|
}
|
|
if(n <= BUCKET_SORT_CUTOFF) {
|
|
// Bucket sort remaining items
|
|
bucketSortSufDcU8(host1, host, hlen, s, slen, dc,
|
|
(uint8_t)hi, begin, end, depth, sanityCheck);
|
|
if(sanityCheck) {
|
|
sanityCheckOrderedSufs(host1, hlen, s, slen, OFF_MASK, begin, end);
|
|
}
|
|
return;
|
|
}
|
|
size_t a, b, c, d, r;
|
|
CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF_U8); // choose pivot, swap to begin
|
|
int v = CHAR_AT_SUF_U8(begin, depth); // v <- pivot value
|
|
#ifndef NDEBUG
|
|
{
|
|
bool stillInBounds = false;
|
|
for(size_t i = begin; i < end; i++) {
|
|
if(depth < (hlen-s[i])) {
|
|
stillInBounds = true;
|
|
break;
|
|
} else { /* already fell off this suffix */ }
|
|
}
|
|
assert(stillInBounds); // >=1 suffix must still be in bounds
|
|
}
|
|
#endif
|
|
a = b = begin;
|
|
c = d = end-1;
|
|
while(true) {
|
|
// Invariant: everything before a is = pivot, everything
|
|
// between a and b is <
|
|
int bc = 0; // shouldn't have to init but gcc on Mac complains
|
|
while(b <= c && v >= (bc = CHAR_AT_SUF_U8(b, depth))) {
|
|
if(v == bc) {
|
|
SWAP(s, a, b); a++;
|
|
}
|
|
b++;
|
|
}
|
|
// Invariant: everything after d is = pivot, everything
|
|
// between c and d is >
|
|
int cc = 0; // shouldn't have to init but gcc on Mac complains
|
|
//bool hiLatch = true;
|
|
while(b <= c && v <= (cc = CHAR_AT_SUF_U8(c, depth))) {
|
|
if(v == cc) {
|
|
SWAP(s, c, d); d--;
|
|
}
|
|
//else if(hiLatch && cc == hi) { }
|
|
c--;
|
|
}
|
|
if(b > c) break;
|
|
SWAP(s, b, c);
|
|
b++;
|
|
c--;
|
|
}
|
|
assert(a > begin || c < end-1); // there was at least one =s
|
|
assert_lt(d-c, n); // they can't all have been > pivot
|
|
assert_lt(b-a, n); // they can't all have been < pivot
|
|
r = min(a-begin, b-a); VECSWAP(s, begin, b-r, r); // swap left = to center
|
|
r = min(d-c, end-d-1); VECSWAP(s, b, end-r, r); // swap right = to center
|
|
r = b-a; // r <- # of <'s
|
|
if(r > 0) {
|
|
MQS_RECURSE_SUF_DC_U8(begin, begin + r, depth); // recurse on <'s
|
|
}
|
|
// Do not recurse on ='s if the pivot was the off-the-end value;
|
|
// they're already fully sorted
|
|
if(v != hi) {
|
|
MQS_RECURSE_SUF_DC_U8(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s
|
|
}
|
|
r = d-c; // r <- # of >'s excluding those exhausted
|
|
if(r > 0 && v < hi-1) {
|
|
MQS_RECURSE_SUF_DC_U8(end-r, end, depth); // recurse on >'s
|
|
}
|
|
}
|
|
|
|
|
|
#endif /*MULTIKEY_QSORT_H_*/
|