222 lines
5.8 KiB
C
222 lines
5.8 KiB
C
|
/*
|
||
|
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
|
||
|
*
|
||
|
* This file is part of Bowtie 2.
|
||
|
*
|
||
|
* Bowtie 2 is free software: you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation, either version 3 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* Bowtie 2 is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
#ifndef RANDOM_UTIL_H_
|
||
|
#define RANDOM_UTIL_H_
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include "random_source.h"
|
||
|
#include "ds.h"
|
||
|
|
||
|
/**
|
||
|
* Return a random integer in [1, N]. Each time it's called it samples again
|
||
|
* without replacement. done() indicates when all elements have been given
|
||
|
* out.
|
||
|
*/
|
||
|
class Random1toN {
|
||
|
|
||
|
typedef uint32_t T;
|
||
|
|
||
|
public:
|
||
|
|
||
|
// A set with fewer than this many elements should kick us into swap-list
|
||
|
// mode immediately. Otherwise we start in seen-list mode and then
|
||
|
// possibly proceed to swap-list mode later.
|
||
|
static const size_t SWAPLIST_THRESH;
|
||
|
|
||
|
// Convert seen-list to swap-list after this many entries in the seen-list.
|
||
|
static const size_t CONVERSION_THRESH;
|
||
|
|
||
|
// Convert seen-list to swap-list after this (this times n_) many entries
|
||
|
// in the seen-list.
|
||
|
static const float CONVERSION_FRAC;
|
||
|
|
||
|
Random1toN(int cat = 0) :
|
||
|
sz_(0), n_(0), cur_(0),
|
||
|
list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat),
|
||
|
thresh_(0) {}
|
||
|
|
||
|
Random1toN(size_t n, int cat = 0) :
|
||
|
sz_(0), n_(n), cur_(0),
|
||
|
list_(SWAPLIST_THRESH, cat), seen_(CONVERSION_THRESH, cat),
|
||
|
thresh_(0) {}
|
||
|
|
||
|
/**
|
||
|
* Initialize the set of pseudo-randoms to be given out without replacement.
|
||
|
*/
|
||
|
void init(size_t n, bool withoutReplacement) {
|
||
|
sz_ = n_ = n;
|
||
|
converted_ = false;
|
||
|
swaplist_ = n < SWAPLIST_THRESH || withoutReplacement;
|
||
|
cur_ = 0;
|
||
|
list_.clear();
|
||
|
seen_.clear();
|
||
|
thresh_ = std::max(CONVERSION_THRESH, (size_t)(CONVERSION_FRAC * n));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reset in preparation for giving out a fresh collection of pseudo-randoms
|
||
|
* without replacement.
|
||
|
*/
|
||
|
void reset() {
|
||
|
sz_ = n_ = cur_ = 0; swaplist_ = converted_ = false;
|
||
|
list_.clear(); seen_.clear();
|
||
|
thresh_ = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Get next pseudo-random element without replacement.
|
||
|
*/
|
||
|
T next(RandomSource& rnd) {
|
||
|
assert(!done());
|
||
|
if(cur_ == 0 && !converted_) {
|
||
|
// This is the first call to next()
|
||
|
if(n_ == 1) {
|
||
|
// Trivial case: set of 1
|
||
|
cur_ = 1;
|
||
|
return 0;
|
||
|
}
|
||
|
if(swaplist_) {
|
||
|
// The set is small, so we go immediately to the random
|
||
|
// swapping list
|
||
|
list_.resize(n_);
|
||
|
for(size_t i = 0; i < n_; i++) {
|
||
|
list_[i] = (T)i;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if(swaplist_) {
|
||
|
// Get next pseudo-random using the swap-list
|
||
|
size_t r = cur_ + (rnd.nextU32() % (n_ - cur_));
|
||
|
if(r != cur_) {
|
||
|
std::swap(list_[cur_], list_[r]);
|
||
|
}
|
||
|
return list_[cur_++];
|
||
|
} else {
|
||
|
assert(!converted_);
|
||
|
// Get next pseudo-random but reject it if it's in the seen-list
|
||
|
bool again = true;
|
||
|
T rn = 0;
|
||
|
size_t seenSz = seen_.size();
|
||
|
while(again) {
|
||
|
rn = rnd.nextU32() % (T)n_;
|
||
|
again = false;
|
||
|
for(size_t i = 0; i < seenSz; i++) {
|
||
|
if(seen_[i] == rn) {
|
||
|
again = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Add it to the seen-list
|
||
|
seen_.push_back(rn);
|
||
|
cur_++;
|
||
|
assert_leq(cur_, n_);
|
||
|
// Move on to using the swap-list?
|
||
|
assert_gt(thresh_, 0);
|
||
|
if(seen_.size() >= thresh_ && cur_ < n_) {
|
||
|
// Add all elements not already in the seen list to the
|
||
|
// swap-list
|
||
|
assert(!seen_.empty());
|
||
|
seen_.sort();
|
||
|
list_.resize(n_ - cur_);
|
||
|
size_t prev = 0;
|
||
|
size_t cur = 0;
|
||
|
for(size_t i = 0; i <= seenSz; i++) {
|
||
|
// Add all the elements between the previous element and
|
||
|
// this one
|
||
|
for(size_t j = prev; j < seen_[i]; j++) {
|
||
|
list_[cur++] = (T)j;
|
||
|
}
|
||
|
prev = seen_[i]+1;
|
||
|
}
|
||
|
for(size_t j = prev; j < n_; j++) {
|
||
|
list_[cur++] = (T)j;
|
||
|
}
|
||
|
assert_eq(cur, n_ - cur_);
|
||
|
seen_.clear();
|
||
|
cur_ = 0;
|
||
|
n_ = list_.size();
|
||
|
converted_ = true;
|
||
|
swaplist_ = true;
|
||
|
}
|
||
|
return rn;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the generator was initialized.
|
||
|
*/
|
||
|
bool inited() const { return n_ > 0; }
|
||
|
|
||
|
/**
|
||
|
* Set so that there are no pseudo-randoms remaining.
|
||
|
*/
|
||
|
void setDone() { assert(inited()); cur_ = n_; }
|
||
|
|
||
|
/**
|
||
|
* Return true iff all pseudo-randoms have already been given out.
|
||
|
*/
|
||
|
bool done() const { return inited() && cur_ >= n_; }
|
||
|
|
||
|
/**
|
||
|
* Return the total number of pseudo-randoms we are initialized to give
|
||
|
* out, including ones already given out.
|
||
|
*/
|
||
|
size_t size() const { return n_; }
|
||
|
|
||
|
/**
|
||
|
* Return the number of pseudo-randoms left to give out.
|
||
|
*/
|
||
|
size_t left() const { return n_ - cur_; }
|
||
|
|
||
|
/**
|
||
|
* Return the total size occupued by the Descent driver and all its
|
||
|
* constituent parts.
|
||
|
*/
|
||
|
size_t totalSizeBytes() const {
|
||
|
return list_.totalSizeBytes() +
|
||
|
seen_.totalSizeBytes();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the total capacity of the Descent driver and all its constituent
|
||
|
* parts.
|
||
|
*/
|
||
|
size_t totalCapacityBytes() const {
|
||
|
return list_.totalCapacityBytes() +
|
||
|
seen_.totalCapacityBytes();
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
|
||
|
size_t sz_; // domain to pick elts from
|
||
|
size_t n_; // number of elements in active list
|
||
|
bool swaplist_; // if small, use swapping
|
||
|
bool converted_; // true iff seen-list was converted to swap-list
|
||
|
size_t cur_; // # times next() was called
|
||
|
EList<T> list_; // pseudo-random swapping list
|
||
|
EList<T> seen_; // prior to swaplist_ mode, list of
|
||
|
// pseudo-randoms given out
|
||
|
size_t thresh_; // conversion threshold for this instantiation, which
|
||
|
// depends both on CONVERSION_THRESH and on n_
|
||
|
};
|
||
|
|
||
|
#endif
|