/* * Copyright 2011, Ben Langmead * * This file is part of Bowtie 2. * * Bowtie 2 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Bowtie 2 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Bowtie 2. If not, see . */ #ifndef IVAL_LIST_H_ #define IVAL_LIST_H_ #include "ds.h" #include "ref_coord.h" #include /** * Encapsulates the "union" of a collection of intervals. Intervals are stored * in a sorted list. Intervals can be added but not removed. Supports just * one type of query for now: locusPresent(). */ class EIvalMergeList { public: static const size_t DEFAULT_UNSORTED_SZ = 16; explicit EIvalMergeList(int cat = 0) : sorted_(cat), sortedLhs_(cat), unsorted_(cat), unsortedSz_(DEFAULT_UNSORTED_SZ) { } explicit EIvalMergeList(size_t unsortedSz, int cat = 0) : sorted_(cat), sortedLhs_(cat), unsorted_(cat), unsortedSz_(unsortedSz) { } /** * Set the maximum size of the unsorted list. */ void setUnsortedSize(size_t usz) { unsortedSz_ = usz; } /** * Add a new interval to the list. */ void add(const Interval& i) { assert_leq(unsorted_.size(), unsortedSz_); if(unsorted_.size() < unsortedSz_) { unsorted_.push_back(i); } if(unsorted_.size() == unsortedSz_) { flush(); } } /** * Move all unsorted interval information into the sorted list and re-sort. * Merge overlapping intervals. */ void flush() { for(size_t i = 0; i < unsorted_.size(); i++) { sorted_.push_back(unsorted_[i]); } sorted_.sort(); merge(); sortedLhs_.clear(); for(size_t i = 0; i < sorted_.size(); i++) { sortedLhs_.push_back(sorted_[i].upstream()); } assert(sortedLhs_.sorted()); unsorted_.clear(); } #ifndef NDEBUG /** * Check that this interval list is internally consistent. */ bool repOk() const { assert_eq(sorted_.size(), sortedLhs_.size()); return true; } #endif /** * Remove all ranges from the list. */ void reset() { clear(); } /** * Remove all ranges from the list. */ void clear() { sorted_.clear(); sortedLhs_.clear(); unsorted_.clear(); } /** * Return true iff this locus is present in one of the intervals in the * list. */ bool locusPresent(const Coord& loc) const { return locusPresentUnsorted(loc) || locusPresentSorted(loc); } /** * Return the number of intervals added since the last call to reset() or * clear(). */ size_t size() const { return sorted_.size() + unsorted_.size(); } /** * Return true iff list is empty. */ bool empty() const { return sorted_.empty() && unsorted_.empty(); } protected: /** * Go through the sorted interval list and merge adjacent entries that * overlap. */ void merge() { size_t nmerged = 0; for(size_t i = 1; i < sorted_.size(); i++) { if(sorted_[i-1].downstream() >= sorted_[i].upstream()) { nmerged++; assert_leq(sorted_[i-1].upstream(), sorted_[i].upstream()); Coord up = std::min(sorted_[i-1].upstream(), sorted_[i].upstream()); Coord dn = std::max(sorted_[i-1].downstream(), sorted_[i].downstream()); sorted_[i].setUpstream(up); sorted_[i].setLength(dn.off() - up.off()); sorted_[i-1].reset(); } } sorted_.sort(); assert_lt(nmerged, sorted_.size()); sorted_.resize(sorted_.size()-nmerged); #ifndef NDEBUG for(size_t i = 0; i < sorted_.size(); i++) { assert(sorted_[i].inited()); } #endif } /** * Return true iff the given locus is present in one of the intervals in * the sorted list. */ bool locusPresentSorted(const Coord& loc) const { assert(repOk()); if(sorted_.empty()) { return false; } size_t beg = sortedLhs_.bsearchLoBound(loc); if(beg == sortedLhs_.size() || sortedLhs_[beg] > loc) { // Check element before if(beg == 0) { return false; } return sorted_[beg-1].contains(loc); } else { assert_eq(loc, sortedLhs_[beg]); return true; } } /** * Return true iff the given locus is present in one of the intervals in * the unsorted list. */ bool locusPresentUnsorted(const Coord& loc) const { for(size_t i = 0; i < unsorted_.size(); i++) { if(unsorted_[i].contains(loc)) { return true; } } return false; } EList sorted_; // LHS, RHS sorted EList sortedLhs_; // LHS, index into sorted_, sorted EList unsorted_; // unsorted size_t unsortedSz_; // max allowed size of unsorted_ }; /** * Binned version of the above. We bin using the low bits of the reference * sequence. */ class EIvalMergeListBinned { public: static const size_t NBIN = 7; explicit EIvalMergeListBinned(int cat = 0) : bins_(1 << NBIN, cat) { bins_.resize(1 << NBIN); } explicit EIvalMergeListBinned( size_t unsortedSz, int cat = 0) : bins_(1 << NBIN, cat) { bins_.resize(1 << NBIN); for(size_t i = 0; i < (1 << NBIN); i++) { bins_[i].setUnsortedSize(unsortedSz); } } /** * Add a new interval to the list. */ void add(const Interval& i) { size_t bin = i.ref() & ~(0xffffffff << NBIN); assert_lt(bin, bins_.size()); bins_[bin].add(i); } #ifndef NDEBUG /** * Check that this interval list is internally consistent. */ bool repOk() const { for(size_t i = 0; i < bins_.size(); i++) { assert(bins_[i].repOk()); } return true; } #endif /** * Remove all ranges from the list. */ void reset() { clear(); } /** * Remove all ranges from the list. */ void clear() { for(size_t i = 0; i < bins_.size(); i++) { bins_[i].clear(); } } /** * Return true iff this locus is present in one of the intervals in the * list. */ bool locusPresent(const Coord& loc) const { size_t bin = loc.ref() & ~(0xffffffff << NBIN); assert_lt(bin, bins_.size()); return bins_[bin].locusPresent(loc); } /** * Return the number of intervals added since the last call to reset() or * clear(). */ size_t size() const { // TODO: Keep track of size size_t sz = 0; for(size_t i = 0; i < bins_.size(); i++) { sz += bins_[i].size(); } return sz; } /** * Return true iff list is empty. */ bool empty() const { return size() == 0; } protected: EList bins_; }; #endif /*ndef IVAL_LIST_H_*/