3455 lines
75 KiB
C
3455 lines
75 KiB
C
|
/*
|
||
|
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
|
||
|
*
|
||
|
* This file is part of Bowtie 2.
|
||
|
*
|
||
|
* Bowtie 2 is free software: you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License as published by
|
||
|
* the Free Software Foundation, either version 3 of the License, or
|
||
|
* (at your option) any later version.
|
||
|
*
|
||
|
* Bowtie 2 is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
|
||
|
#ifndef SSTRING_H_
|
||
|
#define SSTRING_H_
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <iostream>
|
||
|
#include "assert_helpers.h"
|
||
|
#include "alphabet.h"
|
||
|
#include "random_source.h"
|
||
|
|
||
|
/**
|
||
|
* Four kinds of strings defined here:
|
||
|
*
|
||
|
* SString:
|
||
|
* A fixed-length string using heap memory with size set at construction time
|
||
|
* or when install() member is called.
|
||
|
*
|
||
|
* S2bDnaString:
|
||
|
* Like SString, but stores a list uint32_t words where each word is divided
|
||
|
* into 16 2-bit slots interpreted as holding one A/C/G/T nucleotide each.
|
||
|
*
|
||
|
* TODO: S3bDnaString allowing N. S4bDnaString allowing nucleotide masks.
|
||
|
*
|
||
|
* SStringExpandable:
|
||
|
* A string using heap memory where the size of the backing store is
|
||
|
* automatically resized as needed. Supports operations like append, insert,
|
||
|
* erase, etc.
|
||
|
*
|
||
|
* SStringFixed:
|
||
|
* A fixed-length string using stack memory where size is set at compile
|
||
|
* time.
|
||
|
*
|
||
|
* All string classes have some extra facilities that make it easy to print the
|
||
|
* string, including when the string uses an encoded alphabet. See toZBuf()
|
||
|
* and toZBufXForm().
|
||
|
*
|
||
|
* Global lt, eq, and gt template functions are supplied. They are capable of
|
||
|
* doing lexicographical comparisons between any of the three categories of
|
||
|
* strings defined here.
|
||
|
*/
|
||
|
|
||
|
template<typename T>
|
||
|
class Class_sstr_len {
|
||
|
public:
|
||
|
static inline size_t sstr_len(const T& s) {
|
||
|
return s.length();
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template<unsigned N>
|
||
|
class Class_sstr_len<const char[N]> {
|
||
|
public:
|
||
|
static inline size_t sstr_len(const char s[N]) {
|
||
|
return strlen(s);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template<>
|
||
|
class Class_sstr_len<const char *> {
|
||
|
public:
|
||
|
static inline size_t sstr_len(const char *s) {
|
||
|
return strlen(s);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template<>
|
||
|
class Class_sstr_len<const unsigned char *> {
|
||
|
public:
|
||
|
static inline size_t sstr_len(const unsigned char *s) {
|
||
|
return strlen((const char *)s);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_eq(const T1& s1, const T2& s2) {
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
|
||
|
if(len1 != len2) return false;
|
||
|
for(size_t i = 0; i < len1; i++) {
|
||
|
if(s1[i] != s2[i]) return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_neq(const T1& s1, const T2& s2) {
|
||
|
return !sstr_eq(s1, s2);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is equal to the given suffix of s2 up
|
||
|
* to upto characters.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_upto_eq(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
size_t upto,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
if(len1 > upto) len1 = upto;
|
||
|
if(len2 > upto) len2 = upto;
|
||
|
if(len1 != len2) return false;
|
||
|
for(size_t i = 0; i < len1; i++) {
|
||
|
if(s1[suf1+i] != s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is equal to the given suffix of s2 up
|
||
|
* to upto characters.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_upto_neq(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
size_t upto,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
return !sstr_suf_upto_eq(s1, suf1, s2, suf2, upto, endlt);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff s1 is less than s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_lt(const T1& s1, const T2& s2, bool endlt = true) {
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] < s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] > s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is less than the given suffix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_lt(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is less than the given suffix of s2.
|
||
|
* Treat s1 and s2 as though they have lengths len1/len2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_lt(
|
||
|
const T1& s1, size_t suf1, size_t len1,
|
||
|
const T2& s2, size_t suf2, size_t len2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, len1);
|
||
|
assert_leq(suf2, len2);
|
||
|
size_t left1 = len1 - suf1;
|
||
|
size_t left2 = len2 - suf2;
|
||
|
size_t minleft = (left1 < left2 ? left1 : left2);
|
||
|
for(size_t i = 0; i < minleft; i++) {
|
||
|
if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(left1 == left2) return false;
|
||
|
return (left1 < left2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is less than the given suffix of s2
|
||
|
* up to upto characters.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_upto_lt(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
size_t upto,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
if(len1 > upto) len1 = upto;
|
||
|
if(len2 > upto) len2 = upto;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given prefix of s1 is less than the given prefix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_pre_lt(
|
||
|
const T1& s1, size_t pre1,
|
||
|
const T2& s2, size_t pre2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = pre1;
|
||
|
size_t len2 = pre2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] < s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] > s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff s1 is less than or equal to s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_leq(const T1& s1, const T2& s2, bool endlt = true) {
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] < s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] > s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is less than or equal to the given
|
||
|
* suffix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_leq(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given prefix of s1 is less than or equal to the given
|
||
|
* prefix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_pre_leq(
|
||
|
const T1& s1, size_t pre1,
|
||
|
const T2& s2, size_t pre2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = pre1;
|
||
|
size_t len2 = pre2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] < s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] > s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 < len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff s1 is greater than s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_gt(const T1& s1, const T2& s2, bool endlt = true) {
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] > s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] < s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is greater than the given suffix of
|
||
|
* s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_gt(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given prefix of s1 is greater than the given prefix of
|
||
|
* s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_pre_gt(
|
||
|
const T1& s1, size_t pre1,
|
||
|
const T2& s2, size_t pre2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = pre1;
|
||
|
size_t len2 = pre2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] > s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] < s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return false;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff s1 is greater than or equal to s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_geq(const T1& s1, const T2& s2, bool endlt = true) {
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] > s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] < s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given suffix of s1 is greater than or equal to the given
|
||
|
* suffix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_suf_geq(
|
||
|
const T1& s1, size_t suf1,
|
||
|
const T2& s2, size_t suf2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
|
||
|
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[suf1+i] > s2[suf2+i]) {
|
||
|
return true;
|
||
|
} else if(s1[suf1+i] < s2[suf2+i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the given prefix of s1 is greater than or equal to the given
|
||
|
* prefix of s2.
|
||
|
*/
|
||
|
template<typename T1, typename T2>
|
||
|
static inline bool sstr_pre_geq(
|
||
|
const T1& s1, size_t pre1,
|
||
|
const T2& s2, size_t pre2,
|
||
|
bool endlt = true)
|
||
|
{
|
||
|
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
|
||
|
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
|
||
|
size_t len1 = pre1;
|
||
|
size_t len2 = pre2;
|
||
|
size_t minlen = (len1 < len2 ? len1 : len2);
|
||
|
for(size_t i = 0; i < minlen; i++) {
|
||
|
if(s1[i] > s2[i]) {
|
||
|
return true;
|
||
|
} else if(s1[i] < s2[i]) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
if(len1 == len2) return true;
|
||
|
return (len1 > len2) == endlt;
|
||
|
}
|
||
|
|
||
|
template<typename T>
|
||
|
static inline const char * sstr_to_cstr(const T& s) {
|
||
|
return s.toZBuf();
|
||
|
}
|
||
|
|
||
|
template<>
|
||
|
inline const char * sstr_to_cstr<std::basic_string<char> >(
|
||
|
const std::basic_string<char>& s)
|
||
|
{
|
||
|
return s.c_str();
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simple string class with backing memory whose size is managed by the user
|
||
|
* using the constructor and install() member function. No behind-the-scenes
|
||
|
* reallocation or copying takes place.
|
||
|
*/
|
||
|
template<typename T>
|
||
|
class SString {
|
||
|
|
||
|
public:
|
||
|
|
||
|
explicit SString() :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{ }
|
||
|
|
||
|
explicit SString(size_t sz) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
resize(sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from another SStringExpandable.
|
||
|
*/
|
||
|
SString(const SString<T>& o) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
*this = o;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from a std::basic_string of the
|
||
|
* appropriate type.
|
||
|
*/
|
||
|
explicit SString(const std::basic_string<T>& str) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
install(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from an array and size.
|
||
|
*/
|
||
|
explicit SString(const T* b, size_t sz) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
install(b, sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from a zero-terminated array.
|
||
|
*/
|
||
|
explicit SString(const T* b) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Destroy the expandable string object.
|
||
|
*/
|
||
|
virtual ~SString() {
|
||
|
if(cs_ != NULL) {
|
||
|
delete[] cs_;
|
||
|
cs_ = NULL;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
delete[] printcs_;
|
||
|
printcs_ = NULL;
|
||
|
}
|
||
|
len_ = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment to other SString.
|
||
|
*/
|
||
|
SString<T>& operator=(const SString<T>& o) {
|
||
|
install(o.cs_, o.len_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment to other SString.
|
||
|
*/
|
||
|
SString<T>& operator=(const std::basic_string<T>& o) {
|
||
|
install(o);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Resizes the string without preserving its contents.
|
||
|
*/
|
||
|
void resize(size_t sz) {
|
||
|
if(cs_ != NULL) {
|
||
|
delete cs_;
|
||
|
cs_ = NULL;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
delete printcs_;
|
||
|
printcs_ = NULL;
|
||
|
}
|
||
|
if(sz != 0) {
|
||
|
cs_ = new T[sz+1];
|
||
|
}
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse version of the read.
|
||
|
*/
|
||
|
T windowGet(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, len_ - depth);
|
||
|
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
void windowGet(
|
||
|
T& ret,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_leq(len, len_ - depth);
|
||
|
ret.resize(len);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
ret.set(fw ? cs_[depth+i] : cs_[depth+len-i-1], i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
inline void set(int c, size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const T& operator[](size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
inline T& operator[](size_t i) {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const T& get(size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
|
||
|
* operator=.
|
||
|
*/
|
||
|
virtual void install(const T* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
memcpy(cs_, b, sz * sizeof(T));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
|
||
|
* operator=.
|
||
|
*/
|
||
|
virtual void install(const std::basic_string<T>& b) {
|
||
|
size_t sz = b.length();
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
memcpy(cs_, b.c_str(), sz * sizeof(T));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy all bytes from zero-terminated buffer 'b' into this string.
|
||
|
*/
|
||
|
void install(const T* b) {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const char* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
cs_[i] = b[sz-i-1];
|
||
|
}
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const SString<T>& b) {
|
||
|
installReverse(b.cs_, b.len_);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are equal.
|
||
|
*/
|
||
|
bool operator==(const SString<T>& o) {
|
||
|
return sstr_eq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are not equal.
|
||
|
*/
|
||
|
bool operator!=(const SString<T>& o) {
|
||
|
return sstr_neq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than given string.
|
||
|
*/
|
||
|
bool operator<(const SString<T>& o) {
|
||
|
return sstr_lt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than given string.
|
||
|
*/
|
||
|
bool operator>(const SString<T>& o) {
|
||
|
return sstr_gt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than or equal to given string.
|
||
|
*/
|
||
|
bool operator<=(const SString<T>& o) {
|
||
|
return sstr_leq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than or equal to given string.
|
||
|
*/
|
||
|
bool operator>=(const SString<T>& o) {
|
||
|
return sstr_geq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse the buffer in place.
|
||
|
*/
|
||
|
void reverse() {
|
||
|
for(size_t i = 0; i < (len_ >> 1); i++) {
|
||
|
T tmp = get(i);
|
||
|
set(get(len_-i-1), i);
|
||
|
set(tmp, len_-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse the buffer in place.
|
||
|
*/
|
||
|
void reverseComplement(int* rcmap) {
|
||
|
size_t mid = len_ >> 1;
|
||
|
for(size_t i = 0; i < (len_ >> 1); i++) {
|
||
|
T tmp = get(i);
|
||
|
set(rcmap[get(len_-i-1)], i);
|
||
|
set(rcmap[tmp], len_-i-1);
|
||
|
}
|
||
|
if (len_ % 2) {
|
||
|
set(rcmap[get(mid)], mid);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse a substring of the buffer in place.
|
||
|
*/
|
||
|
void reverseWindow(size_t off, size_t len) {
|
||
|
assert_leq(off, len_);
|
||
|
assert_leq(off + len, len_);
|
||
|
size_t mid = len >> 1;
|
||
|
for(size_t i = 0; i < mid; i++) {
|
||
|
T tmp = get(off+i);
|
||
|
set(get(off+len-i-1), off+i);
|
||
|
set(tmp, off+len-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set the first len elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(size_t len, const T& el) {
|
||
|
assert_leq(len, len_);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
set(el, i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set all elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(const T& el) {
|
||
|
fill(len_, el);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the length of the string.
|
||
|
*/
|
||
|
inline size_t length() const { return len_; }
|
||
|
|
||
|
/**
|
||
|
* Clear the buffer.
|
||
|
*/
|
||
|
void clear() { len_ = 0; }
|
||
|
|
||
|
/**
|
||
|
* Return true iff the buffer is empty.
|
||
|
*/
|
||
|
inline bool empty() const { return len_ == 0; }
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
const char* toZBufXForm(const char *xform) const {
|
||
|
ASSERT_ONLY(size_t xformElts = strlen(xform));
|
||
|
// Lazily allocate space for print buffer
|
||
|
if(printcs_ == NULL) {
|
||
|
const_cast<char*&>(printcs_) = new char[len_+1];
|
||
|
}
|
||
|
char* printcs = const_cast<char*>(printcs_);
|
||
|
assert(printcs != NULL);
|
||
|
for(size_t i = 0; i < len_; i++) {
|
||
|
assert_lt(cs_[i], (int)xformElts);
|
||
|
printcs[i] = xform[cs_[i]];
|
||
|
}
|
||
|
printcs[len_] = 0;
|
||
|
return printcs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const T* toZBuf() const {
|
||
|
const_cast<T*>(cs_)[len_] = 0;
|
||
|
return cs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return a const version of the raw buffer.
|
||
|
*/
|
||
|
const T* buf() const { return cs_; }
|
||
|
|
||
|
/**
|
||
|
* Return a writeable version of the raw buffer.
|
||
|
*/
|
||
|
T* wbuf() { return cs_; }
|
||
|
|
||
|
protected:
|
||
|
|
||
|
T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
size_t len_; // # elements
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Simple string class with backing memory whose size is managed by the user
|
||
|
* using the constructor and install() member function. No behind-the-scenes
|
||
|
* reallocation or copying takes place.
|
||
|
*/
|
||
|
class S2bDnaString {
|
||
|
|
||
|
public:
|
||
|
|
||
|
explicit S2bDnaString() :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{ }
|
||
|
|
||
|
explicit S2bDnaString(size_t sz) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
resize(sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy another object of the same class.
|
||
|
*/
|
||
|
S2bDnaString(const S2bDnaString& o) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
*this = o;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from a std::basic_string of the
|
||
|
* appropriate type.
|
||
|
*/
|
||
|
explicit S2bDnaString(
|
||
|
const std::basic_string<char>& str,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(str.c_str(), str.length());
|
||
|
} else {
|
||
|
installChars(str.c_str(), str.length());
|
||
|
}
|
||
|
} else {
|
||
|
install(str.c_str(), str.length());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from an array and size.
|
||
|
*/
|
||
|
explicit S2bDnaString(
|
||
|
const char* b,
|
||
|
size_t sz,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(b, sz);
|
||
|
} else {
|
||
|
installChars(b, sz);
|
||
|
}
|
||
|
} else {
|
||
|
install(b, sz);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a zero-terminated string.
|
||
|
*/
|
||
|
explicit S2bDnaString(
|
||
|
const char* b,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0)
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(b, strlen(b));
|
||
|
} else {
|
||
|
installChars(b, strlen(b));
|
||
|
}
|
||
|
} else {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Destroy the expandable string object.
|
||
|
*/
|
||
|
virtual ~S2bDnaString() {
|
||
|
if(cs_ != NULL) {
|
||
|
delete[] cs_;
|
||
|
cs_ = NULL;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
delete[] printcs_;
|
||
|
printcs_ = NULL;
|
||
|
}
|
||
|
len_ = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment to other SString.
|
||
|
*/
|
||
|
template<typename T>
|
||
|
S2bDnaString& operator=(const T& o) {
|
||
|
install(o.c_str(), o.length());
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment from a std::basic_string
|
||
|
*/
|
||
|
template<typename T>
|
||
|
S2bDnaString& operator=(const std::basic_string<char>& o) {
|
||
|
install(o);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Resizes the string without preserving its contents.
|
||
|
*/
|
||
|
void resize(size_t sz) {
|
||
|
if(cs_ != NULL) {
|
||
|
delete cs_;
|
||
|
cs_ = NULL;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
delete printcs_;
|
||
|
printcs_ = NULL;
|
||
|
}
|
||
|
len_ = sz;
|
||
|
if(sz != 0) {
|
||
|
cs_ = new uint32_t[nwords()];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return DNA character corresponding to element 'idx'.
|
||
|
*/
|
||
|
char toChar(size_t idx) const {
|
||
|
int c = (int)get(idx);
|
||
|
assert_range(0, 3, c);
|
||
|
return "ACGT"[c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return color character corresponding to element 'idx'.
|
||
|
*/
|
||
|
char toColor(size_t idx) const {
|
||
|
int c = (int)get(idx);
|
||
|
assert_range(0, 3, c);
|
||
|
return "0123"[c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse version of the read.
|
||
|
*/
|
||
|
char windowGet(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, len_ - depth);
|
||
|
return fw ? get(depth+i) : get(depth+len-i-1);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
template<typename T>
|
||
|
void windowGet(
|
||
|
T& ret,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_leq(len, len_ - depth);
|
||
|
ret.resize(len);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
ret.set((fw ? get(depth+i) : get(depth+len-i-1)), i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return length in 32-bit words.
|
||
|
*/
|
||
|
size_t nwords() const {
|
||
|
return (len_ + 15) >> 4;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
assert_range(0, 3, c);
|
||
|
size_t word = idx >> 4;
|
||
|
size_t bpoff = (idx & 15) << 1;
|
||
|
cs_[word] = cs_[word] & ~(uint32_t)(3 << bpoff);
|
||
|
cs_[word] = cs_[word] | (uint32_t)(c << bpoff);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to DNA char 'c'.
|
||
|
*/
|
||
|
void setChar(int c, size_t idx) {
|
||
|
assert_in(toupper(c), "ACGT");
|
||
|
int bp = asc2dna[c];
|
||
|
set(bp, idx);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to color char 'c'.
|
||
|
*/
|
||
|
void setColor(int c, size_t idx) {
|
||
|
assert_in(toupper(c), "0123");
|
||
|
int co = asc2col[c];
|
||
|
set(co, idx);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set the ith 32-bit word to given word.
|
||
|
*/
|
||
|
void setWord(uint32_t w, size_t i) {
|
||
|
assert_lt(i, nwords());
|
||
|
cs_[i] = w;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
char operator[](size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
char get(size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
size_t word = i >> 4;
|
||
|
size_t bpoff = (i & 15) << 1;
|
||
|
return (char)((cs_[word] >> bpoff) & 3);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy packed words from string 'b' into this packed string.
|
||
|
*/
|
||
|
void install(const uint32_t* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
memcpy(cs_, b, sizeof(uint32_t)*nwords());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters encoded as integers from buffer 'b' into this
|
||
|
* packed string.
|
||
|
*/
|
||
|
void install(const char* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
size_t wordi = 0;
|
||
|
for(size_t i = 0; i < sz; i += 16) {
|
||
|
uint32_t word = 0;
|
||
|
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
|
||
|
uint32_t bp = (int)b[i+j];
|
||
|
uint32_t shift = (uint32_t)j << 1;
|
||
|
assert_range(0, 3, (int)bp);
|
||
|
word |= (bp << shift);
|
||
|
}
|
||
|
cs_[wordi++] = word;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installChars(const char* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
size_t wordi = 0;
|
||
|
for(size_t i = 0; i < sz; i += 16) {
|
||
|
uint32_t word = 0;
|
||
|
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
|
||
|
char c = b[i+j];
|
||
|
assert_in(toupper(c), "ACGT");
|
||
|
int bp = asc2dna[(int)c];
|
||
|
assert_range(0, 3, (int)bp);
|
||
|
uint32_t shift = (uint32_t)j << 1;
|
||
|
word |= (bp << shift);
|
||
|
}
|
||
|
cs_[wordi++] = word;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' color characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installColors(const char* b, size_t sz) {
|
||
|
if(sz == 0) return;
|
||
|
resize(sz);
|
||
|
size_t wordi = 0;
|
||
|
for(size_t i = 0; i < sz; i += 16) {
|
||
|
uint32_t word = 0;
|
||
|
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
|
||
|
char c = b[i+j];
|
||
|
assert_in(c, "0123");
|
||
|
int bp = asc2col[(int)c];
|
||
|
assert_range(0, 3, (int)bp);
|
||
|
uint32_t shift = (uint32_t)j << 1;
|
||
|
word |= (bp << shift);
|
||
|
}
|
||
|
cs_[wordi++] = word;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void install(const char* b) {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installChars(const char* b) {
|
||
|
installChars(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installColors(const char* b) {
|
||
|
installColors(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void install(const std::basic_string<char>& b) {
|
||
|
install(b.c_str(), b.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installChars(const std::basic_string<char>& b) {
|
||
|
installChars(b.c_str(), b.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
|
||
|
*/
|
||
|
void installColors(const std::basic_string<char>& b) {
|
||
|
installColors(b.c_str(), b.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const char* b, size_t sz) {
|
||
|
resize(sz);
|
||
|
if(sz == 0) return;
|
||
|
size_t wordi = 0;
|
||
|
size_t bpi = 0;
|
||
|
cs_[0] = 0;
|
||
|
for(size_t i =sz; i > 0; i--) {
|
||
|
assert_range(0, 3, (int)b[i-1]);
|
||
|
cs_[wordi] |= ((int)b[i-1] << (bpi<<1));
|
||
|
if(bpi == 15) {
|
||
|
wordi++;
|
||
|
cs_[wordi] = 0;
|
||
|
bpi = 0;
|
||
|
} else bpi++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy all chars from buffer of DNA characters 'b' into this string,
|
||
|
* reversing them in the process.
|
||
|
*/
|
||
|
void installReverse(const char* b) {
|
||
|
installReverse(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer of DNA characters 'b' into this string,
|
||
|
* reversing them in the process.
|
||
|
*/
|
||
|
void installReverseChars(const char* b, size_t sz) {
|
||
|
resize(sz);
|
||
|
if(sz == 0) return;
|
||
|
size_t wordi = 0;
|
||
|
size_t bpi = 0;
|
||
|
cs_[0] = 0;
|
||
|
for(size_t i =sz; i > 0; i--) {
|
||
|
char c = b[i-1];
|
||
|
assert_in(toupper(c), "ACGT");
|
||
|
int bp = asc2dna[(int)c];
|
||
|
assert_range(0, 3, bp);
|
||
|
cs_[wordi] |= (bp << (bpi<<1));
|
||
|
if(bpi == 15) {
|
||
|
wordi++;
|
||
|
cs_[wordi] = 0;
|
||
|
bpi = 0;
|
||
|
} else bpi++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy all chars from buffer of DNA characters 'b' into this string,
|
||
|
* reversing them in the process.
|
||
|
*/
|
||
|
void installReverseChars(const char* b) {
|
||
|
installReverseChars(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer of color characters 'b' into this string,
|
||
|
* reversing them in the process.
|
||
|
*/
|
||
|
void installReverseColors(const char* b, size_t sz) {
|
||
|
resize(sz);
|
||
|
if(sz == 0) return;
|
||
|
size_t wordi = 0;
|
||
|
size_t bpi = 0;
|
||
|
cs_[0] = 0;
|
||
|
for(size_t i =sz; i > 0; i--) {
|
||
|
char c = b[i-1];
|
||
|
assert_in(c, "0123");
|
||
|
int bp = asc2col[(int)c];
|
||
|
assert_range(0, 3, bp);
|
||
|
cs_[wordi] |= (bp << (bpi<<1));
|
||
|
if(bpi == 15) {
|
||
|
wordi++;
|
||
|
cs_[wordi] = 0;
|
||
|
bpi = 0;
|
||
|
} else bpi++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy all chars from buffer of color characters 'b' into this string,
|
||
|
* reversing them in the process.
|
||
|
*/
|
||
|
void installReverseColors(const char* b) {
|
||
|
installReverseColors(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const S2bDnaString& b) {
|
||
|
resize(b.len_);
|
||
|
if(b.len_ == 0) return;
|
||
|
size_t wordi = 0;
|
||
|
size_t bpi = 0;
|
||
|
size_t wordb = b.nwords()-1;
|
||
|
size_t bpb = (b.len_-1) & 15;
|
||
|
cs_[0] = 0;
|
||
|
for(size_t i = b.len_; i > 0; i--) {
|
||
|
int bbp = (int)((b[wordb] >> (bpb << 1)) & 3);
|
||
|
assert_range(0, 3, bbp);
|
||
|
cs_[wordi] |= (bbp << (bpi << 1));
|
||
|
if(bpi == 15) {
|
||
|
wordi++;
|
||
|
cs_[wordi] = 0;
|
||
|
bpi = 0;
|
||
|
} else bpi++;
|
||
|
if(bpb == 0) {
|
||
|
wordb--;
|
||
|
bpi = 15;
|
||
|
} else bpi--;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are equal.
|
||
|
*/
|
||
|
bool operator==(const S2bDnaString& o) {
|
||
|
return sstr_eq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are not equal.
|
||
|
*/
|
||
|
bool operator!=(const S2bDnaString& o) {
|
||
|
return sstr_neq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than given string.
|
||
|
*/
|
||
|
bool operator<(const S2bDnaString& o) {
|
||
|
return sstr_lt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than given string.
|
||
|
*/
|
||
|
bool operator>(const S2bDnaString& o) {
|
||
|
return sstr_gt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than or equal to given string.
|
||
|
*/
|
||
|
bool operator<=(const S2bDnaString& o) {
|
||
|
return sstr_leq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than or equal to given string.
|
||
|
*/
|
||
|
bool operator>=(const S2bDnaString& o) {
|
||
|
return sstr_geq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse the 2-bit encoded DNA string in-place.
|
||
|
*/
|
||
|
void reverse() {
|
||
|
if(len_ <= 1) return;
|
||
|
size_t wordf = nwords()-1;
|
||
|
size_t bpf = (len_-1) & 15;
|
||
|
size_t wordi = 0;
|
||
|
size_t bpi = 0;
|
||
|
while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
|
||
|
int f = (cs_[wordf] >> (bpf << 1)) & 3;
|
||
|
int i = (cs_[wordi] >> (bpi << 1)) & 3;
|
||
|
cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
|
||
|
cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
|
||
|
cs_[wordf] |= (uint32_t)(i << (bpf << 1));
|
||
|
cs_[wordi] |= (uint32_t)(f << (bpi << 1));
|
||
|
if(bpf == 0) {
|
||
|
bpf = 15;
|
||
|
wordf--;
|
||
|
} else bpf--;
|
||
|
if(bpi == 15) {
|
||
|
bpi = 0;
|
||
|
wordi++;
|
||
|
} else bpi++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse a substring of the buffer in place.
|
||
|
*/
|
||
|
void reverseWindow(size_t off, size_t len) {
|
||
|
assert_leq(off, len_);
|
||
|
assert_leq(off+len, len_);
|
||
|
if(len <= 1) return;
|
||
|
size_t wordf = (off+len-1) >> 4;
|
||
|
size_t bpf = (off+len-1) & 15;
|
||
|
size_t wordi = (off ) >> 4;
|
||
|
size_t bpi = (off ) & 15;
|
||
|
while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
|
||
|
int f = (cs_[wordf] >> (bpf << 1)) & 3;
|
||
|
int i = (cs_[wordi] >> (bpi << 1)) & 3;
|
||
|
cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
|
||
|
cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
|
||
|
cs_[wordf] |= (uint32_t)(i << (bpf << 1));
|
||
|
cs_[wordi] |= (uint32_t)(f << (bpi << 1));
|
||
|
if(bpf == 0) {
|
||
|
bpf = 15;
|
||
|
wordf--;
|
||
|
} else bpf--;
|
||
|
if(bpi == 15) {
|
||
|
bpi = 0;
|
||
|
wordi++;
|
||
|
} else bpi++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Set the first len elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(size_t len, char el) {
|
||
|
assert_leq(len, len_);
|
||
|
assert_range(0, 3, (int)el);
|
||
|
size_t word = 0;
|
||
|
if(len > 32) {
|
||
|
// Copy el throughout block
|
||
|
uint32_t bl = (uint32_t)el;
|
||
|
bl |= (bl << 2);
|
||
|
bl |= (bl << 4);
|
||
|
bl |= (bl << 8);
|
||
|
bl |= (bl << 16);
|
||
|
// Fill with blocks
|
||
|
size_t blen = len >> 4;
|
||
|
for(; word < blen; word++) {
|
||
|
cs_[word] = bl;
|
||
|
}
|
||
|
len = len & 15;
|
||
|
}
|
||
|
size_t bp = 0;
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
cs_[word] &= ~(uint32_t)(3 << (bp << 1));
|
||
|
cs_[word] |= (uint32_t)(el << (bp << 1));
|
||
|
if(bp == 15) {
|
||
|
word++;
|
||
|
bp = 0;
|
||
|
} else bp++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set all elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(char el) {
|
||
|
fill(len_, el);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the ith character in the window defined by fw, color, depth and
|
||
|
* len.
|
||
|
*/
|
||
|
char windowGetDna(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, len_ - depth);
|
||
|
if(fw) {
|
||
|
return get(depth+i);
|
||
|
} else {
|
||
|
return
|
||
|
color ?
|
||
|
get(depth+len-i-1) :
|
||
|
compDna(get(depth+len-i-1));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Fill the given DNA buffer with the substring specified by fw,
|
||
|
* color, depth and len.
|
||
|
*/
|
||
|
template<typename T>
|
||
|
void windowGetDna(
|
||
|
T& buf,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_leq(len, len_ - depth);
|
||
|
buf.resize(len);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
buf.set(
|
||
|
(fw ?
|
||
|
get(depth+i) :
|
||
|
(color ?
|
||
|
get(depth+len-i-1) :
|
||
|
compDna(get(depth+len-i-1)))), i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the length of the string.
|
||
|
*/
|
||
|
inline size_t length() const { return len_; }
|
||
|
|
||
|
/**
|
||
|
* Clear the buffer.
|
||
|
*/
|
||
|
void clear() { len_ = 0; }
|
||
|
|
||
|
/**
|
||
|
* Return true iff the buffer is empty.
|
||
|
*/
|
||
|
inline bool empty() const { return len_ == 0; }
|
||
|
|
||
|
/**
|
||
|
* Return a const version of the raw buffer.
|
||
|
*/
|
||
|
const uint32_t* buf() const { return cs_; }
|
||
|
|
||
|
/**
|
||
|
* Return a writeable version of the raw buffer.
|
||
|
*/
|
||
|
uint32_t* wbuf() { return cs_; }
|
||
|
|
||
|
/**
|
||
|
* Note: the size of the string once it's stored in the print buffer is 4
|
||
|
* times as large as the string as stored in compact 2-bit-per-char words.
|
||
|
*/
|
||
|
const char* toZBuf() const {
|
||
|
if(printcs_ == NULL) {
|
||
|
const_cast<char*&>(printcs_) = new char[len_+1];
|
||
|
}
|
||
|
char *printcs = const_cast<char*>(printcs_);
|
||
|
size_t word = 0, bp = 0;
|
||
|
for(size_t i = 0; i < len_; i++) {
|
||
|
int c = (cs_[word] >> (bp << 1)) & 3;
|
||
|
printcs[i] = "ACGT"[c];
|
||
|
if(bp == 15) {
|
||
|
word++;
|
||
|
bp = 0;
|
||
|
} else bp++;
|
||
|
}
|
||
|
printcs[len_] = '\0';
|
||
|
return printcs_;
|
||
|
}
|
||
|
|
||
|
protected:
|
||
|
|
||
|
uint32_t *cs_; // 2-bit packed words
|
||
|
char *printcs_;
|
||
|
size_t len_; // # elements
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Simple string class with backing memory that automatically expands as needed.
|
||
|
*/
|
||
|
template<typename T, int S = 1024, int M = 2>
|
||
|
class SStringExpandable {
|
||
|
|
||
|
public:
|
||
|
|
||
|
explicit SStringExpandable() :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{ }
|
||
|
|
||
|
explicit SStringExpandable(size_t sz) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{
|
||
|
expandNoCopy(sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from another SStringExpandable.
|
||
|
*/
|
||
|
SStringExpandable(const SStringExpandable<T, S>& o) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{
|
||
|
*this = o;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from a std::basic_string of the
|
||
|
* appropriate type.
|
||
|
*/
|
||
|
explicit SStringExpandable(const std::basic_string<T>& str) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{
|
||
|
install(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from an array and size.
|
||
|
*/
|
||
|
explicit SStringExpandable(const T* b, size_t sz) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{
|
||
|
install(b, sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringExpandable from a zero-terminated array.
|
||
|
*/
|
||
|
explicit SStringExpandable(const T* b) :
|
||
|
cs_(NULL),
|
||
|
printcs_(NULL),
|
||
|
len_(0),
|
||
|
sz_(0)
|
||
|
{
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Destroy the expandable string object.
|
||
|
*/
|
||
|
virtual ~SStringExpandable() {
|
||
|
if(cs_ != NULL) {
|
||
|
delete[] cs_;
|
||
|
cs_ = NULL;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
delete[] printcs_;
|
||
|
printcs_ = NULL;
|
||
|
}
|
||
|
sz_ = len_ = 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
T windowGet(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, len_ - depth);
|
||
|
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
void windowGet(
|
||
|
T& ret,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_leq(len, len_ - depth);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment to other SStringFixed.
|
||
|
*/
|
||
|
SStringExpandable<T,S>& operator=(const SStringExpandable<T,S>& o) {
|
||
|
install(o.cs_, o.len_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment from a std::basic_string
|
||
|
*/
|
||
|
SStringExpandable<T,S>& operator=(const std::basic_string<T>& o) {
|
||
|
install(o.c_str(), o.length());
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Insert char c before position 'idx'; slide subsequent chars down.
|
||
|
*/
|
||
|
void insert(const T& c, size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
|
||
|
len_++;
|
||
|
// Move everyone down by 1
|
||
|
// len_ is the *new* length
|
||
|
for(size_t i = len_; i > idx+1; i--) {
|
||
|
cs_[i-1] = cs_[i-2];
|
||
|
}
|
||
|
cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append char c.
|
||
|
*/
|
||
|
void append(const T& c) {
|
||
|
if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
|
||
|
cs_[len_++] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Delete char at position 'idx'; slide subsequent chars up.
|
||
|
*/
|
||
|
void remove(size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
assert_gt(len_, 0);
|
||
|
for(size_t i = idx; i < len_-1; i++) {
|
||
|
cs_[i] = cs_[i+1];
|
||
|
}
|
||
|
len_--;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const T& operator[](size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
T& operator[](size_t i) {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const T& get(size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(const T* b, size_t sz) {
|
||
|
if(sz_ < sz) expandNoCopy((sz + S) * M);
|
||
|
memcpy(cs_, b, sz * sizeof(T));
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Copy all bytes from zero-terminated buffer 'b' into this string.
|
||
|
*/
|
||
|
void install(const T* b) { install(b, strlen(b)); }
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const char* b, size_t sz) {
|
||
|
if(sz_ < sz) expandNoCopy((sz + S) * M);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
cs_[i] = b[sz-i-1];
|
||
|
}
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const SStringExpandable<T, S>& b) {
|
||
|
if(sz_ < b.len_) expandNoCopy((b.len_ + S) * M);
|
||
|
for(size_t i = 0; i < b.len_; i++) {
|
||
|
cs_[i] = b.cs_[b.len_ - i - 1];
|
||
|
}
|
||
|
len_ = b.len_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are equal.
|
||
|
*/
|
||
|
bool operator==(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_eq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are not equal.
|
||
|
*/
|
||
|
bool operator!=(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_neq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than given string.
|
||
|
*/
|
||
|
bool operator<(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_lt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than given string.
|
||
|
*/
|
||
|
bool operator>(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_gt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than or equal to given string.
|
||
|
*/
|
||
|
bool operator<=(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_leq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than or equal to given string.
|
||
|
*/
|
||
|
bool operator>=(const SStringExpandable<T, S>& o) {
|
||
|
return sstr_geq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse the buffer in place.
|
||
|
*/
|
||
|
void reverse() {
|
||
|
for(size_t i = 0; i < (len_ >> 1); i++) {
|
||
|
T tmp = get(i);
|
||
|
set(get(len_-i-1), i);
|
||
|
set(tmp, len_-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse a substring of the buffer in place.
|
||
|
*/
|
||
|
void reverseWindow(size_t off, size_t len) {
|
||
|
assert_leq(off, len_);
|
||
|
assert_leq(off + len, len_);
|
||
|
size_t mid = len >> 1;
|
||
|
for(size_t i = 0; i < mid; i++) {
|
||
|
T tmp = get(off+i);
|
||
|
set(get(off+len-i-1), off+i);
|
||
|
set(tmp, off+len-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simply resize the buffer. If the buffer is resized to be
|
||
|
* longer, the newly-added elements will contain garbage and should
|
||
|
* be initialized immediately.
|
||
|
*/
|
||
|
void resize(size_t len) {
|
||
|
if(sz_ < len) expandCopy((len + S) * M);
|
||
|
len_ = len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simply resize the buffer. If the buffer is resized to be
|
||
|
* longer, new elements will be initialized with 'el'.
|
||
|
*/
|
||
|
void resize(size_t len, const T& el) {
|
||
|
if(sz_ < len) expandCopy((len + S) * M);
|
||
|
if(len > len_) {
|
||
|
for(size_t i = len_; i < len; i++) {
|
||
|
cs_[i] = el;
|
||
|
}
|
||
|
}
|
||
|
len_ = len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set the first len elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(size_t len, const T& el) {
|
||
|
assert_leq(len, len_);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
cs_[i] = el;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set all elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(const T& el) {
|
||
|
fill(len_, el);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Trim len characters from the beginning of the string.
|
||
|
*/
|
||
|
void trimBegin(size_t len) {
|
||
|
assert_leq(len, len_);
|
||
|
if(len == len_) {
|
||
|
len_ = 0; return;
|
||
|
}
|
||
|
for(size_t i = 0; i < len_-len; i++) {
|
||
|
cs_[i] = cs_[i+len];
|
||
|
}
|
||
|
len_ -= len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Trim len characters from the end of the string.
|
||
|
*/
|
||
|
void trimEnd(size_t len) {
|
||
|
if(len >= len_) len_ = 0;
|
||
|
else len_ -= len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
void append(const T* b, size_t sz) {
|
||
|
if(sz_ < len_ + sz) expandCopy((len_ + sz + S) * M);
|
||
|
memcpy(cs_ + len_, b, sz * sizeof(T));
|
||
|
len_ += sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy bytes from zero-terminated buffer 'b' into this string.
|
||
|
*/
|
||
|
void append(const T* b) {
|
||
|
append(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the length of the string.
|
||
|
*/
|
||
|
size_t length() const { return len_; }
|
||
|
|
||
|
/**
|
||
|
* Clear the buffer.
|
||
|
*/
|
||
|
void clear() { len_ = 0; }
|
||
|
|
||
|
/**
|
||
|
* Return true iff the buffer is empty.
|
||
|
*/
|
||
|
bool empty() const { return len_ == 0; }
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
const char* toZBufXForm(const char *xform) const {
|
||
|
ASSERT_ONLY(size_t xformElts = strlen(xform));
|
||
|
if(empty()) {
|
||
|
const_cast<char&>(zero_) = 0;
|
||
|
return &zero_;
|
||
|
}
|
||
|
char* printcs = const_cast<char*>(printcs_);
|
||
|
// Lazily allocate space for print buffer
|
||
|
for(size_t i = 0; i < len_; i++) {
|
||
|
assert_lt(cs_[i], (int)xformElts);
|
||
|
printcs[i] = xform[(int)cs_[i]];
|
||
|
}
|
||
|
printcs[len_] = 0;
|
||
|
return printcs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const T* toZBuf() const {
|
||
|
if(empty()) {
|
||
|
const_cast<T&>(zeroT_) = 0;
|
||
|
return &zeroT_;
|
||
|
}
|
||
|
assert_leq(len_, sz_);
|
||
|
const_cast<T*>(cs_)[len_] = 0;
|
||
|
return cs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this DNA string matches the given nucleotide
|
||
|
* character string.
|
||
|
*/
|
||
|
bool eq(const char *str) const {
|
||
|
const char *self = toZBuf();
|
||
|
return strcmp(str, self) == 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return a const version of the raw buffer.
|
||
|
*/
|
||
|
const T* buf() const { return cs_; }
|
||
|
|
||
|
/**
|
||
|
* Return a writeable version of the raw buffer.
|
||
|
*/
|
||
|
T* wbuf() { return cs_; }
|
||
|
|
||
|
protected:
|
||
|
/**
|
||
|
* Allocate new, bigger buffer and copy old contents into it. If
|
||
|
* requested size can be accommodated by current buffer, do nothing.
|
||
|
*/
|
||
|
void expandCopy(size_t sz) {
|
||
|
if(sz_ >= sz) return; // done!
|
||
|
T *tmp = new T[sz + 1];
|
||
|
char *ptmp = new char[sz + 1];
|
||
|
if(cs_ != NULL) {
|
||
|
memcpy(tmp, cs_, sizeof(T)*len_);
|
||
|
delete[] cs_;
|
||
|
}
|
||
|
if(printcs_ != NULL) {
|
||
|
memcpy(ptmp, printcs_, sizeof(char)*len_);
|
||
|
delete[] printcs_;
|
||
|
}
|
||
|
cs_ = tmp;
|
||
|
printcs_ = ptmp;
|
||
|
sz_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Allocate new, bigger buffer. If requested size can be
|
||
|
* accommodated by current buffer, do nothing.
|
||
|
*/
|
||
|
void expandNoCopy(size_t sz) {
|
||
|
if(sz_ >= sz) return; // done!
|
||
|
if(cs_ != NULL) delete[] cs_;
|
||
|
if(printcs_ != NULL) delete[] printcs_;
|
||
|
cs_ = new T[sz + 1];
|
||
|
printcs_ = new char[sz + 1];
|
||
|
sz_ = sz;
|
||
|
}
|
||
|
|
||
|
T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
char zero_; // 0 terminator for empty string
|
||
|
T zeroT_; // 0 terminator for empty string
|
||
|
size_t len_; // # filled-in elements
|
||
|
size_t sz_; // size capacity of cs_
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Simple string class with in-object storage.
|
||
|
*
|
||
|
* All copies induced by, e.g., operator=, the copy constructor,
|
||
|
* install() and append(), are shallow (using memcpy/sizeof). If deep
|
||
|
* copies are needed, use a different class.
|
||
|
*
|
||
|
* Reading from an uninitialized element results in an assert as long
|
||
|
* as NDEBUG is not defined. If NDEBUG is defined, the result is
|
||
|
* undefined.
|
||
|
*/
|
||
|
template<typename T, int S>
|
||
|
class SStringFixed {
|
||
|
public:
|
||
|
explicit SStringFixed() : len_(0) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from another SStringFixed.
|
||
|
*/
|
||
|
SStringFixed(const SStringFixed<T, S>& o) {
|
||
|
*this = o;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from another SStringFixed.
|
||
|
*/
|
||
|
explicit SStringFixed(const std::basic_string<T>& str) {
|
||
|
install(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from an array and size.
|
||
|
*/
|
||
|
explicit SStringFixed(const T* b, size_t sz) {
|
||
|
install(b, sz);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a zero-terminated string.
|
||
|
*/
|
||
|
explicit SStringFixed(const T* b) {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
virtual ~SStringFixed() { } // C++ needs this
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const T& operator[](size_t i) const {
|
||
|
return get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
inline T& operator[](size_t i) {
|
||
|
return get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const T& get(size_t i) const {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
inline T& get(size_t i) {
|
||
|
assert_lt(i, len_);
|
||
|
return cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
T windowGet(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, len_ - depth);
|
||
|
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return ith character from the left of either the forward or the
|
||
|
* reverse-complement version of the read.
|
||
|
*/
|
||
|
void windowGet(
|
||
|
T& ret,
|
||
|
bool fw,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = len_;
|
||
|
assert_leq(len, len_ - depth);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment to other SStringFixed.
|
||
|
*/
|
||
|
SStringFixed<T,S>& operator=(const SStringFixed<T,S>& o) {
|
||
|
install(o.cs_, o.len_);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Assignment from a std::basic_string
|
||
|
*/
|
||
|
SStringFixed<T,S>& operator=(const std::basic_string<T>& o) {
|
||
|
install(o);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Insert char c before position 'idx'; slide subsequent chars down.
|
||
|
*/
|
||
|
void insert(const T& c, size_t idx) {
|
||
|
assert_lt(len_, S);
|
||
|
assert_lt(idx, len_);
|
||
|
// Move everyone down by 1
|
||
|
for(int i = len_; i > idx; i--) {
|
||
|
cs_[i] = cs_[i-1];
|
||
|
}
|
||
|
cs_[idx] = c;
|
||
|
len_++;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append char c.
|
||
|
*/
|
||
|
void append(const T& c) {
|
||
|
assert_lt(len_, S);
|
||
|
cs_[len_++] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Delete char at position 'idx'; slide subsequent chars up.
|
||
|
*/
|
||
|
void remove(size_t idx) {
|
||
|
assert_lt(idx, len_);
|
||
|
assert_gt(len_, 0);
|
||
|
for(size_t i = idx; i < len_-1; i++) {
|
||
|
cs_[i] = cs_[i+1];
|
||
|
}
|
||
|
len_--;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(const T* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
memcpy(cs_, b, sz * sizeof(T));
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy all bytes from zero-terminated buffer 'b' into this string.
|
||
|
*/
|
||
|
void install(const T* b) { install(b, strlen(b)); }
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const char* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
cs_[i] = b[sz-i-1];
|
||
|
}
|
||
|
len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
|
||
|
* in the process.
|
||
|
*/
|
||
|
void installReverse(const SStringFixed<T, S>& b) {
|
||
|
assert_leq(b.len_, S);
|
||
|
for(size_t i = 0; i < b.len_; i++) {
|
||
|
cs_[i] = b.cs_[b.len_ - i - 1];
|
||
|
}
|
||
|
len_ = b.len_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are equal.
|
||
|
*/
|
||
|
bool operator==(const SStringFixed<T, S>& o) {
|
||
|
return sstr_eq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff the two strings are not equal.
|
||
|
*/
|
||
|
bool operator!=(const SStringFixed<T, S>& o) {
|
||
|
return sstr_neq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than given string.
|
||
|
*/
|
||
|
bool operator<(const SStringFixed<T, S>& o) {
|
||
|
return sstr_lt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than given string.
|
||
|
*/
|
||
|
bool operator>(const SStringFixed<T, S>& o) {
|
||
|
return sstr_gt(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is less than or equal to given string.
|
||
|
*/
|
||
|
bool operator<=(const SStringFixed<T, S>& o) {
|
||
|
return sstr_leq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this string is greater than or equal to given string.
|
||
|
*/
|
||
|
bool operator>=(const SStringFixed<T, S>& o) {
|
||
|
return sstr_geq(*this, o);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse the buffer in place.
|
||
|
*/
|
||
|
void reverse() {
|
||
|
for(size_t i = 0; i < (len_ >> 1); i++) {
|
||
|
T tmp = get(i);
|
||
|
set(get(len_-i-1), i);
|
||
|
set(tmp, len_-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Reverse a substring of the buffer in place.
|
||
|
*/
|
||
|
void reverseWindow(size_t off, size_t len) {
|
||
|
assert_leq(off, len_);
|
||
|
assert_leq(off + len, len_);
|
||
|
size_t mid = len >> 1;
|
||
|
for(size_t i = 0; i < mid; i++) {
|
||
|
T tmp = get(off+i);
|
||
|
set(get(off+len-i-1), off+i);
|
||
|
set(tmp, off+len-i-1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simply resize the buffer. If the buffer is resized to be
|
||
|
* longer, the newly-added elements will contain garbage and should
|
||
|
* be initialized immediately.
|
||
|
*/
|
||
|
void resize(size_t len) {
|
||
|
assert_lt(len, S);
|
||
|
len_ = len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Simply resize the buffer. If the buffer is resized to be
|
||
|
* longer, new elements will be initialized with 'el'.
|
||
|
*/
|
||
|
void resize(size_t len, const T& el) {
|
||
|
assert_lt(len, S);
|
||
|
if(len > len_) {
|
||
|
for(size_t i = len_; i < len; i++) {
|
||
|
cs_[i] = el;
|
||
|
}
|
||
|
}
|
||
|
len_ = len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set the first len elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(size_t len, const T& el) {
|
||
|
assert_leq(len, len_);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
cs_[i] = el;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set all elements of the buffer to el.
|
||
|
*/
|
||
|
void fill(const T& el) {
|
||
|
fill(len_, el);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Trim len characters from the beginning of the string.
|
||
|
*/
|
||
|
void trimBegin(size_t len) {
|
||
|
assert_leq(len, len_);
|
||
|
if(len == len_) {
|
||
|
len_ = 0; return;
|
||
|
}
|
||
|
for(size_t i = 0; i < len_-len; i++) {
|
||
|
cs_[i] = cs_[i+len];
|
||
|
}
|
||
|
len_ -= len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Trim len characters from the end of the string.
|
||
|
*/
|
||
|
void trimEnd(size_t len) {
|
||
|
if(len >= len_) len_ = 0;
|
||
|
else len_ -= len;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
void append(const T* b, size_t sz) {
|
||
|
assert_leq(sz + len_, S);
|
||
|
memcpy(cs_ + len_, b, sz * sizeof(T));
|
||
|
len_ += sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy bytes from zero-terminated buffer 'b' into this string.
|
||
|
*/
|
||
|
void append(const T* b) {
|
||
|
append(b, strlen(b));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the length of the string.
|
||
|
*/
|
||
|
size_t length() const { return len_; }
|
||
|
|
||
|
/**
|
||
|
* Clear the buffer.
|
||
|
*/
|
||
|
void clear() { len_ = 0; }
|
||
|
|
||
|
/**
|
||
|
* Return true iff the buffer is empty.
|
||
|
*/
|
||
|
bool empty() const { return len_ == 0; }
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const T* toZBuf() const {
|
||
|
const_cast<T*>(cs_)[len_] = 0;
|
||
|
return cs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return true iff this DNA string matches the given nucleotide
|
||
|
* character string.
|
||
|
*/
|
||
|
bool eq(const char *str) const {
|
||
|
const char *self = toZBuf();
|
||
|
return strcmp(str, self) == 0;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
const char* toZBufXForm(const char *xform) const {
|
||
|
ASSERT_ONLY(size_t xformElts = strlen(xform));
|
||
|
char* printcs = const_cast<char*>(printcs_);
|
||
|
for(size_t i = 0; i < len_; i++) {
|
||
|
assert_lt(cs_[i], (int)xformElts);
|
||
|
printcs[i] = xform[cs_[i]];
|
||
|
}
|
||
|
printcs[len_] = 0;
|
||
|
return printcs_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return a const version of the raw buffer.
|
||
|
*/
|
||
|
const T* buf() const { return cs_; }
|
||
|
|
||
|
/**
|
||
|
* Return a writeable version of the raw buffer.
|
||
|
*/
|
||
|
T* wbuf() { return cs_; }
|
||
|
|
||
|
protected:
|
||
|
T cs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
char printcs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0"
|
||
|
size_t len_;
|
||
|
};
|
||
|
|
||
|
//
|
||
|
// Stream put operators
|
||
|
//
|
||
|
|
||
|
template <typename T, int S, int M>
|
||
|
std::ostream& operator<< (std::ostream& os, const SStringExpandable<T, S, M>& str) {
|
||
|
os << str.toZBuf();
|
||
|
return os;
|
||
|
}
|
||
|
|
||
|
template <typename T, int S>
|
||
|
std::ostream& operator<< (std::ostream& os, const SStringFixed<T, S>& str) {
|
||
|
os << str.toZBuf();
|
||
|
return os;
|
||
|
}
|
||
|
|
||
|
extern uint8_t asc2dna[];
|
||
|
extern uint8_t asc2col[];
|
||
|
|
||
|
extern uint8_t asc2dna_3N[2][256];
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Encapsulates a fixed-length DNA string with characters encoded as
|
||
|
* chars. Only capable of encoding A, C, G, T and N. The length is
|
||
|
* specified via the template parameter S.
|
||
|
*/
|
||
|
template<int S>
|
||
|
class SDnaStringFixed : public SStringFixed<char, S> {
|
||
|
public:
|
||
|
|
||
|
explicit SDnaStringFixed() : SStringFixed<char, S>() { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from another SStringFixed.
|
||
|
*/
|
||
|
SDnaStringFixed(const SDnaStringFixed<S>& o) :
|
||
|
SStringFixed<char, S>(o) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a C++ basic_string.
|
||
|
*/
|
||
|
explicit SDnaStringFixed(const std::basic_string<char>& str) :
|
||
|
SStringFixed<char, S>(str) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from an array and size.
|
||
|
*/
|
||
|
explicit SDnaStringFixed(const char* b, size_t sz) :
|
||
|
SStringFixed<char, S>(b, sz) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a zero-terminated string.
|
||
|
*/
|
||
|
explicit SDnaStringFixed(
|
||
|
const char* b,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
SStringFixed<char, S>()
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(b, strlen(b));
|
||
|
} else {
|
||
|
installChars(b, strlen(b));
|
||
|
}
|
||
|
} else {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
virtual ~SDnaStringFixed() { } // C++ needs this
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const char* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const SDnaStringFixed<S>& b) {
|
||
|
assert_leq(b.len_, S);
|
||
|
for(size_t i = 0; i < b.len_; i++) {
|
||
|
this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3);
|
||
|
}
|
||
|
this->len_ = b.len_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Either reverse or reverse-complement (depending on "color") this
|
||
|
* DNA buffer in-place.
|
||
|
*/
|
||
|
void reverseComp(bool color = false) {
|
||
|
if(color) {
|
||
|
this->reverse();
|
||
|
} else {
|
||
|
for(size_t i = 0; i < (this->len_ >> 1); i++) {
|
||
|
char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3);
|
||
|
char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3);
|
||
|
this->cs_[i] = tmp2;
|
||
|
this->cs_[this->len_-i-1] = tmp1;
|
||
|
}
|
||
|
// Do middle element iff there are an odd number
|
||
|
if((this->len_ & 1) != 0) {
|
||
|
char tmp = this->cs_[this->len_ >> 1];
|
||
|
tmp = (tmp == 4 ? 4 : tmp ^ 3);
|
||
|
this->cs_[this->len_ >> 1] = tmp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(const char* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
memcpy(this->cs_, b, sz);
|
||
|
#ifndef NDEBUG
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_leq(this->cs_[i], 4);
|
||
|
assert_geq(this->cs_[i], 0);
|
||
|
}
|
||
|
#endif
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy buffer 'b' of ASCII DNA characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installChars(const char* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_in(toupper(b[i]), "ACGTN-");
|
||
|
this->cs_[i] = asc2dna[(int)b[i]];
|
||
|
assert_geq(this->cs_[i], 0);
|
||
|
assert_leq(this->cs_[i], 4);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy buffer 'b' of ASCII color characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installColors(const char* b, size_t sz) {
|
||
|
assert_leq(sz, S);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_in(b[i], "0123.");
|
||
|
this->cs_[i] = asc2col[(int)b[i]];
|
||
|
assert_geq(this->cs_[i], 0);
|
||
|
assert_leq(this->cs_[i], 4);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy C++ string of ASCII DNA characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installChars(const std::basic_string<char>& str) {
|
||
|
installChars(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy C++ string of ASCII color characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installColors(const std::basic_string<char>& str) {
|
||
|
installColors(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_leq(c, 4);
|
||
|
assert_geq(c, 0);
|
||
|
this->cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA char c.
|
||
|
*/
|
||
|
void append(const char& c) {
|
||
|
assert_lt(this->len_, S);
|
||
|
assert_leq(c, 4);
|
||
|
assert_geq(c, 0);
|
||
|
this->cs_[this->len_++] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void setChar(char c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_in(toupper(c), "ACGTN");
|
||
|
this->cs_[idx] = asc2dna[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA character.
|
||
|
*/
|
||
|
void appendChar(char c) {
|
||
|
assert_lt(this->len_, S);
|
||
|
assert_in(toupper(c), "ACGTN");
|
||
|
this->cs_[this->len_++] = asc2dna[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return DNA character corresponding to element 'idx'.
|
||
|
*/
|
||
|
char toChar(size_t idx) const {
|
||
|
assert_geq((int)this->cs_[idx], 0);
|
||
|
assert_leq((int)this->cs_[idx], 4);
|
||
|
return "ACGTN"[(int)this->cs_[idx]];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const char& operator[](size_t i) const {
|
||
|
return this->get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const char& get(size_t i) const {
|
||
|
assert_lt(i, this->len_);
|
||
|
assert_leq(this->cs_[i], 4);
|
||
|
assert_geq(this->cs_[i], 0);
|
||
|
return this->cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the ith character in the window defined by fw, color,
|
||
|
* depth and len.
|
||
|
*/
|
||
|
char windowGetDna(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
if(fw) return this->cs_[depth+i];
|
||
|
else return color ? this->cs_[depth+len-i-1] :
|
||
|
compDna(this->cs_[depth+len-i-1]);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Fill the given DNA buffer with the substring specified by fw,
|
||
|
* color, depth and len.
|
||
|
*/
|
||
|
void windowGetDna(
|
||
|
SDnaStringFixed<S>& buf,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
buf.append(fw ? this->cs_[depth+i] :
|
||
|
(color ? this->cs_[depth+len-i-1] :
|
||
|
compDna(this->cs_[depth+len-i-1])));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); }
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Encapsulates a fixed-length DNA string with characters encoded as
|
||
|
* chars. Only capable of encoding A, C, G, T and N. The length is
|
||
|
* specified via the template parameter S.
|
||
|
*/
|
||
|
|
||
|
template<int S = 1024, int M = 2>
|
||
|
class SDnaStringExpandable : public SStringExpandable<char, S, M> {
|
||
|
public:
|
||
|
|
||
|
explicit SDnaStringExpandable() : SStringExpandable<char, S, M>() { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from another SStringFixed.
|
||
|
*/
|
||
|
SDnaStringExpandable(const SDnaStringExpandable<S, M>& o) :
|
||
|
SStringExpandable<char, S, M>(o) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a C++ basic_string.
|
||
|
*/
|
||
|
explicit SDnaStringExpandable(
|
||
|
const std::basic_string<char>& str,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
SStringExpandable<char, S, M>()
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(str);
|
||
|
} else {
|
||
|
installChars(str);
|
||
|
}
|
||
|
} else {
|
||
|
install(str);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from an array and size.
|
||
|
*/
|
||
|
explicit SDnaStringExpandable(
|
||
|
const char* b,
|
||
|
size_t sz,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
SStringExpandable<char, S, M>()
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(b, sz);
|
||
|
} else {
|
||
|
installChars(b, sz);
|
||
|
}
|
||
|
} else {
|
||
|
install(b, sz);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a zero-terminated string.
|
||
|
*/
|
||
|
explicit SDnaStringExpandable(
|
||
|
const char* b,
|
||
|
bool chars = false,
|
||
|
bool colors = false) :
|
||
|
SStringExpandable<char, S, M>()
|
||
|
{
|
||
|
install(b, chars, colors);
|
||
|
}
|
||
|
|
||
|
virtual ~SDnaStringExpandable() { } // C++ needs this
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const char* b, size_t sz) {
|
||
|
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const SDnaStringExpandable<S, M>& b) {
|
||
|
if(this->sz_ < b.len_) this->expandCopy((b.len_ + S) * M);
|
||
|
for(size_t i = 0; i < b.len_; i++) {
|
||
|
this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3);
|
||
|
}
|
||
|
this->len_ = b.len_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Either reverse or reverse-complement (depending on "color") this
|
||
|
* DNA buffer in-place.
|
||
|
*/
|
||
|
void reverseComp(bool color = false) {
|
||
|
if(color) {
|
||
|
this->reverse();
|
||
|
} else {
|
||
|
for(size_t i = 0; i < (this->len_ >> 1); i++) {
|
||
|
char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3);
|
||
|
char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3);
|
||
|
this->cs_[i] = tmp2;
|
||
|
this->cs_[this->len_-i-1] = tmp1;
|
||
|
}
|
||
|
// Do middle element iff there are an odd number
|
||
|
if((this->len_ & 1) != 0) {
|
||
|
char tmp = this->cs_[this->len_ >> 1];
|
||
|
tmp = (tmp == 4 ? 4 : tmp ^ 3);
|
||
|
this->cs_[this->len_ >> 1] = tmp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(
|
||
|
const char* b,
|
||
|
bool chars = false,
|
||
|
bool colors = false)
|
||
|
{
|
||
|
if(chars) {
|
||
|
if(colors) {
|
||
|
installColors(b, strlen(b));
|
||
|
} else {
|
||
|
installChars(b, strlen(b));
|
||
|
}
|
||
|
} else {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(const char* b, size_t sz) {
|
||
|
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
|
||
|
memcpy(this->cs_, b, sz);
|
||
|
#ifndef NDEBUG
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_range(0, 4, (int)this->cs_[i]);
|
||
|
}
|
||
|
#endif
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy buffer 'b' of ASCII DNA characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installChars(const char* b, size_t sz) {
|
||
|
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_in(toupper(b[i]), "ACGTN-");
|
||
|
this->cs_[i] = asc2dna[(int)b[i]];
|
||
|
assert_range(0, 4, (int)this->cs_[i]);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy buffer 'b' of ASCII color characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installColors(const char* b, size_t sz) {
|
||
|
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_in(b[i], "0123.");
|
||
|
this->cs_[i] = asc2col[(int)b[i]];
|
||
|
assert_range(0, 4, (int)this->cs_[i]);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy C++ string of ASCII DNA characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installChars(const std::basic_string<char>& str) {
|
||
|
installChars(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy C++ string of ASCII color characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installColors(const std::basic_string<char>& str) {
|
||
|
installColors(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_range(0, 4, c);
|
||
|
this->cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA char c.
|
||
|
*/
|
||
|
void append(const char& c) {
|
||
|
if(this->sz_ < this->len_ + 1) {
|
||
|
this->expandCopy((this->len_ + 1 + S) * M);
|
||
|
}
|
||
|
assert_range(0, 4, (int)c);
|
||
|
this->cs_[this->len_++] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void setChar(char c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_in(toupper(c), "ACGTN");
|
||
|
this->cs_[idx] = asc2dna[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA character.
|
||
|
*/
|
||
|
void appendChar(char c) {
|
||
|
if(this->sz_ < this->len_ + 1) {
|
||
|
this->expandCopy((this->len_ + 1 + S) * M);
|
||
|
}
|
||
|
assert_in(toupper(c), "ACGTN");
|
||
|
this->cs_[this->len_++] = asc2dna[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return DNA character corresponding to element 'idx'.
|
||
|
*/
|
||
|
char toChar(size_t idx) const {
|
||
|
assert_range(0, 4, (int)this->cs_[idx]);
|
||
|
return "ACGTN"[(int)this->cs_[idx]];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const char& operator[](size_t i) const {
|
||
|
return this->get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
inline const char& get(size_t i) const {
|
||
|
assert_lt(i, this->len_);
|
||
|
assert_range(0, 4, (int)this->cs_[i]);
|
||
|
return this->cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the ith character in the window defined by fw, color,
|
||
|
* depth and len.
|
||
|
*/
|
||
|
char windowGetDna(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
if(fw) return this->cs_[depth+i];
|
||
|
else return color ? this->cs_[depth+len-i-1] :
|
||
|
compDna(this->cs_[depth+len-i-1]);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Fill the given DNA buffer with the substring specified by fw,
|
||
|
* color, depth and len.
|
||
|
*/
|
||
|
void windowGetDna(
|
||
|
SDnaStringExpandable<S, M>& buf,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
buf.append(fw ? this->cs_[depth+i] :
|
||
|
(color ? this->cs_[depth+len-i-1] :
|
||
|
compDna(this->cs_[depth+len-i-1])));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); }
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Encapsulates an expandable DNA string with characters encoded as
|
||
|
* char-sized masks. Encodes A, C, G, T, and all IUPAC, as well as the
|
||
|
* empty mask indicating "matches nothing."
|
||
|
*/
|
||
|
template<int S = 16, int M = 2>
|
||
|
class SDnaMaskString : public SStringExpandable<char, S, M> {
|
||
|
public:
|
||
|
|
||
|
explicit SDnaMaskString() : SStringExpandable<char, S, M>() { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from another SStringFixed.
|
||
|
*/
|
||
|
SDnaMaskString(const SDnaMaskString<S, M>& o) :
|
||
|
SStringExpandable<char, S, M>(o) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a C++ basic_string.
|
||
|
*/
|
||
|
explicit SDnaMaskString(const std::basic_string<char>& str) :
|
||
|
SStringExpandable<char, S, M>(str) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from an array and size.
|
||
|
*/
|
||
|
explicit SDnaMaskString(const char* b, size_t sz) :
|
||
|
SStringExpandable<char, S, M>(b, sz) { }
|
||
|
|
||
|
/**
|
||
|
* Create an SStringFixed from a zero-terminated string.
|
||
|
*/
|
||
|
explicit SDnaMaskString(const char* b, bool chars = false) :
|
||
|
SStringExpandable<char, S, M>()
|
||
|
{
|
||
|
if(chars) {
|
||
|
installChars(b, strlen(b));
|
||
|
} else {
|
||
|
install(b, strlen(b));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
virtual ~SDnaMaskString() { } // C++ needs this
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const char* b, size_t sz) {
|
||
|
while(this->sz_ < sz) {
|
||
|
this->expandNoCopy((sz + S) * M);
|
||
|
}
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
this->cs_[i] = maskcomp[(int)b[sz-i-1]];
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
|
||
|
* complementing them in the process, assuming an encoding where
|
||
|
* 0=A, 1=C, 2=G, 3=T, 4=N.
|
||
|
*/
|
||
|
void installReverseComp(const SDnaMaskString<S, M>& b) {
|
||
|
while(this->sz_ < b.len_) {
|
||
|
this->expandNoCopy((b.len_ + S) * M);
|
||
|
}
|
||
|
for(size_t i = 0; i < b.len_; i++) {
|
||
|
this->cs_[i] = maskcomp[(int)b.cs_[b.len_-i-1]];
|
||
|
}
|
||
|
this->len_ = b.len_;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Either reverse or reverse-complement (depending on "color") this
|
||
|
* DNA buffer in-place.
|
||
|
*/
|
||
|
void reverseComp(bool color = false) {
|
||
|
if(color) {
|
||
|
this->reverse();
|
||
|
} else {
|
||
|
for(size_t i = 0; i < (this->len_ >> 1); i++) {
|
||
|
char tmp1 = maskcomp[(int)this->cs_[i]];
|
||
|
char tmp2 = maskcomp[(int)this->cs_[this->len_-i-1]];
|
||
|
this->cs_[i] = tmp2;
|
||
|
this->cs_[this->len_-i-1] = tmp1;
|
||
|
}
|
||
|
// Do middle element iff there are an odd number
|
||
|
if((this->len_ & 1) != 0) {
|
||
|
char tmp = this->cs_[this->len_ >> 1];
|
||
|
tmp = maskcomp[(int)tmp];
|
||
|
this->cs_[this->len_ >> 1] = tmp;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy 'sz' bytes from buffer 'b' into this string.
|
||
|
*/
|
||
|
virtual void install(const char* b, size_t sz) {
|
||
|
while(this->sz_ < sz) {
|
||
|
this->expandNoCopy((sz + S) * M);
|
||
|
}
|
||
|
memcpy(this->cs_, b, sz);
|
||
|
#ifndef NDEBUG
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_range((int)this->cs_[i], 0, 15);
|
||
|
}
|
||
|
#endif
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy buffer 'b' of ASCII DNA characters into DNA masks.
|
||
|
*/
|
||
|
virtual void installChars(const char* b, size_t sz) {
|
||
|
while(this->sz_ < sz) {
|
||
|
this->expandNoCopy((sz + S) * M);
|
||
|
}
|
||
|
for(size_t i = 0; i < sz; i++) {
|
||
|
assert_in(b[i], iupacs);
|
||
|
this->cs_[i] = asc2dnamask[(int)b[i]];
|
||
|
assert_range((int)this->cs_[i], 0, 15);
|
||
|
}
|
||
|
this->len_ = sz;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Copy C++ string of ASCII DNA characters into normal DNA
|
||
|
* characters.
|
||
|
*/
|
||
|
virtual void installChars(const std::basic_string<char>& str) {
|
||
|
installChars(str.c_str(), str.length());
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void set(int c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_range(c, 0, 15);
|
||
|
this->cs_[idx] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA char c.
|
||
|
*/
|
||
|
void append(const char& c) {
|
||
|
while(this->sz_ < this->len_+1) {
|
||
|
this->expandNoCopy((this->len_ + 1 + S) * M);
|
||
|
}
|
||
|
assert_range((int)c, 0, 15);
|
||
|
this->cs_[this->len_++] = c;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Set DNA character at index 'idx' to 'c'.
|
||
|
*/
|
||
|
void setChar(char c, size_t idx) {
|
||
|
assert_lt(idx, this->len_);
|
||
|
assert_in(toupper(c), iupacs);
|
||
|
this->cs_[idx] = asc2dnamask[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Append DNA character.
|
||
|
*/
|
||
|
void appendChar(char c) {
|
||
|
while(this->sz_ < this->len_+1) {
|
||
|
expandNoCopy((this->len_ + 1 + S) * M);
|
||
|
}
|
||
|
assert_in(toupper(c), iupacs);
|
||
|
this->cs_[this->len_++] = asc2dnamask[(int)c];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return DNA character corresponding to element 'idx'.
|
||
|
*/
|
||
|
char toChar(size_t idx) const {
|
||
|
assert_range((int)this->cs_[idx], 0, 15);
|
||
|
return mask2iupac[(int)this->cs_[idx]];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const char& operator[](size_t i) const {
|
||
|
return this->get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
char& operator[](size_t i) {
|
||
|
return this->get(i);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve constant version of element i.
|
||
|
*/
|
||
|
const char& get(size_t i) const {
|
||
|
assert_lt(i, this->len_);
|
||
|
assert_range((int)this->cs_[i], 0, 15);
|
||
|
return this->cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Retrieve mutable version of element i.
|
||
|
*/
|
||
|
char& get(size_t i) {
|
||
|
assert_lt(i, this->len_);
|
||
|
assert_range((int)this->cs_[i], 0, 15);
|
||
|
return this->cs_[i];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Return the ith character in the window defined by fw, color,
|
||
|
* depth and len.
|
||
|
*/
|
||
|
char windowGetDna(
|
||
|
size_t i,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_lt(i, len);
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
if(fw) return this->cs_[depth+i];
|
||
|
else return color ? this->cs_[depth+len-i-1] :
|
||
|
maskcomp[this->cs_[depth+len-i-1]];
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Fill the given DNA buffer with the substring specified by fw,
|
||
|
* color, depth and len.
|
||
|
*/
|
||
|
void windowGetDna(
|
||
|
SDnaStringFixed<S>& buf,
|
||
|
bool fw,
|
||
|
bool color,
|
||
|
size_t depth = 0,
|
||
|
size_t len = 0) const
|
||
|
{
|
||
|
if(len == 0) len = this->len_;
|
||
|
assert_leq(len, this->len_ - depth);
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
buf.append(fw ? this->cs_[depth+i] :
|
||
|
(color ? this->cs_[depth+len-i-1] :
|
||
|
maskcomp[this->cs_[depth+len-i-1]]));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Sample a random substring of the given length from this DNA
|
||
|
* string and install the result in 'dst'.
|
||
|
*/
|
||
|
template<typename T>
|
||
|
void randSubstr(
|
||
|
RandomSource& rnd, // pseudo-random generator
|
||
|
T& dst, // put sampled substring here
|
||
|
size_t len, // length of substring to extract
|
||
|
bool watson = true, // true -> possibly extract from Watson strand
|
||
|
bool crick = true) // true -> possibly extract from Crick strand
|
||
|
{
|
||
|
assert(watson || crick);
|
||
|
assert_geq(this->len_, len);
|
||
|
size_t poss = this->len_ - len + 1;
|
||
|
assert_gt(poss, 0);
|
||
|
uint32_t rndoff = (uint32_t)(rnd.nextU32() % poss);
|
||
|
bool fw;
|
||
|
if (watson && !crick) fw = true;
|
||
|
else if(!watson && crick) fw = false;
|
||
|
else {
|
||
|
fw = rnd.nextBool();
|
||
|
}
|
||
|
if(fw) {
|
||
|
// Install Watson substring
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
dst[i] = this->cs_[i + rndoff];
|
||
|
}
|
||
|
} else {
|
||
|
// Install Crick substring
|
||
|
for(size_t i = 0; i < len; i++) {
|
||
|
dst[i] = maskcomp[(int)this->cs_[i + rndoff + (len - i - 1)]];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Put a terminator in the 'len_'th element and then return a
|
||
|
* pointer to the buffer. Useful for printing.
|
||
|
*/
|
||
|
virtual const char* toZBuf() const { return this->toZBufXForm(iupacs); }
|
||
|
};
|
||
|
|
||
|
typedef SStringExpandable<char, 1024, 2> BTString;
|
||
|
typedef SDnaStringExpandable<1024, 2> BTDnaString;
|
||
|
typedef SDnaMaskString<32, 2> BTDnaMask;
|
||
|
|
||
|
#endif /* SSTRING_H_ */
|