/* * Copyright 2020, Yun (Leo) Zhang * * This file is part of HISAT-3N. * * HISAT-3N is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HISAT-3N is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HISAT-3N. If not, see . */ #ifndef UTILITY_3N_TABLE_H #define UTILITY_3N_TABLE_H #include #include #include #include #include using namespace std; /** * return complement of input base. */ char asc2dnacomp[] = { /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'-', 0, 0, /* 48 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 64 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, /* A B C D G H K M N */ /* 80 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, /* R S T V W Y */ /* 96 */ 0,'T','V','G','H', 0, 0,'C','D', 0, 0,'M', 0,'K','N', 0, /* a b c d g h k m n */ /* 112 */ 0, 0,'Y','S','A', 0,'B','W', 0,'R', 0, 0, 0, 0, 0, 0, /* r s t v w y */ /* 128 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /** * the simple data structure to bind quality score and position (on reference) together. */ class PosQuality { public: int readPos; // 0-based int refPos; // 0-based char qual; bool converted; bool remove; PosQuality(int& inputPos) { readPos = inputPos; refPos = inputPos; remove = true; } void setQual (char& inputQual, bool inputConverted) { qual = inputQual; converted = inputConverted; remove = false; } }; /** * the base class for string we need to search. */ class string_search { public: int start; string s; int stringLen; void initialize() { start = 0; stringLen = 0; s.clear(); } void loadString(string intputString) { s = intputString; stringLen = s.size(); start = 0; } }; /** * to store CIGAR string and search segments in it. */ class CIGAR : public string_search{ public: bool getNextSegment(int& len, char& symbol) { if (start == stringLen) { return false; } len = 0; int currentIndex = start; while (true) { if (isalpha(s[currentIndex])) { len = stoi(s.substr(start, currentIndex-start)); symbol = s[currentIndex]; start = currentIndex+1; return true; } currentIndex++; } } }; /** * to store MD tag and search segments in it. */ class MD_tag : public string_search { public: bool getNextSegment(string& seg) { if (start >= stringLen) { return false; } seg.clear(); int currentIndex = start; bool deletion = false; while (true) { if (currentIndex >= stringLen) { start = currentIndex + 1; return !seg.empty(); } if (seg.empty() && s[currentIndex] == '0') { currentIndex++; continue; } if (isalpha(s[currentIndex])) { if (seg.empty()) { seg = s[currentIndex]; start = currentIndex+1; return true; } else { if (deletion) { seg += s[currentIndex]; //currentIndex++; } else { start = currentIndex; return true; } } } else if (s[currentIndex] == '^') { if (seg.empty()) { seg = s[currentIndex]; deletion = true; } else { start = currentIndex; return true; } } else { // number if (seg.empty()) { seg = s[currentIndex]; } else { if (deletion || isalpha(seg.back())) { start = currentIndex; return true; } else { seg += s[currentIndex]; } } } currentIndex++; } } }; template struct Channel { Channel() = default; Channel(const Channel &) = delete; Channel &operator=(const Channel &) = delete; Channel(Channel &&) = delete; Channel &operator=(Channel &&) = delete; ~Channel() = default; void send(T in) { if (closed()) { abort(); } { std::unique_lock lock{mtx_}; queue_.push(std::move(in)); ++size_; } cnd_.notify_one(); } bool recv(T& out) { if (closed() && empty()) { return false; } { std::unique_lock lock{mtx_}; cnd_.wait(lock, [this] { return !empty() || closed(); }); if (empty()) { return false; } out = std::move(queue_.front()); queue_.pop(); --size_; } if (closed()) { cnd_.notify_all(); } else { cnd_.notify_one(); } return true; } size_t constexpr size() const noexcept { return size_; } bool constexpr empty() const noexcept { return size_ == 0; } void close() noexcept { { std::unique_lock lock{mtx_}; is_closed_.store(true); } cnd_.notify_all(); } bool closed() const noexcept { return is_closed_.load(); } private: std::queue queue_; std::atomic size_{0}; std::mutex mtx_; std::condition_variable cnd_; std::atomic is_closed_{false}; }; /** * store one chromosome and it's stream position */ class ChromosomeFilePosition { public: string chromosome; streampos linePos; ChromosomeFilePosition(string inputChromosome, streampos inputPos) { chromosome = inputChromosome; linePos = inputPos; } bool operator < (const ChromosomeFilePosition& in) const{ return chromosome < in.chromosome; } }; /** * store all chromosome and it's stream position */ class ChromosomeFilePositions { public: vector pos; /** * input the chromosome name and it's streamPos, if it is not in pos, add it. */ void append (string &chromosome, streampos& linePos) { pos.push_back(ChromosomeFilePosition(chromosome, linePos)); } /** * make binary search on pos for target chromosome name */ int findChromosome(string &targetChromosome, int start, int end) { if (start <= end) { int middle = (start + end) / 2; if (pos[middle].chromosome == targetChromosome) { return middle; } if (pos[middle].chromosome > targetChromosome) { return findChromosome(targetChromosome, start, middle-1); } return findChromosome(targetChromosome, middle+1, end); } else { // cannot find the chromosome! throw! cerr << "Cannot find the chromosome: " << targetChromosome << " in reference file." << endl; throw 1; } } /** * given targetChromosome name, return its streampos */ streampos getChromosomePosInRefFile(string &targetChromosome) { int index = findChromosome(targetChromosome, 0, pos.size()-1); assert(pos[index].chromosome == targetChromosome); return pos[index].linePos; } /** * sort the pos by chromosome name */ void sort() { std::sort(pos.begin(), pos.end()); } }; #endif //UTILITY_3N_TABLE_H