/* * Copyright 2020, Yun (Leo) Zhang * * This file is part of HISAT-3N. * * HISAT-3N is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HISAT-3N is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HISAT-3N. If not, see . */ #include "alignment_3n.h" #include "aln_sink.h" /** * return true if two location is concordant. * return false, if there are not concordant or too far (>maxPairDistance). */ bool Alignment::isConcordant(long long int location1, bool &forward1, long long int readLength1, long long int location2, bool &forward2, long long int readLength2) { if (forward1 == forward2) // same direction { return false; } // adjust the location of the start of the read if (!forward1) { location1 = location1 + readLength1 - 1; } if (!forward2) { location2 = location2 + readLength2 - 1; } // return false if two reads are too far from each other if (abs(location1-location2) > maxPairDistance) { return false; } if (location1 == location2) { return true; } else if (location1 < location2) { if (forward1 && !forward2) { return true; } } else { if (!forward1 && forward2) { return true; } } return false; } /** * this is the basic function to calculate DNA pair score. * if the distance between 2 alignments is more than penaltyFreeDistance_DNA, we reduce the score by the distance/100. * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. */ int Alignment::calculatePairScore_DNA (long long int &location0, int& AS0, bool& forward0, long long int readLength0, long long int &location1, int &AS1, bool &forward1, long long int readLength1, bool& concordant) { int score = ASPenalty*AS0 + ASPenalty*AS1; int distance = abs(location0 - location1); if (distance > maxPairDistance) { return numeric_limits::min(); } if (distance > penaltyFreeDistance_DNA) { score -= distance/distancePenaltyFraction_DNA; } concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1); if (concordant) { score += concordantScoreBounce; } return score; } /** * this is the basic function to calculate RNA pair score. * if the distance between 2 alignments is more than penaltyFreeDistance_RNA, we reduce the score by the distance/1000. * if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair. */ int Alignment::calculatePairScore_RNA (long long int &location0, int& XM0, bool& forward0, long long int readLength0, long long int &location1, int &XM1, bool &forward1, long long int readLength1, bool& concordant) { // this is the basic function to calculate pair score. // if the distance between 2 alignment is more than 100,000, we reduce the score by the distance/1000. // if two alignment is concordant we add 500,000 to make sure to select the concordant pair as best pair. int score = -ASPenalty*XM0 + -ASPenalty*XM1; int distance = abs(location0 - location1); if (distance > maxPairDistance) { return numeric_limits::min(); } if (distance > penaltyFreeDistance_RNA) { score -= distance/distancePenaltyFraction_RNA; } concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1); if (concordant) { score += concordantScoreBounce; } return score; } /** * calculate the pairScore for a pair of alignment result. Output pair Score and number of pair. * Do not update their pairScore. */ int Alignment::calculatePairScore(Alignment *inputAlignment, int &nPair) { int pairScore = numeric_limits::min(); nPair = 0; if (pairSegment == inputAlignment->pairSegment){ // when 2 alignment results are from same pair segment, output the lowest score and number of pair equal zero. pairScore = numeric_limits::min(); } else if (!mapped && !inputAlignment->mapped) { // both unmapped. pairScore = numeric_limits::min()/2 - 1; } else if (!mapped || !inputAlignment->mapped) { // one of the segment unmapped. pairScore = numeric_limits::min()/2; nPair = 1; } else if ((!repeat && !inputAlignment->repeat)){ // both mapped and (both non-repeat or not expand repeat) bool concordant; if (DNA) { pairScore = calculatePairScore_DNA(location, AS, forward, readSequence.length(), inputAlignment->location, inputAlignment->AS, inputAlignment->forward, inputAlignment->readSequence.length(), concordant); } else { pairScore = calculatePairScore_RNA(location, XM, forward, readSequence.length(), inputAlignment->location, inputAlignment->XM, inputAlignment->forward, inputAlignment->readSequence.length(), concordant); } setConcordant(concordant); inputAlignment->setConcordant(concordant); nPair = 1; } return pairScore; } void Alignments::reportStats_single(ReportingMetrics& met) { int nAlignment = alignmentPositions.nBestSingle; if (nAlignment == 0) { met.nunp_0++; } else { met.nunp_uni++; if (nAlignment == 1) { met.nunp_uni1++; } else { met.nunp_uni2++; } } } void Alignments::reportStats_paired(ReportingMetrics& met) { if (!alignmentPositions.concordantExist) { met.nconcord_0++; if (alignmentPositions.nBestPair == 0) { met.nunp_0_0 += 2; return; } if (alignmentPositions.bestPairScore == numeric_limits::min()/2) { // one mate is unmapped, one mate is mapped met.nunp_0_0++; met.nunp_0_uni++; if (alignmentPositions.nBestPair == 1) { met.nunp_0_uni1++; } else { met.nunp_0_uni2++; } } else { //both mate is mapped if (alignmentPositions.nBestPair == 1) { met.ndiscord++; return; } else { met.nunp_0_uni += 2; met.nunp_0_uni2 += 2; } } } else { assert(alignmentPositions.nBestPair > 0); met.nconcord_uni++; if (alignmentPositions.nBestPair == 1) { met.nconcord_uni1++; } else { met.nconcord_uni2++; } } }