194 lines
7.5 KiB
C++
194 lines
7.5 KiB
C++
/*
|
|
* Copyright 2020, Yun (Leo) Zhang <imzhangyun@gmail.com>
|
|
*
|
|
* This file is part of HISAT-3N.
|
|
*
|
|
* HISAT-3N is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* HISAT-3N is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with HISAT-3N. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "alignment_3n.h"
|
|
#include "aln_sink.h"
|
|
|
|
|
|
/**
|
|
* return true if two location is concordant.
|
|
* return false, if there are not concordant or too far (>maxPairDistance).
|
|
*/
|
|
bool Alignment::isConcordant(long long int location1, bool &forward1, long long int readLength1, long long int location2, bool &forward2, long long int readLength2) {
|
|
if (forward1 == forward2) // same direction
|
|
{
|
|
return false;
|
|
}
|
|
// adjust the location of the start of the read
|
|
if (!forward1)
|
|
{
|
|
location1 = location1 + readLength1 - 1;
|
|
}
|
|
if (!forward2)
|
|
{
|
|
location2 = location2 + readLength2 - 1;
|
|
}
|
|
// return false if two reads are too far from each other
|
|
if (abs(location1-location2) > maxPairDistance)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (location1 == location2)
|
|
{
|
|
return true;
|
|
}
|
|
else if (location1 < location2)
|
|
{
|
|
if (forward1 && !forward2)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!forward1 && forward2)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* this is the basic function to calculate DNA pair score.
|
|
* if the distance between 2 alignments is more than penaltyFreeDistance_DNA, we reduce the score by the distance/100.
|
|
* if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair.
|
|
*/
|
|
int Alignment::calculatePairScore_DNA (long long int &location0, int& AS0, bool& forward0, long long int readLength0, long long int &location1, int &AS1, bool &forward1, long long int readLength1, bool& concordant) {
|
|
|
|
int score = ASPenalty*AS0 + ASPenalty*AS1;
|
|
int distance = abs(location0 - location1);
|
|
if (distance > maxPairDistance) { return numeric_limits<int>::min(); }
|
|
if (distance > penaltyFreeDistance_DNA) { score -= distance/distancePenaltyFraction_DNA; }
|
|
concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1);
|
|
if (concordant) { score += concordantScoreBounce; }
|
|
return score;
|
|
}
|
|
|
|
/**
|
|
* this is the basic function to calculate RNA pair score.
|
|
* if the distance between 2 alignments is more than penaltyFreeDistance_RNA, we reduce the score by the distance/1000.
|
|
* if two alignment is concordant we add concordantScoreBounce to make sure to select the concordant pair as best pair.
|
|
*/
|
|
int Alignment::calculatePairScore_RNA (long long int &location0, int& XM0, bool& forward0, long long int readLength0, long long int &location1, int &XM1, bool &forward1, long long int readLength1, bool& concordant) {
|
|
// this is the basic function to calculate pair score.
|
|
// if the distance between 2 alignment is more than 100,000, we reduce the score by the distance/1000.
|
|
// if two alignment is concordant we add 500,000 to make sure to select the concordant pair as best pair.
|
|
int score = -ASPenalty*XM0 + -ASPenalty*XM1;
|
|
int distance = abs(location0 - location1);
|
|
if (distance > maxPairDistance) { return numeric_limits<int>::min(); }
|
|
if (distance > penaltyFreeDistance_RNA) { score -= distance/distancePenaltyFraction_RNA; }
|
|
concordant = isConcordant(location0, forward0, readLength0, location1, forward1, readLength1);
|
|
if (concordant) { score += concordantScoreBounce; }
|
|
return score;
|
|
}
|
|
|
|
/**
|
|
* calculate the pairScore for a pair of alignment result. Output pair Score and number of pair.
|
|
* Do not update their pairScore.
|
|
*/
|
|
int Alignment::calculatePairScore(Alignment *inputAlignment, int &nPair) {
|
|
int pairScore = numeric_limits<int>::min();
|
|
nPair = 0;
|
|
if (pairSegment == inputAlignment->pairSegment){
|
|
// when 2 alignment results are from same pair segment, output the lowest score and number of pair equal zero.
|
|
pairScore = numeric_limits<int>::min();
|
|
} else if (!mapped && !inputAlignment->mapped) {
|
|
// both unmapped.
|
|
pairScore = numeric_limits<int>::min()/2 - 1;
|
|
} else if (!mapped || !inputAlignment->mapped) {
|
|
// one of the segment unmapped.
|
|
pairScore = numeric_limits<int>::min()/2;
|
|
nPair = 1;
|
|
} else if ((!repeat && !inputAlignment->repeat)){
|
|
// both mapped and (both non-repeat or not expand repeat)
|
|
bool concordant;
|
|
if (DNA) {
|
|
pairScore = calculatePairScore_DNA(location,
|
|
AS,
|
|
forward,
|
|
readSequence.length(),
|
|
inputAlignment->location,
|
|
inputAlignment->AS,
|
|
inputAlignment->forward,
|
|
inputAlignment->readSequence.length(),
|
|
concordant);
|
|
} else {
|
|
pairScore = calculatePairScore_RNA(location,
|
|
XM,
|
|
forward,
|
|
readSequence.length(),
|
|
inputAlignment->location,
|
|
inputAlignment->XM,
|
|
inputAlignment->forward,
|
|
inputAlignment->readSequence.length(),
|
|
concordant);
|
|
}
|
|
setConcordant(concordant);
|
|
inputAlignment->setConcordant(concordant);
|
|
nPair = 1;
|
|
}
|
|
return pairScore;
|
|
}
|
|
|
|
void Alignments::reportStats_single(ReportingMetrics& met) {
|
|
|
|
int nAlignment = alignmentPositions.nBestSingle;
|
|
if (nAlignment == 0) {
|
|
met.nunp_0++;
|
|
} else {
|
|
met.nunp_uni++;
|
|
if (nAlignment == 1) { met.nunp_uni1++; }
|
|
else { met.nunp_uni2++; }
|
|
}
|
|
}
|
|
|
|
void Alignments::reportStats_paired(ReportingMetrics& met) {
|
|
if (!alignmentPositions.concordantExist) {
|
|
met.nconcord_0++;
|
|
if (alignmentPositions.nBestPair == 0) {
|
|
met.nunp_0_0 += 2;
|
|
return;
|
|
}
|
|
if (alignmentPositions.bestPairScore == numeric_limits<int>::min()/2) {
|
|
// one mate is unmapped, one mate is mapped
|
|
met.nunp_0_0++;
|
|
met.nunp_0_uni++;
|
|
if (alignmentPositions.nBestPair == 1) { met.nunp_0_uni1++; }
|
|
else { met.nunp_0_uni2++; }
|
|
} else { //both mate is mapped
|
|
if (alignmentPositions.nBestPair == 1) {
|
|
met.ndiscord++;
|
|
return;
|
|
}
|
|
else {
|
|
met.nunp_0_uni += 2;
|
|
met.nunp_0_uni2 += 2;
|
|
}
|
|
}
|
|
} else {
|
|
assert(alignmentPositions.nBestPair > 0);
|
|
met.nconcord_uni++;
|
|
if (alignmentPositions.nBestPair == 1) { met.nconcord_uni1++; }
|
|
else { met.nconcord_uni2++; }
|
|
}
|
|
}
|
|
|