hisat-3n/pe.h

322 lines
9.5 KiB
C
Raw Permalink Normal View History

2025-01-18 13:09:52 +00:00
/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* pe.h
*
* A class encapsulating a paired-end policy and routines for
* identifying intervals according to the policy. For instance,
* contains a routine that, given a policy and details about a match
* for one mate, returns details about where to search for the other
* mate.
*/
#ifndef PE_H_
#define PE_H_
#include <iostream>
#include <stdint.h>
// In description below "To the left" = "Upstream of w/r/t the Watson strand"
// The 4 possible policies describing how mates 1 and 2 should be
// oriented with respect to the reference genome and each other
enum {
// (fw) Both mates from Watson with 1 to the left, or
// (rc) Both mates from Crick with 2 to the left
PE_POLICY_FF = 1,
// (fw) Both mates from Crick with 1 to the left, or
// (rc) Both mates from Watson with 2 to the left
PE_POLICY_RR,
// (fw) Mate 1 from Watson and mate 2 from Crick with 1 to the left, or
// (rc) Mate 2 from Watson and mate 1 from Crick with 2 to the left
PE_POLICY_FR,
// (fw) Mate 1 from Crick and mate 2 from Watson with 1 to the left, or
// (rc) Mate 2 from Crick and mate 1 from Watson with 2 to the left
PE_POLICY_RF
};
// Various distinct ways that the mates might align with respect to
// each other in a concordant alignment. We distinguish between them
// because in some cases a user may want to consider some of these
// categories to be discordant, even if the alignment otherwise
// conforms to the paired-end policy.
enum {
// Describes a paired-end alignment where the mates
// straightforwardly conform to the paired-end policy without any
// overlap between the mates
PE_ALS_NORMAL = 1,
// Describes a paired-end alignment where the mate overlap, but
// neither contains the other and they do not dovetail, but the
// alignment conforms to the paired-end policy
PE_ALS_OVERLAP,
// Describes a paired-end alignment where the mates conform to the
// paired-end policy, but one mate strictly contains the other but
// they don't dovetail. We distinguish this from a "normal"
// concordant alignment because some users may wish to categorize
// such an alignment as discordant.
PE_ALS_CONTAIN,
// Describes a paired-end alignment where the mates conform to the
// paired-end policy, but mates "fall off" each other. E.g. if the
// policy is FR and any of these happen:
// 1: >>>>> >>>>>
// 2: <<<<<< <<<<<<
// And the overall extent is consistent with the minimum fragment
// length, this is a dovetail alignment. We distinguish this from
// a "normal" concordant alignment because some users may wish to
// categorize such an alignment as discordant.
PE_ALS_DOVETAIL,
// The mates are clearly discordant, owing to their orientations
// and/or implied fragment length
PE_ALS_DISCORD
};
/**
* Return true iff the orientations and relative positions of mates 1
* and 2 are compatible with the given PE_POLICY.
*/
static inline bool pePolicyCompat(
int policy, // PE_POLICY
bool oneLeft, // true iff mate 1 is to the left of mate 2
bool oneWat, // true iff mate 1 aligned to Watson strand
bool twoWat) // true iff mate 2 aligned to Watson strand
{
switch(policy) {
case PE_POLICY_FF:
return oneWat == twoWat && oneWat == oneLeft;
case PE_POLICY_RR:
return oneWat == twoWat && oneWat != oneLeft;
case PE_POLICY_FR:
return oneWat != twoWat && oneWat == oneLeft;
case PE_POLICY_RF:
return oneWat != twoWat && oneWat != oneLeft;
default: {
std::cerr << "Bad PE_POLICY: " << policy << std::endl;
throw 1;
}
}
throw 1;
}
/**
* Given that the given mate aligns in the given orientation, return
* true iff the other mate must appear "to the right" of the given mate
* in order for the alignment to be concordant.
*/
static inline void pePolicyMateDir(
int policy,// in: PE_POLICY
bool is1, // in: true iff mate 1 is the one that already aligned
bool fw, // in: true iff already-aligned mate aligned to Watson
bool& left, // out: set =true iff other mate must be to the left
bool& mfw) // out: set =true iff other mate must align to watson
{
switch(policy) {
case PE_POLICY_FF: {
left = (is1 != fw);
mfw = fw;
break;
}
case PE_POLICY_RR: {
left = (is1 == fw);
mfw = fw;
break;
}
case PE_POLICY_FR: {
left = !fw;
mfw = !fw;
break;
}
case PE_POLICY_RF: {
left = fw;
mfw = !fw;
break;
}
default: {
std::cerr << "Error: No such PE_POLICY: " << policy << std::endl;
throw 1;
}
}
return;
}
/**
* Encapsulates paired-end alignment parameters.
*/
class PairedEndPolicy {
public:
PairedEndPolicy() { reset(); }
PairedEndPolicy(
int pol,
size_t maxfrag,
size_t minfrag,
bool local,
bool flippingOk,
bool dovetailOk,
bool containOk,
bool olapOk,
bool expandToFit)
{
init(
pol,
maxfrag,
minfrag,
local,
flippingOk,
dovetailOk,
containOk,
olapOk,
expandToFit);
}
/**
* Initialize with nonsense values.
*/
void reset() {
init(-1, 0xffffffff, 0xffffffff, false, false, false, false, false, false);
}
/**
* Initialize given policy, maximum & minimum fragment lengths.
*/
void init(
int pol,
size_t maxfrag,
size_t minfrag,
bool local,
bool flippingOk,
bool dovetailOk,
bool containOk,
bool olapOk,
bool expandToFit)
{
pol_ = pol;
maxfrag_ = maxfrag;
minfrag_ = minfrag;
local_ = local;
flippingOk_ = flippingOk;
dovetailOk_ = dovetailOk;
containOk_ = containOk;
olapOk_ = olapOk;
expandToFit_ = expandToFit;
}
/**
* Given details about how one mate aligns, and some details about the
* reference sequence it aligned to, calculate a window and orientation s.t.
* a paired-end alignment is concordant iff the opposite mate aligns in the
* calculated window with the calculated orientation. The calculaton does not
* consider gaps. The dynamic programming framer will take gaps into account.
*
* Returns false if no concordant alignments are possible, true otherwise.
*/
bool otherMate(
bool is1, // true -> mate 1 aligned and we're looking
// for 2, false -> vice versa
bool fw, // orientation of aligned mate
int64_t off, // offset into the reference sequence
int64_t maxalcols, // max # columns spanned by alignment
size_t reflen, // length of reference sequence aligned to
size_t len1, // length of mate 1
size_t len2, // length of mate 2
bool& oleft, // out: true iff opp mate must be to right of anchor
int64_t& oll, // out: leftmost Watson off for LHS of opp alignment
int64_t& olr, // out: rightmost Watson off for LHS of opp alignment
int64_t& orl, // out: leftmost Watson off for RHS of opp alignment
int64_t& orr, // out: rightmost Watson off for RHS of opp alignment
bool& ofw) // out: true iff opp mate must be on Watson strand
const;
/**
* Return a PE_TYPE flag indicating, given a PE_POLICY and coordinates
* for a paired-end alignment, qwhat type of alignment it is, i.e.,
* whether it's:
*
* 1. Straightforwardly concordant
* 2. Mates dovetail (one extends beyond the end of the other)
* 3. One mate contains the other but they don't dovetail
* 4. One mate overlaps the other but neither contains the other and
* they don't dovetail
* 5. Discordant
*/
int peClassifyPair(
int64_t off1, // offset of mate 1
size_t len1, // length of mate 1
bool fw1, // whether mate 1 aligned to Watson
int64_t off2, // offset of mate 2
size_t len2, // length of mate 2
bool fw2) // whether mate 2 aligned to Watson
const;
int policy() const { return pol_; }
size_t maxFragLen() const { return maxfrag_; }
size_t minFragLen() const { return minfrag_; }
protected:
// Use local alignment to search for the opposite mate, rather than
// a type of alignment that requires the read to align end-to-end
bool local_;
// Policy governing how mates should be oriented with respect to
// each other and the reference genome
int pol_;
// true iff settings are such that mates that violate the expected relative
// orientation but are still consistent with maximum fragment length are OK
bool flippingOk_;
// true iff settings are such that dovetailed mates should be
// considered concordant.
bool dovetailOk_;
// true iff paired-end alignments where one mate's alignment is
// strictly contained within the other's should be considered
// concordant
bool containOk_;
// true iff paired-end alignments where one mate's alignment
// overlaps the other's should be considered concordant
bool olapOk_;
// What to do when a mate length is > maxfrag_? If expandToFit_ is
// true, we temporarily increase maxfrag_ to equal the mate length.
// Otherwise we say that any paired-end alignment involving the
// long mate is discordant.
bool expandToFit_;
// Maximum fragment size to consider
size_t maxfrag_;
// Minimum fragment size to consider
size_t minfrag_;
};
#endif /*ndef PE_H_*/