hisat-3n/sam.h

2014 lines
63 KiB
C
Raw Normal View History

2025-01-18 13:09:52 +00:00
/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef SAM_H_
#define SAM_H_
#include <string>
#include <sys/time.h>
#include "ds.h"
#include "read.h"
#include "util.h"
#include "aligner_result.h"
#include "scoring.h"
#include "alt.h"
#include "filebuf.h"
#include "alignment_3n.h"
enum {
// Comments use language from v1.4-r962 spec
SAM_FLAG_PAIRED = 1, // templ. having mult. frag.s in sequencing
SAM_FLAG_MAPPED_PAIRED = 2, // each frag properly aligned
SAM_FLAG_UNMAPPED = 4, // fragment unmapped
SAM_FLAG_MATE_UNMAPPED = 8, // next fragment in template unmapped
SAM_FLAG_QUERY_STRAND = 16, // SEQ is reverse comp'ed from original
SAM_FLAG_MATE_STRAND = 32, // next fragment SEQ reverse comp'ed
SAM_FLAG_FIRST_IN_PAIR = 64, // first fragment in template
SAM_FLAG_SECOND_IN_PAIR = 128, // last fragment in template
SAM_FLAG_NOT_PRIMARY = 256, // secondary alignment
SAM_FLAG_FAILS_CHECKS = 512, // not passing quality controls
SAM_FLAG_DUPLICATE = 1024 // PCR or optical duplicate
};
class AlnRes;
class AlnFlags;
class AlnSetSumm;
/**
* Encapsulates all the various ways that a user may wish to customize SAM
* output.
*/
template<typename index_t>
class SamConfig {
typedef EList<std::string> StrList;
typedef EList<size_t> LenList;
public:
SamConfig(
const StrList& refnames, // reference sequence names
const LenList& reflens, // reference sequence lengths
const StrList& repnames, // repeat sequence names
const LenList& replens, // repeat sequence lengths
bool truncQname, // truncate read name to 255?
bool omitsec, // omit secondary SEQ/QUAL
bool noUnal, // omit unaligned reads
const std::string& pg_id, // id
const std::string& pg_pn, // name
const std::string& pg_vn, // version
const std::string& pg_cl, // command-line
const std::string& rgs, // read groups string
int rna_strandness,
bool print_as,
bool print_xs,
bool print_xss,
bool print_yn,
bool print_xn,
bool print_cs,
bool print_cq,
bool print_x0,
bool print_x1,
bool print_xm,
bool print_xo,
bool print_xg,
bool print_nm,
bool print_md,
bool print_yf,
bool print_yi,
bool print_ym,
bool print_yp,
bool print_yt,
bool print_ys,
bool print_zs,
bool print_xr,
bool print_xt,
bool print_xd,
bool print_xu,
bool print_ye, // streak of failed DPs at end
bool print_yl, // longest streak of failed DPs
bool print_yu, // index of last succeeded DP
bool print_xp, // print seed hit information
bool print_yr, // # redundant seed hits
bool print_zb, // # Ftab lookups
bool print_zr, // # redundant path checks
bool print_zf, // # FM Index ops
bool print_zm, // FM Index op string for best-first search
bool print_zi, // # seed extend loop iters
bool print_zp,
bool print_zu,
bool print_xs_a,
bool print_nh) :
truncQname_(truncQname),
omitsec_(omitsec),
noUnal_(noUnal),
pg_id_(pg_id),
pg_pn_(pg_pn),
pg_vn_(pg_vn),
pg_cl_(pg_cl),
rgs_(rgs),
refnames_(refnames),
reflens_(reflens),
repnames_(repnames),
replens_(replens),
rna_strandness_(rna_strandness),
print_as_(print_as), // alignment score of best alignment
print_xs_(print_xs), // alignment score of second-best alignment
print_xss_(print_xss),
print_yn_(print_yn), // minimum valid score and perfect score
print_xn_(print_xn),
print_cs_(print_cs),
print_cq_(print_cq),
print_x0_(print_x0),
print_x1_(print_x1),
print_xm_(print_xm),
print_xo_(print_xo),
print_xg_(print_xg),
print_nm_(print_nm),
print_md_(print_md),
print_yf_(print_yf),
print_yi_(print_yi),
print_ym_(print_ym),
print_yp_(print_yp),
print_yt_(print_yt),
print_ys_(print_ys),
print_zs_(print_zs),
print_xr_(print_xr),
print_xt_(print_xt), // time elapsed in microseconds
print_xd_(print_xd), // DP extend attempts
print_xu_(print_xu), // ungapped extend attempts
print_ye_(print_ye), // streak of failed DPs at end
print_yl_(print_yl), // longest streak of failed DPs
print_yu_(print_yu), // index of last succeeded DP
print_xp_(print_xp), // print seed hit information
print_yr_(print_yr), // index of last succeeded DP
print_zb_(print_zb), // # Ftab lookups
print_zr_(print_zr), // # redundant path checks
print_zf_(print_zf), // # FM Index ops
print_zm_(print_zm), // FM Index op string for best-first search
print_zi_(print_zi), // # seed extend loop iters
print_zp_(print_zp), // # seed extend loop iters
print_zu_(print_zu), // # seed extend loop iters
print_xs_a_(print_xs_a),
print_nh_(print_nh)
{
assert_eq(refnames_.size(), reflens_.size());
}
/**
* Print a reference name in a way that doesn't violate SAM's character
* constraints. \*|[!-()+-<>-~][!-~]*
*/
void printRefName(
BTString& o,
const std::string& name)
const;
/**
* Print a :Z optional field where certain characters (whitespace, colon
* and percent) are escaped using % escapes.
*/
template<typename T>
void printOptFieldEscapedZ(BTString& o, const T& s) const {
size_t len = s.length();
for(size_t i = 0; i < len; i++) {
if(s[i] < 33 || s[i] > 126 || s[i] == ':' || s[i] == '%') {
// percent-encode it
o.append('%');
int ms = s[i] >> 4;
int ls = s[i] & 15;
assert_range(0, 15, ms);
assert_range(0, 15, ls);
o.append("0123456789ABCDEF"[ms]);
o.append("0123456789ABCDEF"[ls]);
} else {
o.append(s[i]);
}
}
}
/**
* Print a :Z optional field where newline characters are escaped using %
* escapes.
*/
template<typename T>
void printOptFieldNewlineEscapedZ(BTString& o, const T& s) const {
size_t len = s.length();
for(size_t i = 0; i < len; i++) {
if(s[i] == 10 || s[i] == 13 || s[i] == '%') {
// percent-encode it
o.append('%');
int ms = s[i] >> 4;
int ls = s[i] & 15;
assert_range(0, 15, ms);
assert_range(0, 15, ls);
o.append("0123456789ABCDEF"[ms]);
o.append("0123456789ABCDEF"[ls]);
} else {
o.append(s[i]);
}
}
}
/**
* Print a read name in a way that doesn't violate SAM's character
* constraints. [!-?A-~]{1,255} (i.e. [33, 63], [65, 126])
*/
template<typename TStr>
void printReadName(
BTString& o,
const TStr& name,
bool omitSlashMate)
const
{
size_t namelen = name.length();
if(omitSlashMate &&
namelen >= 2 &&
name[namelen-2] == '/' &&
(name[namelen-1] == '1' || name[namelen-1] == '2' || name[namelen-1] == '3'))
{
namelen -= 2;
}
if(truncQname_ && namelen > 255) {
namelen = 255;
}
for(size_t i = 0; i < namelen; i++) {
if(truncQname_ && isspace(name[i])) {
return;
}
o.append(name[i]);
}
}
/**
* Print a reference name given a reference index.
*/
void printRefNameFromIndex(
BTString& o,
size_t i,
bool repeat = false)
const;
/**
* Print SAM header to given output buffer.
*/
void printHeader(
BTString& o,
const std::string& rgid,
const std::string& rgs,
bool printHd,
bool printSq,
bool printPg)
const;
/**
* Print the @HD header line to the given string.
*/
void printHdLine(BTString& o, const char *samver) const;
/**
* Print the @SQ header lines to the given string.
*/
void printSqLines(BTString& o) const;
/**
* Print the @PG header line to the given string.
*/
void printPgLine(BTString& o) const;
/**
* Print the optional flags to the given string.
*/
void printAlignedOptFlags(
BTString& o, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
AlnRes& res, // individual alignment result
StackedAln& staln, // stacked alignment
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metics
const Scoring& sc, // scoring scheme
const char *mapqInp, // inputs to MAPQ calculation
const ALTDB<index_t>* altdb)
const;
/**
* Print the optional flags to the given string.
*/
void printAlignedOptFlags(
Alignment* newAlignment, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
AlnRes& res, // individual alignment result
StackedAln& staln, // stacked alignment
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metics
const Scoring& sc, // scoring scheme
const char *mapqInp, // inputs to MAPQ calculation
const ALTDB<index_t>* altdb)
const;
/**
* Print the optional flags to the given string.
*/
void printEmptyOptFlags(
BTString& o, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc) // scoring scheme
const;
void printEmptyOptFlags(
Alignment* newAlignment, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc) // scoring scheme
const;
/**
* Return true iff we should try to obey the SAM spec's recommendations
* that:
*
* SEQ and QUAL of secondary alignments should be set to * to reduce the
* file size.
*/
bool omitSecondarySeqQual() const {
return omitsec_;
}
bool omitUnalignedReads() const {
return noUnal_;
}
protected:
bool truncQname_; // truncate QNAME to 255 chars?
bool omitsec_; // omit secondary
bool noUnal_; // omit unaligned reads
std::string pg_id_; // @PG ID: Program record identifier
std::string pg_pn_; // @PG PN: Program name
std::string pg_vn_; // @PG VN: Program version
std::string pg_cl_; // @PG CL: Program command-line
std::string rgs_; // Read-group string to add to all records
const StrList& refnames_; // reference sequence names
const LenList& reflens_; // reference sequence lengths
const StrList& repnames_; // repeat sequence names
const LenList& replens_; // repeat sequence lengths
int rna_strandness_;
// Which alignment flags to print?
// Following are printed by BWA-SW
bool print_as_; // AS:i: Alignment score generated by aligner
bool print_xs_; // XS:i: Suboptimal alignment score
bool print_xss_;// Xs:i: Best invalid alignment score found
bool print_yn_; // YN:i:, Yn:i: minimum valid score and perfect score
bool print_xn_; // XN:i: Number of ambiguous bases in the referenece
// Other optional flags
bool print_cs_; // CS:Z: Color read sequence on the original strand
bool print_cq_; // CQ:Z: Color read quality on the original strand
// Following are printed by BWA
bool print_x0_; // X0:i: Number of best hits
bool print_x1_; // X1:i: Number of sub-optimal best hits
bool print_xm_; // XM:i: Number of mismatches in the alignment
bool print_xo_; // XO:i: Number of gap opens
bool print_xg_; // XG:i: Number of gap extensions (incl. opens)
bool print_nm_; // NM:i: Edit dist. to the ref, Ns count, clipping doesn't
bool print_md_; // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2
// Following are Bowtie2-specific
bool print_yf_; // YF:i: Read was filtered out?
bool print_yi_; // YI:Z: Summary of inputs to MAPQ calculation
bool print_ym_; // YM:i: Read was repetitive when aligned unpaired?
bool print_yp_; // YP:i: Read was repetitive when aligned paired?
bool print_yt_; // YT:Z: String representing alignment type
bool print_ys_; // YS:i: Score of other mate
bool print_zs_; // ZS:i: Pseudo-random seed
bool print_xr_; // XR:Z: Original read string
bool print_xt_; // XT:i: Time taken to align
bool print_xd_; // XD:i: DP problems
bool print_xu_; // XU:i: ungapped alignment
bool print_ye_; // YE:i: streak of failed DPs at end
bool print_yl_; // YL:i: longest streak of failed DPs
bool print_yu_; // YU:i: index of last succeeded DP
bool print_xp_; // XP:BI: seed hit information
bool print_yr_; // YR:i: # redundant seed hits
bool print_zb_; // ZB:i: # Ftab lookups
bool print_zr_; // ZR:i: # redundant path checks
bool print_zf_; // ZF:i: # FM Index ops
bool print_zm_; // ZM:i: FM ops string for best-first search
bool print_zi_; // ZI:i: # extend loop iters
bool print_zp_; // ZP:i: Score of best/second-best paired-end alignment
bool print_zu_; // ZU:i: Score of best/second-best unpaired alignment
bool print_xs_a_; // XS:A:[+=] Sense/anti-sense strand splice sites correspond to
bool print_nh_; // NH:i: # alignments
};
/**
* Print a reference name in a way that doesn't violate SAM's character
* constraints. \*|[!-()+-<>-~][!-~]* (i.e. [33, 63], [65, 126])
*/
template<typename index_t>
void SamConfig<index_t>::printRefName(
BTString& o,
const std::string& name) const
{
size_t namelen = name.length();
for(size_t i = 0; i < namelen; i++) {
if(isspace(name[i])) {
return;
}
o.append(name[i]);
}
}
/**
* Print a reference name given a reference index.
*/
template<typename index_t>
void SamConfig<index_t>::printRefNameFromIndex(BTString& o, size_t i, bool repeat) const {
if(repeat) {
printRefName(o, repnames_[i]);
} else {
printRefName(o, refnames_[i]);
}
}
/**
* Print SAM header to given output buffer.
*/
template<typename index_t>
void SamConfig<index_t>::printHeader(
BTString& o,
const string& rgid,
const string& rgs,
bool printHd,
bool printSq,
bool printPg) const
{
if(printHd) printHdLine(o, "1.0");
if(printSq) printSqLines(o);
if(!rgid.empty()) {
o.append("@RG");
o.append(rgid.c_str());
o.append(rgs.c_str());
o.append('\n');
}
if(printPg) printPgLine(o);
}
/**
* Print the @HD header line to the given string.
*/
template<typename index_t>
void SamConfig<index_t>::printHdLine(BTString& o, const char *samver) const {
o.append("@HD\tVN:");
o.append(samver);
o.append("\tSO:unsorted\n");
}
/**
* Print the @SQ header lines to the given string.
*/
template<typename index_t>
void SamConfig<index_t>::printSqLines(BTString& o) const {
char buf[1024];
for(size_t i = 0; i < refnames_.size(); i++) {
o.append("@SQ\tSN:");
printRefName(o, refnames_[i]);
o.append("\tLN:");
itoa10<size_t>(reflens_[i], buf);
o.append(buf);
o.append('\n');
}
if (!threeN) {
for(size_t i = 0; i < repnames_.size(); i++) {
o.append("@SQ\tSN:");
printRefName(o, repnames_[i]);
o.append("\tLN:");
itoa10<size_t>(replens_[i], buf);
o.append(buf);
o.append('\n');
}
}
}
/**
* Print the @PG header line to the given string.
*/
template<typename index_t>
void SamConfig<index_t>::printPgLine(BTString& o) const {
o.append("@PG\tID:");
o.append(pg_id_.c_str());
o.append("\tPN:");
o.append(pg_pn_.c_str());
o.append("\tVN:");
o.append(pg_vn_.c_str());
o.append("\tCL:\"");
o.append(pg_cl_.c_str());
o.append('"');
o.append('\n');
}
#define WRITE_SEP() { \
if(!first) o.append('\t'); \
first = false; \
}
/**
* Print the optional flags to the given string.
*/
template<typename index_t>
void SamConfig<index_t>::printAlignedOptFlags(
BTString& o, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
AlnRes& res, // individual alignment result
StackedAln& staln, // stacked alignment buffer
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc, // scoring scheme
const char *mapqInp, // inputs to MAPQ calculation
const ALTDB<index_t>* altdb)
const
{
char buf[1024];
if(print_as_) {
// AS:i: Alignment score generated by aligner
itoa10<TAlScore>(res.score().score(), buf);
WRITE_SEP();
o.append("AS:i:");
o.append(buf);
}
// Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie
if(print_xs_) {
// XS:i: Suboptimal alignment score
// Use ZS:i: to avoid conflict with XS:A:
AlnScore sco = summ.secbestMate(rd.mate < 2);
if(sco.valid()) {
itoa10<TAlScore>(sco.score(), buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
}
if(print_xn_) {
// XN:i: Number of ambiguous bases in the referenece
itoa10<size_t>(res.refNs(), buf);
WRITE_SEP();
o.append("XN:i:");
o.append(buf);
}
if(print_x0_) {
// X0:i: Number of best hits
}
if(print_x1_) {
// X1:i: Number of sub-optimal best hits
}
size_t num_mm = 0;
size_t num_go = 0;
size_t num_gx = 0;
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].isMismatch()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_mm++;
}
} else if(res.ned()[i].isReadGap()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_go++;
num_gx++;
}
while(i < res.ned().size()-1 &&
res.ned()[i+1].pos == res.ned()[i].pos &&
res.ned()[i+1].isReadGap())
{
i++;
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_gx++;
}
}
} else if(res.ned()[i].isRefGap()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_go++;
num_gx++;
}
while(i < res.ned().size()-1 &&
res.ned()[i+1].pos == res.ned()[i].pos+1 &&
res.ned()[i+1].isRefGap())
{
i++;
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_gx++;
}
}
}
}
if(print_xm_) {
// XM:i: Number of mismatches in the alignment
itoa10<size_t>(num_mm, buf);
WRITE_SEP();
o.append("XM:i:");
o.append(buf);
}
if(print_xo_) {
// XO:i: Number of gap opens
itoa10<size_t>(num_go, buf);
WRITE_SEP();
o.append("XO:i:");
o.append(buf);
}
if(print_xg_) {
// XG:i: Number of gap extensions (incl. opens)
itoa10<size_t>(num_gx, buf);
WRITE_SEP();
o.append("XG:i:");
o.append(buf);
}
if(print_nm_) {
// NM:i: Edit dist. to the ref, Ns count, clipping doesn't
size_t NM = 0;
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].type != EDIT_TYPE_SPL) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
NM++;
}
}
}
itoa10<size_t>(NM, buf);
WRITE_SEP();
o.append("NM:i:");
o.append(buf);
}
if(print_md_) {
// MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2
WRITE_SEP();
o.append("MD:Z:");
staln.buildMdz();
staln.writeMdz(
&o, // output buffer
NULL); // no char buffer
}
if(print_ys_ && summ.paired()) {
// YS:i: Alignment score of opposite mate
assert(res.oscore().valid());
itoa10<TAlScore>(res.oscore().score(), buf);
WRITE_SEP();
o.append("YS:i:");
o.append(buf);
}
if(print_yn_) {
// YN:i: Minimum valid score for this mate
TAlScore mn = sc.scoreMin.f<TAlScore>(rd.length());
itoa10<TAlScore>(mn, buf);
WRITE_SEP();
o.append("YN:i:");
o.append(buf);
// Yn:i: Perfect score for this mate
TAlScore pe = sc.perfectScore(rd.length());
itoa10<TAlScore>(pe, buf);
WRITE_SEP();
o.append("Yn:i:");
o.append(buf);
}
if(print_xss_) {
// Xs:i: Best invalid alignment score of this mate
bool one = true;
if(flags.partOfPair() && !flags.readMate1()) {
one = false;
}
TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2;
if(bst > std::numeric_limits<TAlScore>::min()) {
itoa10<TAlScore>(bst, buf);
WRITE_SEP();
o.append("Xs:i:");
o.append(buf);
}
if(flags.partOfPair()) {
// Ys:i: Best invalid alignment score of opposite mate
bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1;
if(bst > std::numeric_limits<TAlScore>::min()) {
itoa10<TAlScore>(bst, buf);
WRITE_SEP();
o.append("Ys:i:");
o.append(buf);
}
}
}
if(print_zs_) {
// ZS:i: Pseudo-random seed for read
itoa10<uint32_t>(rd.seed, buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
if(print_yt_) {
// YT:Z: String representing alignment type
WRITE_SEP();
flags.printYT(o);
}
if(print_yp_ && flags.partOfPair() && flags.canMax()) {
// YP:i: Read was repetitive when aligned paired?
WRITE_SEP();
flags.printYP(o);
}
if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) {
// YM:i: Read was repetitive when aligned unpaired?
WRITE_SEP();
flags.printYM(o);
}
if(print_yf_ && flags.filtered()) {
// YF:i: Read was filtered?
first = flags.printYF(o, first) && first;
}
if(print_yi_) {
// Print MAPQ calibration info
if(mapqInp[0] != '\0') {
// YI:i: Suboptimal alignment score
WRITE_SEP();
o.append("YI:Z:");
o.append(mapqInp);
}
}
if(flags.partOfPair() && print_zp_) {
// ZP:i: Score of best concordant paired-end alignment
WRITE_SEP();
o.append("ZP:Z:");
if(summ.bestPaired().valid()) {
itoa10<TAlScore>(summ.bestPaired().score(), buf);
o.append(buf);
} else {
o.append("NA");
}
// Zp:i: Second-best concordant paired-end alignment score
WRITE_SEP();
o.append("Zp:Z:");
if(summ.secbestPaired().valid()) {
itoa10<TAlScore>(summ.secbestPaired().score(), buf);
o.append(buf);
} else {
o.append("NA");
}
}
if(print_zu_) {
// ZU:i: Score of best unpaired alignment
AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2());
AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2());
WRITE_SEP();
o.append("ZU:i:");
if(best.valid()) {
itoa10<TAlScore>(best.score(), buf);
o.append(buf);
} else {
o.append("NA");
}
// Zu:i: Score of second-best unpaired alignment
WRITE_SEP();
o.append("Zu:i:");
if(secbest.valid()) {
itoa10<TAlScore>(secbest.score(), buf);
o.append(buf);
} else {
o.append("NA");
}
}
if(!rgs_.empty()) {
WRITE_SEP();
o.append(rgs_.c_str());
}
if(print_xt_) {
// XT:i: Timing
WRITE_SEP();
struct timeval tv_end;
struct timezone tz_end;
gettimeofday(&tv_end, &tz_end);
size_t total_usecs =
(tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 +
(tv_end.tv_usec - prm.tv_beg.tv_usec);
itoa10<size_t>(total_usecs, buf);
o.append("XT:i:");
o.append(buf);
}
if(print_xd_) {
// XD:i: Extend DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nExDps, buf);
o.append("XD:i:");
o.append(buf);
// Xd:i: Mate DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nMateDps, buf);
o.append("Xd:i:");
o.append(buf);
}
if(print_xu_) {
// XU:i: Extend ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nExUgs, buf);
o.append("XU:i:");
o.append(buf);
// Xu:i: Mate ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nMateUgs, buf);
o.append("Xu:i:");
o.append(buf);
}
if(print_ye_) {
// YE:i: Streak of failed DPs at end
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFail, buf);
o.append("YE:i:");
o.append(buf);
// Ye:i: Streak of failed ungaps at end
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFail, buf);
o.append("Ye:i:");
o.append(buf);
}
if(print_yl_) {
// YL:i: Longest streak of failed DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFailStreak, buf);
o.append("YL:i:");
o.append(buf);
// Yl:i: Longest streak of failed ungaps
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFailStreak, buf);
o.append("Yl:i:");
o.append(buf);
}
if(print_yu_) {
// YU:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nDpLastSucc, buf);
o.append("YU:i:");
o.append(buf);
// Yu:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nUgLastSucc, buf);
o.append("Yu:i:");
o.append(buf);
}
if(print_xp_) {
// XP:Z: String describing seed hits
WRITE_SEP();
o.append("XP:B:I,");
itoa10<uint64_t>(prm.nSeedElts, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsFw, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsRc, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMean, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMedian, buf);
o.append(buf);
}
if(print_yr_) {
// YR:i: Redundant seed hits
WRITE_SEP();
itoa10<uint64_t>(prm.nRedundants, buf);
o.append("YR:i:");
o.append(buf);
}
if(print_zb_) {
// ZB:i: Ftab ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nFtabs, buf);
o.append("ZB:i:");
o.append(buf);
}
if(print_zr_) {
// ZR:Z: Redundant path skips in seed alignment
WRITE_SEP();
o.append("ZR:Z:");
itoa10<uint64_t>(prm.nRedSkip, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedFail, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedIns, buf); o.append(buf);
}
if(print_zf_) {
// ZF:i: FM Index ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nSdFmops, buf);
o.append("ZF:i:");
o.append(buf);
// Zf:i: FM Index ops for offset resolution
WRITE_SEP();
itoa10<uint64_t>(prm.nExFmops, buf);
o.append("Zf:i:");
o.append(buf);
}
if(print_zm_) {
// ZM:Z: Print FM index op string for best-first search
WRITE_SEP();
o.append("ZM:Z:");
prm.fmString.print(o, buf);
}
if(print_zi_) {
// ZI:i: Seed extend loop iterations
WRITE_SEP();
itoa10<uint64_t>(prm.nExIters, buf);
o.append("ZI:i:");
o.append(buf);
}
if(print_xs_a_) {
if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) {
uint8_t whichsense = res.spliced_whichsense_transcript();
if(whichsense != SPL_UNKNOWN) {
WRITE_SEP();
o.append("XS:A:");
if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) {
o.append('+');
} else {
assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC);
o.append('-');
}
}
} else {
WRITE_SEP();
o.append("XS:A:");
char strandness = '+';
if(res.readMate1()) {
if(res.orient()) {
if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) {
strandness = '-';
}
} else {
if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) {
strandness = '-';
}
}
} else {
assert(res.readMate2());
assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF);
if(res.orient()) {
if(rna_strandness_ == RNA_STRANDNESS_FR) {
strandness = '-';
}
} else {
if(rna_strandness_ == RNA_STRANDNESS_RF) {
strandness = '-';
}
}
}
o.append(strandness);
}
}
if(print_nh_) {
if(flags.alignedPaired()) {
WRITE_SEP();
itoa10<uint64_t>(summ.numAlnsPaired(), buf);
o.append("NH:i:");
o.append(buf);
} else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) {
WRITE_SEP();
itoa10<uint64_t>((flags.alignedUnpaired() || flags.readMate1()) ?
summ.numAlns1() : summ.numAlns2(), buf);
o.append("NH:i:");
o.append(buf);
}
}
bool snp_first = true;
index_t prev_snp_idx = INDEX_MAX;
size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true);
if(!res.fw()) {
Edit::invertPoss(const_cast<EList<Edit>&>(res.ned()), len_trimmed, false);
}
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].snpID >= altdb->alts().size())
continue;
index_t snp_idx = res.ned()[i].snpID;
assert_lt(snp_idx, altdb->alts().size());
const ALT<index_t>& snp = altdb->alts()[snp_idx];
const string& snpID = altdb->altnames()[snp_idx];
if(snp_idx == prev_snp_idx) continue;
if(snp_first) {
WRITE_SEP();
o.append("Zs:Z:");
}
if(!snp_first) o.append(",");
uint64_t pos = res.ned()[i].pos;
size_t j = i;
while(j > 0) {
if(res.ned()[j-1].snpID < altdb->alts().size()) {
const ALT<index_t>& snp2 = altdb->alts()[res.ned()[j-1].snpID];
if(snp2.type == ALT_SNP_SGL) {
pos -= (res.ned()[j-1].pos + 1);
} else if(snp2.type == ALT_SNP_DEL) {
pos -= res.ned()[j-1].pos;
} else if(snp2.type == ALT_SNP_INS) {
pos -= (res.ned()[j-1].pos + snp.len);
}
break;
}
j--;
}
itoa10<uint64_t>(pos, buf);
o.append(buf);
o.append("|");
if(snp.type == ALT_SNP_SGL) {
o.append("S");
} else if(snp.type == ALT_SNP_DEL) {
o.append("D");
} else {
assert_eq(snp.type, ALT_SNP_INS);
o.append("I");
}
o.append("|");
o.append(snpID.c_str());
if(snp_first) snp_first = false;
prev_snp_idx = snp_idx;
}
if(!res.fw()) {
Edit::invertPoss(const_cast<EList<Edit>&>(res.ned()), len_trimmed, false);
}
if(print_xr_) {
// Original read string
o.append("\n");
printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
}
}
template<typename index_t>
void SamConfig<index_t>::printAlignedOptFlags(
Alignment* newAlignment, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // the read
AlnRes& res, // individual alignment result
StackedAln& staln, // stacked alignment buffer
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc, // scoring scheme
const char *mapqInp, // inputs to MAPQ calculation
const ALTDB<index_t>* altdb)
const
{
BTString &o = newAlignment->unChangedTags;
char buf[1024];
if(print_as_) {
// AS:i: Alignment score generated by aligner
//itoa10<TAlScore>(res.score().score(), buf);
newAlignment->AS = res.score().score();
}
// Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie
if(print_xs_) {
// XS:i: Suboptimal alignment score
// Use ZS:i: to avoid conflict with XS:A:
AlnScore sco = summ.secbestMate(rd.mate < 2);
if(sco.valid()) {
itoa10<TAlScore>(sco.score(), buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
}
if(print_xn_) {
// XN:i: Number of ambiguous bases in the referenece
itoa10<size_t>(res.refNs(), buf);
WRITE_SEP();
o.append("XN:i:");
o.append(buf);
}
if(print_x0_) {
// X0:i: Number of best hits
}
if(print_x1_) {
// X1:i: Number of sub-optimal best hits
}
size_t num_mm = 0;
size_t num_go = 0;
size_t num_gx = 0;
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].isMismatch()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_mm++;
}
} else if(res.ned()[i].isReadGap()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_go++;
num_gx++;
}
while(i < res.ned().size()-1 &&
res.ned()[i+1].pos == res.ned()[i].pos &&
res.ned()[i+1].isReadGap())
{
i++;
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_gx++;
}
}
} else if(res.ned()[i].isRefGap()) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_go++;
num_gx++;
}
while(i < res.ned().size()-1 &&
res.ned()[i+1].pos == res.ned()[i].pos+1 &&
res.ned()[i+1].isRefGap())
{
i++;
if(res.ned()[i].snpID >= altdb->alts().size()) {
num_gx++;
}
}
}
}
if(print_xm_) {
// XM:i: Number of mismatches in the alignment
//itoa10<size_t>(num_mm, buf);
/*WRITE_SEP();
o.append("XM:i:");
o.append(buf);*/
newAlignment->XM = num_mm;
}
if(print_xo_) {
// XO:i: Number of gap opens
itoa10<size_t>(num_go, buf);
WRITE_SEP();
o.append("XO:i:");
o.append(buf);
}
if(print_xg_) {
// XG:i: Number of gap extensions (incl. opens)
itoa10<size_t>(num_gx, buf);
WRITE_SEP();
o.append("XG:i:");
o.append(buf);
}
if(print_nm_) {
// NM:i: Edit dist. to the ref, Ns count, clipping doesn't
size_t NM = 0;
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].type != EDIT_TYPE_SPL) {
if(res.ned()[i].snpID >= altdb->alts().size()) {
NM++;
}
}
}
newAlignment->NM = NM;
}
if(print_md_) {
// MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2
/*WRITE_SEP();
o.append("MD:Z:");*/
staln.buildMdz();
staln.writeMdz(
&newAlignment->MD, // output buffer
NULL); // no char buffer
}
if(print_ys_ && summ.paired()) {
// YS:i: Alignment score of opposite mate
assert(res.oscore().valid());
newAlignment->YS = res.oscore().score();
}
if(print_yn_) {
// YN:i: Minimum valid score for this mate
TAlScore mn = sc.scoreMin.f<TAlScore>(rd.length());
itoa10<TAlScore>(mn, buf);
WRITE_SEP();
o.append("YN:i:");
o.append(buf);
// Yn:i: Perfect score for this mate
TAlScore pe = sc.perfectScore(rd.length());
itoa10<TAlScore>(pe, buf);
WRITE_SEP();
o.append("Yn:i:");
o.append(buf);
}
if(print_xss_) {
// Xs:i: Best invalid alignment score of this mate
bool one = true;
if(flags.partOfPair() && !flags.readMate1()) {
one = false;
}
TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2;
if(bst > std::numeric_limits<TAlScore>::min()) {
itoa10<TAlScore>(bst, buf);
WRITE_SEP();
o.append("Xs:i:");
o.append(buf);
}
if(flags.partOfPair()) {
// Ys:i: Best invalid alignment score of opposite mate
bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1;
if(bst > std::numeric_limits<TAlScore>::min()) {
itoa10<TAlScore>(bst, buf);
WRITE_SEP();
o.append("Ys:i:");
o.append(buf);
}
}
}
if(print_zs_) {
// ZS:i: Pseudo-random seed for read
itoa10<uint32_t>(rd.seed, buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
if(print_yt_ && !threeN) {
// YT:Z: String representing alignment type
WRITE_SEP();
flags.printYT(o);
}
if(print_yp_ && flags.partOfPair() && flags.canMax()) {
// YP:i: Read was repetitive when aligned paired?
WRITE_SEP();
flags.printYP(o);
}
if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) {
// YM:i: Read was repetitive when aligned unpaired?
WRITE_SEP();
flags.printYM(o);
}
if(print_yf_ && flags.filtered()) {
// YF:i: Read was filtered?
first = flags.printYF(o, first) && first;
}
if(print_yi_) {
// Print MAPQ calibration info
if(mapqInp[0] != '\0') {
// YI:i: Suboptimal alignment score
WRITE_SEP();
o.append("YI:Z:");
o.append(mapqInp);
}
}
if(flags.partOfPair() && print_zp_) {
// ZP:i: Score of best concordant paired-end alignment
WRITE_SEP();
o.append("ZP:Z:");
if(summ.bestPaired().valid()) {
itoa10<TAlScore>(summ.bestPaired().score(), buf);
o.append(buf);
} else {
o.append("NA");
}
// Zp:i: Second-best concordant paired-end alignment score
WRITE_SEP();
o.append("Zp:Z:");
if(summ.secbestPaired().valid()) {
itoa10<TAlScore>(summ.secbestPaired().score(), buf);
o.append(buf);
} else {
o.append("NA");
}
}
if(print_zu_) {
// ZU:i: Score of best unpaired alignment
AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2());
AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2());
WRITE_SEP();
o.append("ZU:i:");
if(best.valid()) {
itoa10<TAlScore>(best.score(), buf);
o.append(buf);
} else {
o.append("NA");
}
// Zu:i: Score of second-best unpaired alignment
WRITE_SEP();
o.append("Zu:i:");
if(secbest.valid()) {
itoa10<TAlScore>(secbest.score(), buf);
o.append(buf);
} else {
o.append("NA");
}
}
if(!rgs_.empty()) {
WRITE_SEP();
o.append(rgs_.c_str());
}
if(print_xt_) {
// XT:i: Timing
WRITE_SEP();
struct timeval tv_end;
struct timezone tz_end;
gettimeofday(&tv_end, &tz_end);
size_t total_usecs =
(tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 +
(tv_end.tv_usec - prm.tv_beg.tv_usec);
itoa10<size_t>(total_usecs, buf);
o.append("XT:i:");
o.append(buf);
}
if(print_xd_) {
// XD:i: Extend DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nExDps, buf);
o.append("XD:i:");
o.append(buf);
// Xd:i: Mate DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nMateDps, buf);
o.append("Xd:i:");
o.append(buf);
}
if(print_xu_) {
// XU:i: Extend ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nExUgs, buf);
o.append("XU:i:");
o.append(buf);
// Xu:i: Mate ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nMateUgs, buf);
o.append("Xu:i:");
o.append(buf);
}
if(print_ye_) {
// YE:i: Streak of failed DPs at end
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFail, buf);
o.append("YE:i:");
o.append(buf);
// Ye:i: Streak of failed ungaps at end
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFail, buf);
o.append("Ye:i:");
o.append(buf);
}
if(print_yl_) {
// YL:i: Longest streak of failed DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFailStreak, buf);
o.append("YL:i:");
o.append(buf);
// Yl:i: Longest streak of failed ungaps
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFailStreak, buf);
o.append("Yl:i:");
o.append(buf);
}
if(print_yu_) {
// YU:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nDpLastSucc, buf);
o.append("YU:i:");
o.append(buf);
// Yu:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nUgLastSucc, buf);
o.append("Yu:i:");
o.append(buf);
}
if(print_xp_) {
// XP:Z: String describing seed hits
WRITE_SEP();
o.append("XP:B:I,");
itoa10<uint64_t>(prm.nSeedElts, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsFw, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsRc, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMean, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMedian, buf);
o.append(buf);
}
if(print_yr_) {
// YR:i: Redundant seed hits
WRITE_SEP();
itoa10<uint64_t>(prm.nRedundants, buf);
o.append("YR:i:");
o.append(buf);
}
if(print_zb_) {
// ZB:i: Ftab ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nFtabs, buf);
o.append("ZB:i:");
o.append(buf);
}
if(print_zr_) {
// ZR:Z: Redundant path skips in seed alignment
WRITE_SEP();
o.append("ZR:Z:");
itoa10<uint64_t>(prm.nRedSkip, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedFail, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedIns, buf); o.append(buf);
}
if(print_zf_) {
// ZF:i: FM Index ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nSdFmops, buf);
o.append("ZF:i:");
o.append(buf);
// Zf:i: FM Index ops for offset resolution
WRITE_SEP();
itoa10<uint64_t>(prm.nExFmops, buf);
o.append("Zf:i:");
o.append(buf);
}
if(print_zm_) {
// ZM:Z: Print FM index op string for best-first search
WRITE_SEP();
o.append("ZM:Z:");
prm.fmString.print(o, buf);
}
if(print_zi_) {
// ZI:i: Seed extend loop iterations
WRITE_SEP();
itoa10<uint64_t>(prm.nExIters, buf);
o.append("ZI:i:");
o.append(buf);
}
if(print_xs_a_) {
if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) {
uint8_t whichsense = res.spliced_whichsense_transcript();
if(whichsense != SPL_UNKNOWN) {
WRITE_SEP();
o.append("XS:A:");
if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) {
o.append('+');
} else {
assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC);
o.append('-');
}
}
} else {
WRITE_SEP();
o.append("XS:A:");
char strandness = '+';
if(res.readMate1()) {
if(res.orient()) {
if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) {
strandness = '-';
}
} else {
if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) {
strandness = '-';
}
}
} else {
assert(res.readMate2());
assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF);
if(res.orient()) {
if(rna_strandness_ == RNA_STRANDNESS_FR) {
strandness = '-';
}
} else {
if(rna_strandness_ == RNA_STRANDNESS_RF) {
strandness = '-';
}
}
}
o.append(strandness);
}
}
if(print_nh_) {
if(flags.alignedPaired()) {
/*WRITE_SEP();
itoa10<uint64_t>(summ.numAlnsPaired(), buf);
o.append("NH:i:");
o.append(buf);*/
newAlignment->NH = summ.numAlnsPaired();
} else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) {
/*WRITE_SEP();
itoa10<uint64_t>((flags.alignedUnpaired() || flags.readMate1()) ?
summ.numAlns1() : summ.numAlns2(), buf);
o.append("NH:i:");
o.append(buf);*/
newAlignment->NH = (flags.alignedUnpaired() || flags.readMate1()) ? summ.numAlns1() : summ.numAlns2();
}
}
bool snp_first = true;
index_t prev_snp_idx = INDEX_MAX;
size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true);
if(!res.fw()) {
Edit::invertPoss(const_cast<EList<Edit>&>(res.ned()), len_trimmed, false);
}
for(size_t i = 0; i < res.ned().size(); i++) {
if(res.ned()[i].snpID >= altdb->alts().size())
continue;
index_t snp_idx = res.ned()[i].snpID;
assert_lt(snp_idx, altdb->alts().size());
const ALT<index_t>& snp = altdb->alts()[snp_idx];
const string& snpID = altdb->altnames()[snp_idx];
if(snp_idx == prev_snp_idx) continue;
if(snp_first) {
WRITE_SEP();
o.append("Zs:Z:");
}
if(!snp_first) o.append(",");
uint64_t pos = res.ned()[i].pos;
size_t j = i;
while(j > 0) {
if(res.ned()[j-1].snpID < altdb->alts().size()) {
const ALT<index_t>& snp2 = altdb->alts()[res.ned()[j-1].snpID];
if(snp2.type == ALT_SNP_SGL) {
pos -= (res.ned()[j-1].pos + 1);
} else if(snp2.type == ALT_SNP_DEL) {
pos -= res.ned()[j-1].pos;
} else if(snp2.type == ALT_SNP_INS) {
pos -= (res.ned()[j-1].pos + snp.len);
}
break;
}
j--;
}
itoa10<uint64_t>(pos, buf);
o.append(buf);
o.append("|");
if(snp.type == ALT_SNP_SGL) {
o.append("S");
} else if(snp.type == ALT_SNP_DEL) {
o.append("D");
} else {
assert_eq(snp.type, ALT_SNP_INS);
o.append("I");
}
o.append("|");
o.append(snpID.c_str());
if(snp_first) snp_first = false;
prev_snp_idx = snp_idx;
}
if(!res.fw()) {
Edit::invertPoss(const_cast<EList<Edit>&>(res.ned()), len_trimmed, false);
}
if(print_xr_) {
// Original read string
newAlignment->passThroughLine.append("\n");
printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf);
}
}
/**
* Print the optional flags to the given string.
*/
template<typename index_t>
void SamConfig<index_t>::printEmptyOptFlags(
BTString& o, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // read
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc) // scoring scheme
const
{
char buf[1024];
if(print_yn_) {
// YN:i: Minimum valid score for this mate
TAlScore mn = sc.scoreMin.f<TAlScore>(rd.length());
itoa10<TAlScore>(mn, buf);
WRITE_SEP();
o.append("YN:i:");
o.append(buf);
// Yn:i: Perfect score for this mate
TAlScore pe = sc.perfectScore(rd.length());
itoa10<TAlScore>(pe, buf);
WRITE_SEP();
o.append("Yn:i:");
o.append(buf);
}
if(print_zs_) {
// ZS:i: Pseudo-random seed for read
itoa10<uint32_t>(rd.seed, buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
if(print_yt_&& !threeN) {
// YT:Z: String representing alignment type
WRITE_SEP();
flags.printYT(o);
}
if(print_yp_ && flags.partOfPair() && flags.canMax()) {
// YP:i: Read was repetitive when aligned paired?
WRITE_SEP();
flags.printYP(o);
}
if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) {
// YM:i: Read was repetitive when aligned unpaired?
WRITE_SEP();
flags.printYM(o);
}
if(print_yf_ && flags.filtered()) {
// YM:i: Read was repetitive when aligned unpaired?
first = flags.printYF(o, first) && first;
}
if(!rgs_.empty()) {
WRITE_SEP();
o.append(rgs_.c_str());
}
if(print_xt_) {
// XT:i: Timing
WRITE_SEP();
struct timeval tv_end;
struct timezone tz_end;
gettimeofday(&tv_end, &tz_end);
size_t total_usecs =
(tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 +
(tv_end.tv_usec - prm.tv_beg.tv_usec);
itoa10<size_t>(total_usecs, buf);
o.append("XT:i:");
o.append(buf);
}
if(print_xd_) {
// XD:i: Extend DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nExDps, buf);
o.append("XD:i:");
o.append(buf);
// Xd:i: Mate DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nMateDps, buf);
o.append("Xd:i:");
o.append(buf);
}
if(print_xu_) {
// XU:i: Extend ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nExUgs, buf);
o.append("XU:i:");
o.append(buf);
// Xu:i: Mate ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nMateUgs, buf);
o.append("Xu:i:");
o.append(buf);
}
if(print_ye_) {
// YE:i: Streak of failed DPs at end
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFail, buf);
o.append("YE:i:");
o.append(buf);
// Ye:i: Streak of failed ungaps at end
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFail, buf);
o.append("Ye:i:");
o.append(buf);
}
if(print_yl_) {
// YL:i: Longest streak of failed DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFailStreak, buf);
o.append("YL:i:");
o.append(buf);
// Yl:i: Longest streak of failed ungaps
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFailStreak, buf);
o.append("Yl:i:");
o.append(buf);
}
if(print_yu_) {
// YU:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nDpLastSucc, buf);
o.append("YU:i:");
o.append(buf);
// Yu:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nUgLastSucc, buf);
o.append("Yu:i:");
o.append(buf);
}
if(print_xp_) {
// XP:Z: String describing seed hits
WRITE_SEP();
o.append("XP:B:I,");
itoa10<uint64_t>(prm.nSeedElts, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsFw, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsRc, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMean, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMedian, buf);
o.append(buf);
}
if(print_yr_) {
// YR:i: Redundant seed hits
WRITE_SEP();
itoa10<uint64_t>(prm.nRedundants, buf);
o.append("YR:i:");
o.append(buf);
}
if(print_zb_) {
// ZB:i: Ftab ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nFtabs, buf);
o.append("ZB:i:");
o.append(buf);
}
if(print_zr_) {
// ZR:Z: Redundant path skips in seed alignment
WRITE_SEP();
o.append("ZR:Z:");
itoa10<uint64_t>(prm.nRedSkip, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedFail, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedIns, buf); o.append(buf);
}
if(print_zf_) {
// ZF:i: FM Index ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nSdFmops, buf);
o.append("ZF:i:");
o.append(buf);
// Zf:i: FM Index ops for offset resolution
WRITE_SEP();
itoa10<uint64_t>(prm.nExFmops, buf);
o.append("Zf:i:");
o.append(buf);
}
if(print_zm_) {
// ZM:Z: Print FM index op string for best-first search
WRITE_SEP();
o.append("ZM:Z:");
prm.fmString.print(o, buf);
}
if(print_zi_) {
// ZI:i: Seed extend loop iterations
WRITE_SEP();
itoa10<uint64_t>(prm.nExIters, buf);
o.append("ZI:i:");
o.append(buf);
}
if(print_xr_) {
// Original read string
o.append("\n");
printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
}
}
/**
* Print the optional flags to the given string. This function is for HISAT-3N.
*/
template<typename index_t>
void SamConfig<index_t>::printEmptyOptFlags(
Alignment* newAlignment, // output buffer
bool first, // first opt flag printed is first overall?
const Read& rd, // read
const AlnFlags& flags, // alignment flags
const AlnSetSumm& summ, // summary of alignments for this read
const SeedAlSumm& ssm, // seed alignment summary
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc) // scoring scheme
const
{
char buf[1024];
BTString &o = newAlignment->unChangedTags;
if(print_yn_) {
// YN:i: Minimum valid score for this mate
TAlScore mn = sc.scoreMin.f<TAlScore>(rd.length());
itoa10<TAlScore>(mn, buf);
WRITE_SEP();
o.append("YN:i:");
o.append(buf);
// Yn:i: Perfect score for this mate
TAlScore pe = sc.perfectScore(rd.length());
itoa10<TAlScore>(pe, buf);
WRITE_SEP();
o.append("Yn:i:");
o.append(buf);
}
if(print_zs_) {
// ZS:i: Pseudo-random seed for read
itoa10<uint32_t>(rd.seed, buf);
WRITE_SEP();
o.append("ZS:i:");
o.append(buf);
}
if(print_yt_&& !threeN) {
// YT:Z: String representing alignment type
WRITE_SEP();
flags.printYT(o);
}
if(print_yp_ && flags.partOfPair() && flags.canMax()) {
// YP:i: Read was repetitive when aligned paired?
WRITE_SEP();
flags.printYP(o);
}
if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) {
// YM:i: Read was repetitive when aligned unpaired?
WRITE_SEP();
flags.printYM(o);
}
if(print_yf_ && flags.filtered()) {
// YM:i: Read was repetitive when aligned unpaired?
first = flags.printYF(o, first) && first;
}
if(!rgs_.empty()) {
WRITE_SEP();
o.append(rgs_.c_str());
}
if(print_xt_) {
// XT:i: Timing
WRITE_SEP();
struct timeval tv_end;
struct timezone tz_end;
gettimeofday(&tv_end, &tz_end);
size_t total_usecs =
(tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 +
(tv_end.tv_usec - prm.tv_beg.tv_usec);
itoa10<size_t>(total_usecs, buf);
o.append("XT:i:");
o.append(buf);
}
if(print_xd_) {
// XD:i: Extend DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nExDps, buf);
o.append("XD:i:");
o.append(buf);
// Xd:i: Mate DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nMateDps, buf);
o.append("Xd:i:");
o.append(buf);
}
if(print_xu_) {
// XU:i: Extend ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nExUgs, buf);
o.append("XU:i:");
o.append(buf);
// Xu:i: Mate ungapped tries
WRITE_SEP();
itoa10<uint64_t>(prm.nMateUgs, buf);
o.append("Xu:i:");
o.append(buf);
}
if(print_ye_) {
// YE:i: Streak of failed DPs at end
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFail, buf);
o.append("YE:i:");
o.append(buf);
// Ye:i: Streak of failed ungaps at end
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFail, buf);
o.append("Ye:i:");
o.append(buf);
}
if(print_yl_) {
// YL:i: Longest streak of failed DPs
WRITE_SEP();
itoa10<uint64_t>(prm.nDpFailStreak, buf);
o.append("YL:i:");
o.append(buf);
// Yl:i: Longest streak of failed ungaps
WRITE_SEP();
itoa10<uint64_t>(prm.nUgFailStreak, buf);
o.append("Yl:i:");
o.append(buf);
}
if(print_yu_) {
// YU:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nDpLastSucc, buf);
o.append("YU:i:");
o.append(buf);
// Yu:i: Index of last succesful DP
WRITE_SEP();
itoa10<uint64_t>(prm.nUgLastSucc, buf);
o.append("Yu:i:");
o.append(buf);
}
if(print_xp_) {
// XP:Z: String describing seed hits
WRITE_SEP();
o.append("XP:B:I,");
itoa10<uint64_t>(prm.nSeedElts, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsFw, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nSeedEltsRc, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMean, buf);
o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.seedMedian, buf);
o.append(buf);
}
if(print_yr_) {
// YR:i: Redundant seed hits
WRITE_SEP();
itoa10<uint64_t>(prm.nRedundants, buf);
o.append("YR:i:");
o.append(buf);
}
if(print_zb_) {
// ZB:i: Ftab ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nFtabs, buf);
o.append("ZB:i:");
o.append(buf);
}
if(print_zr_) {
// ZR:Z: Redundant path skips in seed alignment
WRITE_SEP();
o.append("ZR:Z:");
itoa10<uint64_t>(prm.nRedSkip, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedFail, buf); o.append(buf);
o.append(',');
itoa10<uint64_t>(prm.nRedIns, buf); o.append(buf);
}
if(print_zf_) {
// ZF:i: FM Index ops for seed alignment
WRITE_SEP();
itoa10<uint64_t>(prm.nSdFmops, buf);
o.append("ZF:i:");
o.append(buf);
// Zf:i: FM Index ops for offset resolution
WRITE_SEP();
itoa10<uint64_t>(prm.nExFmops, buf);
o.append("Zf:i:");
o.append(buf);
}
if(print_zm_) {
// ZM:Z: Print FM index op string for best-first search
WRITE_SEP();
o.append("ZM:Z:");
prm.fmString.print(o, buf);
}
if(print_zi_) {
// ZI:i: Seed extend loop iterations
WRITE_SEP();
itoa10<uint64_t>(prm.nExIters, buf);
o.append("ZI:i:");
o.append(buf);
}
if(print_xr_) {
// Original read string
newAlignment->passThroughLine.append("\n");
printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf);
}
}
#endif /* SAM_H_ */