/* * Copyright 2011, Ben Langmead * * This file is part of Bowtie 2. * * Bowtie 2 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Bowtie 2 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Bowtie 2. If not, see . */ #ifndef SAM_H_ #define SAM_H_ #include #include #include "ds.h" #include "read.h" #include "util.h" #include "aligner_result.h" #include "scoring.h" #include "alt.h" #include "filebuf.h" #include "alignment_3n.h" enum { // Comments use language from v1.4-r962 spec SAM_FLAG_PAIRED = 1, // templ. having mult. frag.s in sequencing SAM_FLAG_MAPPED_PAIRED = 2, // each frag properly aligned SAM_FLAG_UNMAPPED = 4, // fragment unmapped SAM_FLAG_MATE_UNMAPPED = 8, // next fragment in template unmapped SAM_FLAG_QUERY_STRAND = 16, // SEQ is reverse comp'ed from original SAM_FLAG_MATE_STRAND = 32, // next fragment SEQ reverse comp'ed SAM_FLAG_FIRST_IN_PAIR = 64, // first fragment in template SAM_FLAG_SECOND_IN_PAIR = 128, // last fragment in template SAM_FLAG_NOT_PRIMARY = 256, // secondary alignment SAM_FLAG_FAILS_CHECKS = 512, // not passing quality controls SAM_FLAG_DUPLICATE = 1024 // PCR or optical duplicate }; class AlnRes; class AlnFlags; class AlnSetSumm; /** * Encapsulates all the various ways that a user may wish to customize SAM * output. */ template class SamConfig { typedef EList StrList; typedef EList LenList; public: SamConfig( const StrList& refnames, // reference sequence names const LenList& reflens, // reference sequence lengths const StrList& repnames, // repeat sequence names const LenList& replens, // repeat sequence lengths bool truncQname, // truncate read name to 255? bool omitsec, // omit secondary SEQ/QUAL bool noUnal, // omit unaligned reads const std::string& pg_id, // id const std::string& pg_pn, // name const std::string& pg_vn, // version const std::string& pg_cl, // command-line const std::string& rgs, // read groups string int rna_strandness, bool print_as, bool print_xs, bool print_xss, bool print_yn, bool print_xn, bool print_cs, bool print_cq, bool print_x0, bool print_x1, bool print_xm, bool print_xo, bool print_xg, bool print_nm, bool print_md, bool print_yf, bool print_yi, bool print_ym, bool print_yp, bool print_yt, bool print_ys, bool print_zs, bool print_xr, bool print_xt, bool print_xd, bool print_xu, bool print_ye, // streak of failed DPs at end bool print_yl, // longest streak of failed DPs bool print_yu, // index of last succeeded DP bool print_xp, // print seed hit information bool print_yr, // # redundant seed hits bool print_zb, // # Ftab lookups bool print_zr, // # redundant path checks bool print_zf, // # FM Index ops bool print_zm, // FM Index op string for best-first search bool print_zi, // # seed extend loop iters bool print_zp, bool print_zu, bool print_xs_a, bool print_nh) : truncQname_(truncQname), omitsec_(omitsec), noUnal_(noUnal), pg_id_(pg_id), pg_pn_(pg_pn), pg_vn_(pg_vn), pg_cl_(pg_cl), rgs_(rgs), refnames_(refnames), reflens_(reflens), repnames_(repnames), replens_(replens), rna_strandness_(rna_strandness), print_as_(print_as), // alignment score of best alignment print_xs_(print_xs), // alignment score of second-best alignment print_xss_(print_xss), print_yn_(print_yn), // minimum valid score and perfect score print_xn_(print_xn), print_cs_(print_cs), print_cq_(print_cq), print_x0_(print_x0), print_x1_(print_x1), print_xm_(print_xm), print_xo_(print_xo), print_xg_(print_xg), print_nm_(print_nm), print_md_(print_md), print_yf_(print_yf), print_yi_(print_yi), print_ym_(print_ym), print_yp_(print_yp), print_yt_(print_yt), print_ys_(print_ys), print_zs_(print_zs), print_xr_(print_xr), print_xt_(print_xt), // time elapsed in microseconds print_xd_(print_xd), // DP extend attempts print_xu_(print_xu), // ungapped extend attempts print_ye_(print_ye), // streak of failed DPs at end print_yl_(print_yl), // longest streak of failed DPs print_yu_(print_yu), // index of last succeeded DP print_xp_(print_xp), // print seed hit information print_yr_(print_yr), // index of last succeeded DP print_zb_(print_zb), // # Ftab lookups print_zr_(print_zr), // # redundant path checks print_zf_(print_zf), // # FM Index ops print_zm_(print_zm), // FM Index op string for best-first search print_zi_(print_zi), // # seed extend loop iters print_zp_(print_zp), // # seed extend loop iters print_zu_(print_zu), // # seed extend loop iters print_xs_a_(print_xs_a), print_nh_(print_nh) { assert_eq(refnames_.size(), reflens_.size()); } /** * Print a reference name in a way that doesn't violate SAM's character * constraints. \*|[!-()+-<>-~][!-~]* */ void printRefName( BTString& o, const std::string& name) const; /** * Print a :Z optional field where certain characters (whitespace, colon * and percent) are escaped using % escapes. */ template void printOptFieldEscapedZ(BTString& o, const T& s) const { size_t len = s.length(); for(size_t i = 0; i < len; i++) { if(s[i] < 33 || s[i] > 126 || s[i] == ':' || s[i] == '%') { // percent-encode it o.append('%'); int ms = s[i] >> 4; int ls = s[i] & 15; assert_range(0, 15, ms); assert_range(0, 15, ls); o.append("0123456789ABCDEF"[ms]); o.append("0123456789ABCDEF"[ls]); } else { o.append(s[i]); } } } /** * Print a :Z optional field where newline characters are escaped using % * escapes. */ template void printOptFieldNewlineEscapedZ(BTString& o, const T& s) const { size_t len = s.length(); for(size_t i = 0; i < len; i++) { if(s[i] == 10 || s[i] == 13 || s[i] == '%') { // percent-encode it o.append('%'); int ms = s[i] >> 4; int ls = s[i] & 15; assert_range(0, 15, ms); assert_range(0, 15, ls); o.append("0123456789ABCDEF"[ms]); o.append("0123456789ABCDEF"[ls]); } else { o.append(s[i]); } } } /** * Print a read name in a way that doesn't violate SAM's character * constraints. [!-?A-~]{1,255} (i.e. [33, 63], [65, 126]) */ template void printReadName( BTString& o, const TStr& name, bool omitSlashMate) const { size_t namelen = name.length(); if(omitSlashMate && namelen >= 2 && name[namelen-2] == '/' && (name[namelen-1] == '1' || name[namelen-1] == '2' || name[namelen-1] == '3')) { namelen -= 2; } if(truncQname_ && namelen > 255) { namelen = 255; } for(size_t i = 0; i < namelen; i++) { if(truncQname_ && isspace(name[i])) { return; } o.append(name[i]); } } /** * Print a reference name given a reference index. */ void printRefNameFromIndex( BTString& o, size_t i, bool repeat = false) const; /** * Print SAM header to given output buffer. */ void printHeader( BTString& o, const std::string& rgid, const std::string& rgs, bool printHd, bool printSq, bool printPg) const; /** * Print the @HD header line to the given string. */ void printHdLine(BTString& o, const char *samver) const; /** * Print the @SQ header lines to the given string. */ void printSqLines(BTString& o) const; /** * Print the @PG header line to the given string. */ void printPgLine(BTString& o) const; /** * Print the optional flags to the given string. */ void printAlignedOptFlags( BTString& o, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read AlnRes& res, // individual alignment result StackedAln& staln, // stacked alignment const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metics const Scoring& sc, // scoring scheme const char *mapqInp, // inputs to MAPQ calculation const ALTDB* altdb) const; /** * Print the optional flags to the given string. */ void printAlignedOptFlags( Alignment* newAlignment, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read AlnRes& res, // individual alignment result StackedAln& staln, // stacked alignment const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metics const Scoring& sc, // scoring scheme const char *mapqInp, // inputs to MAPQ calculation const ALTDB* altdb) const; /** * Print the optional flags to the given string. */ void printEmptyOptFlags( BTString& o, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc) // scoring scheme const; void printEmptyOptFlags( Alignment* newAlignment, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc) // scoring scheme const; /** * Return true iff we should try to obey the SAM spec's recommendations * that: * * SEQ and QUAL of secondary alignments should be set to ‘*’ to reduce the * file size. */ bool omitSecondarySeqQual() const { return omitsec_; } bool omitUnalignedReads() const { return noUnal_; } protected: bool truncQname_; // truncate QNAME to 255 chars? bool omitsec_; // omit secondary bool noUnal_; // omit unaligned reads std::string pg_id_; // @PG ID: Program record identifier std::string pg_pn_; // @PG PN: Program name std::string pg_vn_; // @PG VN: Program version std::string pg_cl_; // @PG CL: Program command-line std::string rgs_; // Read-group string to add to all records const StrList& refnames_; // reference sequence names const LenList& reflens_; // reference sequence lengths const StrList& repnames_; // repeat sequence names const LenList& replens_; // repeat sequence lengths int rna_strandness_; // Which alignment flags to print? // Following are printed by BWA-SW bool print_as_; // AS:i: Alignment score generated by aligner bool print_xs_; // XS:i: Suboptimal alignment score bool print_xss_;// Xs:i: Best invalid alignment score found bool print_yn_; // YN:i:, Yn:i: minimum valid score and perfect score bool print_xn_; // XN:i: Number of ambiguous bases in the referenece // Other optional flags bool print_cs_; // CS:Z: Color read sequence on the original strand bool print_cq_; // CQ:Z: Color read quality on the original strand // Following are printed by BWA bool print_x0_; // X0:i: Number of best hits bool print_x1_; // X1:i: Number of sub-optimal best hits bool print_xm_; // XM:i: Number of mismatches in the alignment bool print_xo_; // XO:i: Number of gap opens bool print_xg_; // XG:i: Number of gap extensions (incl. opens) bool print_nm_; // NM:i: Edit dist. to the ref, Ns count, clipping doesn't bool print_md_; // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 // Following are Bowtie2-specific bool print_yf_; // YF:i: Read was filtered out? bool print_yi_; // YI:Z: Summary of inputs to MAPQ calculation bool print_ym_; // YM:i: Read was repetitive when aligned unpaired? bool print_yp_; // YP:i: Read was repetitive when aligned paired? bool print_yt_; // YT:Z: String representing alignment type bool print_ys_; // YS:i: Score of other mate bool print_zs_; // ZS:i: Pseudo-random seed bool print_xr_; // XR:Z: Original read string bool print_xt_; // XT:i: Time taken to align bool print_xd_; // XD:i: DP problems bool print_xu_; // XU:i: ungapped alignment bool print_ye_; // YE:i: streak of failed DPs at end bool print_yl_; // YL:i: longest streak of failed DPs bool print_yu_; // YU:i: index of last succeeded DP bool print_xp_; // XP:BI: seed hit information bool print_yr_; // YR:i: # redundant seed hits bool print_zb_; // ZB:i: # Ftab lookups bool print_zr_; // ZR:i: # redundant path checks bool print_zf_; // ZF:i: # FM Index ops bool print_zm_; // ZM:i: FM ops string for best-first search bool print_zi_; // ZI:i: # extend loop iters bool print_zp_; // ZP:i: Score of best/second-best paired-end alignment bool print_zu_; // ZU:i: Score of best/second-best unpaired alignment bool print_xs_a_; // XS:A:[+=] Sense/anti-sense strand splice sites correspond to bool print_nh_; // NH:i: # alignments }; /** * Print a reference name in a way that doesn't violate SAM's character * constraints. \*|[!-()+-<>-~][!-~]* (i.e. [33, 63], [65, 126]) */ template void SamConfig::printRefName( BTString& o, const std::string& name) const { size_t namelen = name.length(); for(size_t i = 0; i < namelen; i++) { if(isspace(name[i])) { return; } o.append(name[i]); } } /** * Print a reference name given a reference index. */ template void SamConfig::printRefNameFromIndex(BTString& o, size_t i, bool repeat) const { if(repeat) { printRefName(o, repnames_[i]); } else { printRefName(o, refnames_[i]); } } /** * Print SAM header to given output buffer. */ template void SamConfig::printHeader( BTString& o, const string& rgid, const string& rgs, bool printHd, bool printSq, bool printPg) const { if(printHd) printHdLine(o, "1.0"); if(printSq) printSqLines(o); if(!rgid.empty()) { o.append("@RG"); o.append(rgid.c_str()); o.append(rgs.c_str()); o.append('\n'); } if(printPg) printPgLine(o); } /** * Print the @HD header line to the given string. */ template void SamConfig::printHdLine(BTString& o, const char *samver) const { o.append("@HD\tVN:"); o.append(samver); o.append("\tSO:unsorted\n"); } /** * Print the @SQ header lines to the given string. */ template void SamConfig::printSqLines(BTString& o) const { char buf[1024]; for(size_t i = 0; i < refnames_.size(); i++) { o.append("@SQ\tSN:"); printRefName(o, refnames_[i]); o.append("\tLN:"); itoa10(reflens_[i], buf); o.append(buf); o.append('\n'); } if (!threeN) { for(size_t i = 0; i < repnames_.size(); i++) { o.append("@SQ\tSN:"); printRefName(o, repnames_[i]); o.append("\tLN:"); itoa10(replens_[i], buf); o.append(buf); o.append('\n'); } } } /** * Print the @PG header line to the given string. */ template void SamConfig::printPgLine(BTString& o) const { o.append("@PG\tID:"); o.append(pg_id_.c_str()); o.append("\tPN:"); o.append(pg_pn_.c_str()); o.append("\tVN:"); o.append(pg_vn_.c_str()); o.append("\tCL:\""); o.append(pg_cl_.c_str()); o.append('"'); o.append('\n'); } #define WRITE_SEP() { \ if(!first) o.append('\t'); \ first = false; \ } /** * Print the optional flags to the given string. */ template void SamConfig::printAlignedOptFlags( BTString& o, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read AlnRes& res, // individual alignment result StackedAln& staln, // stacked alignment buffer const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc, // scoring scheme const char *mapqInp, // inputs to MAPQ calculation const ALTDB* altdb) const { char buf[1024]; if(print_as_) { // AS:i: Alignment score generated by aligner itoa10(res.score().score(), buf); WRITE_SEP(); o.append("AS:i:"); o.append(buf); } // Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie if(print_xs_) { // XS:i: Suboptimal alignment score // Use ZS:i: to avoid conflict with XS:A: AlnScore sco = summ.secbestMate(rd.mate < 2); if(sco.valid()) { itoa10(sco.score(), buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } } if(print_xn_) { // XN:i: Number of ambiguous bases in the referenece itoa10(res.refNs(), buf); WRITE_SEP(); o.append("XN:i:"); o.append(buf); } if(print_x0_) { // X0:i: Number of best hits } if(print_x1_) { // X1:i: Number of sub-optimal best hits } size_t num_mm = 0; size_t num_go = 0; size_t num_gx = 0; for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].isMismatch()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_mm++; } } else if(res.ned()[i].isReadGap()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_go++; num_gx++; } while(i < res.ned().size()-1 && res.ned()[i+1].pos == res.ned()[i].pos && res.ned()[i+1].isReadGap()) { i++; if(res.ned()[i].snpID >= altdb->alts().size()) { num_gx++; } } } else if(res.ned()[i].isRefGap()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_go++; num_gx++; } while(i < res.ned().size()-1 && res.ned()[i+1].pos == res.ned()[i].pos+1 && res.ned()[i+1].isRefGap()) { i++; if(res.ned()[i].snpID >= altdb->alts().size()) { num_gx++; } } } } if(print_xm_) { // XM:i: Number of mismatches in the alignment itoa10(num_mm, buf); WRITE_SEP(); o.append("XM:i:"); o.append(buf); } if(print_xo_) { // XO:i: Number of gap opens itoa10(num_go, buf); WRITE_SEP(); o.append("XO:i:"); o.append(buf); } if(print_xg_) { // XG:i: Number of gap extensions (incl. opens) itoa10(num_gx, buf); WRITE_SEP(); o.append("XG:i:"); o.append(buf); } if(print_nm_) { // NM:i: Edit dist. to the ref, Ns count, clipping doesn't size_t NM = 0; for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].type != EDIT_TYPE_SPL) { if(res.ned()[i].snpID >= altdb->alts().size()) { NM++; } } } itoa10(NM, buf); WRITE_SEP(); o.append("NM:i:"); o.append(buf); } if(print_md_) { // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 WRITE_SEP(); o.append("MD:Z:"); staln.buildMdz(); staln.writeMdz( &o, // output buffer NULL); // no char buffer } if(print_ys_ && summ.paired()) { // YS:i: Alignment score of opposite mate assert(res.oscore().valid()); itoa10(res.oscore().score(), buf); WRITE_SEP(); o.append("YS:i:"); o.append(buf); } if(print_yn_) { // YN:i: Minimum valid score for this mate TAlScore mn = sc.scoreMin.f(rd.length()); itoa10(mn, buf); WRITE_SEP(); o.append("YN:i:"); o.append(buf); // Yn:i: Perfect score for this mate TAlScore pe = sc.perfectScore(rd.length()); itoa10(pe, buf); WRITE_SEP(); o.append("Yn:i:"); o.append(buf); } if(print_xss_) { // Xs:i: Best invalid alignment score of this mate bool one = true; if(flags.partOfPair() && !flags.readMate1()) { one = false; } TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2; if(bst > std::numeric_limits::min()) { itoa10(bst, buf); WRITE_SEP(); o.append("Xs:i:"); o.append(buf); } if(flags.partOfPair()) { // Ys:i: Best invalid alignment score of opposite mate bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1; if(bst > std::numeric_limits::min()) { itoa10(bst, buf); WRITE_SEP(); o.append("Ys:i:"); o.append(buf); } } } if(print_zs_) { // ZS:i: Pseudo-random seed for read itoa10(rd.seed, buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } if(print_yt_) { // YT:Z: String representing alignment type WRITE_SEP(); flags.printYT(o); } if(print_yp_ && flags.partOfPair() && flags.canMax()) { // YP:i: Read was repetitive when aligned paired? WRITE_SEP(); flags.printYP(o); } if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { // YM:i: Read was repetitive when aligned unpaired? WRITE_SEP(); flags.printYM(o); } if(print_yf_ && flags.filtered()) { // YF:i: Read was filtered? first = flags.printYF(o, first) && first; } if(print_yi_) { // Print MAPQ calibration info if(mapqInp[0] != '\0') { // YI:i: Suboptimal alignment score WRITE_SEP(); o.append("YI:Z:"); o.append(mapqInp); } } if(flags.partOfPair() && print_zp_) { // ZP:i: Score of best concordant paired-end alignment WRITE_SEP(); o.append("ZP:Z:"); if(summ.bestPaired().valid()) { itoa10(summ.bestPaired().score(), buf); o.append(buf); } else { o.append("NA"); } // Zp:i: Second-best concordant paired-end alignment score WRITE_SEP(); o.append("Zp:Z:"); if(summ.secbestPaired().valid()) { itoa10(summ.secbestPaired().score(), buf); o.append(buf); } else { o.append("NA"); } } if(print_zu_) { // ZU:i: Score of best unpaired alignment AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2()); AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2()); WRITE_SEP(); o.append("ZU:i:"); if(best.valid()) { itoa10(best.score(), buf); o.append(buf); } else { o.append("NA"); } // Zu:i: Score of second-best unpaired alignment WRITE_SEP(); o.append("Zu:i:"); if(secbest.valid()) { itoa10(secbest.score(), buf); o.append(buf); } else { o.append("NA"); } } if(!rgs_.empty()) { WRITE_SEP(); o.append(rgs_.c_str()); } if(print_xt_) { // XT:i: Timing WRITE_SEP(); struct timeval tv_end; struct timezone tz_end; gettimeofday(&tv_end, &tz_end); size_t total_usecs = (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + (tv_end.tv_usec - prm.tv_beg.tv_usec); itoa10(total_usecs, buf); o.append("XT:i:"); o.append(buf); } if(print_xd_) { // XD:i: Extend DPs WRITE_SEP(); itoa10(prm.nExDps, buf); o.append("XD:i:"); o.append(buf); // Xd:i: Mate DPs WRITE_SEP(); itoa10(prm.nMateDps, buf); o.append("Xd:i:"); o.append(buf); } if(print_xu_) { // XU:i: Extend ungapped tries WRITE_SEP(); itoa10(prm.nExUgs, buf); o.append("XU:i:"); o.append(buf); // Xu:i: Mate ungapped tries WRITE_SEP(); itoa10(prm.nMateUgs, buf); o.append("Xu:i:"); o.append(buf); } if(print_ye_) { // YE:i: Streak of failed DPs at end WRITE_SEP(); itoa10(prm.nDpFail, buf); o.append("YE:i:"); o.append(buf); // Ye:i: Streak of failed ungaps at end WRITE_SEP(); itoa10(prm.nUgFail, buf); o.append("Ye:i:"); o.append(buf); } if(print_yl_) { // YL:i: Longest streak of failed DPs WRITE_SEP(); itoa10(prm.nDpFailStreak, buf); o.append("YL:i:"); o.append(buf); // Yl:i: Longest streak of failed ungaps WRITE_SEP(); itoa10(prm.nUgFailStreak, buf); o.append("Yl:i:"); o.append(buf); } if(print_yu_) { // YU:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nDpLastSucc, buf); o.append("YU:i:"); o.append(buf); // Yu:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nUgLastSucc, buf); o.append("Yu:i:"); o.append(buf); } if(print_xp_) { // XP:Z: String describing seed hits WRITE_SEP(); o.append("XP:B:I,"); itoa10(prm.nSeedElts, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsFw, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsRc, buf); o.append(buf); o.append(','); itoa10(prm.seedMean, buf); o.append(buf); o.append(','); itoa10(prm.seedMedian, buf); o.append(buf); } if(print_yr_) { // YR:i: Redundant seed hits WRITE_SEP(); itoa10(prm.nRedundants, buf); o.append("YR:i:"); o.append(buf); } if(print_zb_) { // ZB:i: Ftab ops for seed alignment WRITE_SEP(); itoa10(prm.nFtabs, buf); o.append("ZB:i:"); o.append(buf); } if(print_zr_) { // ZR:Z: Redundant path skips in seed alignment WRITE_SEP(); o.append("ZR:Z:"); itoa10(prm.nRedSkip, buf); o.append(buf); o.append(','); itoa10(prm.nRedFail, buf); o.append(buf); o.append(','); itoa10(prm.nRedIns, buf); o.append(buf); } if(print_zf_) { // ZF:i: FM Index ops for seed alignment WRITE_SEP(); itoa10(prm.nSdFmops, buf); o.append("ZF:i:"); o.append(buf); // Zf:i: FM Index ops for offset resolution WRITE_SEP(); itoa10(prm.nExFmops, buf); o.append("Zf:i:"); o.append(buf); } if(print_zm_) { // ZM:Z: Print FM index op string for best-first search WRITE_SEP(); o.append("ZM:Z:"); prm.fmString.print(o, buf); } if(print_zi_) { // ZI:i: Seed extend loop iterations WRITE_SEP(); itoa10(prm.nExIters, buf); o.append("ZI:i:"); o.append(buf); } if(print_xs_a_) { if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) { uint8_t whichsense = res.spliced_whichsense_transcript(); if(whichsense != SPL_UNKNOWN) { WRITE_SEP(); o.append("XS:A:"); if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) { o.append('+'); } else { assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC); o.append('-'); } } } else { WRITE_SEP(); o.append("XS:A:"); char strandness = '+'; if(res.readMate1()) { if(res.orient()) { if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) { strandness = '-'; } } else { if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) { strandness = '-'; } } } else { assert(res.readMate2()); assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF); if(res.orient()) { if(rna_strandness_ == RNA_STRANDNESS_FR) { strandness = '-'; } } else { if(rna_strandness_ == RNA_STRANDNESS_RF) { strandness = '-'; } } } o.append(strandness); } } if(print_nh_) { if(flags.alignedPaired()) { WRITE_SEP(); itoa10(summ.numAlnsPaired(), buf); o.append("NH:i:"); o.append(buf); } else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) { WRITE_SEP(); itoa10((flags.alignedUnpaired() || flags.readMate1()) ? summ.numAlns1() : summ.numAlns2(), buf); o.append("NH:i:"); o.append(buf); } } bool snp_first = true; index_t prev_snp_idx = INDEX_MAX; size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true); if(!res.fw()) { Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); } for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].snpID >= altdb->alts().size()) continue; index_t snp_idx = res.ned()[i].snpID; assert_lt(snp_idx, altdb->alts().size()); const ALT& snp = altdb->alts()[snp_idx]; const string& snpID = altdb->altnames()[snp_idx]; if(snp_idx == prev_snp_idx) continue; if(snp_first) { WRITE_SEP(); o.append("Zs:Z:"); } if(!snp_first) o.append(","); uint64_t pos = res.ned()[i].pos; size_t j = i; while(j > 0) { if(res.ned()[j-1].snpID < altdb->alts().size()) { const ALT& snp2 = altdb->alts()[res.ned()[j-1].snpID]; if(snp2.type == ALT_SNP_SGL) { pos -= (res.ned()[j-1].pos + 1); } else if(snp2.type == ALT_SNP_DEL) { pos -= res.ned()[j-1].pos; } else if(snp2.type == ALT_SNP_INS) { pos -= (res.ned()[j-1].pos + snp.len); } break; } j--; } itoa10(pos, buf); o.append(buf); o.append("|"); if(snp.type == ALT_SNP_SGL) { o.append("S"); } else if(snp.type == ALT_SNP_DEL) { o.append("D"); } else { assert_eq(snp.type, ALT_SNP_INS); o.append("I"); } o.append("|"); o.append(snpID.c_str()); if(snp_first) snp_first = false; prev_snp_idx = snp_idx; } if(!res.fw()) { Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); } if(print_xr_) { // Original read string o.append("\n"); printOptFieldNewlineEscapedZ(o, rd.readOrigBuf); } } template void SamConfig::printAlignedOptFlags( Alignment* newAlignment, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // the read AlnRes& res, // individual alignment result StackedAln& staln, // stacked alignment buffer const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc, // scoring scheme const char *mapqInp, // inputs to MAPQ calculation const ALTDB* altdb) const { BTString &o = newAlignment->unChangedTags; char buf[1024]; if(print_as_) { // AS:i: Alignment score generated by aligner //itoa10(res.score().score(), buf); newAlignment->AS = res.score().score(); } // Do not output suboptimal alignment score, which conflicts with Cufflinks and StringTie if(print_xs_) { // XS:i: Suboptimal alignment score // Use ZS:i: to avoid conflict with XS:A: AlnScore sco = summ.secbestMate(rd.mate < 2); if(sco.valid()) { itoa10(sco.score(), buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } } if(print_xn_) { // XN:i: Number of ambiguous bases in the referenece itoa10(res.refNs(), buf); WRITE_SEP(); o.append("XN:i:"); o.append(buf); } if(print_x0_) { // X0:i: Number of best hits } if(print_x1_) { // X1:i: Number of sub-optimal best hits } size_t num_mm = 0; size_t num_go = 0; size_t num_gx = 0; for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].isMismatch()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_mm++; } } else if(res.ned()[i].isReadGap()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_go++; num_gx++; } while(i < res.ned().size()-1 && res.ned()[i+1].pos == res.ned()[i].pos && res.ned()[i+1].isReadGap()) { i++; if(res.ned()[i].snpID >= altdb->alts().size()) { num_gx++; } } } else if(res.ned()[i].isRefGap()) { if(res.ned()[i].snpID >= altdb->alts().size()) { num_go++; num_gx++; } while(i < res.ned().size()-1 && res.ned()[i+1].pos == res.ned()[i].pos+1 && res.ned()[i+1].isRefGap()) { i++; if(res.ned()[i].snpID >= altdb->alts().size()) { num_gx++; } } } } if(print_xm_) { // XM:i: Number of mismatches in the alignment //itoa10(num_mm, buf); /*WRITE_SEP(); o.append("XM:i:"); o.append(buf);*/ newAlignment->XM = num_mm; } if(print_xo_) { // XO:i: Number of gap opens itoa10(num_go, buf); WRITE_SEP(); o.append("XO:i:"); o.append(buf); } if(print_xg_) { // XG:i: Number of gap extensions (incl. opens) itoa10(num_gx, buf); WRITE_SEP(); o.append("XG:i:"); o.append(buf); } if(print_nm_) { // NM:i: Edit dist. to the ref, Ns count, clipping doesn't size_t NM = 0; for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].type != EDIT_TYPE_SPL) { if(res.ned()[i].snpID >= altdb->alts().size()) { NM++; } } } newAlignment->NM = NM; } if(print_md_) { // MD:Z: String for mms. [0-9]+(([A-Z]|\^[A-Z]+)[0-9]+)*2 /*WRITE_SEP(); o.append("MD:Z:");*/ staln.buildMdz(); staln.writeMdz( &newAlignment->MD, // output buffer NULL); // no char buffer } if(print_ys_ && summ.paired()) { // YS:i: Alignment score of opposite mate assert(res.oscore().valid()); newAlignment->YS = res.oscore().score(); } if(print_yn_) { // YN:i: Minimum valid score for this mate TAlScore mn = sc.scoreMin.f(rd.length()); itoa10(mn, buf); WRITE_SEP(); o.append("YN:i:"); o.append(buf); // Yn:i: Perfect score for this mate TAlScore pe = sc.perfectScore(rd.length()); itoa10(pe, buf); WRITE_SEP(); o.append("Yn:i:"); o.append(buf); } if(print_xss_) { // Xs:i: Best invalid alignment score of this mate bool one = true; if(flags.partOfPair() && !flags.readMate1()) { one = false; } TAlScore bst = one ? prm.bestLtMinscMate1 : prm.bestLtMinscMate2; if(bst > std::numeric_limits::min()) { itoa10(bst, buf); WRITE_SEP(); o.append("Xs:i:"); o.append(buf); } if(flags.partOfPair()) { // Ys:i: Best invalid alignment score of opposite mate bst = one ? prm.bestLtMinscMate2 : prm.bestLtMinscMate1; if(bst > std::numeric_limits::min()) { itoa10(bst, buf); WRITE_SEP(); o.append("Ys:i:"); o.append(buf); } } } if(print_zs_) { // ZS:i: Pseudo-random seed for read itoa10(rd.seed, buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } if(print_yt_ && !threeN) { // YT:Z: String representing alignment type WRITE_SEP(); flags.printYT(o); } if(print_yp_ && flags.partOfPair() && flags.canMax()) { // YP:i: Read was repetitive when aligned paired? WRITE_SEP(); flags.printYP(o); } if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { // YM:i: Read was repetitive when aligned unpaired? WRITE_SEP(); flags.printYM(o); } if(print_yf_ && flags.filtered()) { // YF:i: Read was filtered? first = flags.printYF(o, first) && first; } if(print_yi_) { // Print MAPQ calibration info if(mapqInp[0] != '\0') { // YI:i: Suboptimal alignment score WRITE_SEP(); o.append("YI:Z:"); o.append(mapqInp); } } if(flags.partOfPair() && print_zp_) { // ZP:i: Score of best concordant paired-end alignment WRITE_SEP(); o.append("ZP:Z:"); if(summ.bestPaired().valid()) { itoa10(summ.bestPaired().score(), buf); o.append(buf); } else { o.append("NA"); } // Zp:i: Second-best concordant paired-end alignment score WRITE_SEP(); o.append("Zp:Z:"); if(summ.secbestPaired().valid()) { itoa10(summ.secbestPaired().score(), buf); o.append(buf); } else { o.append("NA"); } } if(print_zu_) { // ZU:i: Score of best unpaired alignment AlnScore best = (rd.mate <= 1 ? summ.best1() : summ.best2()); AlnScore secbest = (rd.mate <= 1 ? summ.secbest1() : summ.secbest2()); WRITE_SEP(); o.append("ZU:i:"); if(best.valid()) { itoa10(best.score(), buf); o.append(buf); } else { o.append("NA"); } // Zu:i: Score of second-best unpaired alignment WRITE_SEP(); o.append("Zu:i:"); if(secbest.valid()) { itoa10(secbest.score(), buf); o.append(buf); } else { o.append("NA"); } } if(!rgs_.empty()) { WRITE_SEP(); o.append(rgs_.c_str()); } if(print_xt_) { // XT:i: Timing WRITE_SEP(); struct timeval tv_end; struct timezone tz_end; gettimeofday(&tv_end, &tz_end); size_t total_usecs = (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + (tv_end.tv_usec - prm.tv_beg.tv_usec); itoa10(total_usecs, buf); o.append("XT:i:"); o.append(buf); } if(print_xd_) { // XD:i: Extend DPs WRITE_SEP(); itoa10(prm.nExDps, buf); o.append("XD:i:"); o.append(buf); // Xd:i: Mate DPs WRITE_SEP(); itoa10(prm.nMateDps, buf); o.append("Xd:i:"); o.append(buf); } if(print_xu_) { // XU:i: Extend ungapped tries WRITE_SEP(); itoa10(prm.nExUgs, buf); o.append("XU:i:"); o.append(buf); // Xu:i: Mate ungapped tries WRITE_SEP(); itoa10(prm.nMateUgs, buf); o.append("Xu:i:"); o.append(buf); } if(print_ye_) { // YE:i: Streak of failed DPs at end WRITE_SEP(); itoa10(prm.nDpFail, buf); o.append("YE:i:"); o.append(buf); // Ye:i: Streak of failed ungaps at end WRITE_SEP(); itoa10(prm.nUgFail, buf); o.append("Ye:i:"); o.append(buf); } if(print_yl_) { // YL:i: Longest streak of failed DPs WRITE_SEP(); itoa10(prm.nDpFailStreak, buf); o.append("YL:i:"); o.append(buf); // Yl:i: Longest streak of failed ungaps WRITE_SEP(); itoa10(prm.nUgFailStreak, buf); o.append("Yl:i:"); o.append(buf); } if(print_yu_) { // YU:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nDpLastSucc, buf); o.append("YU:i:"); o.append(buf); // Yu:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nUgLastSucc, buf); o.append("Yu:i:"); o.append(buf); } if(print_xp_) { // XP:Z: String describing seed hits WRITE_SEP(); o.append("XP:B:I,"); itoa10(prm.nSeedElts, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsFw, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsRc, buf); o.append(buf); o.append(','); itoa10(prm.seedMean, buf); o.append(buf); o.append(','); itoa10(prm.seedMedian, buf); o.append(buf); } if(print_yr_) { // YR:i: Redundant seed hits WRITE_SEP(); itoa10(prm.nRedundants, buf); o.append("YR:i:"); o.append(buf); } if(print_zb_) { // ZB:i: Ftab ops for seed alignment WRITE_SEP(); itoa10(prm.nFtabs, buf); o.append("ZB:i:"); o.append(buf); } if(print_zr_) { // ZR:Z: Redundant path skips in seed alignment WRITE_SEP(); o.append("ZR:Z:"); itoa10(prm.nRedSkip, buf); o.append(buf); o.append(','); itoa10(prm.nRedFail, buf); o.append(buf); o.append(','); itoa10(prm.nRedIns, buf); o.append(buf); } if(print_zf_) { // ZF:i: FM Index ops for seed alignment WRITE_SEP(); itoa10(prm.nSdFmops, buf); o.append("ZF:i:"); o.append(buf); // Zf:i: FM Index ops for offset resolution WRITE_SEP(); itoa10(prm.nExFmops, buf); o.append("Zf:i:"); o.append(buf); } if(print_zm_) { // ZM:Z: Print FM index op string for best-first search WRITE_SEP(); o.append("ZM:Z:"); prm.fmString.print(o, buf); } if(print_zi_) { // ZI:i: Seed extend loop iterations WRITE_SEP(); itoa10(prm.nExIters, buf); o.append("ZI:i:"); o.append(buf); } if(print_xs_a_) { if(rna_strandness_ == RNA_STRANDNESS_UNKNOWN) { uint8_t whichsense = res.spliced_whichsense_transcript(); if(whichsense != SPL_UNKNOWN) { WRITE_SEP(); o.append("XS:A:"); if(whichsense == SPL_FW || whichsense == SPL_SEMI_FW) { o.append('+'); } else { assert(whichsense == SPL_RC || whichsense == SPL_SEMI_RC); o.append('-'); } } } else { WRITE_SEP(); o.append("XS:A:"); char strandness = '+'; if(res.readMate1()) { if(res.orient()) { if(rna_strandness_ == RNA_STRANDNESS_R || rna_strandness_ == RNA_STRANDNESS_RF) { strandness = '-'; } } else { if(rna_strandness_ == RNA_STRANDNESS_F || rna_strandness_ == RNA_STRANDNESS_FR) { strandness = '-'; } } } else { assert(res.readMate2()); assert(rna_strandness_ == RNA_STRANDNESS_FR || rna_strandness_ == RNA_STRANDNESS_RF); if(res.orient()) { if(rna_strandness_ == RNA_STRANDNESS_FR) { strandness = '-'; } } else { if(rna_strandness_ == RNA_STRANDNESS_RF) { strandness = '-'; } } } o.append(strandness); } } if(print_nh_) { if(flags.alignedPaired()) { /*WRITE_SEP(); itoa10(summ.numAlnsPaired(), buf); o.append("NH:i:"); o.append(buf);*/ newAlignment->NH = summ.numAlnsPaired(); } else if(flags.alignedUnpaired() || flags.alignedUnpairedMate()) { /*WRITE_SEP(); itoa10((flags.alignedUnpaired() || flags.readMate1()) ? summ.numAlns1() : summ.numAlns2(), buf); o.append("NH:i:"); o.append(buf);*/ newAlignment->NH = (flags.alignedUnpaired() || flags.readMate1()) ? summ.numAlns1() : summ.numAlns2(); } } bool snp_first = true; index_t prev_snp_idx = INDEX_MAX; size_t len_trimmed = rd.length() - res.trimmed5p(true) - res.trimmed3p(true); if(!res.fw()) { Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); } for(size_t i = 0; i < res.ned().size(); i++) { if(res.ned()[i].snpID >= altdb->alts().size()) continue; index_t snp_idx = res.ned()[i].snpID; assert_lt(snp_idx, altdb->alts().size()); const ALT& snp = altdb->alts()[snp_idx]; const string& snpID = altdb->altnames()[snp_idx]; if(snp_idx == prev_snp_idx) continue; if(snp_first) { WRITE_SEP(); o.append("Zs:Z:"); } if(!snp_first) o.append(","); uint64_t pos = res.ned()[i].pos; size_t j = i; while(j > 0) { if(res.ned()[j-1].snpID < altdb->alts().size()) { const ALT& snp2 = altdb->alts()[res.ned()[j-1].snpID]; if(snp2.type == ALT_SNP_SGL) { pos -= (res.ned()[j-1].pos + 1); } else if(snp2.type == ALT_SNP_DEL) { pos -= res.ned()[j-1].pos; } else if(snp2.type == ALT_SNP_INS) { pos -= (res.ned()[j-1].pos + snp.len); } break; } j--; } itoa10(pos, buf); o.append(buf); o.append("|"); if(snp.type == ALT_SNP_SGL) { o.append("S"); } else if(snp.type == ALT_SNP_DEL) { o.append("D"); } else { assert_eq(snp.type, ALT_SNP_INS); o.append("I"); } o.append("|"); o.append(snpID.c_str()); if(snp_first) snp_first = false; prev_snp_idx = snp_idx; } if(!res.fw()) { Edit::invertPoss(const_cast&>(res.ned()), len_trimmed, false); } if(print_xr_) { // Original read string newAlignment->passThroughLine.append("\n"); printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf); } } /** * Print the optional flags to the given string. */ template void SamConfig::printEmptyOptFlags( BTString& o, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // read const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc) // scoring scheme const { char buf[1024]; if(print_yn_) { // YN:i: Minimum valid score for this mate TAlScore mn = sc.scoreMin.f(rd.length()); itoa10(mn, buf); WRITE_SEP(); o.append("YN:i:"); o.append(buf); // Yn:i: Perfect score for this mate TAlScore pe = sc.perfectScore(rd.length()); itoa10(pe, buf); WRITE_SEP(); o.append("Yn:i:"); o.append(buf); } if(print_zs_) { // ZS:i: Pseudo-random seed for read itoa10(rd.seed, buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } if(print_yt_&& !threeN) { // YT:Z: String representing alignment type WRITE_SEP(); flags.printYT(o); } if(print_yp_ && flags.partOfPair() && flags.canMax()) { // YP:i: Read was repetitive when aligned paired? WRITE_SEP(); flags.printYP(o); } if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { // YM:i: Read was repetitive when aligned unpaired? WRITE_SEP(); flags.printYM(o); } if(print_yf_ && flags.filtered()) { // YM:i: Read was repetitive when aligned unpaired? first = flags.printYF(o, first) && first; } if(!rgs_.empty()) { WRITE_SEP(); o.append(rgs_.c_str()); } if(print_xt_) { // XT:i: Timing WRITE_SEP(); struct timeval tv_end; struct timezone tz_end; gettimeofday(&tv_end, &tz_end); size_t total_usecs = (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + (tv_end.tv_usec - prm.tv_beg.tv_usec); itoa10(total_usecs, buf); o.append("XT:i:"); o.append(buf); } if(print_xd_) { // XD:i: Extend DPs WRITE_SEP(); itoa10(prm.nExDps, buf); o.append("XD:i:"); o.append(buf); // Xd:i: Mate DPs WRITE_SEP(); itoa10(prm.nMateDps, buf); o.append("Xd:i:"); o.append(buf); } if(print_xu_) { // XU:i: Extend ungapped tries WRITE_SEP(); itoa10(prm.nExUgs, buf); o.append("XU:i:"); o.append(buf); // Xu:i: Mate ungapped tries WRITE_SEP(); itoa10(prm.nMateUgs, buf); o.append("Xu:i:"); o.append(buf); } if(print_ye_) { // YE:i: Streak of failed DPs at end WRITE_SEP(); itoa10(prm.nDpFail, buf); o.append("YE:i:"); o.append(buf); // Ye:i: Streak of failed ungaps at end WRITE_SEP(); itoa10(prm.nUgFail, buf); o.append("Ye:i:"); o.append(buf); } if(print_yl_) { // YL:i: Longest streak of failed DPs WRITE_SEP(); itoa10(prm.nDpFailStreak, buf); o.append("YL:i:"); o.append(buf); // Yl:i: Longest streak of failed ungaps WRITE_SEP(); itoa10(prm.nUgFailStreak, buf); o.append("Yl:i:"); o.append(buf); } if(print_yu_) { // YU:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nDpLastSucc, buf); o.append("YU:i:"); o.append(buf); // Yu:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nUgLastSucc, buf); o.append("Yu:i:"); o.append(buf); } if(print_xp_) { // XP:Z: String describing seed hits WRITE_SEP(); o.append("XP:B:I,"); itoa10(prm.nSeedElts, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsFw, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsRc, buf); o.append(buf); o.append(','); itoa10(prm.seedMean, buf); o.append(buf); o.append(','); itoa10(prm.seedMedian, buf); o.append(buf); } if(print_yr_) { // YR:i: Redundant seed hits WRITE_SEP(); itoa10(prm.nRedundants, buf); o.append("YR:i:"); o.append(buf); } if(print_zb_) { // ZB:i: Ftab ops for seed alignment WRITE_SEP(); itoa10(prm.nFtabs, buf); o.append("ZB:i:"); o.append(buf); } if(print_zr_) { // ZR:Z: Redundant path skips in seed alignment WRITE_SEP(); o.append("ZR:Z:"); itoa10(prm.nRedSkip, buf); o.append(buf); o.append(','); itoa10(prm.nRedFail, buf); o.append(buf); o.append(','); itoa10(prm.nRedIns, buf); o.append(buf); } if(print_zf_) { // ZF:i: FM Index ops for seed alignment WRITE_SEP(); itoa10(prm.nSdFmops, buf); o.append("ZF:i:"); o.append(buf); // Zf:i: FM Index ops for offset resolution WRITE_SEP(); itoa10(prm.nExFmops, buf); o.append("Zf:i:"); o.append(buf); } if(print_zm_) { // ZM:Z: Print FM index op string for best-first search WRITE_SEP(); o.append("ZM:Z:"); prm.fmString.print(o, buf); } if(print_zi_) { // ZI:i: Seed extend loop iterations WRITE_SEP(); itoa10(prm.nExIters, buf); o.append("ZI:i:"); o.append(buf); } if(print_xr_) { // Original read string o.append("\n"); printOptFieldNewlineEscapedZ(o, rd.readOrigBuf); } } /** * Print the optional flags to the given string. This function is for HISAT-3N. */ template void SamConfig::printEmptyOptFlags( Alignment* newAlignment, // output buffer bool first, // first opt flag printed is first overall? const Read& rd, // read const AlnFlags& flags, // alignment flags const AlnSetSumm& summ, // summary of alignments for this read const SeedAlSumm& ssm, // seed alignment summary const PerReadMetrics& prm, // per-read metrics const Scoring& sc) // scoring scheme const { char buf[1024]; BTString &o = newAlignment->unChangedTags; if(print_yn_) { // YN:i: Minimum valid score for this mate TAlScore mn = sc.scoreMin.f(rd.length()); itoa10(mn, buf); WRITE_SEP(); o.append("YN:i:"); o.append(buf); // Yn:i: Perfect score for this mate TAlScore pe = sc.perfectScore(rd.length()); itoa10(pe, buf); WRITE_SEP(); o.append("Yn:i:"); o.append(buf); } if(print_zs_) { // ZS:i: Pseudo-random seed for read itoa10(rd.seed, buf); WRITE_SEP(); o.append("ZS:i:"); o.append(buf); } if(print_yt_&& !threeN) { // YT:Z: String representing alignment type WRITE_SEP(); flags.printYT(o); } if(print_yp_ && flags.partOfPair() && flags.canMax()) { // YP:i: Read was repetitive when aligned paired? WRITE_SEP(); flags.printYP(o); } if(print_ym_ && flags.canMax() && (flags.isMixedMode() || !flags.partOfPair())) { // YM:i: Read was repetitive when aligned unpaired? WRITE_SEP(); flags.printYM(o); } if(print_yf_ && flags.filtered()) { // YM:i: Read was repetitive when aligned unpaired? first = flags.printYF(o, first) && first; } if(!rgs_.empty()) { WRITE_SEP(); o.append(rgs_.c_str()); } if(print_xt_) { // XT:i: Timing WRITE_SEP(); struct timeval tv_end; struct timezone tz_end; gettimeofday(&tv_end, &tz_end); size_t total_usecs = (tv_end.tv_sec - prm.tv_beg.tv_sec) * 1000000 + (tv_end.tv_usec - prm.tv_beg.tv_usec); itoa10(total_usecs, buf); o.append("XT:i:"); o.append(buf); } if(print_xd_) { // XD:i: Extend DPs WRITE_SEP(); itoa10(prm.nExDps, buf); o.append("XD:i:"); o.append(buf); // Xd:i: Mate DPs WRITE_SEP(); itoa10(prm.nMateDps, buf); o.append("Xd:i:"); o.append(buf); } if(print_xu_) { // XU:i: Extend ungapped tries WRITE_SEP(); itoa10(prm.nExUgs, buf); o.append("XU:i:"); o.append(buf); // Xu:i: Mate ungapped tries WRITE_SEP(); itoa10(prm.nMateUgs, buf); o.append("Xu:i:"); o.append(buf); } if(print_ye_) { // YE:i: Streak of failed DPs at end WRITE_SEP(); itoa10(prm.nDpFail, buf); o.append("YE:i:"); o.append(buf); // Ye:i: Streak of failed ungaps at end WRITE_SEP(); itoa10(prm.nUgFail, buf); o.append("Ye:i:"); o.append(buf); } if(print_yl_) { // YL:i: Longest streak of failed DPs WRITE_SEP(); itoa10(prm.nDpFailStreak, buf); o.append("YL:i:"); o.append(buf); // Yl:i: Longest streak of failed ungaps WRITE_SEP(); itoa10(prm.nUgFailStreak, buf); o.append("Yl:i:"); o.append(buf); } if(print_yu_) { // YU:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nDpLastSucc, buf); o.append("YU:i:"); o.append(buf); // Yu:i: Index of last succesful DP WRITE_SEP(); itoa10(prm.nUgLastSucc, buf); o.append("Yu:i:"); o.append(buf); } if(print_xp_) { // XP:Z: String describing seed hits WRITE_SEP(); o.append("XP:B:I,"); itoa10(prm.nSeedElts, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsFw, buf); o.append(buf); o.append(','); itoa10(prm.nSeedEltsRc, buf); o.append(buf); o.append(','); itoa10(prm.seedMean, buf); o.append(buf); o.append(','); itoa10(prm.seedMedian, buf); o.append(buf); } if(print_yr_) { // YR:i: Redundant seed hits WRITE_SEP(); itoa10(prm.nRedundants, buf); o.append("YR:i:"); o.append(buf); } if(print_zb_) { // ZB:i: Ftab ops for seed alignment WRITE_SEP(); itoa10(prm.nFtabs, buf); o.append("ZB:i:"); o.append(buf); } if(print_zr_) { // ZR:Z: Redundant path skips in seed alignment WRITE_SEP(); o.append("ZR:Z:"); itoa10(prm.nRedSkip, buf); o.append(buf); o.append(','); itoa10(prm.nRedFail, buf); o.append(buf); o.append(','); itoa10(prm.nRedIns, buf); o.append(buf); } if(print_zf_) { // ZF:i: FM Index ops for seed alignment WRITE_SEP(); itoa10(prm.nSdFmops, buf); o.append("ZF:i:"); o.append(buf); // Zf:i: FM Index ops for offset resolution WRITE_SEP(); itoa10(prm.nExFmops, buf); o.append("Zf:i:"); o.append(buf); } if(print_zm_) { // ZM:Z: Print FM index op string for best-first search WRITE_SEP(); o.append("ZM:Z:"); prm.fmString.print(o, buf); } if(print_zi_) { // ZI:i: Seed extend loop iterations WRITE_SEP(); itoa10(prm.nExIters, buf); o.append("ZI:i:"); o.append(buf); } if(print_xr_) { // Original read string newAlignment->passThroughLine.append("\n"); printOptFieldNewlineEscapedZ(newAlignment->passThroughLine, rd.readOrigBuf); } } #endif /* SAM_H_ */