4979 lines
199 KiB
C++
4979 lines
199 KiB
C++
|
/*
|
|||
|
* Copyright 2015, Daehwan Kim <infphilo@gmail.com>
|
|||
|
*
|
|||
|
* This file is part of HISAT 2.
|
|||
|
* This file is edited by Yun (Leo) Zhang for HISAT-3N.
|
|||
|
*
|
|||
|
* HISAT 2 is free software: you can redistribute it and/or modify
|
|||
|
* it under the terms of the GNU General Public License as published by
|
|||
|
* the Free Software Foundation, either version 3 of the License, or
|
|||
|
* (at your option) any later version.
|
|||
|
*
|
|||
|
* HISAT 2 is distributed in the hope that it will be useful,
|
|||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|||
|
* GNU General Public License for more details.
|
|||
|
*
|
|||
|
* You should have received a copy of the GNU General Public License
|
|||
|
* along with HISAT 2. If not, see <http://www.gnu.org/licenses/>.
|
|||
|
*/
|
|||
|
|
|||
|
#include <stdlib.h>
|
|||
|
#include <iostream>
|
|||
|
#include <fstream>
|
|||
|
#include <string>
|
|||
|
#include <cassert>
|
|||
|
#include <stdexcept>
|
|||
|
#include <getopt.h>
|
|||
|
#include <math.h>
|
|||
|
#include <utility>
|
|||
|
#include <limits>
|
|||
|
#include "alphabet.h"
|
|||
|
#include "assert_helpers.h"
|
|||
|
#include "endian_swap.h"
|
|||
|
#include "hgfm.h"
|
|||
|
#include "rfm.h"
|
|||
|
#include "formats.h"
|
|||
|
#include "sequence_io.h"
|
|||
|
#include "tokenize.h"
|
|||
|
#include "aln_sink.h"
|
|||
|
#include "pat.h"
|
|||
|
#include "threading.h"
|
|||
|
#include "ds.h"
|
|||
|
#include "aligner_metrics.h"
|
|||
|
#include "sam.h"
|
|||
|
#include "aligner_seed.h"
|
|||
|
#include "splice_site.h"
|
|||
|
#include "spliced_aligner.h"
|
|||
|
#include "aligner_seed_policy.h"
|
|||
|
#include "aligner_sw.h"
|
|||
|
#include "aligner_sw_driver.h"
|
|||
|
#include "aligner_cache.h"
|
|||
|
#include "util.h"
|
|||
|
#include "pe.h"
|
|||
|
#include "tp.h"
|
|||
|
#include "gp.h"
|
|||
|
#include "simple_func.h"
|
|||
|
#include "presets.h"
|
|||
|
#include "opts.h"
|
|||
|
#include "outq.h"
|
|||
|
#include "repeat_kmer.h"
|
|||
|
#include "hisat2lib/ht2.h"
|
|||
|
//#include "utility_3n.h"
|
|||
|
|
|||
|
|
|||
|
using namespace std;
|
|||
|
|
|||
|
MemoryTally gMemTally;
|
|||
|
|
|||
|
static EList<string> mates1; // mated reads (first mate)
|
|||
|
static EList<string> mates2; // mated reads (second mate)
|
|||
|
static EList<string> mates12; // mated reads (1st/2nd interleaved in 1 file)
|
|||
|
static string adjIdxBase;
|
|||
|
static string adjIdxBases_3N[2];
|
|||
|
bool gColor; // colorspace (not supported)
|
|||
|
int gVerbose; // be talkative
|
|||
|
static bool startVerbose; // be talkative at startup
|
|||
|
int gQuiet; // print nothing but the alignments
|
|||
|
static int sanityCheck; // enable expensive sanity checks
|
|||
|
static int format; // default read format is FASTQ
|
|||
|
static string origString; // reference text, or filename(s)
|
|||
|
static int seed; // srandom() seed
|
|||
|
static int timing; // whether to report basic timing data
|
|||
|
static int metricsIval; // interval between alignment metrics messages (0 = no messages)
|
|||
|
static string metricsFile;// output file to put alignment metrics in
|
|||
|
static bool metricsStderr;// output file to put alignment metrics in
|
|||
|
static bool metricsPerRead; // report a metrics tuple for every read
|
|||
|
static bool allHits; // for multihits, report just one
|
|||
|
static bool showVersion; // just print version and quit?
|
|||
|
static int ipause; // pause before maching?
|
|||
|
static uint32_t qUpto; // max # of queries to read
|
|||
|
int gTrim5; // amount to trim from 5' end
|
|||
|
int gTrim3; // amount to trim from 3' end
|
|||
|
static int offRate; // keep default offRate
|
|||
|
static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
|
|||
|
static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
|
|||
|
static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII
|
|||
|
static int nthreads; // number of pthreads operating concurrently
|
|||
|
static int outType; // style of output
|
|||
|
static bool noRefNames; // true -> print reference indexes; not names
|
|||
|
static uint32_t khits; // number of hits per read; >1 is much slower
|
|||
|
static uint32_t mhits; // don't report any hits if there are > mhits
|
|||
|
static int partitionSz; // output a partitioning key in first field
|
|||
|
static bool useSpinlock; // false -> don't use of spinlocks even if they're #defines
|
|||
|
static bool fileParallel; // separate threads read separate input files in parallel
|
|||
|
static bool useShmem; // use shared memory to hold the index
|
|||
|
static bool useMm; // use memory-mapped files to hold the index
|
|||
|
static bool mmSweep; // sweep through memory-mapped files immediately after mapping
|
|||
|
int gMinInsert; // minimum insert size
|
|||
|
int gMaxInsert; // maximum insert size
|
|||
|
bool gMate1fw; // -1 mate aligns in fw orientation on fw strand
|
|||
|
bool gMate2fw; // -2 mate aligns in rc orientation on fw strand
|
|||
|
bool gFlippedMatesOK; // allow mates to be in wrong order
|
|||
|
bool gDovetailMatesOK; // allow one mate to extend off the end of the other
|
|||
|
bool gContainMatesOK; // allow one mate to contain the other in PE alignment
|
|||
|
bool gOlapMatesOK; // allow mates to overlap in PE alignment
|
|||
|
bool gExpandToFrag; // incr max frag length to =larger mate len if necessary
|
|||
|
bool gReportDiscordant; // find and report discordant paired-end alignments
|
|||
|
bool gReportMixed; // find and report unpaired alignments for paired reads
|
|||
|
static uint32_t cacheLimit; // ranges w/ size > limit will be cached
|
|||
|
static uint32_t cacheSize; // # words per range cache
|
|||
|
static uint32_t skipReads; // # reads/read pairs to skip
|
|||
|
bool gNofw; // don't align fw orientation of read
|
|||
|
bool gNorc; // don't align rc orientation of read
|
|||
|
static uint32_t fastaContLen;
|
|||
|
static uint32_t fastaContFreq;
|
|||
|
static bool hadoopOut; // print Hadoop status and summary messages
|
|||
|
static bool fuzzy;
|
|||
|
static bool fullRef;
|
|||
|
static bool samTruncQname; // whether to truncate QNAME to 255 chars
|
|||
|
static bool samOmitSecSeqQual; // omit SEQ/QUAL for 2ndary alignments?
|
|||
|
static bool samNoUnal; // don't print records for unaligned reads
|
|||
|
static bool samNoHead; // don't print any header lines in SAM output
|
|||
|
static bool samNoSQ; // don't print @SQ header lines
|
|||
|
static bool sam_print_as;
|
|||
|
static bool sam_print_xs; // XS:i
|
|||
|
static bool sam_print_xss; // Xs:i and Ys:i
|
|||
|
static bool sam_print_yn; // YN:i and Yn:i
|
|||
|
static bool sam_print_xn;
|
|||
|
static bool sam_print_cs;
|
|||
|
static bool sam_print_cq;
|
|||
|
static bool sam_print_x0;
|
|||
|
static bool sam_print_x1;
|
|||
|
static bool sam_print_xm;
|
|||
|
static bool sam_print_xo;
|
|||
|
static bool sam_print_xg;
|
|||
|
static bool sam_print_nm;
|
|||
|
static bool sam_print_md;
|
|||
|
static bool sam_print_yf;
|
|||
|
static bool sam_print_yi;
|
|||
|
static bool sam_print_ym;
|
|||
|
static bool sam_print_yp;
|
|||
|
static bool sam_print_yt;
|
|||
|
static bool sam_print_ys;
|
|||
|
static bool sam_print_zs;
|
|||
|
static bool sam_print_xr;
|
|||
|
static bool sam_print_xt;
|
|||
|
static bool sam_print_xd;
|
|||
|
static bool sam_print_xu;
|
|||
|
static bool sam_print_yl;
|
|||
|
static bool sam_print_ye;
|
|||
|
static bool sam_print_yu;
|
|||
|
static bool sam_print_xp;
|
|||
|
static bool sam_print_yr;
|
|||
|
static bool sam_print_zb;
|
|||
|
static bool sam_print_zr;
|
|||
|
static bool sam_print_zf;
|
|||
|
static bool sam_print_zm;
|
|||
|
static bool sam_print_zi;
|
|||
|
static bool sam_print_zp;
|
|||
|
static bool sam_print_zu;
|
|||
|
static bool sam_print_xs_a;
|
|||
|
static bool sam_print_nh;
|
|||
|
static bool bwaSwLike;
|
|||
|
static float bwaSwLikeC;
|
|||
|
static float bwaSwLikeT;
|
|||
|
static bool qcFilter;
|
|||
|
static bool sortByScore; // prioritize alignments to report by score?
|
|||
|
bool gReportOverhangs; // false -> filter out alignments that fall off the end of a reference sequence
|
|||
|
static string rgid; // ID: setting for @RG header line
|
|||
|
static string rgs; // SAM outputs for @RG header line
|
|||
|
static string rgs_optflag; // SAM optional flag to add corresponding to @RG ID
|
|||
|
static bool msample; // whether to report a random alignment when maxed-out via -m/-M
|
|||
|
int gGapBarrier; // # diags on top/bot only to be entered diagonally
|
|||
|
static EList<string> qualities;
|
|||
|
static EList<string> qualities1;
|
|||
|
static EList<string> qualities2;
|
|||
|
static string polstr; // temporary holder for policy string
|
|||
|
static bool msNoCache; // true -> disable local cache
|
|||
|
static int bonusMatchType; // how to reward matches
|
|||
|
static int bonusMatch; // constant reward if bonusMatchType=constant
|
|||
|
static int penMmcType; // how to penalize mismatches
|
|||
|
int penMmcMax; // max mm penalty
|
|||
|
static int penMmcMin; // min mm penalty
|
|||
|
static int penScMax; // max sc penalty
|
|||
|
static int penScMin; // min sc penalty
|
|||
|
static int penNType; // how to penalize Ns in the read
|
|||
|
static int penN; // constant if N pelanty is a constant
|
|||
|
static bool penNCatPair; // concatenate mates before N filtering?
|
|||
|
static bool localAlign; // do local alignment in DP steps
|
|||
|
static bool noisyHpolymer; // set to true if gap penalties should be reduced to be consistent with a sequencer that under- and overcalls homopolymers
|
|||
|
static int penRdGapConst; // constant cost of extending a gap in the read
|
|||
|
static int penRfGapConst; // constant cost of extending a gap in the reference
|
|||
|
static int penRdGapLinear; // coeff of linear term for cost of gap extension in read
|
|||
|
static int penRfGapLinear; // coeff of linear term for cost of gap extension in ref
|
|||
|
SimpleFunc scoreMin; // minimum valid score as function of read len
|
|||
|
static SimpleFunc nCeil; // max # Ns allowed as function of read len
|
|||
|
static SimpleFunc msIval; // interval between seeds as function of read len
|
|||
|
static double descConsExp; // how to adjust score minimum as we descent further into index-assisted alignment
|
|||
|
static size_t descentLanding; // don't place a search root if it's within this many positions of end
|
|||
|
static SimpleFunc descentTotSz; // maximum space a DescentDriver can use in bytes
|
|||
|
static SimpleFunc descentTotFmops; // maximum # FM ops a DescentDriver can perform
|
|||
|
static int multiseedMms; // mismatches permitted in a multiseed seed
|
|||
|
static int multiseedLen; // length of multiseed seeds
|
|||
|
static size_t multiseedOff; // offset to begin extracting seeds
|
|||
|
static uint32_t seedCacheLocalMB; // # MB to use for non-shared seed alignment cacheing
|
|||
|
static uint32_t seedCacheCurrentMB; // # MB to use for current-read seed hit cacheing
|
|||
|
static uint32_t exactCacheCurrentMB; // # MB to use for current-read seed hit cacheing
|
|||
|
static size_t maxhalf; // max width on one side of DP table
|
|||
|
static bool seedSumm; // print summary information about seed hits, not alignments
|
|||
|
static bool doUngapped; // do ungapped alignment
|
|||
|
static size_t maxIters; // stop after this many extend loop iterations
|
|||
|
static size_t maxUg; // stop after this many ungap extends
|
|||
|
static size_t maxDp; // stop after this many DPs
|
|||
|
static size_t maxItersIncr; // amt to add to maxIters for each -k > 1
|
|||
|
static size_t maxEeStreak; // stop after this many end-to-end fails in a row
|
|||
|
static size_t maxUgStreak; // stop after this many ungap fails in a row
|
|||
|
static size_t maxDpStreak; // stop after this many dp fails in a row
|
|||
|
static size_t maxStreakIncr; // amt to add to streak for each -k > 1
|
|||
|
static size_t maxMateStreak; // stop seed range after this many mate-find fails
|
|||
|
static bool doExtend; // extend seed hits
|
|||
|
static bool enable8; // use 8-bit SSE where possible?
|
|||
|
static size_t cminlen; // longer reads use checkpointing
|
|||
|
static size_t cpow2; // checkpoint interval log2
|
|||
|
static bool doTri; // do triangular mini-fills?
|
|||
|
static string defaultPreset; // default preset; applied immediately
|
|||
|
static bool ignoreQuals; // all mms incur same penalty, regardless of qual
|
|||
|
static string wrapper; // type of wrapper script, so we can print correct usage
|
|||
|
static EList<string> queries; // list of query files
|
|||
|
static string outfile; // write SAM output to this file
|
|||
|
static int mapqv; // MAPQ calculation version
|
|||
|
static int tighten; // -M tighten mode (0=none, 1=best, 2=secbest+1)
|
|||
|
static bool doExactUpFront; // do exact search up front if seeds seem good enough
|
|||
|
static bool do1mmUpFront; // do 1mm search up front if seeds seem good enough
|
|||
|
static size_t do1mmMinLen; // length below which we disable 1mm e2e search
|
|||
|
static int seedBoostThresh; // if average non-zero position has more than this many elements
|
|||
|
static size_t maxSeeds; // maximum number of seeds allowed
|
|||
|
static size_t nSeedRounds; // # seed rounds
|
|||
|
static bool reorder; // true -> reorder SAM recs in -p mode
|
|||
|
static float sampleFrac; // only align random fraction of input reads
|
|||
|
static bool arbitraryRandom; // pseudo-randoms no longer a function of read properties
|
|||
|
static bool bowtie2p5;
|
|||
|
static bool useTempSpliceSite;
|
|||
|
static int penCanSplice;
|
|||
|
static int penNoncanSplice;
|
|||
|
static int penConflictSplice;
|
|||
|
static SimpleFunc penCanIntronLen;
|
|||
|
static SimpleFunc penNoncanIntronLen;
|
|||
|
static size_t minIntronLen;
|
|||
|
static size_t maxIntronLen;
|
|||
|
static string knownSpliceSiteInfile; //
|
|||
|
static string novelSpliceSiteInfile; //
|
|||
|
static string novelSpliceSiteOutfile; //
|
|||
|
static bool secondary;
|
|||
|
static bool no_spliced_alignment;
|
|||
|
static int rna_strandness; //
|
|||
|
static bool splicesite_db_only; //
|
|||
|
|
|||
|
static bool anchorStop;
|
|||
|
static bool pseudogeneStop;
|
|||
|
static bool tranMapOnly; // transcriptome mapping only
|
|||
|
static bool tranAssm; // alignments selected for downstream transcript assembly such as StringTie and Cufflinks
|
|||
|
static string tranAssm_program;
|
|||
|
static bool avoid_pseudogene;
|
|||
|
|
|||
|
#ifdef USE_SRA
|
|||
|
static EList<string> sra_accs;
|
|||
|
#endif
|
|||
|
|
|||
|
static string bt2indexs[2]; // read Bowtie 2 index from files with this prefix
|
|||
|
static EList<pair<int, string> > extra_opts;
|
|||
|
static size_t extra_opts_cur;
|
|||
|
|
|||
|
static EList<uint64_t> thread_rids;
|
|||
|
static MUTEX_T thread_rids_mutex;
|
|||
|
static uint64_t thread_rids_mindist;
|
|||
|
|
|||
|
static bool rmChrName; // remove "chr" from reference names (e.g., chr18 to 18)
|
|||
|
static bool addChrName; // add "chr" to reference names (e.g., 18 to chr18)
|
|||
|
|
|||
|
static size_t max_alts_tried;
|
|||
|
static bool use_haplotype;
|
|||
|
static bool enable_codis;
|
|||
|
|
|||
|
static bool templateLenAdjustment;
|
|||
|
static string alignSumFile; // write alignment summary stat. to this file
|
|||
|
static bool newAlignSummary;
|
|||
|
|
|||
|
static int bowtie2_dp; // Bowtie2's dynamic programming alignment (0: no dynamic programming, 1: conditional dynamic programming, and 2: uncoditional dynamic programming)
|
|||
|
static bool fast; // --fast
|
|||
|
static bool sensitive; // --sensitive
|
|||
|
static bool very_sensitive; // --very-sensitive
|
|||
|
|
|||
|
static bool repeat;
|
|||
|
static bool use_repeat_index;
|
|||
|
static EList<size_t> readLens;
|
|||
|
|
|||
|
// 3N variable
|
|||
|
bool threeN = false; // indicator for 3N mode.
|
|||
|
bool base_change_entered; // set true once user used --base-change
|
|||
|
|
|||
|
char usrInput_convertedFrom; // user input converted from. the nucleotide is replaced by others in sample preparation protocol. for sequence comparison step in HISAT-3N.
|
|||
|
char usrInput_convertedTo; // user input converted To. the nucleotide to others in sample preparation protocol. for sequence comparison step in HISAT-3N.
|
|||
|
char usrInput_convertedFromComplement; // the complement of usrInput_convertedFrom. for sequence comparison step in HISAT-3N.
|
|||
|
char usrInput_convertedToComplement; // the complement of usrInput_convertedTo. for sequence comparison step in HISAT-3N.
|
|||
|
|
|||
|
char hs3N_convertedFrom; // the actual converted from by HISAT-3N. use in + strand.
|
|||
|
char hs3N_convertedTo; // the actual converted to by HISAT-3N. use in + strand.
|
|||
|
char hs3N_convertedFromComplement; // the complement of hs3N_convertedFrom. use in - strand.
|
|||
|
char hs3N_convertedToComplement; // the complement of hs3N_convertedTo. use in - strand.
|
|||
|
|
|||
|
string threeN_indexTags[2];
|
|||
|
|
|||
|
vector<ht2_handle_t> repeatHandles; // the 2 repeat handles helps expand the repeat alignment information. 0 for + strand. 1 for - strand.
|
|||
|
struct ht2_index_getrefnames_result *refNameMap; // chromosome names and it's index for repeat alignment.
|
|||
|
int repeatLimit; // expand #repeatLimit of qualified position in repeat alignment.
|
|||
|
bool uniqueOutputOnly; // only output the unique alignment result.
|
|||
|
int nMappingCycle; // =1 for standard HISAT2, =4 for HISAT-3N
|
|||
|
bool mappingCycles[4]; // this array will indicate which mapping cycle will be run
|
|||
|
int directional3NMapping; // =0 for non-directional mapping, =1 for directional mapping and read1/single-end map to fw reference, =2 for reverse directional mapping and read1/single-end map to rc reference.
|
|||
|
|
|||
|
#define DMAX std::numeric_limits<double>::max()
|
|||
|
|
|||
|
static void resetOptions() {
|
|||
|
mates1.clear();
|
|||
|
mates2.clear();
|
|||
|
mates12.clear();
|
|||
|
adjIdxBase = "";
|
|||
|
adjIdxBases_3N[0] = "";
|
|||
|
adjIdxBases_3N[1] = "";
|
|||
|
gColor = false;
|
|||
|
gVerbose = 0;
|
|||
|
startVerbose = 0;
|
|||
|
gQuiet = false;
|
|||
|
sanityCheck = 0; // enable expensive sanity checks
|
|||
|
format = FASTQ; // default read format is FASTQ
|
|||
|
origString = ""; // reference text, or filename(s)
|
|||
|
seed = 0; // srandom() seed
|
|||
|
timing = 0; // whether to report basic timing data
|
|||
|
metricsIval = 1; // interval between alignment metrics messages (0 = no messages)
|
|||
|
metricsFile = ""; // output file to put alignment metrics in
|
|||
|
metricsStderr = false; // print metrics to stderr (in addition to --metrics-file if it's specified
|
|||
|
metricsPerRead = false; // report a metrics tuple for every read?
|
|||
|
allHits = false; // for multihits, report just one
|
|||
|
showVersion = false; // just print version and quit?
|
|||
|
ipause = 0; // pause before maching?
|
|||
|
qUpto = 0xffffffff; // max # of queries to read
|
|||
|
gTrim5 = 0; // amount to trim from 5' end
|
|||
|
gTrim3 = 0; // amount to trim from 3' end
|
|||
|
offRate = -1; // keep default offRate
|
|||
|
solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
|
|||
|
phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33)
|
|||
|
integerQuals = false; // quality strings are space-separated strings of integers, not ASCII
|
|||
|
nthreads = 1; // number of pthreads operating concurrently
|
|||
|
outType = OUTPUT_SAM; // style of output
|
|||
|
noRefNames = false; // true -> print reference indexes; not names
|
|||
|
khits = 10; // number of hits per read; >1 is much slower
|
|||
|
mhits = 0; // stop after finding this many alignments+1
|
|||
|
partitionSz = 0; // output a partitioning key in first field
|
|||
|
useSpinlock = true; // false -> don't use of spinlocks even if they're #defines
|
|||
|
fileParallel = false; // separate threads read separate input files in parallel
|
|||
|
useShmem = false; // use shared memory to hold the index
|
|||
|
useMm = false; // use memory-mapped files to hold the index
|
|||
|
mmSweep = false; // sweep through memory-mapped files immediately after mapping
|
|||
|
gMinInsert = 0; // minimum insert size
|
|||
|
gMaxInsert = 1000; // maximum insert size
|
|||
|
gMate1fw = true; // -1 mate aligns in fw orientation on fw strand
|
|||
|
gMate2fw = false; // -2 mate aligns in rc orientation on fw strand
|
|||
|
gFlippedMatesOK = false; // allow mates to be in wrong order
|
|||
|
gDovetailMatesOK = false; // allow one mate to extend off the end of the other
|
|||
|
gContainMatesOK = true; // allow one mate to contain the other in PE alignment
|
|||
|
gOlapMatesOK = true; // allow mates to overlap in PE alignment
|
|||
|
gExpandToFrag = true; // incr max frag length to =larger mate len if necessary
|
|||
|
gReportDiscordant = true; // find and report discordant paired-end alignments
|
|||
|
gReportMixed = true; // find and report unpaired alignments for paired reads
|
|||
|
|
|||
|
cacheLimit = 5; // ranges w/ size > limit will be cached
|
|||
|
cacheSize = 0; // # words per range cache
|
|||
|
skipReads = 0; // # reads/read pairs to skip
|
|||
|
gNofw = false; // don't align fw orientation of read
|
|||
|
gNorc = false; // don't align rc orientation of read
|
|||
|
fastaContLen = 0;
|
|||
|
fastaContFreq = 0;
|
|||
|
hadoopOut = false; // print Hadoop status and summary messages
|
|||
|
fuzzy = false; // reads will have alternate basecalls w/ qualities
|
|||
|
fullRef = false; // print entire reference name instead of just up to 1st space
|
|||
|
samTruncQname = true; // whether to truncate QNAME to 255 chars
|
|||
|
samOmitSecSeqQual = false; // omit SEQ/QUAL for 2ndary alignments?
|
|||
|
samNoUnal = false; // omit SAM records for unaligned reads
|
|||
|
samNoHead = false; // don't print any header lines in SAM output
|
|||
|
samNoSQ = false; // don't print @SQ header lines
|
|||
|
sam_print_as = true;
|
|||
|
sam_print_xs = true;
|
|||
|
sam_print_xss = false; // Xs:i and Ys:i
|
|||
|
sam_print_yn = false; // YN:i and Yn:i
|
|||
|
sam_print_xn = true;
|
|||
|
sam_print_cs = false;
|
|||
|
sam_print_cq = false;
|
|||
|
sam_print_x0 = true;
|
|||
|
sam_print_x1 = true;
|
|||
|
sam_print_xm = true;
|
|||
|
sam_print_xo = true;
|
|||
|
sam_print_xg = true;
|
|||
|
sam_print_nm = true;
|
|||
|
sam_print_md = true;
|
|||
|
sam_print_yf = true;
|
|||
|
sam_print_yi = false;
|
|||
|
sam_print_ym = false;
|
|||
|
sam_print_yp = false;
|
|||
|
sam_print_yt = true;
|
|||
|
sam_print_ys = true;
|
|||
|
sam_print_zs = false;
|
|||
|
sam_print_xr = false;
|
|||
|
sam_print_xt = false;
|
|||
|
sam_print_xd = false;
|
|||
|
sam_print_xu = false;
|
|||
|
sam_print_yl = false;
|
|||
|
sam_print_ye = false;
|
|||
|
sam_print_yu = false;
|
|||
|
sam_print_xp = false;
|
|||
|
sam_print_yr = false;
|
|||
|
sam_print_zb = false;
|
|||
|
sam_print_zr = false;
|
|||
|
sam_print_zf = false;
|
|||
|
sam_print_zm = false;
|
|||
|
sam_print_zi = false;
|
|||
|
sam_print_zp = false;
|
|||
|
sam_print_zu = false;
|
|||
|
sam_print_xs_a = true;
|
|||
|
sam_print_nh = true;
|
|||
|
bwaSwLike = false;
|
|||
|
bwaSwLikeC = 5.5f;
|
|||
|
bwaSwLikeT = 20.0f;
|
|||
|
qcFilter = false; // don't believe upstream qc by default
|
|||
|
sortByScore = true; // prioritize alignments to report by score?
|
|||
|
rgid = ""; // SAM outputs for @RG header line
|
|||
|
rgs = ""; // SAM outputs for @RG header line
|
|||
|
rgs_optflag = ""; // SAM optional flag to add corresponding to @RG ID
|
|||
|
msample = true;
|
|||
|
gGapBarrier = 4; // disallow gaps within this many chars of either end of alignment
|
|||
|
qualities.clear();
|
|||
|
qualities1.clear();
|
|||
|
qualities2.clear();
|
|||
|
polstr.clear();
|
|||
|
msNoCache = true; // true -> disable local cache
|
|||
|
bonusMatchType = DEFAULT_MATCH_BONUS_TYPE;
|
|||
|
bonusMatch = DEFAULT_MATCH_BONUS;
|
|||
|
penMmcType = DEFAULT_MM_PENALTY_TYPE;
|
|||
|
penMmcMax = DEFAULT_MM_PENALTY_MAX;
|
|||
|
penMmcMin = DEFAULT_MM_PENALTY_MIN;
|
|||
|
penScMax = DEFAULT_SC_PENALTY_MAX;
|
|||
|
penScMin = DEFAULT_SC_PENALTY_MIN;
|
|||
|
penNType = DEFAULT_N_PENALTY_TYPE;
|
|||
|
penN = DEFAULT_N_PENALTY;
|
|||
|
penNCatPair = DEFAULT_N_CAT_PAIR; // concatenate mates before N filtering?
|
|||
|
localAlign = false; // do local alignment in DP steps
|
|||
|
noisyHpolymer = false;
|
|||
|
penRdGapConst = DEFAULT_READ_GAP_CONST;
|
|||
|
penRfGapConst = DEFAULT_REF_GAP_CONST;
|
|||
|
penRdGapLinear = DEFAULT_READ_GAP_LINEAR;
|
|||
|
penRfGapLinear = DEFAULT_REF_GAP_LINEAR;
|
|||
|
scoreMin.init (SIMPLE_FUNC_LINEAR, 0.0f, -0.2f);
|
|||
|
// scoreMin.init (SIMPLE_FUNC_CONST, -18, 0);
|
|||
|
nCeil.init (SIMPLE_FUNC_LINEAR, 0.0f, DMAX, 2.0f, 0.1f);
|
|||
|
msIval.init (SIMPLE_FUNC_LINEAR, 1.0f, DMAX, DEFAULT_IVAL_B, DEFAULT_IVAL_A);
|
|||
|
descConsExp = 2.0;
|
|||
|
descentLanding = 20;
|
|||
|
descentTotSz.init(SIMPLE_FUNC_LINEAR, 1024.0, DMAX, 0.0, 1024.0);
|
|||
|
descentTotFmops.init(SIMPLE_FUNC_LINEAR, 100.0, DMAX, 0.0, 10.0);
|
|||
|
multiseedMms = DEFAULT_SEEDMMS;
|
|||
|
multiseedLen = DEFAULT_SEEDLEN;
|
|||
|
multiseedOff = 0;
|
|||
|
seedCacheLocalMB = 32; // # MB to use for non-shared seed alignment cacheing
|
|||
|
seedCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing
|
|||
|
exactCacheCurrentMB = 20; // # MB to use for current-read seed hit cacheing
|
|||
|
maxhalf = 15; // max width on one side of DP table
|
|||
|
seedSumm = false; // print summary information about seed hits, not alignments
|
|||
|
doUngapped = true; // do ungapped alignment
|
|||
|
maxIters = 400; // max iterations of extend loop
|
|||
|
maxUg = 300; // stop after this many ungap extends
|
|||
|
maxDp = 300; // stop after this many dp extends
|
|||
|
maxItersIncr = 20; // amt to add to maxIters for each -k > 1
|
|||
|
maxEeStreak = 15; // stop after this many end-to-end fails in a row
|
|||
|
maxUgStreak = 15; // stop after this many ungap fails in a row
|
|||
|
maxDpStreak = 15; // stop after this many dp fails in a row
|
|||
|
maxStreakIncr = 10; // amt to add to streak for each -k > 1
|
|||
|
maxMateStreak = 10; // in PE: abort seed range after N mate-find fails
|
|||
|
doExtend = true; // do seed extensions
|
|||
|
enable8 = true; // use 8-bit SSE where possible?
|
|||
|
cminlen = 2000; // longer reads use checkpointing
|
|||
|
cpow2 = 4; // checkpoint interval log2
|
|||
|
doTri = false; // do triangular mini-fills?
|
|||
|
defaultPreset = "sensitive%LOCAL%"; // default preset; applied immediately
|
|||
|
extra_opts.clear();
|
|||
|
extra_opts_cur = 0;
|
|||
|
bt2indexs[0].clear(); // read Bowtie 2 index from files with this prefix
|
|||
|
bt2indexs[1].clear();
|
|||
|
ignoreQuals = false; // all mms incur same penalty, regardless of qual
|
|||
|
wrapper.clear(); // type of wrapper script, so we can print correct usage
|
|||
|
queries.clear(); // list of query files
|
|||
|
outfile.clear(); // write SAM output to this file
|
|||
|
mapqv = 2; // MAPQ calculation version
|
|||
|
tighten = 3; // -M tightening mode
|
|||
|
doExactUpFront = true; // do exact search up front if seeds seem good enough
|
|||
|
do1mmUpFront = true; // do 1mm search up front if seeds seem good enough
|
|||
|
seedBoostThresh = 300; // if average non-zero position has more than this many elements
|
|||
|
nSeedRounds = 2; // # rounds of seed searches to do for repetitive reads
|
|||
|
maxSeeds = 0; // maximum number of seeds allowed
|
|||
|
do1mmMinLen = 60; // length below which we disable 1mm search
|
|||
|
reorder = false; // reorder SAM records with -p > 1
|
|||
|
sampleFrac = 1.1f; // align all reads
|
|||
|
arbitraryRandom = false; // let pseudo-random seeds be a function of read properties
|
|||
|
bowtie2p5 = false;
|
|||
|
useTempSpliceSite = true;
|
|||
|
penCanSplice = 0;
|
|||
|
penNoncanSplice = 12;
|
|||
|
penConflictSplice = 1000000;
|
|||
|
penCanIntronLen.init(SIMPLE_FUNC_LOG, -8, 1);
|
|||
|
penNoncanIntronLen.init(SIMPLE_FUNC_LOG, -8, 1);
|
|||
|
minIntronLen = 20;
|
|||
|
maxIntronLen = 500000;
|
|||
|
knownSpliceSiteInfile = "";
|
|||
|
novelSpliceSiteInfile = "";
|
|||
|
novelSpliceSiteOutfile = "";
|
|||
|
secondary = false; // allow secondary alignments
|
|||
|
no_spliced_alignment = false;
|
|||
|
rna_strandness = RNA_STRANDNESS_UNKNOWN;
|
|||
|
splicesite_db_only = false;
|
|||
|
anchorStop = true;
|
|||
|
pseudogeneStop = true;
|
|||
|
tranMapOnly = false;
|
|||
|
tranAssm = false;
|
|||
|
tranAssm_program = "";
|
|||
|
avoid_pseudogene = false;
|
|||
|
|
|||
|
#ifdef USE_SRA
|
|||
|
sra_accs.clear();
|
|||
|
#endif
|
|||
|
|
|||
|
rmChrName = false;
|
|||
|
addChrName = false;
|
|||
|
|
|||
|
max_alts_tried = 16;
|
|||
|
use_haplotype = false;
|
|||
|
enable_codis = false;
|
|||
|
|
|||
|
templateLenAdjustment = true;
|
|||
|
alignSumFile = "";
|
|||
|
newAlignSummary = false;
|
|||
|
|
|||
|
bowtie2_dp = 0; // disable Bowtie2's dynamic programming alignment
|
|||
|
fast = false;
|
|||
|
sensitive = false;
|
|||
|
very_sensitive = false;
|
|||
|
|
|||
|
repeat = false; // true iff alignments to repeat sequences are directly reported.
|
|||
|
use_repeat_index = true;
|
|||
|
readLens.clear();
|
|||
|
|
|||
|
refNameMap = NULL;
|
|||
|
threeN = false;
|
|||
|
repeatLimit = 1000;
|
|||
|
uniqueOutputOnly = false;
|
|||
|
base_change_entered = false;
|
|||
|
threeN_indexTags[0] = ".3n.";
|
|||
|
threeN_indexTags[1] = ".3n.";
|
|||
|
nMappingCycle = 1;
|
|||
|
directional3NMapping = 0;
|
|||
|
for (int i = 0; i < 4; i++){
|
|||
|
mappingCycles[i] = false;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
static const char *short_options = "fF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:";
|
|||
|
|
|||
|
static struct option long_options[] = {
|
|||
|
{(char*)"verbose", no_argument, 0, ARG_VERBOSE},
|
|||
|
{(char*)"startverbose", no_argument, 0, ARG_STARTVERBOSE},
|
|||
|
{(char*)"quiet", no_argument, 0, ARG_QUIET},
|
|||
|
{(char*)"sanity", no_argument, 0, ARG_SANITY},
|
|||
|
{(char*)"pause", no_argument, &ipause, 1},
|
|||
|
{(char*)"orig", required_argument, 0, ARG_ORIG},
|
|||
|
{(char*)"all", no_argument, 0, 'a'},
|
|||
|
{(char*)"solexa-quals", no_argument, 0, ARG_SOLEXA_QUALS},
|
|||
|
{(char*)"integer-quals",no_argument, 0, ARG_INTEGER_QUALS},
|
|||
|
{(char*)"int-quals", no_argument, 0, ARG_INTEGER_QUALS},
|
|||
|
{(char*)"metrics", required_argument, 0, ARG_METRIC_IVAL},
|
|||
|
{(char*)"metrics-file", required_argument, 0, ARG_METRIC_FILE},
|
|||
|
{(char*)"metrics-stderr",no_argument, 0, ARG_METRIC_STDERR},
|
|||
|
{(char*)"metrics-per-read", no_argument, 0, ARG_METRIC_PER_READ},
|
|||
|
{(char*)"met-read", no_argument, 0, ARG_METRIC_PER_READ},
|
|||
|
{(char*)"met", required_argument, 0, ARG_METRIC_IVAL},
|
|||
|
{(char*)"met-file", required_argument, 0, ARG_METRIC_FILE},
|
|||
|
{(char*)"met-stderr", no_argument, 0, ARG_METRIC_STDERR},
|
|||
|
{(char*)"time", no_argument, 0, 't'},
|
|||
|
{(char*)"trim3", required_argument, 0, '3'},
|
|||
|
{(char*)"trim5", required_argument, 0, '5'},
|
|||
|
{(char*)"seed", required_argument, 0, ARG_SEED},
|
|||
|
{(char*)"qupto", required_argument, 0, 'u'},
|
|||
|
{(char*)"upto", required_argument, 0, 'u'},
|
|||
|
{(char*)"version", no_argument, 0, ARG_VERSION},
|
|||
|
{(char*)"filepar", no_argument, 0, ARG_FILEPAR},
|
|||
|
{(char*)"help", no_argument, 0, 'h'},
|
|||
|
{(char*)"threads", required_argument, 0, 'p'},
|
|||
|
{(char*)"khits", required_argument, 0, 'k'},
|
|||
|
{(char*)"minins", required_argument, 0, 'I'},
|
|||
|
{(char*)"maxins", required_argument, 0, 'X'},
|
|||
|
{(char*)"quals", required_argument, 0, 'Q'},
|
|||
|
{(char*)"Q1", required_argument, 0, ARG_QUALS1},
|
|||
|
{(char*)"Q2", required_argument, 0, ARG_QUALS2},
|
|||
|
{(char*)"refidx", no_argument, 0, ARG_REFIDX},
|
|||
|
{(char*)"partition", required_argument, 0, ARG_PARTITION},
|
|||
|
{(char*)"ff", no_argument, 0, ARG_FF},
|
|||
|
{(char*)"fr", no_argument, 0, ARG_FR},
|
|||
|
{(char*)"rf", no_argument, 0, ARG_RF},
|
|||
|
{(char*)"cachelim", required_argument, 0, ARG_CACHE_LIM},
|
|||
|
{(char*)"cachesz", required_argument, 0, ARG_CACHE_SZ},
|
|||
|
{(char*)"nofw", no_argument, 0, ARG_NO_FW},
|
|||
|
{(char*)"norc", no_argument, 0, ARG_NO_RC},
|
|||
|
{(char*)"skip", required_argument, 0, 's'},
|
|||
|
{(char*)"12", required_argument, 0, ARG_ONETWO},
|
|||
|
{(char*)"tab5", required_argument, 0, ARG_TAB5},
|
|||
|
{(char*)"tab6", required_argument, 0, ARG_TAB6},
|
|||
|
{(char*)"phred33-quals", no_argument, 0, ARG_PHRED33},
|
|||
|
{(char*)"phred64-quals", no_argument, 0, ARG_PHRED64},
|
|||
|
{(char*)"phred33", no_argument, 0, ARG_PHRED33},
|
|||
|
{(char*)"phred64", no_argument, 0, ARG_PHRED64},
|
|||
|
{(char*)"solexa1.3-quals", no_argument, 0, ARG_PHRED64},
|
|||
|
{(char*)"mm", no_argument, 0, ARG_MM},
|
|||
|
{(char*)"shmem", no_argument, 0, ARG_SHMEM},
|
|||
|
{(char*)"mmsweep", no_argument, 0, ARG_MMSWEEP},
|
|||
|
{(char*)"hadoopout", no_argument, 0, ARG_HADOOPOUT},
|
|||
|
{(char*)"fuzzy", no_argument, 0, ARG_FUZZY},
|
|||
|
{(char*)"fullref", no_argument, 0, ARG_FULLREF},
|
|||
|
{(char*)"usage", no_argument, 0, ARG_USAGE},
|
|||
|
{(char*)"sam-no-qname-trunc", no_argument, 0, ARG_SAM_NO_QNAME_TRUNC},
|
|||
|
{(char*)"sam-omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ},
|
|||
|
{(char*)"omit-sec-seq", no_argument, 0, ARG_SAM_OMIT_SEC_SEQ},
|
|||
|
{(char*)"sam-no-head", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"sam-nohead", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"sam-noHD", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"sam-no-hd", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"sam-nosq", no_argument, 0, ARG_SAM_NOSQ},
|
|||
|
{(char*)"sam-no-sq", no_argument, 0, ARG_SAM_NOSQ},
|
|||
|
{(char*)"sam-noSQ", no_argument, 0, ARG_SAM_NOSQ},
|
|||
|
{(char*)"no-head", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"no-hd", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"no-sq", no_argument, 0, ARG_SAM_NOSQ},
|
|||
|
{(char*)"no-HD", no_argument, 0, ARG_SAM_NOHEAD},
|
|||
|
{(char*)"no-SQ", no_argument, 0, ARG_SAM_NOSQ},
|
|||
|
{(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL},
|
|||
|
{(char*)"color", no_argument, 0, 'C'},
|
|||
|
{(char*)"sam-RG", required_argument, 0, ARG_SAM_RG},
|
|||
|
{(char*)"sam-rg", required_argument, 0, ARG_SAM_RG},
|
|||
|
{(char*)"sam-rg-id", required_argument, 0, ARG_SAM_RGID},
|
|||
|
{(char*)"RG", required_argument, 0, ARG_SAM_RG},
|
|||
|
{(char*)"rg", required_argument, 0, ARG_SAM_RG},
|
|||
|
{(char*)"rg-id", required_argument, 0, ARG_SAM_RGID},
|
|||
|
{(char*)"snpphred", required_argument, 0, ARG_SNPPHRED},
|
|||
|
{(char*)"snpfrac", required_argument, 0, ARG_SNPFRAC},
|
|||
|
{(char*)"gbar", required_argument, 0, ARG_GAP_BAR},
|
|||
|
{(char*)"qseq", no_argument, 0, ARG_QSEQ},
|
|||
|
{(char*)"policy", required_argument, 0, ARG_ALIGN_POLICY},
|
|||
|
{(char*)"preset", required_argument, 0, 'P'},
|
|||
|
{(char*)"seed-summ", no_argument, 0, ARG_SEED_SUMM},
|
|||
|
{(char*)"seed-summary", no_argument, 0, ARG_SEED_SUMM},
|
|||
|
{(char*)"overhang", no_argument, 0, ARG_OVERHANG},
|
|||
|
{(char*)"no-cache", no_argument, 0, ARG_NO_CACHE},
|
|||
|
{(char*)"cache", no_argument, 0, ARG_USE_CACHE},
|
|||
|
{(char*)"454", no_argument, 0, ARG_NOISY_HPOLY},
|
|||
|
{(char*)"ion-torrent", no_argument, 0, ARG_NOISY_HPOLY},
|
|||
|
{(char*)"no-mixed", no_argument, 0, ARG_NO_MIXED},
|
|||
|
{(char*)"no-discordant",no_argument, 0, ARG_NO_DISCORDANT},
|
|||
|
// {(char*)"local", no_argument, 0, ARG_LOCAL},
|
|||
|
{(char*)"end-to-end", no_argument, 0, ARG_END_TO_END},
|
|||
|
{(char*)"ungapped", no_argument, 0, ARG_UNGAPPED},
|
|||
|
{(char*)"no-ungapped", no_argument, 0, ARG_UNGAPPED_NO},
|
|||
|
{(char*)"sse8", no_argument, 0, ARG_SSE8},
|
|||
|
{(char*)"no-sse8", no_argument, 0, ARG_SSE8_NO},
|
|||
|
{(char*)"scan-narrowed",no_argument, 0, ARG_SCAN_NARROWED},
|
|||
|
{(char*)"qc-filter", no_argument, 0, ARG_QC_FILTER},
|
|||
|
{(char*)"bwa-sw-like", no_argument, 0, ARG_BWA_SW_LIKE},
|
|||
|
{(char*)"multiseed", required_argument, 0, ARG_MULTISEED_IVAL},
|
|||
|
{(char*)"ma", required_argument, 0, ARG_SCORE_MA},
|
|||
|
{(char*)"mp", required_argument, 0, ARG_SCORE_MMP},
|
|||
|
{(char*)"sp", required_argument, 0, ARG_SCORE_SCP},
|
|||
|
{(char*)"no-softclip", no_argument, 0, ARG_NO_SOFTCLIP},
|
|||
|
{(char*)"np", required_argument, 0, ARG_SCORE_NP},
|
|||
|
{(char*)"rdg", required_argument, 0, ARG_SCORE_RDG},
|
|||
|
{(char*)"rfg", required_argument, 0, ARG_SCORE_RFG},
|
|||
|
{(char*)"score-min", required_argument, 0, ARG_SCORE_MIN},
|
|||
|
{(char*)"min-score", required_argument, 0, ARG_SCORE_MIN},
|
|||
|
{(char*)"n-ceil", required_argument, 0, ARG_N_CEIL},
|
|||
|
{(char*)"dpad", required_argument, 0, ARG_DPAD},
|
|||
|
{(char*)"mapq-print-inputs",no_argument, 0, ARG_SAM_PRINT_YI},
|
|||
|
{(char*)"very-fast", no_argument, 0, ARG_PRESET_VERY_FAST},
|
|||
|
{(char*)"fast", no_argument, 0, ARG_PRESET_FAST},
|
|||
|
{(char*)"sensitive", no_argument, 0, ARG_PRESET_SENSITIVE},
|
|||
|
{(char*)"very-sensitive", no_argument, 0, ARG_PRESET_VERY_SENSITIVE},
|
|||
|
// {(char*)"very-fast-local", no_argument, 0, ARG_PRESET_VERY_FAST_LOCAL},
|
|||
|
// {(char*)"fast-local", no_argument, 0, ARG_PRESET_FAST_LOCAL},
|
|||
|
// {(char*)"sensitive-local", no_argument, 0, ARG_PRESET_SENSITIVE_LOCAL},
|
|||
|
// {(char*)"very-sensitive-local", no_argument, 0, ARG_PRESET_VERY_SENSITIVE_LOCAL},
|
|||
|
{(char*)"no-score-priority",no_argument, 0, ARG_NO_SCORE_PRIORITY},
|
|||
|
{(char*)"seedlen", required_argument, 0, 'L'},
|
|||
|
{(char*)"seedmms", required_argument, 0, 'N'},
|
|||
|
{(char*)"seedival", required_argument, 0, 'i'},
|
|||
|
{(char*)"ignore-quals", no_argument, 0, ARG_IGNORE_QUALS},
|
|||
|
{(char*)"index", required_argument, 0, 'x'},
|
|||
|
{(char*)"arg-desc", no_argument, 0, ARG_DESC},
|
|||
|
{(char*)"wrapper", required_argument, 0, ARG_WRAPPER},
|
|||
|
{(char*)"unpaired", required_argument, 0, 'U'},
|
|||
|
{(char*)"output", required_argument, 0, 'S'},
|
|||
|
{(char*)"mapq-v", required_argument, 0, ARG_MAPQ_V},
|
|||
|
{(char*)"dovetail", no_argument, 0, ARG_DOVETAIL},
|
|||
|
{(char*)"no-dovetail", no_argument, 0, ARG_NO_DOVETAIL},
|
|||
|
{(char*)"contain", no_argument, 0, ARG_CONTAIN},
|
|||
|
{(char*)"no-contain", no_argument, 0, ARG_NO_CONTAIN},
|
|||
|
{(char*)"overlap", no_argument, 0, ARG_OVERLAP},
|
|||
|
{(char*)"no-overlap", no_argument, 0, ARG_NO_OVERLAP},
|
|||
|
{(char*)"tighten", required_argument, 0, ARG_TIGHTEN},
|
|||
|
{(char*)"exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT},
|
|||
|
{(char*)"1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT},
|
|||
|
{(char*)"no-exact-upfront", no_argument, 0, ARG_EXACT_UPFRONT_NO},
|
|||
|
{(char*)"no-1mm-upfront", no_argument, 0, ARG_1MM_UPFRONT_NO},
|
|||
|
{(char*)"1mm-minlen", required_argument, 0, ARG_1MM_MINLEN},
|
|||
|
{(char*)"seed-off", required_argument, 0, 'O'},
|
|||
|
{(char*)"seed-boost", required_argument, 0, ARG_SEED_BOOST_THRESH},
|
|||
|
{(char*)"max-seeds", required_argument, 0, ARG_MAX_SEEDS},
|
|||
|
{(char*)"read-times", no_argument, 0, ARG_READ_TIMES},
|
|||
|
{(char*)"show-rand-seed", no_argument, 0, ARG_SHOW_RAND_SEED},
|
|||
|
{(char*)"dp-fail-streak", required_argument, 0, ARG_DP_FAIL_STREAK_THRESH},
|
|||
|
{(char*)"ee-fail-streak", required_argument, 0, ARG_EE_FAIL_STREAK_THRESH},
|
|||
|
{(char*)"ug-fail-streak", required_argument, 0, ARG_UG_FAIL_STREAK_THRESH},
|
|||
|
{(char*)"fail-streak", required_argument, 0, 'D'},
|
|||
|
{(char*)"dp-fails", required_argument, 0, ARG_DP_FAIL_THRESH},
|
|||
|
{(char*)"ug-fails", required_argument, 0, ARG_UG_FAIL_THRESH},
|
|||
|
{(char*)"extends", required_argument, 0, ARG_EXTEND_ITERS},
|
|||
|
{(char*)"no-extend", no_argument, 0, ARG_NO_EXTEND},
|
|||
|
{(char*)"mapq-extra", no_argument, 0, ARG_MAPQ_EX},
|
|||
|
{(char*)"seed-rounds", required_argument, 0, 'R'},
|
|||
|
{(char*)"reorder", no_argument, 0, ARG_REORDER},
|
|||
|
{(char*)"passthrough", no_argument, 0, ARG_READ_PASSTHRU},
|
|||
|
{(char*)"sample", required_argument, 0, ARG_SAMPLE},
|
|||
|
{(char*)"cp-min", required_argument, 0, ARG_CP_MIN},
|
|||
|
{(char*)"cp-ival", required_argument, 0, ARG_CP_IVAL},
|
|||
|
{(char*)"tri", no_argument, 0, ARG_TRI},
|
|||
|
{(char*)"nondeterministic", no_argument, 0, ARG_NON_DETERMINISTIC},
|
|||
|
{(char*)"non-deterministic", no_argument, 0, ARG_NON_DETERMINISTIC},
|
|||
|
// {(char*)"local-seed-cache-sz", required_argument, 0, ARG_LOCAL_SEED_CACHE_SZ},
|
|||
|
{(char*)"seed-cache-sz", required_argument, 0, ARG_CURRENT_SEED_CACHE_SZ},
|
|||
|
{(char*)"no-unal", no_argument, 0, ARG_SAM_NO_UNAL},
|
|||
|
{(char*)"test-25", no_argument, 0, ARG_TEST_25},
|
|||
|
// TODO: following should be a function of read length?
|
|||
|
{(char*)"desc-kb", required_argument, 0, ARG_DESC_KB},
|
|||
|
{(char*)"desc-landing", required_argument, 0, ARG_DESC_LANDING},
|
|||
|
{(char*)"desc-exp", required_argument, 0, ARG_DESC_EXP},
|
|||
|
{(char*)"desc-fmops", required_argument, 0, ARG_DESC_FMOPS},
|
|||
|
{(char*)"no-temp-splicesite", no_argument, 0, ARG_NO_TEMPSPLICESITE},
|
|||
|
{(char*)"pen-cansplice", required_argument, 0, ARG_PEN_CANSPLICE},
|
|||
|
{(char*)"pen-noncansplice", required_argument, 0, ARG_PEN_NONCANSPLICE},
|
|||
|
{(char*)"pen-conflictsplice", required_argument, 0, ARG_PEN_CONFLICTSPLICE},
|
|||
|
{(char*)"pen-intronlen", required_argument, 0, ARG_PEN_CANINTRONLEN},
|
|||
|
{(char*)"pen-canintronlen", required_argument, 0, ARG_PEN_CANINTRONLEN},
|
|||
|
{(char*)"pen-noncanintronlen", required_argument, 0, ARG_PEN_NONCANINTRONLEN},
|
|||
|
{(char*)"min-intronlen", required_argument, 0, ARG_MIN_INTRONLEN},
|
|||
|
{(char*)"max-intronlen", required_argument, 0, ARG_MAX_INTRONLEN},
|
|||
|
{(char*)"known-splicesite-infile", required_argument, 0, ARG_KNOWN_SPLICESITE_INFILE},
|
|||
|
{(char*)"novel-splicesite-infile", required_argument, 0, ARG_NOVEL_SPLICESITE_INFILE},
|
|||
|
{(char*)"novel-splicesite-outfile", required_argument, 0, ARG_NOVEL_SPLICESITE_OUTFILE},
|
|||
|
{(char*)"secondary", no_argument, 0, ARG_SECONDARY},
|
|||
|
{(char*)"no-spliced-alignment", no_argument, 0, ARG_NO_SPLICED_ALIGNMENT},
|
|||
|
{(char*)"rna-strandness", required_argument, 0, ARG_RNA_STRANDNESS},
|
|||
|
{(char*)"splicesite-db-only", no_argument, 0, ARG_SPLICESITE_DB_ONLY},
|
|||
|
{(char*)"no-anchorstop", no_argument, 0, ARG_NO_ANCHORSTOP},
|
|||
|
{(char*)"transcriptome-mapping-only", no_argument, 0, ARG_TRANSCRIPTOME_MAPPING_ONLY},
|
|||
|
{(char*)"tmo", no_argument, 0, ARG_TRANSCRIPTOME_MAPPING_ONLY},
|
|||
|
{(char*)"downstream-transcriptome-assembly", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY},
|
|||
|
{(char*)"dta", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY},
|
|||
|
{(char*)"dta-cufflinks", no_argument, 0, ARG_TRANSCRIPTOME_ASSEMBLY_CUFFLINKS},
|
|||
|
{(char*)"avoid-pseudogene",no_argument, 0, ARG_AVOID_PSEUDOGENE},
|
|||
|
{(char*)"no-templatelen-adjustment", no_argument, 0, ARG_NO_TEMPLATELEN_ADJUSTMENT},
|
|||
|
#ifdef USE_SRA
|
|||
|
{(char*)"sra-acc", required_argument, 0, ARG_SRA_ACC},
|
|||
|
#endif
|
|||
|
{(char*)"remove-chrname", no_argument, 0, ARG_REMOVE_CHRNAME},
|
|||
|
{(char*)"add-chrname", no_argument, 0, ARG_ADD_CHRNAME},
|
|||
|
{(char*)"max-altstried", required_argument, 0, ARG_MAX_ALTSTRIED},
|
|||
|
{(char*)"haplotype", no_argument, 0, ARG_HAPLOTYPE},
|
|||
|
{(char*)"enable-codis", no_argument, 0, ARG_CODIS},
|
|||
|
{(char*)"summary-file", required_argument, 0, ARG_SUMMARY_FILE},
|
|||
|
{(char*)"new-summary", no_argument, 0, ARG_NEW_SUMMARY},
|
|||
|
{(char*)"enable-dp", no_argument, 0, ARG_DP},
|
|||
|
{(char*)"bowtie2-dp", required_argument, 0, ARG_DP},
|
|||
|
{(char*)"repeat", no_argument, 0, ARG_REPEAT},
|
|||
|
{(char*)"no-repeat-index", no_argument, 0, ARG_NO_REPEAT_INDEX},
|
|||
|
{(char*)"read-lengths", required_argument, 0, ARG_READ_LENGTHS},
|
|||
|
{(char*)"base-change", required_argument, 0, ARG_BASE_CHANGE},
|
|||
|
{(char*)"repeat-limit", required_argument, 0, ARG_REPEAT_LIMIT},
|
|||
|
{(char*)"unique-only", no_argument, 0, ARG_UNIQUE_ONLY},
|
|||
|
{(char*)"3N", no_argument, 0, ARG_3N},
|
|||
|
{(char*)"directional-mapping", no_argument, 0, ARG_DIRECTIONAL},
|
|||
|
{(char*)"directional-mapping-reverse", no_argument, 0, ARG_DIRECTIONAL_REVERSE},
|
|||
|
{(char*)0, 0, 0, 0} // terminator
|
|||
|
};
|
|||
|
|
|||
|
/**
|
|||
|
* Print out a concise description of what options are taken and whether they
|
|||
|
* take an argument.
|
|||
|
*/
|
|||
|
static void printArgDesc(ostream& out) {
|
|||
|
// struct option {
|
|||
|
// const char *name;
|
|||
|
// int has_arg;
|
|||
|
// int *flag;
|
|||
|
// int val;
|
|||
|
// };
|
|||
|
size_t i = 0;
|
|||
|
while(long_options[i].name != 0) {
|
|||
|
out << long_options[i].name << "\t"
|
|||
|
<< (long_options[i].has_arg == no_argument ? 0 : 1)
|
|||
|
<< endl;
|
|||
|
i++;
|
|||
|
}
|
|||
|
size_t solen = strlen(short_options);
|
|||
|
for(i = 0; i < solen; i++) {
|
|||
|
// Has an option? Does if next char is :
|
|||
|
if(i == solen-1) {
|
|||
|
assert_neq(':', short_options[i]);
|
|||
|
cout << (char)short_options[i] << "\t" << 0 << endl;
|
|||
|
} else {
|
|||
|
if(short_options[i+1] == ':') {
|
|||
|
// Option with argument
|
|||
|
cout << (char)short_options[i] << "\t" << 1 << endl;
|
|||
|
i++; // skip the ':'
|
|||
|
} else {
|
|||
|
// Option with no argument
|
|||
|
cout << (char)short_options[i] << "\t" << 0 << endl;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Print a summary usage message to the provided output stream.
|
|||
|
*/
|
|||
|
static void printUsage(ostream& out) {
|
|||
|
out << "HISAT2 version " << string(HISAT2_VERSION).c_str() << " by Daehwan Kim (infphilo@gmail.com, www.ccb.jhu.edu/people/infphilo)" << endl;
|
|||
|
string tool_name = "hisat2-align";
|
|||
|
if(wrapper == "basic-0") {
|
|||
|
tool_name = "hisat2";
|
|||
|
}
|
|||
|
out << "Usage: " << endl
|
|||
|
#ifdef USE_SRA
|
|||
|
<< " " << tool_name.c_str() << " [options]* -x <ht2-idx> {-1 <m1> -2 <m2> | -U <r> | --sra-acc <SRA accession number>} [-S <sam>]" << endl
|
|||
|
#else
|
|||
|
<< " " << tool_name.c_str() << " [options]* -x <ht2-idx> {-1 <m1> -2 <m2> | -U <r>} [-S <sam>]" << endl
|
|||
|
#endif
|
|||
|
<< endl
|
|||
|
<< " <ht2-idx> Index filename prefix (minus trailing .X." << gfm_ext << ")." << endl
|
|||
|
<< " <m1> Files with #1 mates, paired with files in <m2>." << endl;
|
|||
|
if(wrapper == "basic-0") {
|
|||
|
out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
|
|||
|
}
|
|||
|
out << " <m2> Files with #2 mates, paired with files in <m1>." << endl;
|
|||
|
if(wrapper == "basic-0") {
|
|||
|
out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
|
|||
|
}
|
|||
|
out << " <r> Files with unpaired reads." << endl;
|
|||
|
if(wrapper == "basic-0") {
|
|||
|
out << " Could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2)." << endl;
|
|||
|
}
|
|||
|
#ifdef USE_SRA
|
|||
|
out << " <SRA accession number> Comma-separated list of SRA accession numbers, e.g. --sra-acc SRR353653,SRR353654." << endl;
|
|||
|
#endif
|
|||
|
out << " <sam> File for SAM output (default: stdout)" << endl
|
|||
|
<< endl
|
|||
|
<< " <m1>, <m2>, <r> can be comma-separated lists (no whitespace) and can be" << endl
|
|||
|
<< " specified many times. E.g. '-U file1.fq,file2.fq -U file3.fq'." << endl
|
|||
|
// Wrapper script should write <bam> line next
|
|||
|
<< endl
|
|||
|
<< "Options (defaults in parentheses):" << endl
|
|||
|
<< endl
|
|||
|
<< " Input:" << endl
|
|||
|
<< " -q query input files are FASTQ .fq/.fastq (default)" << endl
|
|||
|
<< " --qseq query input files are in Illumina's qseq format" << endl
|
|||
|
<< " -f query input files are (multi-)FASTA .fa/.mfa" << endl
|
|||
|
<< " -r query input files are raw one-sequence-per-line" << endl
|
|||
|
<< " -c <m1>, <m2>, <r> are sequences themselves, not files" << endl
|
|||
|
<< " -s/--skip <int> skip the first <int> reads/pairs in the input (none)" << endl
|
|||
|
<< " -u/--upto <int> stop after first <int> reads/pairs (no limit)" << endl
|
|||
|
<< " -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)" << endl
|
|||
|
<< " -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)" << endl
|
|||
|
<< " --phred33 qualities are Phred+33 (default)" << endl
|
|||
|
<< " --phred64 qualities are Phred+64" << endl
|
|||
|
<< " --int-quals qualities encoded as space-delimited integers" << endl
|
|||
|
#ifdef USE_SRA
|
|||
|
<< " --sra-acc SRA accession ID" << endl
|
|||
|
#endif
|
|||
|
<< endl
|
|||
|
|
|||
|
<< " Presets: Same as:" << endl
|
|||
|
// << " For --end-to-end:" << endl
|
|||
|
// << " --very-fast -D 5 -R 1 -N 0 -L 22 -i S,0,2.50" << endl
|
|||
|
// << " --fast -D 10 -R 2 -N 0 -L 22 -i S,0,2.50" << endl
|
|||
|
// << " --sensitive -D 15 -R 2 -N 0 -L 22 -i S,1,1.15 (default)" << endl
|
|||
|
// << " --very-sensitive -D 20 -R 3 -N 0 -L 20 -i S,1,0.50" << endl
|
|||
|
<< " --fast --no-repeat-index" << endl
|
|||
|
<< " --sensitive --bowtie2-dp 1 -k 30 --score-min L,0,-0.5" << endl
|
|||
|
<< " --very-sensitive --bowtie2-dp 2 -k 50 --score-min L,0,-1" << endl
|
|||
|
<< endl
|
|||
|
<< " Alignment:" << endl
|
|||
|
//<< " -N <int> max # mismatches in seed alignment; can be 0 or 1 (0)" << endl
|
|||
|
//<< " -L <int> length of seed substrings; must be >3, <32 (22)" << endl
|
|||
|
//<< " -i <func> interval between seed substrings w/r/t read len (S,1,1.15)" << endl
|
|||
|
<< " --bowtie2-dp <int> use Bowtie2's dynamic programming alignment algorithm (0) - 0: no dynamic programming, 1: conditional dynamic programming, and 2: unconditional dynamic programming (slowest)" << endl
|
|||
|
<< " --n-ceil <func> func for max # non-A/C/G/Ts permitted in aln (L,0,0.15)" << endl
|
|||
|
//<< " --dpad <int> include <int> extra ref chars on sides of DP table (15)" << endl
|
|||
|
//<< " --gbar <int> disallow gaps within <int> nucs of read extremes (4)" << endl
|
|||
|
<< " --ignore-quals treat all quality values as 30 on Phred scale (off)" << endl
|
|||
|
<< " --nofw do not align forward (original) version of read (off)" << endl
|
|||
|
<< " --norc do not align reverse-complement version of read (off)" << endl
|
|||
|
<< " --no-repeat-index do not use repeat index" << endl
|
|||
|
<< endl
|
|||
|
<< " 3N-Alignment:" << endl
|
|||
|
<< " --base-change <chr,chr> the converted nucleotide and converted to nucleotide (C,T)" << endl
|
|||
|
<< " --directional-mapping make directional mapping, please use this option only if your reads are prepared with a strand specific library (off)" << endl
|
|||
|
<< " --repeat-limit <int> maximum number of repeat will be expanded for repeat alignment (1000)" << endl
|
|||
|
<< " --unique-only only output the reads have unique alignment (off)" << endl
|
|||
|
<< endl
|
|||
|
<< " Spliced Alignment:" << endl
|
|||
|
<< " --pen-cansplice <int> penalty for a canonical splice site (0)" << endl
|
|||
|
<< " --pen-noncansplice <int> penalty for a non-canonical splice site (12)" << endl
|
|||
|
// << " --pen-conflictsplice <int> penalty for conflicting splice sites (1000000)" << endl
|
|||
|
<< " --pen-canintronlen <func> penalty for long introns (G,-8,1) with canonical splice sites" << endl
|
|||
|
<< " --pen-noncanintronlen <func> penalty for long introns (G,-8,1) with noncanonical splice sites" << endl
|
|||
|
<< " --min-intronlen <int> minimum intron length (20)" << endl
|
|||
|
<< " --max-intronlen <int> maximum intron length (500000)" << endl
|
|||
|
<< " --known-splicesite-infile <path> provide a list of known splice sites" << endl
|
|||
|
<< " --novel-splicesite-outfile <path> report a list of splice sites" << endl
|
|||
|
<< " --novel-splicesite-infile <path> provide a list of novel splice sites" << endl
|
|||
|
<< " --no-temp-splicesite disable the use of splice sites found" << endl
|
|||
|
<< " --no-spliced-alignment disable spliced alignment" << endl
|
|||
|
<< " --rna-strandness <string> specify strand-specific information (unstranded)" << endl
|
|||
|
<< " --tmo reports only those alignments within known transcriptome" << endl
|
|||
|
<< " --dta reports alignments tailored for transcript assemblers" << endl
|
|||
|
<< " --dta-cufflinks reports alignments tailored specifically for cufflinks" << endl
|
|||
|
<< " --avoid-pseudogene tries to avoid aligning reads to pseudogenes (experimental option)" << endl
|
|||
|
<< " --no-templatelen-adjustment disables template length adjustment for RNA-seq reads" << endl
|
|||
|
<< endl
|
|||
|
<< " Scoring:" << endl
|
|||
|
//<< " --ma <int> match bonus (0 for --end-to-end, 2 for --local) " << endl
|
|||
|
<< " --mp <int>,<int> max and min penalties for mismatch; lower qual = lower penalty <6,2>" << endl
|
|||
|
<< " --sp <int>,<int> max and min penalties for soft-clipping; lower qual = lower penalty <2,1>" << endl
|
|||
|
<< " --no-softclip no soft-clipping" << endl
|
|||
|
<< " --np <int> penalty for non-A/C/G/Ts in read/ref (1)" << endl
|
|||
|
<< " --rdg <int>,<int> read gap open, extend penalties (5,3)" << endl
|
|||
|
<< " --rfg <int>,<int> reference gap open, extend penalties (5,3)" << endl
|
|||
|
<< " --score-min <func> min acceptable alignment score w/r/t read length" << endl
|
|||
|
<< " (L,0.0,-0.2)" << endl
|
|||
|
<< endl
|
|||
|
<< " Reporting:" << endl
|
|||
|
<< " -k <int> It searches for at most <int> distinct, primary alignments for each read. Primary alignments mean " << endl
|
|||
|
<< " alignments whose alignment score is equal to or higher than any other alignments. The search terminates " << endl
|
|||
|
<< " when it cannot find more distinct valid alignments, or when it finds <int>, whichever happens first. " << endl
|
|||
|
<< " The alignment score for a paired-end alignment equals the sum of the alignment scores of " << endl
|
|||
|
<< " the individual mates. Each reported read or pair alignment beyond the first has the SAM ‘secondary’ bit " << endl
|
|||
|
<< " (which equals 256) set in its FLAGS field. For reads that have more than <int> distinct, " << endl
|
|||
|
<< " valid alignments, hisat2 does not guarantee that the <int> alignments reported are the best possible " << endl
|
|||
|
<< " in terms of alignment score. Default: 5 (linear index) or 10 (graph index)." << endl
|
|||
|
<< " Note: HISAT2 is not designed with large values for -k in mind, and when aligning reads to long, " << endl
|
|||
|
<< " repetitive genomes, large -k could make alignment much slower." << endl
|
|||
|
<< " --max-seeds <int> HISAT2, like other aligners, uses seed-and-extend approaches. HISAT2 tries to extend seeds to " << endl
|
|||
|
<< " full-length alignments. In HISAT2, --max-seeds is used to control the maximum number of seeds that " << endl
|
|||
|
<< " will be extended. For DNA-read alignment (--no-spliced-alignment), HISAT2 extends up to these many seeds" << endl
|
|||
|
<< " and skips the rest of the seeds. For RNA-read alignment, HISAT2 skips extending seeds and reports " << endl
|
|||
|
<< " no alignments if the number of seeds is larger than the number specified with the option, " << endl
|
|||
|
<< " to be compatible with previous versions of HISAT2. Large values for --max-seeds may improve alignment " << endl
|
|||
|
<< " sensitivity, but HISAT2 is not designed with large values for --max-seeds in mind, and when aligning " << endl
|
|||
|
<< " reads to long, repetitive genomes, large --max-seeds could make alignment much slower. " << endl
|
|||
|
<< " The default value is the maximum of 5 and the value that comes with -k times 2." << endl
|
|||
|
<< " -a/--all HISAT2 reports all alignments it can find. Using the option is equivalent to using both --max-seeds " << endl
|
|||
|
<< " and -k with the maximum value that a 64-bit signed integer can represent (9,223,372,036,854,775,807)." << endl
|
|||
|
<< " --repeat report alignments to repeat sequences directly" << endl
|
|||
|
<< endl
|
|||
|
//<< " Effort:" << endl
|
|||
|
//<< " -D <int> give up extending after <int> failed extends in a row (15)" << endl
|
|||
|
//<< " -R <int> for reads w/ repetitive seeds, try <int> sets of seeds (2)" << endl
|
|||
|
//<< endl
|
|||
|
<< " Paired-end:" << endl
|
|||
|
<< " -I/--minins <int> minimum fragment length (0), only valid with --no-spliced-alignment" << endl
|
|||
|
<< " -X/--maxins <int> maximum fragment length (500), only valid with --no-spliced-alignment" << endl
|
|||
|
<< " --fr/--rf/--ff -1, -2 mates align fw/rev, rev/fw, fw/fw (--fr)" << endl
|
|||
|
<< " --no-mixed suppress unpaired alignments for paired reads" << endl
|
|||
|
<< " --no-discordant suppress discordant alignments for paired reads" << endl
|
|||
|
<< endl
|
|||
|
<< " Output:" << endl;
|
|||
|
//if(wrapper == "basic-0") {
|
|||
|
// out << " --bam output directly to BAM (by piping through 'samtools view')" << endl;
|
|||
|
//}
|
|||
|
out << " -t/--time print wall-clock time taken by search phases" << endl;
|
|||
|
if(wrapper == "basic-0") {
|
|||
|
out << " --un <path> write unpaired reads that didn't align to <path>" << endl
|
|||
|
<< " --al <path> write unpaired reads that aligned at least once to <path>" << endl
|
|||
|
<< " --un-conc <path> write pairs that didn't align concordantly to <path>" << endl
|
|||
|
<< " --al-conc <path> write pairs that aligned concordantly at least once to <path>" << endl
|
|||
|
<< " (Note: for --un, --al, --un-conc, or --al-conc, add '-gz' to the option name, e.g." << endl
|
|||
|
<< " --un-gz <path>, to gzip compress output, or add '-bz2' to bzip2 compress output.)" << endl;
|
|||
|
}
|
|||
|
out << " --summary-file <path> print alignment summary to this file." << endl
|
|||
|
<< " --new-summary print alignment summary in a new style, which is more machine-friendly." << endl
|
|||
|
<< " --quiet print nothing to stderr except serious errors" << endl
|
|||
|
// << " --refidx refer to ref. seqs by 0-based index rather than name" << endl
|
|||
|
<< " --met-file <path> send metrics to file at <path> (off)" << endl
|
|||
|
<< " --met-stderr send metrics to stderr (off)" << endl
|
|||
|
<< " --met <int> report internal counters & metrics every <int> secs (1)" << endl
|
|||
|
// Following is supported in the wrapper instead
|
|||
|
// << " --no-unal suppress SAM records for unaligned reads" << endl
|
|||
|
<< " --no-head suppress header lines, i.e. lines starting with @" << endl
|
|||
|
<< " --no-sq suppress @SQ header lines" << endl
|
|||
|
<< " --rg-id <text> set read group id, reflected in @RG line and RG:Z: opt field" << endl
|
|||
|
<< " --rg <text> add <text> (\"lab:value\") to @RG line of SAM header." << endl
|
|||
|
<< " Note: @RG line only printed when --rg-id is set." << endl
|
|||
|
<< " --omit-sec-seq put '*' in SEQ and QUAL fields for secondary alignments." << endl
|
|||
|
<< endl
|
|||
|
<< " Performance:" << endl
|
|||
|
<< " -o/--offrate <int> override offrate of index; must be >= index's offrate" << endl
|
|||
|
<< " -p/--threads <int> number of alignment threads to launch (1)" << endl
|
|||
|
<< " --reorder force SAM output order to match order of input reads" << endl
|
|||
|
#ifdef BOWTIE_MM
|
|||
|
<< " --mm use memory-mapped I/O for index; many 'hisat2's can share" << endl
|
|||
|
#endif
|
|||
|
#ifdef BOWTIE_SHARED_MEM
|
|||
|
//<< " --shmem use shared mem for index; many 'hisat2's can share" << endl
|
|||
|
#endif
|
|||
|
<< endl
|
|||
|
<< " Other:" << endl
|
|||
|
<< " --qc-filter filter out reads that are bad according to QSEQ filter" << endl
|
|||
|
<< " --seed <int> seed for random number generator (0)" << endl
|
|||
|
<< " --non-deterministic seed rand. gen. arbitrarily instead of using read attributes" << endl
|
|||
|
<< " --remove-chrname remove 'chr' from reference names in alignment" << endl
|
|||
|
<< " --add-chrname add 'chr' to reference names in alignment " << endl
|
|||
|
// << " --verbose verbose output for debugging" << endl
|
|||
|
<< " --version print version information and quit" << endl
|
|||
|
<< " -h/--help print this usage message" << endl
|
|||
|
;
|
|||
|
if(wrapper.empty()) {
|
|||
|
cerr << endl
|
|||
|
<< "*** Warning ***" << endl
|
|||
|
<< "'hisat2-align' was run directly. It is recommended that you run the wrapper script 'hisat2' instead." << endl
|
|||
|
<< endl;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Parse an int out of optarg and enforce that it be at least 'lower';
|
|||
|
* if it is less than 'lower', than output the given error message and
|
|||
|
* exit with an error and a usage message.
|
|||
|
*/
|
|||
|
static int parseInt(int lower, int upper, const char *errmsg, const char *arg) {
|
|||
|
long l;
|
|||
|
char *endPtr= NULL;
|
|||
|
l = strtol(arg, &endPtr, 10);
|
|||
|
if (endPtr != NULL) {
|
|||
|
if (l < lower || l > upper) {
|
|||
|
cerr << errmsg << endl;
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
return (int32_t)l;
|
|||
|
}
|
|||
|
cerr << errmsg << endl;
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
return -1;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Upper is maximum int by default.
|
|||
|
*/
|
|||
|
static int parseInt(int lower, const char *errmsg, const char *arg) {
|
|||
|
return parseInt(lower, std::numeric_limits<int>::max(), errmsg, arg);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Parse a T string 'str'.
|
|||
|
*/
|
|||
|
template<typename T>
|
|||
|
T parse(const char *s) {
|
|||
|
T tmp;
|
|||
|
stringstream ss(s);
|
|||
|
ss >> tmp;
|
|||
|
return tmp;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Parse a pair of Ts from a string, 'str', delimited with 'delim'.
|
|||
|
*/
|
|||
|
template<typename T>
|
|||
|
pair<T, T> parsePair(const char *str, char delim) {
|
|||
|
string s(str);
|
|||
|
EList<string> ss;
|
|||
|
tokenize(s, delim, ss);
|
|||
|
pair<T, T> ret;
|
|||
|
ret.first = parse<T>(ss[0].c_str());
|
|||
|
ret.second = parse<T>(ss[1].c_str());
|
|||
|
return ret;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Parse a pair of Ts from a string, 'str', delimited with 'delim'.
|
|||
|
*/
|
|||
|
template<typename T>
|
|||
|
void parseTuple(const char *str, char delim, EList<T>& ret) {
|
|||
|
string s(str);
|
|||
|
EList<string> ss;
|
|||
|
tokenize(s, delim, ss);
|
|||
|
for(size_t i = 0; i < ss.size(); i++) {
|
|||
|
ret.push_back(parse<T>(ss[i].c_str()));
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
static string applyPreset(const string& sorig, Presets& presets) {
|
|||
|
string s = sorig;
|
|||
|
size_t found = s.find("%LOCAL%");
|
|||
|
if(found != string::npos) {
|
|||
|
s.replace(found, strlen("%LOCAL%"), localAlign ? "-local" : "");
|
|||
|
}
|
|||
|
if(gVerbose) {
|
|||
|
cerr << "Applying preset: '" << s.c_str() << "' using preset menu '"
|
|||
|
<< presets.name() << "'" << endl;
|
|||
|
}
|
|||
|
string pol;
|
|||
|
presets.apply(s, pol, extra_opts);
|
|||
|
return pol;
|
|||
|
}
|
|||
|
|
|||
|
static bool saw_M;
|
|||
|
static bool saw_a;
|
|||
|
static bool saw_k;
|
|||
|
static EList<string> presetList;
|
|||
|
|
|||
|
/**
|
|||
|
* TODO: Argument parsing is very, very flawed. The biggest problem is that
|
|||
|
* there are two separate worlds of arguments, the ones set via polstr, and
|
|||
|
* the ones set directly in variables. This makes for nasty interactions,
|
|||
|
* e.g., with the -M option being resolved at an awkward time relative to
|
|||
|
* the -k and -a options.
|
|||
|
*/
|
|||
|
static void parseOption(int next_option, const char *arg) {
|
|||
|
switch (next_option) {
|
|||
|
case ARG_TEST_25: bowtie2p5 = true; break;
|
|||
|
case ARG_DESC_KB: descentTotSz = SimpleFunc::parse(arg, 0.0, 1024.0, 1024.0, DMAX); break;
|
|||
|
case ARG_DESC_FMOPS: descentTotFmops = SimpleFunc::parse(arg, 0.0, 10.0, 100.0, DMAX); break;
|
|||
|
case ARG_DESC_LANDING: descentLanding = parse<int>(arg); break;
|
|||
|
case ARG_DESC_EXP: {
|
|||
|
descConsExp = parse<double>(arg);
|
|||
|
if(descConsExp < 0.0) {
|
|||
|
cerr << "Error: --desc-exp must be greater than or equal to 0" << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case '1': tokenize(arg, ",", mates1); break;
|
|||
|
case '2': tokenize(arg, ",", mates2); break;
|
|||
|
case ARG_ONETWO: tokenize(arg, ",", mates12); format = TAB_MATE5; break;
|
|||
|
case ARG_TAB5: tokenize(arg, ",", mates12); format = TAB_MATE5; break;
|
|||
|
case ARG_TAB6: tokenize(arg, ",", mates12); format = TAB_MATE6; break;
|
|||
|
case 'f': format = FASTA; break;
|
|||
|
case 'F': {
|
|||
|
format = FASTA_CONT;
|
|||
|
pair<uint32_t, uint32_t> p = parsePair<uint32_t>(arg, ',');
|
|||
|
fastaContLen = p.first;
|
|||
|
fastaContFreq = p.second;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_BWA_SW_LIKE: {
|
|||
|
bwaSwLikeC = 5.5f;
|
|||
|
bwaSwLikeT = 30;
|
|||
|
bwaSwLike = true;
|
|||
|
localAlign = true;
|
|||
|
// -a INT Score of a match [1]
|
|||
|
// -b INT Mismatch penalty [3]
|
|||
|
// -q INT Gap open penalty [5]
|
|||
|
// -r INT Gap extension penalty. The penalty for a contiguous
|
|||
|
// gap of size k is q+k*r. [2]
|
|||
|
polstr += ";MA=1;MMP=C3;RDG=5,2;RFG=5,2";
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'q': format = FASTQ; break;
|
|||
|
case 'r': format = RAW; break;
|
|||
|
case 'c': format = CMDLINE; break;
|
|||
|
case ARG_QSEQ: format = QSEQ; break;
|
|||
|
case 'C': {
|
|||
|
cerr << "Error: -C specified but Bowtie 2 does not support colorspace input." << endl;
|
|||
|
throw 1;
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'I':
|
|||
|
gMinInsert = parseInt(0, "-I arg must be positive", arg);
|
|||
|
break;
|
|||
|
case 'X':
|
|||
|
gMaxInsert = parseInt(1, "-X arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case ARG_NO_DISCORDANT: gReportDiscordant = false; break;
|
|||
|
case ARG_NO_MIXED: gReportMixed = false; break;
|
|||
|
case 's':
|
|||
|
skipReads = (uint32_t)parseInt(0, "-s arg must be positive", arg);
|
|||
|
break;
|
|||
|
case ARG_FF: gMate1fw = true; gMate2fw = true; break;
|
|||
|
case ARG_RF: gMate1fw = false; gMate2fw = true; break;
|
|||
|
case ARG_FR: gMate1fw = true; gMate2fw = false; break;
|
|||
|
case ARG_SHMEM: useShmem = true; break;
|
|||
|
case ARG_SEED_SUMM: seedSumm = true; break;
|
|||
|
case ARG_MM: {
|
|||
|
#ifdef BOWTIE_MM
|
|||
|
useMm = true;
|
|||
|
break;
|
|||
|
#else
|
|||
|
cerr << "Memory-mapped I/O mode is disabled because bowtie was not compiled with" << endl
|
|||
|
<< "BOWTIE_MM defined. Memory-mapped I/O is not supported under Windows. If you" << endl
|
|||
|
<< "would like to use memory-mapped I/O on a platform that supports it, please" << endl
|
|||
|
<< "refrain from specifying BOWTIE_MM=0 when compiling Bowtie." << endl;
|
|||
|
throw 1;
|
|||
|
#endif
|
|||
|
}
|
|||
|
case ARG_MMSWEEP: mmSweep = true; break;
|
|||
|
case ARG_HADOOPOUT: hadoopOut = true; break;
|
|||
|
case ARG_SOLEXA_QUALS: solexaQuals = true; break;
|
|||
|
case ARG_INTEGER_QUALS: integerQuals = true; break;
|
|||
|
case ARG_PHRED64: phred64Quals = true; break;
|
|||
|
case ARG_PHRED33: solexaQuals = false; phred64Quals = false; break;
|
|||
|
case ARG_OVERHANG: gReportOverhangs = true; break;
|
|||
|
case ARG_NO_CACHE: msNoCache = true; break;
|
|||
|
case ARG_USE_CACHE: msNoCache = false; break;
|
|||
|
case ARG_LOCAL_SEED_CACHE_SZ:
|
|||
|
seedCacheLocalMB = (uint32_t)parseInt(1, "--local-seed-cache-sz arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case ARG_CURRENT_SEED_CACHE_SZ:
|
|||
|
seedCacheCurrentMB = (uint32_t)parseInt(1, "--seed-cache-sz arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case ARG_REFIDX: noRefNames = true; break;
|
|||
|
case ARG_FUZZY: fuzzy = true; break;
|
|||
|
case ARG_FULLREF: fullRef = true; break;
|
|||
|
case ARG_GAP_BAR:
|
|||
|
gGapBarrier = parseInt(1, "--gbar must be no less than 1", arg);
|
|||
|
break;
|
|||
|
case ARG_SEED:
|
|||
|
seed = parseInt(0, "--seed arg must be at least 0", arg);
|
|||
|
break;
|
|||
|
case ARG_NON_DETERMINISTIC:
|
|||
|
arbitraryRandom = true;
|
|||
|
break;
|
|||
|
case 'u':
|
|||
|
qUpto = (uint32_t)parseInt(1, "-u/--qupto arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case 'Q':
|
|||
|
tokenize(arg, ",", qualities);
|
|||
|
integerQuals = true;
|
|||
|
break;
|
|||
|
case ARG_QUALS1:
|
|||
|
tokenize(arg, ",", qualities1);
|
|||
|
integerQuals = true;
|
|||
|
break;
|
|||
|
case ARG_QUALS2:
|
|||
|
tokenize(arg, ",", qualities2);
|
|||
|
integerQuals = true;
|
|||
|
break;
|
|||
|
case ARG_CACHE_LIM:
|
|||
|
cacheLimit = (uint32_t)parseInt(1, "--cachelim arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case ARG_CACHE_SZ:
|
|||
|
cacheSize = (uint32_t)parseInt(1, "--cachesz arg must be at least 1", arg);
|
|||
|
cacheSize *= (1024 * 1024); // convert from MB to B
|
|||
|
break;
|
|||
|
case ARG_WRAPPER: wrapper = arg; break;
|
|||
|
case 'p':
|
|||
|
nthreads = parseInt(1, "-p/--threads arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
case ARG_FILEPAR:
|
|||
|
fileParallel = true;
|
|||
|
break;
|
|||
|
case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break;
|
|||
|
case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break;
|
|||
|
case 'h': printUsage(cout); throw 0; break;
|
|||
|
case ARG_USAGE: printUsage(cout); throw 0; break;
|
|||
|
//
|
|||
|
// NOTE that unlike in Bowtie 1, -M, -a and -k are mutually
|
|||
|
// exclusive here.
|
|||
|
//
|
|||
|
case 'M': {
|
|||
|
msample = true;
|
|||
|
mhits = parse<uint32_t>(arg);
|
|||
|
if(saw_a || saw_k) {
|
|||
|
cerr << "Warning: -M, -k and -a are mutually exclusive. "
|
|||
|
<< "-M will override" << endl;
|
|||
|
khits = 1;
|
|||
|
}
|
|||
|
assert_eq(1, khits);
|
|||
|
saw_M = true;
|
|||
|
cerr << "Warning: -M is deprecated. Use -D and -R to adjust " <<
|
|||
|
"effort instead." << endl;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_EXTEND_ITERS: {
|
|||
|
maxIters = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NO_EXTEND: {
|
|||
|
doExtend = false;
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'R': { polstr += ";ROUNDS="; polstr += arg; break; }
|
|||
|
case 'D': { polstr += ";DPS="; polstr += arg; break; }
|
|||
|
case ARG_DP_MATE_STREAK_THRESH: {
|
|||
|
maxMateStreak = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DP_FAIL_STREAK_THRESH: {
|
|||
|
maxDpStreak = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_EE_FAIL_STREAK_THRESH: {
|
|||
|
maxEeStreak = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_UG_FAIL_STREAK_THRESH: {
|
|||
|
maxUgStreak = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DP_FAIL_THRESH: {
|
|||
|
maxDp = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_UG_FAIL_THRESH: {
|
|||
|
maxUg = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_MAX_SEEDS: {
|
|||
|
maxSeeds = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SEED_BOOST_THRESH: {
|
|||
|
seedBoostThresh = parse<int>(arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'a': {
|
|||
|
msample = false;
|
|||
|
allHits = true;
|
|||
|
mhits = 0; // disable -M
|
|||
|
if(saw_M || saw_k) {
|
|||
|
cerr << "Warning: -M, -k and -a are mutually exclusive. "
|
|||
|
<< "-a will override" << endl;
|
|||
|
}
|
|||
|
saw_a = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'k': {
|
|||
|
msample = false;
|
|||
|
khits = (uint32_t)parseInt(1, "-k arg must be at least 1", arg);
|
|||
|
mhits = 0; // disable -M
|
|||
|
if(saw_M || saw_a) {
|
|||
|
cerr << "Warning: -M, -k and -a are mutually exclusive. "
|
|||
|
<< "-k will override" << endl;
|
|||
|
}
|
|||
|
saw_k = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_VERBOSE: gVerbose = 1; break;
|
|||
|
case ARG_STARTVERBOSE: startVerbose = true; break;
|
|||
|
case ARG_QUIET: gQuiet = true; break;
|
|||
|
case ARG_SANITY: sanityCheck = true; break;
|
|||
|
case 't': timing = true; break;
|
|||
|
case ARG_METRIC_IVAL: {
|
|||
|
metricsIval = parseInt(1, "--metrics arg must be at least 1", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_METRIC_FILE: metricsFile = arg; break;
|
|||
|
case ARG_METRIC_STDERR: metricsStderr = true; break;
|
|||
|
case ARG_METRIC_PER_READ: metricsPerRead = true; break;
|
|||
|
case ARG_NO_FW: gNofw = true; break;
|
|||
|
case ARG_NO_RC: gNorc = true; break;
|
|||
|
case ARG_SAM_NO_QNAME_TRUNC: samTruncQname = false; break;
|
|||
|
case ARG_SAM_OMIT_SEC_SEQ: samOmitSecSeqQual = true; break;
|
|||
|
case ARG_SAM_NO_UNAL: samNoUnal = true; break;
|
|||
|
case ARG_SAM_NOHEAD: samNoHead = true; break;
|
|||
|
case ARG_SAM_NOSQ: samNoSQ = true; break;
|
|||
|
case ARG_SAM_PRINT_YI: sam_print_yi = true; break;
|
|||
|
case ARG_REORDER: reorder = true; break;
|
|||
|
case ARG_MAPQ_EX: {
|
|||
|
sam_print_zp = true;
|
|||
|
sam_print_zu = true;
|
|||
|
sam_print_xp = true;
|
|||
|
sam_print_xss = true;
|
|||
|
sam_print_yn = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SHOW_RAND_SEED: {
|
|||
|
sam_print_zs = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SAMPLE:
|
|||
|
sampleFrac = parse<float>(arg);
|
|||
|
break;
|
|||
|
case ARG_CP_MIN:
|
|||
|
cminlen = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
case ARG_CP_IVAL:
|
|||
|
cpow2 = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
case ARG_TRI:
|
|||
|
doTri = true;
|
|||
|
break;
|
|||
|
case ARG_READ_PASSTHRU: {
|
|||
|
sam_print_xr = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_READ_TIMES: {
|
|||
|
sam_print_xt = true;
|
|||
|
sam_print_xd = true;
|
|||
|
sam_print_xu = true;
|
|||
|
sam_print_yl = true;
|
|||
|
sam_print_ye = true;
|
|||
|
sam_print_yu = true;
|
|||
|
sam_print_yr = true;
|
|||
|
sam_print_zb = true;
|
|||
|
sam_print_zr = true;
|
|||
|
sam_print_zf = true;
|
|||
|
sam_print_zm = true;
|
|||
|
sam_print_zi = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SAM_RG: {
|
|||
|
string argstr = arg;
|
|||
|
if(argstr.substr(0, 3) == "ID:") {
|
|||
|
rgid = "\t";
|
|||
|
rgid += argstr;
|
|||
|
rgs_optflag = "RG:Z:" + argstr.substr(3);
|
|||
|
} else {
|
|||
|
rgs += '\t';
|
|||
|
rgs += argstr;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SAM_RGID: {
|
|||
|
string argstr = arg;
|
|||
|
rgid = "\t";
|
|||
|
rgid = "\tID:" + argstr;
|
|||
|
rgs_optflag = "RG:Z:" + argstr;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_PARTITION: partitionSz = parse<int>(arg); break;
|
|||
|
case ARG_DPAD:
|
|||
|
maxhalf = parseInt(0, "--dpad must be no less than 0", arg);
|
|||
|
break;
|
|||
|
case ARG_ORIG:
|
|||
|
if(arg == NULL || strlen(arg) == 0) {
|
|||
|
cerr << "--orig arg must be followed by a string" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
origString = arg;
|
|||
|
break;
|
|||
|
case ARG_LOCAL: localAlign = true; break;
|
|||
|
case ARG_END_TO_END: localAlign = false; break;
|
|||
|
case ARG_SSE8: enable8 = true; break;
|
|||
|
case ARG_SSE8_NO: enable8 = false; break;
|
|||
|
case ARG_UNGAPPED: doUngapped = true; break;
|
|||
|
case ARG_UNGAPPED_NO: doUngapped = false; break;
|
|||
|
// case ARG_NO_DOVETAIL: gDovetailMatesOK = false; break;
|
|||
|
// case ARG_NO_CONTAIN: gContainMatesOK = false; break;
|
|||
|
// case ARG_NO_OVERLAP: gOlapMatesOK = false; break;
|
|||
|
// case ARG_DOVETAIL: gDovetailMatesOK = true; break;
|
|||
|
// case ARG_CONTAIN: gContainMatesOK = true; break;
|
|||
|
// case ARG_OVERLAP: gOlapMatesOK = true; break;
|
|||
|
case ARG_QC_FILTER: qcFilter = true; break;
|
|||
|
case ARG_NO_SCORE_PRIORITY: sortByScore = false; break;
|
|||
|
case ARG_IGNORE_QUALS: ignoreQuals = true; break;
|
|||
|
case ARG_MAPQ_V: mapqv = parse<int>(arg); break;
|
|||
|
case ARG_TIGHTEN: tighten = parse<int>(arg); break;
|
|||
|
case ARG_EXACT_UPFRONT: doExactUpFront = true; break;
|
|||
|
case ARG_1MM_UPFRONT: do1mmUpFront = true; break;
|
|||
|
case ARG_EXACT_UPFRONT_NO: doExactUpFront = false; break;
|
|||
|
case ARG_1MM_UPFRONT_NO: do1mmUpFront = false; break;
|
|||
|
case ARG_1MM_MINLEN: do1mmMinLen = parse<size_t>(arg); break;
|
|||
|
case ARG_NOISY_HPOLY: noisyHpolymer = true; break;
|
|||
|
case 'x' : bt2indexs[0] = arg; break;
|
|||
|
case ARG_PRESET_VERY_FAST_LOCAL: localAlign = true;
|
|||
|
case ARG_PRESET_VERY_FAST: {
|
|||
|
presetList.push_back("very-fast%LOCAL%"); break;
|
|||
|
}
|
|||
|
case ARG_PRESET_FAST_LOCAL: localAlign = true;
|
|||
|
case ARG_PRESET_FAST: {
|
|||
|
fast = true;
|
|||
|
presetList.push_back("fast%LOCAL%"); break;
|
|||
|
}
|
|||
|
case ARG_PRESET_SENSITIVE_LOCAL: localAlign = true;
|
|||
|
case ARG_PRESET_SENSITIVE: {
|
|||
|
sensitive = true;
|
|||
|
presetList.push_back("sensitive%LOCAL%"); break;
|
|||
|
}
|
|||
|
case ARG_PRESET_VERY_SENSITIVE_LOCAL: localAlign = true;
|
|||
|
case ARG_PRESET_VERY_SENSITIVE: {
|
|||
|
very_sensitive = true;
|
|||
|
presetList.push_back("very-sensitive%LOCAL%"); break;
|
|||
|
}
|
|||
|
case 'P': { presetList.push_back(arg); break; }
|
|||
|
case ARG_ALIGN_POLICY: {
|
|||
|
if(strlen(arg) > 0) {
|
|||
|
polstr += ";"; polstr += arg;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case 'N': { polstr += ";SEED="; polstr += arg; break; }
|
|||
|
case 'L': {
|
|||
|
int64_t len = parse<size_t>(arg);
|
|||
|
if(len < 0) {
|
|||
|
cerr << "Error: -L argument must be >= 0; was " << arg << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(len > 32) {
|
|||
|
cerr << "Error: -L argument must be <= 32; was" << arg << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
polstr += ";SEEDLEN="; polstr += arg; break;
|
|||
|
}
|
|||
|
case 'O':
|
|||
|
multiseedOff = parse<size_t>(arg);
|
|||
|
break;
|
|||
|
case 'i': {
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 3 || args.size() == 0) {
|
|||
|
cerr << "Error: expected 3 or fewer comma-separated "
|
|||
|
<< "arguments to -i option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
// Interval-settings arguments
|
|||
|
polstr += (";IVAL=" + args[0]); // Function type
|
|||
|
if(args.size() > 1) {
|
|||
|
polstr += ("," + args[1]); // Constant term
|
|||
|
}
|
|||
|
if(args.size() > 2) {
|
|||
|
polstr += ("," + args[2]); // Coefficient
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_MULTISEED_IVAL: {
|
|||
|
polstr += ";";
|
|||
|
// Split argument by comma
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 5 || args.size() == 0) {
|
|||
|
cerr << "Error: expected 5 or fewer comma-separated "
|
|||
|
<< "arguments to --multiseed option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
// Seed mm and length arguments
|
|||
|
polstr += "SEED=";
|
|||
|
polstr += (args[0]); // # mismatches
|
|||
|
if(args.size() > 1) polstr += ("," + args[ 1]); // length
|
|||
|
if(args.size() > 2) polstr += (";IVAL=" + args[2]); // Func type
|
|||
|
if(args.size() > 3) polstr += ("," + args[ 3]); // Constant term
|
|||
|
if(args.size() > 4) polstr += ("," + args[ 4]); // Coefficient
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_N_CEIL: {
|
|||
|
// Split argument by comma
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 3) {
|
|||
|
cerr << "Error: expected 3 or fewer comma-separated "
|
|||
|
<< "arguments to --n-ceil option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(args.size() == 0) {
|
|||
|
cerr << "Error: expected at least one argument to --n-ceil option" << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
polstr += ";NCEIL=";
|
|||
|
if(args.size() == 3) {
|
|||
|
polstr += (args[0] + "," + args[1] + "," + args[2]);
|
|||
|
} else {
|
|||
|
if(args.size() == 1) {
|
|||
|
polstr += ("C," + args[0]);
|
|||
|
} else {
|
|||
|
polstr += (args[0] + "," + args[1]);
|
|||
|
}
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SCORE_MA: polstr += ";MA="; polstr += arg; break;
|
|||
|
case ARG_SCORE_MMP: {
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 2 || args.size() == 0) {
|
|||
|
cerr << "Error: expected 1 or 2 comma-separated "
|
|||
|
<< "arguments to --mp option, got " << args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(args.size() >= 1) {
|
|||
|
polstr += ";MMP=Q,";
|
|||
|
polstr += args[0];
|
|||
|
if(args.size() >= 2) {
|
|||
|
polstr += ",";
|
|||
|
polstr += args[1];
|
|||
|
}
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SCORE_SCP: {
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 2 || args.size() == 0) {
|
|||
|
cerr << "Error: expected 1 or 2 comma-separated "
|
|||
|
<< "arguments to --sp option, got " << args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(args.size() >= 1) {
|
|||
|
polstr += ";SCP=Q,";
|
|||
|
polstr += args[0];
|
|||
|
if(args.size() >= 2) {
|
|||
|
polstr += ",";
|
|||
|
polstr += args[1];
|
|||
|
}
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NO_SOFTCLIP: {
|
|||
|
ostringstream convert;
|
|||
|
convert << std::numeric_limits<int>::max();
|
|||
|
polstr += ";SCP=Q,";
|
|||
|
polstr += convert.str();
|
|||
|
polstr += ",";
|
|||
|
polstr += convert.str();
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SCORE_NP: polstr += ";NP=C"; polstr += arg; break;
|
|||
|
case ARG_SCORE_RDG: polstr += ";RDG="; polstr += arg; break;
|
|||
|
case ARG_SCORE_RFG: polstr += ";RFG="; polstr += arg; break;
|
|||
|
case ARG_SCORE_MIN: {
|
|||
|
polstr += ";";
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 3 && args.size() == 0) {
|
|||
|
cerr << "Error: expected 3 or fewer comma-separated "
|
|||
|
<< "arguments to --n-ceil option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
polstr += ("MIN=" + args[0]);
|
|||
|
if(args.size() > 1) {
|
|||
|
polstr += ("," + args[1]);
|
|||
|
}
|
|||
|
if(args.size() > 2) {
|
|||
|
polstr += ("," + args[2]);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DESC: printArgDesc(cout); throw 0;
|
|||
|
case 'S': outfile = arg; break;
|
|||
|
case 'U': {
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
for(size_t i = 0; i < args.size(); i++) {
|
|||
|
queries.push_back(args[i]);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_VERSION: showVersion = 1; break;
|
|||
|
case ARG_NO_TEMPSPLICESITE: useTempSpliceSite = false; break;
|
|||
|
case ARG_PEN_CANSPLICE: {
|
|||
|
penCanSplice = parseInt(0, "--pen-cansplice arg must be at least 0", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_PEN_NONCANSPLICE: {
|
|||
|
penNoncanSplice = parseInt(0, "--pen-noncansplice arg must be at least 0", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_PEN_CONFLICTSPLICE: {
|
|||
|
penConflictSplice = parseInt(0, "--pen-conflictsplice arg must be at least 0", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_PEN_CANINTRONLEN: {
|
|||
|
polstr += ";";
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 3 && args.size() == 0) {
|
|||
|
cerr << "Error: expected 3 or fewer comma-separated "
|
|||
|
<< "arguments to --n-ceil option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
polstr += ("CANINTRONLEN=" + args[0]);
|
|||
|
if(args.size() > 1) {
|
|||
|
polstr += ("," + args[1]);
|
|||
|
}
|
|||
|
if(args.size() > 2) {
|
|||
|
polstr += ("," + args[2]);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_PEN_NONCANINTRONLEN: {
|
|||
|
polstr += ";";
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() > 3 && args.size() == 0) {
|
|||
|
cerr << "Error: expected 3 or fewer comma-separated "
|
|||
|
<< "arguments to --n-ceil option, got "
|
|||
|
<< args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
polstr += ("NONCANINTRONLEN=" + args[0]);
|
|||
|
if(args.size() > 1) {
|
|||
|
polstr += ("," + args[1]);
|
|||
|
}
|
|||
|
if(args.size() > 2) {
|
|||
|
polstr += ("," + args[2]);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_MIN_INTRONLEN: {
|
|||
|
minIntronLen = parseInt(20, "--min-intronlen arg must be at least 20", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_MAX_INTRONLEN: {
|
|||
|
maxIntronLen = parseInt(20, "--max-intronlen arg must be at least 20", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_KNOWN_SPLICESITE_INFILE: knownSpliceSiteInfile = arg; break;
|
|||
|
case ARG_NOVEL_SPLICESITE_INFILE: novelSpliceSiteInfile = arg; break;
|
|||
|
case ARG_NOVEL_SPLICESITE_OUTFILE: novelSpliceSiteOutfile = arg; break;
|
|||
|
case ARG_SECONDARY: secondary = true; break;
|
|||
|
case ARG_NO_SPLICED_ALIGNMENT: no_spliced_alignment = true; break;
|
|||
|
case ARG_RNA_STRANDNESS: {
|
|||
|
string strandness = arg;
|
|||
|
if(strandness == "F") rna_strandness = RNA_STRANDNESS_F;
|
|||
|
else if(strandness == "R") rna_strandness = RNA_STRANDNESS_R;
|
|||
|
else if(strandness == "FR") rna_strandness = RNA_STRANDNESS_FR;
|
|||
|
else if(strandness == "RF") rna_strandness = RNA_STRANDNESS_RF;
|
|||
|
else {
|
|||
|
cerr << "Error: should be one of F, R, FR, or RF " << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SPLICESITE_DB_ONLY: {
|
|||
|
splicesite_db_only = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NO_ANCHORSTOP: {
|
|||
|
anchorStop = false;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_TRANSCRIPTOME_MAPPING_ONLY: {
|
|||
|
tranMapOnly = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_TRANSCRIPTOME_ASSEMBLY: {
|
|||
|
tranAssm = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_TRANSCRIPTOME_ASSEMBLY_CUFFLINKS: {
|
|||
|
tranAssm = true;
|
|||
|
tranAssm_program = "cufflinks";
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_AVOID_PSEUDOGENE: {
|
|||
|
avoid_pseudogene = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
#ifdef USE_SRA
|
|||
|
case ARG_SRA_ACC: {
|
|||
|
tokenize(arg, ",", sra_accs); format = SRA_FASTA;
|
|||
|
break;
|
|||
|
}
|
|||
|
#endif
|
|||
|
case ARG_REMOVE_CHRNAME: {
|
|||
|
rmChrName = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_ADD_CHRNAME: {
|
|||
|
addChrName = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_MAX_ALTSTRIED: {
|
|||
|
max_alts_tried = parseInt(8, "--max-altstried arg must be at least 8", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_HAPLOTYPE: {
|
|||
|
use_haplotype = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_CODIS: {
|
|||
|
enable_codis = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NO_TEMPLATELEN_ADJUSTMENT: {
|
|||
|
templateLenAdjustment = false;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_SUMMARY_FILE: {
|
|||
|
alignSumFile = arg;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NEW_SUMMARY: {
|
|||
|
newAlignSummary = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DP: {
|
|||
|
bowtie2_dp = parseInt(0, "--bowtie2-dp arg must be 0, 1, or 2", arg);
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_REPEAT: {
|
|||
|
repeat = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_NO_REPEAT_INDEX: {
|
|||
|
use_repeat_index = false;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_READ_LENGTHS: {
|
|||
|
EList<string> str_readLens;
|
|||
|
tokenize(arg, ",", str_readLens);
|
|||
|
for(size_t i = 0; i < str_readLens.size(); i++) {
|
|||
|
int readLen = parseInt(0, "--read-lengths arg must be at least 0", str_readLens[i].c_str());
|
|||
|
readLens.push_back(readLen);
|
|||
|
}
|
|||
|
readLens.sort();
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_BASE_CHANGE: {
|
|||
|
// Split argument by comma
|
|||
|
EList<string> args;
|
|||
|
tokenize(arg, ",", args);
|
|||
|
if(args.size() != 2) {
|
|||
|
cerr << "Error: expected 2 comma-separated "
|
|||
|
<< "arguments to --base-change option, got " << args.size() << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
base_change_entered = true;
|
|||
|
usrInput_convertedFrom = toupper(args[0][0]);
|
|||
|
usrInput_convertedTo = toupper(args[1][0]);
|
|||
|
|
|||
|
string s = "ACGT";
|
|||
|
if ((s.find(usrInput_convertedFrom) == std::string::npos) || (s.find(usrInput_convertedTo) == std::string::npos)) {
|
|||
|
cerr << "Please enter the nucleotide in 'ACGT' for --base-change option." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
|
|||
|
if (usrInput_convertedFrom == usrInput_convertedTo) {
|
|||
|
cerr << "Please enter two different base for --base-change option. If you wish to align normal reads without nucleotide conversion, please use hisat2." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_3N: {
|
|||
|
threeN = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_REPEAT_LIMIT: {
|
|||
|
repeatLimit = parseInt(1, "--repeat-limit arg must be at least 1", arg);;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_UNIQUE_ONLY: {
|
|||
|
uniqueOutputOnly = true;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DIRECTIONAL: {
|
|||
|
directional3NMapping = 1;
|
|||
|
break;
|
|||
|
}
|
|||
|
case ARG_DIRECTIONAL_REVERSE: {
|
|||
|
directional3NMapping = 2;
|
|||
|
break;
|
|||
|
}
|
|||
|
default:
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Read command-line arguments
|
|||
|
*/
|
|||
|
static void parseOptions(int argc, const char **argv) {
|
|||
|
int option_index = 0;
|
|||
|
int next_option;
|
|||
|
saw_M = false;
|
|||
|
saw_a = false;
|
|||
|
saw_k = false;
|
|||
|
presetList.clear();
|
|||
|
if(startVerbose) { cerr << "Parsing options: "; logTime(cerr, true); }
|
|||
|
while(true) {
|
|||
|
next_option = getopt_long(
|
|||
|
argc, const_cast<char**>(argv),
|
|||
|
short_options, long_options, &option_index);
|
|||
|
const char * arg = optarg;
|
|||
|
if(next_option == EOF) {
|
|||
|
if(extra_opts_cur < extra_opts.size()) {
|
|||
|
next_option = extra_opts[extra_opts_cur].first;
|
|||
|
arg = extra_opts[extra_opts_cur].second.c_str();
|
|||
|
extra_opts_cur++;
|
|||
|
} else {
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
parseOption(next_option, arg);
|
|||
|
}
|
|||
|
// Now parse all the presets. Might want to pick which presets version to
|
|||
|
// use according to other parameters.
|
|||
|
auto_ptr<Presets> presets(new PresetsV0());
|
|||
|
// Apply default preset
|
|||
|
if(!defaultPreset.empty()) {
|
|||
|
polstr = applyPreset(defaultPreset, *presets.get()) + polstr;
|
|||
|
}
|
|||
|
// Apply specified presets
|
|||
|
for(size_t i = 0; i < presetList.size(); i++) {
|
|||
|
polstr += applyPreset(presetList[i], *presets.get());
|
|||
|
}
|
|||
|
for(size_t i = 0; i < extra_opts.size(); i++) {
|
|||
|
next_option = extra_opts[extra_opts_cur].first;
|
|||
|
const char *arg = extra_opts[extra_opts_cur].second.c_str();
|
|||
|
parseOption(next_option, arg);
|
|||
|
}
|
|||
|
|
|||
|
if (showVersion) {
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
// Remove initial semicolons
|
|||
|
while(!polstr.empty() && polstr[0] == ';') {
|
|||
|
polstr = polstr.substr(1);
|
|||
|
}
|
|||
|
if(gVerbose) {
|
|||
|
cerr << "Final policy string: '" << polstr.c_str() << "'" << endl;
|
|||
|
}
|
|||
|
|
|||
|
if (threeN && !base_change_entered) {
|
|||
|
cerr << "--base-change must be set for HISAT-3N" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if (!threeN && base_change_entered) {
|
|||
|
cerr << "Please do not use --base-change for HISAT2. To align nucleotide conversion reads, please use HISAT-3N" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
usrInput_convertedFromComplement = asc2dnacomp[usrInput_convertedFrom];
|
|||
|
usrInput_convertedToComplement = asc2dnacomp[usrInput_convertedTo];
|
|||
|
|
|||
|
getConversion(usrInput_convertedFrom, usrInput_convertedTo, hs3N_convertedFrom, hs3N_convertedTo);
|
|||
|
hs3N_convertedFromComplement = asc2dnacomp[hs3N_convertedFrom];
|
|||
|
hs3N_convertedToComplement = asc2dnacomp[hs3N_convertedTo];
|
|||
|
|
|||
|
asc2dna_3N[0][hs3N_convertedFrom] = asc2dna[hs3N_convertedTo];
|
|||
|
asc2dna_3N[0][tolower(hs3N_convertedFrom)] = asc2dna[hs3N_convertedTo];
|
|||
|
asc2dna_3N[1][hs3N_convertedFromComplement] = asc2dna[hs3N_convertedToComplement];
|
|||
|
asc2dna_3N[1][tolower(hs3N_convertedFromComplement)] = asc2dna[hs3N_convertedToComplement];
|
|||
|
|
|||
|
threeN_indexTags[0] += hs3N_convertedFrom;
|
|||
|
threeN_indexTags[0] += hs3N_convertedTo;
|
|||
|
threeN_indexTags[1] += hs3N_convertedFromComplement;
|
|||
|
threeN_indexTags[1] += hs3N_convertedToComplement;
|
|||
|
|
|||
|
nMappingCycle = 4;
|
|||
|
|
|||
|
if (hs3N_convertedFrom == hs3N_convertedToComplement || directional3NMapping == 1) {
|
|||
|
mappingCycles[0] = true;
|
|||
|
mappingCycles[1] = true;
|
|||
|
}
|
|||
|
else if (directional3NMapping == 2) {
|
|||
|
mappingCycles[2] = true;
|
|||
|
mappingCycles[3] = true;
|
|||
|
}
|
|||
|
else {
|
|||
|
for (int i = 0; i < 4; i++){
|
|||
|
mappingCycles[i] = true;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
nMappingCycle = 1;
|
|||
|
mappingCycles[0] = true;
|
|||
|
}
|
|||
|
|
|||
|
size_t failStreakTmp = 0;
|
|||
|
SeedAlignmentPolicy::parseString(
|
|||
|
polstr,
|
|||
|
localAlign,
|
|||
|
noisyHpolymer,
|
|||
|
ignoreQuals,
|
|||
|
bonusMatchType,
|
|||
|
bonusMatch,
|
|||
|
penMmcType,
|
|||
|
penMmcMax,
|
|||
|
penMmcMin,
|
|||
|
penScMax,
|
|||
|
penScMin,
|
|||
|
penNType,
|
|||
|
penN,
|
|||
|
penRdGapConst,
|
|||
|
penRfGapConst,
|
|||
|
penRdGapLinear,
|
|||
|
penRfGapLinear,
|
|||
|
scoreMin,
|
|||
|
nCeil,
|
|||
|
penNCatPair,
|
|||
|
multiseedMms,
|
|||
|
multiseedLen,
|
|||
|
msIval,
|
|||
|
failStreakTmp,
|
|||
|
nSeedRounds,
|
|||
|
&penCanIntronLen,
|
|||
|
&penNoncanIntronLen);
|
|||
|
if(failStreakTmp > 0) {
|
|||
|
maxEeStreak = failStreakTmp;
|
|||
|
maxUgStreak = failStreakTmp;
|
|||
|
maxDpStreak = failStreakTmp;
|
|||
|
}
|
|||
|
if(saw_a || saw_k || true) {
|
|||
|
msample = false;
|
|||
|
mhits = 0;
|
|||
|
} else {
|
|||
|
assert_gt(mhits, 0);
|
|||
|
msample = true;
|
|||
|
}
|
|||
|
|
|||
|
if(fast) {
|
|||
|
use_repeat_index = false;
|
|||
|
} else if(sensitive) {
|
|||
|
if(bowtie2_dp == 0) {
|
|||
|
bowtie2_dp = 1;
|
|||
|
}
|
|||
|
|
|||
|
if(khits < 10) {
|
|||
|
khits = 10;
|
|||
|
saw_k = true;
|
|||
|
}
|
|||
|
scoreMin.init(SIMPLE_FUNC_LINEAR, 0.0f, -0.5f);
|
|||
|
} else if(very_sensitive) {
|
|||
|
bowtie2_dp = 2;
|
|||
|
if(khits < 30) {
|
|||
|
khits = 30;
|
|||
|
saw_k = true;
|
|||
|
}
|
|||
|
scoreMin.init(SIMPLE_FUNC_LINEAR, 0.0f, -1.0f);
|
|||
|
}
|
|||
|
|
|||
|
if(mates1.size() != mates2.size()) {
|
|||
|
cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << mates2.size() << endl
|
|||
|
<< "mate files/sequences were specified with -2. The same number of mate files/" << endl
|
|||
|
<< "sequences must be specified with -1 and -2." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(qualities.size() && format != FASTA) {
|
|||
|
cerr << "Error: one or more quality files were specified with -Q but -f was not" << endl
|
|||
|
<< "enabled. -Q works only in combination with -f and -C." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(qualities1.size() && format != FASTA) {
|
|||
|
cerr << "Error: one or more quality files were specified with --Q1 but -f was not" << endl
|
|||
|
<< "enabled. --Q1 works only in combination with -f and -C." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(qualities2.size() && format != FASTA) {
|
|||
|
cerr << "Error: one or more quality files were specified with --Q2 but -f was not" << endl
|
|||
|
<< "enabled. --Q2 works only in combination with -f and -C." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(qualities1.size() > 0 && mates1.size() != qualities1.size()) {
|
|||
|
cerr << "Error: " << mates1.size() << " mate files/sequences were specified with -1, but " << qualities1.size() << endl
|
|||
|
<< "quality files were specified with --Q1. The same number of mate and quality" << endl
|
|||
|
<< "files must sequences must be specified with -1 and --Q1." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(qualities2.size() > 0 && mates2.size() != qualities2.size()) {
|
|||
|
cerr << "Error: " << mates2.size() << " mate files/sequences were specified with -2, but " << qualities2.size() << endl
|
|||
|
<< "quality files were specified with --Q2. The same number of mate and quality" << endl
|
|||
|
<< "files must sequences must be specified with -2 and --Q2." << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(!rgs.empty() && rgid.empty()) {
|
|||
|
cerr << "Warning: --rg was specified without --rg-id also "
|
|||
|
<< "being specified. @RG line is not printed unless --rg-id "
|
|||
|
<< "is specified." << endl;
|
|||
|
}
|
|||
|
// Check for duplicate mate input files
|
|||
|
if(format != CMDLINE) {
|
|||
|
for(size_t i = 0; i < mates1.size(); i++) {
|
|||
|
for(size_t j = 0; j < mates2.size(); j++) {
|
|||
|
if(mates1[i] == mates2[j] && !gQuiet) {
|
|||
|
cerr << "Warning: Same mate file \"" << mates1[i].c_str() << "\" appears as argument to both -1 and -2" << endl;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// If both -s and -u are used, we need to adjust qUpto accordingly
|
|||
|
// since it uses rdid to know if we've reached the -u limit (and
|
|||
|
// rdids are all shifted up by skipReads characters)
|
|||
|
if(qUpto + skipReads > qUpto) {
|
|||
|
qUpto += skipReads;
|
|||
|
}
|
|||
|
if(useShmem && useMm && !gQuiet) {
|
|||
|
cerr << "Warning: --shmem overrides --mm..." << endl;
|
|||
|
useMm = false;
|
|||
|
}
|
|||
|
if(gGapBarrier < 1) {
|
|||
|
cerr << "Warning: --gbar was set less than 1 (=" << gGapBarrier
|
|||
|
<< "); setting to 1 instead" << endl;
|
|||
|
gGapBarrier = 1;
|
|||
|
}
|
|||
|
if(multiseedMms >= multiseedLen) {
|
|||
|
assert_gt(multiseedLen, 0);
|
|||
|
cerr << "Warning: seed mismatches (" << multiseedMms
|
|||
|
<< ") is less than seed length (" << multiseedLen
|
|||
|
<< "); setting mismatches to " << (multiseedMms-1)
|
|||
|
<< " instead" << endl;
|
|||
|
multiseedMms = multiseedLen-1;
|
|||
|
}
|
|||
|
sam_print_zm = sam_print_zm && bowtie2p5;
|
|||
|
#ifndef NDEBUG
|
|||
|
if(!gQuiet) {
|
|||
|
cerr << "Warning: Running in debug mode. Please use debug mode only "
|
|||
|
<< "for diagnosing errors, and not for typical use of HISAT2."
|
|||
|
<< endl;
|
|||
|
}
|
|||
|
#endif
|
|||
|
}
|
|||
|
|
|||
|
static const char *argv0 = NULL;
|
|||
|
|
|||
|
/// Create a PatternSourcePerThread for the current thread according
|
|||
|
/// to the global params and return a pointer to it
|
|||
|
static PatternSourcePerThreadFactory*
|
|||
|
createPatsrcFactory(PairedPatternSource& _patsrc, int tid) {
|
|||
|
PatternSourcePerThreadFactory *patsrcFact;
|
|||
|
patsrcFact = new WrappedPatternSourcePerThreadFactory(_patsrc);
|
|||
|
assert(patsrcFact != NULL);
|
|||
|
return patsrcFact;
|
|||
|
}
|
|||
|
|
|||
|
#define PTHREAD_ATTRS (PTHREAD_CREATE_JOINABLE | PTHREAD_CREATE_DETACHED)
|
|||
|
|
|||
|
typedef TIndexOffU index_t;
|
|||
|
typedef uint16_t local_index_t;
|
|||
|
static PairedPatternSource* multiseed_patsrc;
|
|||
|
static HGFM<index_t>* multiseed_gfm;
|
|||
|
static RFM<index_t>* multiseed_rgfm;
|
|||
|
//static HGFM<index_t>* multiseed_gfms[2];
|
|||
|
//static RFM<index_t>* multiseed_rgfms[2];
|
|||
|
static Scoring* multiseed_sc;
|
|||
|
static BitPairReference* multiseed_refs;
|
|||
|
static BitPairReference* multiseed_rrefs;
|
|||
|
//static BitPairReference* multiseed_refss[2];
|
|||
|
//static BitPairReference* multiseed_rrefss[2];
|
|||
|
static AlnSink<index_t>* multiseed_msink;
|
|||
|
static OutFileBuf* multiseed_metricsOfb;
|
|||
|
static SpliceSiteDB* ssdb;
|
|||
|
static ALTDB<index_t>* altdb;
|
|||
|
static RepeatDB<index_t>* repeatdb;
|
|||
|
static ALTDB<index_t>* raltdb;
|
|||
|
|
|||
|
static ALTDB<index_t> *altdbs_3N[2];
|
|||
|
static RepeatDB<index_t> *repeatdbs_3N[2];
|
|||
|
static ALTDB<index_t> *raltdbs_3N[2];
|
|||
|
static TranscriptomePolicy* multiseed_tpol;
|
|||
|
static GraphPolicy* gpol;
|
|||
|
|
|||
|
|
|||
|
class reference3N {
|
|||
|
public:
|
|||
|
const HGFM<index_t>* multiseed_gfm[2];
|
|||
|
const RFM<index_t>* multiseed_rgfm[2];
|
|||
|
const BitPairReference* multiseed_rrefs[2];
|
|||
|
|
|||
|
reference3N() {
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
void load(EList<HGFM<index_t>* >& gfms_3N,
|
|||
|
RFM<index_t>* rgfms_3N[2],
|
|||
|
BitPairReference* rrefss[2]) {
|
|||
|
for (int i = 0; i < 2; i++) {
|
|||
|
multiseed_gfm[i] = gfms_3N[i];
|
|||
|
multiseed_rgfm[i] = rgfms_3N[i];
|
|||
|
multiseed_rrefs[i] = rrefss[i];
|
|||
|
}
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
reference3N ref3N;
|
|||
|
|
|||
|
/**
|
|||
|
* Metrics for measuring the work done by the outer read alignment
|
|||
|
* loop.
|
|||
|
*/
|
|||
|
struct OuterLoopMetrics {
|
|||
|
|
|||
|
OuterLoopMetrics() {
|
|||
|
reset();
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Set all counters to 0.
|
|||
|
*/
|
|||
|
void reset() {
|
|||
|
reads = bases = srreads = srbases =
|
|||
|
freads = fbases = ureads = ubases = 0;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Sum the counters in m in with the conters in this object. This
|
|||
|
* is the only safe way to update an OuterLoopMetrics that's shared
|
|||
|
* by multiple threads.
|
|||
|
*/
|
|||
|
void merge(
|
|||
|
const OuterLoopMetrics& m,
|
|||
|
bool getLock = false)
|
|||
|
{
|
|||
|
ThreadSafe ts(&mutex_m, getLock);
|
|||
|
reads += m.reads;
|
|||
|
bases += m.bases;
|
|||
|
srreads += m.srreads;
|
|||
|
srbases += m.srbases;
|
|||
|
freads += m.freads;
|
|||
|
fbases += m.fbases;
|
|||
|
ureads += m.ureads;
|
|||
|
ubases += m.ubases;
|
|||
|
}
|
|||
|
|
|||
|
uint64_t reads; // total reads
|
|||
|
uint64_t bases; // total bases
|
|||
|
uint64_t srreads; // same-read reads
|
|||
|
uint64_t srbases; // same-read bases
|
|||
|
uint64_t freads; // filtered reads
|
|||
|
uint64_t fbases; // filtered bases
|
|||
|
uint64_t ureads; // unfiltered reads
|
|||
|
uint64_t ubases; // unfiltered bases
|
|||
|
MUTEX_T mutex_m;
|
|||
|
};
|
|||
|
|
|||
|
/**
|
|||
|
* Collection of all relevant performance metrics when aligning in
|
|||
|
* multiseed mode.
|
|||
|
*/
|
|||
|
struct PerfMetrics {
|
|||
|
|
|||
|
PerfMetrics() : first(true) { reset(); }
|
|||
|
|
|||
|
/**
|
|||
|
* Set all counters to 0.
|
|||
|
*/
|
|||
|
void reset() {
|
|||
|
olm.reset();
|
|||
|
sdm.reset();
|
|||
|
wlm.reset();
|
|||
|
swmSeed.reset();
|
|||
|
swmMate.reset();
|
|||
|
rpm.reset();
|
|||
|
dpSse8Seed.reset(); // 8-bit SSE seed extensions
|
|||
|
dpSse8Mate.reset(); // 8-bit SSE mate finds
|
|||
|
dpSse16Seed.reset(); // 16-bit SSE seed extensions
|
|||
|
dpSse16Mate.reset(); // 16-bit SSE mate finds
|
|||
|
nbtfiltst = 0;
|
|||
|
nbtfiltsc = 0;
|
|||
|
nbtfiltdo = 0;
|
|||
|
|
|||
|
olmu.reset();
|
|||
|
sdmu.reset();
|
|||
|
wlmu.reset();
|
|||
|
swmuSeed.reset();
|
|||
|
swmuMate.reset();
|
|||
|
rpmu.reset();
|
|||
|
dpSse8uSeed.reset(); // 8-bit SSE seed extensions
|
|||
|
dpSse8uMate.reset(); // 8-bit SSE mate finds
|
|||
|
dpSse16uSeed.reset(); // 16-bit SSE seed extensions
|
|||
|
dpSse16uMate.reset(); // 16-bit SSE mate finds
|
|||
|
nbtfiltst_u = 0;
|
|||
|
nbtfiltsc_u = 0;
|
|||
|
nbtfiltdo_u = 0;
|
|||
|
|
|||
|
him.reset();
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Merge a set of specific metrics into this object.
|
|||
|
*/
|
|||
|
void merge(
|
|||
|
const OuterLoopMetrics *ol,
|
|||
|
const SeedSearchMetrics *sd,
|
|||
|
const WalkMetrics *wl,
|
|||
|
const SwMetrics *swSeed,
|
|||
|
const SwMetrics *swMate,
|
|||
|
const ReportingMetrics *rm,
|
|||
|
const SSEMetrics *dpSse8Ex,
|
|||
|
const SSEMetrics *dpSse8Ma,
|
|||
|
const SSEMetrics *dpSse16Ex,
|
|||
|
const SSEMetrics *dpSse16Ma,
|
|||
|
uint64_t nbtfiltst_,
|
|||
|
uint64_t nbtfiltsc_,
|
|||
|
uint64_t nbtfiltdo_,
|
|||
|
const HIMetrics *hi,
|
|||
|
bool getLock)
|
|||
|
{
|
|||
|
ThreadSafe ts(&mutex_m, getLock);
|
|||
|
if(ol != NULL) {
|
|||
|
olmu.merge(*ol, false);
|
|||
|
}
|
|||
|
if(sd != NULL) {
|
|||
|
sdmu.merge(*sd, false);
|
|||
|
}
|
|||
|
if(wl != NULL) {
|
|||
|
wlmu.merge(*wl, false);
|
|||
|
}
|
|||
|
if(swSeed != NULL) {
|
|||
|
swmuSeed.merge(*swSeed, false);
|
|||
|
}
|
|||
|
if(swMate != NULL) {
|
|||
|
swmuMate.merge(*swMate, false);
|
|||
|
}
|
|||
|
if(rm != NULL) {
|
|||
|
rpmu.merge(*rm, false);
|
|||
|
}
|
|||
|
if(dpSse8Ex != NULL) {
|
|||
|
dpSse8uSeed.merge(*dpSse8Ex, false);
|
|||
|
}
|
|||
|
if(dpSse8Ma != NULL) {
|
|||
|
dpSse8uMate.merge(*dpSse8Ma, false);
|
|||
|
}
|
|||
|
if(dpSse16Ex != NULL) {
|
|||
|
dpSse16uSeed.merge(*dpSse16Ex, false);
|
|||
|
}
|
|||
|
if(dpSse16Ma != NULL) {
|
|||
|
dpSse16uMate.merge(*dpSse16Ma, false);
|
|||
|
}
|
|||
|
nbtfiltst_u += nbtfiltst_;
|
|||
|
nbtfiltsc_u += nbtfiltsc_;
|
|||
|
nbtfiltdo_u += nbtfiltdo_;
|
|||
|
if(hi != NULL) {
|
|||
|
him.merge(*hi, false);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Reports a matrix of results, incl. column labels, to an OutFileBuf.
|
|||
|
* Optionally also sends results to stderr (unbuffered). Can optionally
|
|||
|
* print a per-read record with the read name at the beginning.
|
|||
|
*/
|
|||
|
void reportInterval(
|
|||
|
OutFileBuf* o, // file to send output to
|
|||
|
bool metricsStderr, // additionally output to stderr?
|
|||
|
bool total, // true -> report total, otherwise incremental
|
|||
|
bool sync, // synchronize output
|
|||
|
const BTString *name) // non-NULL name pointer if is per-read record
|
|||
|
{
|
|||
|
ThreadSafe ts(&mutex_m, sync);
|
|||
|
ostringstream stderrSs;
|
|||
|
time_t curtime = time(0);
|
|||
|
char buf[1024];
|
|||
|
if(first) {
|
|||
|
const char *str =
|
|||
|
/* 1 */ "Time" "\t"
|
|||
|
/* 2 */ "Read" "\t"
|
|||
|
/* 3 */ "Base" "\t"
|
|||
|
/* 4 */ "SameRead" "\t"
|
|||
|
/* 5 */ "SameReadBase" "\t"
|
|||
|
/* 6 */ "UnfilteredRead" "\t"
|
|||
|
/* 7 */ "UnfilteredBase" "\t"
|
|||
|
|
|||
|
/* 8 */ "Paired" "\t"
|
|||
|
/* 9 */ "Unpaired" "\t"
|
|||
|
/* 10 */ "AlConUni" "\t"
|
|||
|
/* 11 */ "AlConRep" "\t"
|
|||
|
/* 12 */ "AlConFail" "\t"
|
|||
|
/* 13 */ "AlDis" "\t"
|
|||
|
/* 14 */ "AlConFailUni" "\t"
|
|||
|
/* 15 */ "AlConFailRep" "\t"
|
|||
|
/* 16 */ "AlConFailFail" "\t"
|
|||
|
/* 17 */ "AlConRepUni" "\t"
|
|||
|
/* 18 */ "AlConRepRep" "\t"
|
|||
|
/* 19 */ "AlConRepFail" "\t"
|
|||
|
/* 20 */ "AlUnpUni" "\t"
|
|||
|
/* 21 */ "AlUnpRep" "\t"
|
|||
|
/* 22 */ "AlUnpFail" "\t"
|
|||
|
|
|||
|
/* 23 */ "SeedSearch" "\t"
|
|||
|
/* 24 */ "IntraSCacheHit" "\t"
|
|||
|
/* 25 */ "InterSCacheHit" "\t"
|
|||
|
/* 26 */ "OutOfMemory" "\t"
|
|||
|
/* 27 */ "AlBWOp" "\t"
|
|||
|
/* 28 */ "AlBWBranch" "\t"
|
|||
|
/* 29 */ "ResBWOp" "\t"
|
|||
|
/* 30 */ "ResBWBranch" "\t"
|
|||
|
/* 31 */ "ResResolve" "\t"
|
|||
|
/* 34 */ "ResReport" "\t"
|
|||
|
/* 35 */ "RedundantSHit" "\t"
|
|||
|
|
|||
|
/* 36 */ "BestMinEdit0" "\t"
|
|||
|
/* 37 */ "BestMinEdit1" "\t"
|
|||
|
/* 38 */ "BestMinEdit2" "\t"
|
|||
|
|
|||
|
/* 39 */ "ExactAttempts" "\t"
|
|||
|
/* 40 */ "ExactSucc" "\t"
|
|||
|
/* 41 */ "ExactRanges" "\t"
|
|||
|
/* 42 */ "ExactRows" "\t"
|
|||
|
/* 43 */ "ExactOOMs" "\t"
|
|||
|
|
|||
|
/* 44 */ "1mmAttempts" "\t"
|
|||
|
/* 45 */ "1mmSucc" "\t"
|
|||
|
/* 46 */ "1mmRanges" "\t"
|
|||
|
/* 47 */ "1mmRows" "\t"
|
|||
|
/* 48 */ "1mmOOMs" "\t"
|
|||
|
|
|||
|
/* 49 */ "UngappedSucc" "\t"
|
|||
|
/* 50 */ "UngappedFail" "\t"
|
|||
|
/* 51 */ "UngappedNoDec" "\t"
|
|||
|
|
|||
|
/* 52 */ "DPExLt10Gaps" "\t"
|
|||
|
/* 53 */ "DPExLt5Gaps" "\t"
|
|||
|
/* 54 */ "DPExLt3Gaps" "\t"
|
|||
|
|
|||
|
/* 55 */ "DPMateLt10Gaps" "\t"
|
|||
|
/* 56 */ "DPMateLt5Gaps" "\t"
|
|||
|
/* 57 */ "DPMateLt3Gaps" "\t"
|
|||
|
|
|||
|
/* 58 */ "DP16ExDps" "\t"
|
|||
|
/* 59 */ "DP16ExDpSat" "\t"
|
|||
|
/* 60 */ "DP16ExDpFail" "\t"
|
|||
|
/* 61 */ "DP16ExDpSucc" "\t"
|
|||
|
/* 62 */ "DP16ExCol" "\t"
|
|||
|
/* 63 */ "DP16ExCell" "\t"
|
|||
|
/* 64 */ "DP16ExInner" "\t"
|
|||
|
/* 65 */ "DP16ExFixup" "\t"
|
|||
|
/* 66 */ "DP16ExGathSol" "\t"
|
|||
|
/* 67 */ "DP16ExBt" "\t"
|
|||
|
/* 68 */ "DP16ExBtFail" "\t"
|
|||
|
/* 69 */ "DP16ExBtSucc" "\t"
|
|||
|
/* 70 */ "DP16ExBtCell" "\t"
|
|||
|
/* 71 */ "DP16ExCoreRej" "\t"
|
|||
|
/* 72 */ "DP16ExNRej" "\t"
|
|||
|
|
|||
|
/* 73 */ "DP8ExDps" "\t"
|
|||
|
/* 74 */ "DP8ExDpSat" "\t"
|
|||
|
/* 75 */ "DP8ExDpFail" "\t"
|
|||
|
/* 76 */ "DP8ExDpSucc" "\t"
|
|||
|
/* 77 */ "DP8ExCol" "\t"
|
|||
|
/* 78 */ "DP8ExCell" "\t"
|
|||
|
/* 79 */ "DP8ExInner" "\t"
|
|||
|
/* 80 */ "DP8ExFixup" "\t"
|
|||
|
/* 81 */ "DP8ExGathSol" "\t"
|
|||
|
/* 82 */ "DP8ExBt" "\t"
|
|||
|
/* 83 */ "DP8ExBtFail" "\t"
|
|||
|
/* 84 */ "DP8ExBtSucc" "\t"
|
|||
|
/* 85 */ "DP8ExBtCell" "\t"
|
|||
|
/* 86 */ "DP8ExCoreRej" "\t"
|
|||
|
/* 87 */ "DP8ExNRej" "\t"
|
|||
|
|
|||
|
/* 88 */ "DP16MateDps" "\t"
|
|||
|
/* 89 */ "DP16MateDpSat" "\t"
|
|||
|
/* 90 */ "DP16MateDpFail" "\t"
|
|||
|
/* 91 */ "DP16MateDpSucc" "\t"
|
|||
|
/* 92 */ "DP16MateCol" "\t"
|
|||
|
/* 93 */ "DP16MateCell" "\t"
|
|||
|
/* 94 */ "DP16MateInner" "\t"
|
|||
|
/* 95 */ "DP16MateFixup" "\t"
|
|||
|
/* 96 */ "DP16MateGathSol" "\t"
|
|||
|
/* 97 */ "DP16MateBt" "\t"
|
|||
|
/* 98 */ "DP16MateBtFail" "\t"
|
|||
|
/* 99 */ "DP16MateBtSucc" "\t"
|
|||
|
/* 100 */ "DP16MateBtCell" "\t"
|
|||
|
/* 101 */ "DP16MateCoreRej" "\t"
|
|||
|
/* 102 */ "DP16MateNRej" "\t"
|
|||
|
|
|||
|
/* 103 */ "DP8MateDps" "\t"
|
|||
|
/* 104 */ "DP8MateDpSat" "\t"
|
|||
|
/* 105 */ "DP8MateDpFail" "\t"
|
|||
|
/* 106 */ "DP8MateDpSucc" "\t"
|
|||
|
/* 107 */ "DP8MateCol" "\t"
|
|||
|
/* 108 */ "DP8MateCell" "\t"
|
|||
|
/* 109 */ "DP8MateInner" "\t"
|
|||
|
/* 110 */ "DP8MateFixup" "\t"
|
|||
|
/* 111 */ "DP8MateGathSol" "\t"
|
|||
|
/* 112 */ "DP8MateBt" "\t"
|
|||
|
/* 113 */ "DP8MateBtFail" "\t"
|
|||
|
/* 114 */ "DP8MateBtSucc" "\t"
|
|||
|
/* 115 */ "DP8MateBtCell" "\t"
|
|||
|
/* 116 */ "DP8MateCoreRej" "\t"
|
|||
|
/* 117 */ "DP8MateNRej" "\t"
|
|||
|
|
|||
|
/* 118 */ "DPBtFiltStart" "\t"
|
|||
|
/* 119 */ "DPBtFiltScore" "\t"
|
|||
|
/* 120 */ "DpBtFiltDom" "\t"
|
|||
|
|
|||
|
/* 121 */ "MemPeak" "\t"
|
|||
|
/* 122 */ "UncatMemPeak" "\t" // 0
|
|||
|
/* 123 */ "EbwtMemPeak" "\t" // EBWT_CAT
|
|||
|
/* 124 */ "CacheMemPeak" "\t" // CA_CAT
|
|||
|
/* 125 */ "ResolveMemPeak" "\t" // GW_CAT
|
|||
|
/* 126 */ "AlignMemPeak" "\t" // AL_CAT
|
|||
|
/* 127 */ "DPMemPeak" "\t" // DP_CAT
|
|||
|
/* 128 */ "MiscMemPeak" "\t" // MISC_CAT
|
|||
|
/* 129 */ "DebugMemPeak" "\t" // DEBUG_CAT
|
|||
|
|
|||
|
/* 130 */ "LocalSearch" "\t"
|
|||
|
/* 131 */ "AnchorSearch" "\t"
|
|||
|
/* 132 */ "LocalIndexSearch" "\t"
|
|||
|
/* 133 */ "LocalExtSearch" "\t"
|
|||
|
/* 134 */ "LocalSearchRecur" "\t"
|
|||
|
/* 135 */ "GlobalGenomeCoords" "\t"
|
|||
|
/* 136 */ "LocalGenomeCoords" "\t"
|
|||
|
|
|||
|
|
|||
|
"\n";
|
|||
|
|
|||
|
if(name != NULL) {
|
|||
|
if(o != NULL) o->writeChars("Name\t");
|
|||
|
if(metricsStderr) stderrSs << "Name\t";
|
|||
|
}
|
|||
|
|
|||
|
if(o != NULL) o->writeChars(str);
|
|||
|
if(metricsStderr) stderrSs << str;
|
|||
|
first = false;
|
|||
|
}
|
|||
|
|
|||
|
if(total) mergeIncrementals();
|
|||
|
|
|||
|
// 0. Read name, if needed
|
|||
|
if(name != NULL) {
|
|||
|
if(o != NULL) {
|
|||
|
o->writeChars(name->toZBuf());
|
|||
|
o->write('\t');
|
|||
|
}
|
|||
|
if(metricsStderr) {
|
|||
|
stderrSs << (*name) << '\t';
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 1. Current time in secs
|
|||
|
itoa10<time_t>(curtime, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const OuterLoopMetrics& ol = total ? olm : olmu;
|
|||
|
|
|||
|
// 2. Reads
|
|||
|
itoa10<uint64_t>(ol.reads, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 3. Bases
|
|||
|
itoa10<uint64_t>(ol.bases, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 4. Same-read reads
|
|||
|
itoa10<uint64_t>(ol.srreads, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 5. Same-read bases
|
|||
|
itoa10<uint64_t>(ol.srbases, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 6. Unfiltered reads
|
|||
|
itoa10<uint64_t>(ol.ureads, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 7. Unfiltered bases
|
|||
|
itoa10<uint64_t>(ol.ubases, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const ReportingMetrics& rp = total ? rpm : rpmu;
|
|||
|
|
|||
|
// 8. Paired reads
|
|||
|
itoa10<uint64_t>(rp.npaired, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 9. Unpaired reads
|
|||
|
itoa10<uint64_t>(rp.nunpaired, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 10. Pairs with unique concordant alignments
|
|||
|
itoa10<uint64_t>(rp.nconcord_uni, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 11. Pairs with repetitive concordant alignments
|
|||
|
itoa10<uint64_t>(rp.nconcord_rep, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 12. Pairs with 0 concordant alignments
|
|||
|
itoa10<uint64_t>(rp.nconcord_0, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 13. Pairs with 1 discordant alignment
|
|||
|
itoa10<uint64_t>(rp.ndiscord, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 14. Mates from unaligned pairs that align uniquely
|
|||
|
itoa10<uint64_t>(rp.nunp_0_uni, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 15. Mates from unaligned pairs that align repetitively
|
|||
|
itoa10<uint64_t>(rp.nunp_0_rep, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 16. Mates from unaligned pairs that fail to align
|
|||
|
itoa10<uint64_t>(rp.nunp_0_0, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 17. Mates from repetitive pairs that align uniquely
|
|||
|
itoa10<uint64_t>(rp.nunp_rep_uni, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 18. Mates from repetitive pairs that align repetitively
|
|||
|
itoa10<uint64_t>(rp.nunp_rep_rep, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 19. Mates from repetitive pairs that fail to align
|
|||
|
itoa10<uint64_t>(rp.nunp_rep_0, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 20. Unpaired reads that align uniquely
|
|||
|
itoa10<uint64_t>(rp.nunp_uni, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 21. Unpaired reads that align repetitively
|
|||
|
itoa10<uint64_t>(rp.nunp_rep, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 22. Unpaired reads that fail to align
|
|||
|
itoa10<uint64_t>(rp.nunp_0, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const SeedSearchMetrics& sd = total ? sdm : sdmu;
|
|||
|
|
|||
|
// 23. Seed searches
|
|||
|
itoa10<uint64_t>(sd.seedsearch, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 24. Hits in 'current' cache
|
|||
|
itoa10<uint64_t>(sd.intrahit, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 25. Hits in 'local' cache
|
|||
|
itoa10<uint64_t>(sd.interhit, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 26. Out of memory
|
|||
|
itoa10<uint64_t>(sd.ooms, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 27. Burrows-Wheeler ops in aligner
|
|||
|
itoa10<uint64_t>(sd.bwops, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 28. Burrows-Wheeler branches (edits) in aligner
|
|||
|
itoa10<uint64_t>(sd.bweds, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const WalkMetrics& wl = total ? wlm : wlmu;
|
|||
|
|
|||
|
// 29. Burrows-Wheeler ops in resolver
|
|||
|
itoa10<uint64_t>(wl.bwops, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 30. Burrows-Wheeler branches in resolver
|
|||
|
itoa10<uint64_t>(wl.branches, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 31. Burrows-Wheeler offset resolutions
|
|||
|
itoa10<uint64_t>(wl.resolves, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 34. Offset reports
|
|||
|
itoa10<uint64_t>(wl.reports, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 35. Redundant seed hit
|
|||
|
itoa10<uint64_t>(total ? swmSeed.rshit : swmuSeed.rshit, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 36. # times the best (out of fw/rc) minimum # edits was 0
|
|||
|
itoa10<uint64_t>(total ? sdm.bestmin0 : sdmu.bestmin0, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 37. # times the best (out of fw/rc) minimum # edits was 1
|
|||
|
itoa10<uint64_t>(total ? sdm.bestmin1 : sdmu.bestmin1, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 38. # times the best (out of fw/rc) minimum # edits was 2
|
|||
|
itoa10<uint64_t>(total ? sdm.bestmin2 : sdmu.bestmin2, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 39. Exact aligner attempts
|
|||
|
itoa10<uint64_t>(total ? swmSeed.exatts : swmuSeed.exatts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 40. Exact aligner successes
|
|||
|
itoa10<uint64_t>(total ? swmSeed.exsucc : swmuSeed.exsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 41. Exact aligner ranges
|
|||
|
itoa10<uint64_t>(total ? swmSeed.exranges : swmuSeed.exranges, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 42. Exact aligner rows
|
|||
|
itoa10<uint64_t>(total ? swmSeed.exrows : swmuSeed.exrows, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 43. Exact aligner OOMs
|
|||
|
itoa10<uint64_t>(total ? swmSeed.exooms : swmuSeed.exooms, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 44. 1mm aligner attempts
|
|||
|
itoa10<uint64_t>(total ? swmSeed.mm1atts : swmuSeed.mm1atts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 45. 1mm aligner successes
|
|||
|
itoa10<uint64_t>(total ? swmSeed.mm1succ : swmuSeed.mm1succ, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 46. 1mm aligner ranges
|
|||
|
itoa10<uint64_t>(total ? swmSeed.mm1ranges : swmuSeed.mm1ranges, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 47. 1mm aligner rows
|
|||
|
itoa10<uint64_t>(total ? swmSeed.mm1rows : swmuSeed.mm1rows, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 48. 1mm aligner OOMs
|
|||
|
itoa10<uint64_t>(total ? swmSeed.mm1ooms : swmuSeed.mm1ooms, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 49 Ungapped aligner success
|
|||
|
itoa10<uint64_t>(total ? swmSeed.ungapsucc : swmuSeed.ungapsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 50. Ungapped aligner fail
|
|||
|
itoa10<uint64_t>(total ? swmSeed.ungapfail : swmuSeed.ungapfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 51. Ungapped aligner no decision
|
|||
|
itoa10<uint64_t>(total ? swmSeed.ungapnodec : swmuSeed.ungapnodec, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 52. # seed-extend DPs with < 10 gaps
|
|||
|
itoa10<uint64_t>(total ? swmSeed.sws10 : swmuSeed.sws10, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 53. # seed-extend DPs with < 5 gaps
|
|||
|
itoa10<uint64_t>(total ? swmSeed.sws5 : swmuSeed.sws5, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 54. # seed-extend DPs with < 3 gaps
|
|||
|
itoa10<uint64_t>(total ? swmSeed.sws3 : swmuSeed.sws3, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 55. # seed-extend DPs with < 10 gaps
|
|||
|
itoa10<uint64_t>(total ? swmMate.sws10 : swmuMate.sws10, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 56. # seed-extend DPs with < 5 gaps
|
|||
|
itoa10<uint64_t>(total ? swmMate.sws5 : swmuMate.sws5, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 57. # seed-extend DPs with < 3 gaps
|
|||
|
itoa10<uint64_t>(total ? swmMate.sws3 : swmuMate.sws3, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const SSEMetrics& dpSse16s = total ? dpSse16Seed : dpSse16uSeed;
|
|||
|
|
|||
|
// 58. 16-bit SSE seed-extend DPs tried
|
|||
|
itoa10<uint64_t>(dpSse16s.dp, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 59. 16-bit SSE seed-extend DPs saturated
|
|||
|
itoa10<uint64_t>(dpSse16s.dpsat, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 60. 16-bit SSE seed-extend DPs failed
|
|||
|
itoa10<uint64_t>(dpSse16s.dpfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 61. 16-bit SSE seed-extend DPs succeeded
|
|||
|
itoa10<uint64_t>(dpSse16s.dpsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 62. 16-bit SSE seed-extend DP columns completed
|
|||
|
itoa10<uint64_t>(dpSse16s.col, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 63. 16-bit SSE seed-extend DP cells completed
|
|||
|
itoa10<uint64_t>(dpSse16s.cell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 64. 16-bit SSE seed-extend DP inner loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse16s.inner, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 65. 16-bit SSE seed-extend DP fixup loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse16s.fixup, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 66. 16-bit SSE seed-extend DP gather, cells with potential solutions
|
|||
|
itoa10<uint64_t>(dpSse16s.gathsol, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 67. 16-bit SSE seed-extend DP backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16s.bt, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 68. 16-bit SSE seed-extend DP failed backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16s.btfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 69. 16-bit SSE seed-extend DP succesful backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16s.btsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 70. 16-bit SSE seed-extend DP backtrace cells
|
|||
|
itoa10<uint64_t>(dpSse16s.btcell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 71. 16-bit SSE seed-extend DP core-diag rejections
|
|||
|
itoa10<uint64_t>(dpSse16s.corerej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 72. 16-bit SSE seed-extend DP N rejections
|
|||
|
itoa10<uint64_t>(dpSse16s.nrej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const SSEMetrics& dpSse8s = total ? dpSse8Seed : dpSse8uSeed;
|
|||
|
|
|||
|
// 73. 8-bit SSE seed-extend DPs tried
|
|||
|
itoa10<uint64_t>(dpSse8s.dp, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 74. 8-bit SSE seed-extend DPs saturated
|
|||
|
itoa10<uint64_t>(dpSse8s.dpsat, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 75. 8-bit SSE seed-extend DPs failed
|
|||
|
itoa10<uint64_t>(dpSse8s.dpfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 76. 8-bit SSE seed-extend DPs succeeded
|
|||
|
itoa10<uint64_t>(dpSse8s.dpsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 77. 8-bit SSE seed-extend DP columns completed
|
|||
|
itoa10<uint64_t>(dpSse8s.col, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 78. 8-bit SSE seed-extend DP cells completed
|
|||
|
itoa10<uint64_t>(dpSse8s.cell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 79. 8-bit SSE seed-extend DP inner loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse8s.inner, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 80. 8-bit SSE seed-extend DP fixup loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse8s.fixup, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 81. 16-bit SSE seed-extend DP gather, cells with potential solutions
|
|||
|
itoa10<uint64_t>(dpSse8s.gathsol, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 82. 16-bit SSE seed-extend DP backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8s.bt, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 83. 16-bit SSE seed-extend DP failed backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8s.btfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 84. 16-bit SSE seed-extend DP succesful backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8s.btsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 85. 16-bit SSE seed-extend DP backtrace cells
|
|||
|
itoa10<uint64_t>(dpSse8s.btcell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 86. 16-bit SSE seed-extend DP core-diag rejections
|
|||
|
itoa10<uint64_t>(dpSse8s.corerej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 87. 16-bit SSE seed-extend DP N rejections
|
|||
|
itoa10<uint64_t>(dpSse8s.nrej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const SSEMetrics& dpSse16m = total ? dpSse16Mate : dpSse16uMate;
|
|||
|
|
|||
|
// 88. 16-bit SSE mate-finding DPs tried
|
|||
|
itoa10<uint64_t>(dpSse16m.dp, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 89. 16-bit SSE mate-finding DPs saturated
|
|||
|
itoa10<uint64_t>(dpSse16m.dpsat, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 90. 16-bit SSE mate-finding DPs failed
|
|||
|
itoa10<uint64_t>(dpSse16m.dpfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 91. 16-bit SSE mate-finding DPs succeeded
|
|||
|
itoa10<uint64_t>(dpSse16m.dpsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 92. 16-bit SSE mate-finding DP columns completed
|
|||
|
itoa10<uint64_t>(dpSse16m.col, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 93. 16-bit SSE mate-finding DP cells completed
|
|||
|
itoa10<uint64_t>(dpSse16m.cell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 94. 16-bit SSE mate-finding DP inner loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse16m.inner, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 95. 16-bit SSE mate-finding DP fixup loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse16m.fixup, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 96. 16-bit SSE mate-finding DP gather, cells with potential solutions
|
|||
|
itoa10<uint64_t>(dpSse16m.gathsol, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 97. 16-bit SSE mate-finding DP backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16m.bt, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 98. 16-bit SSE mate-finding DP failed backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16m.btfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 99. 16-bit SSE mate-finding DP succesful backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse16m.btsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 100. 16-bit SSE mate-finding DP backtrace cells
|
|||
|
itoa10<uint64_t>(dpSse16m.btcell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 101. 16-bit SSE mate-finding DP core-diag rejections
|
|||
|
itoa10<uint64_t>(dpSse16m.corerej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 102. 16-bit SSE mate-finding DP N rejections
|
|||
|
itoa10<uint64_t>(dpSse16m.nrej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
const SSEMetrics& dpSse8m = total ? dpSse8Mate : dpSse8uMate;
|
|||
|
|
|||
|
// 103. 8-bit SSE mate-finding DPs tried
|
|||
|
itoa10<uint64_t>(dpSse8m.dp, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 104. 8-bit SSE mate-finding DPs saturated
|
|||
|
itoa10<uint64_t>(dpSse8m.dpsat, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 105. 8-bit SSE mate-finding DPs failed
|
|||
|
itoa10<uint64_t>(dpSse8m.dpfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 106. 8-bit SSE mate-finding DPs succeeded
|
|||
|
itoa10<uint64_t>(dpSse8m.dpsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 107. 8-bit SSE mate-finding DP columns completed
|
|||
|
itoa10<uint64_t>(dpSse8m.col, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 108. 8-bit SSE mate-finding DP cells completed
|
|||
|
itoa10<uint64_t>(dpSse8m.cell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 109. 8-bit SSE mate-finding DP inner loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse8m.inner, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 110. 8-bit SSE mate-finding DP fixup loop iters completed
|
|||
|
itoa10<uint64_t>(dpSse8m.fixup, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 111. 16-bit SSE mate-finding DP gather, cells with potential solutions
|
|||
|
itoa10<uint64_t>(dpSse8m.gathsol, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 112. 16-bit SSE mate-finding DP backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8m.bt, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 113. 16-bit SSE mate-finding DP failed backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8m.btfail, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 114. 16-bit SSE mate-finding DP succesful backtrace attempts
|
|||
|
itoa10<uint64_t>(dpSse8m.btsucc, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 115. 16-bit SSE mate-finding DP backtrace cells
|
|||
|
itoa10<uint64_t>(dpSse8m.btcell, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 116. 16-bit SSE mate-finding DP core rejections
|
|||
|
itoa10<uint64_t>(dpSse8m.corerej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 117. 16-bit SSE mate-finding N rejections
|
|||
|
itoa10<uint64_t>(dpSse8m.nrej, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 118. Backtrace candidates filtered due to starting cell
|
|||
|
itoa10<uint64_t>(total ? nbtfiltst : nbtfiltst_u, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 119. Backtrace candidates filtered due to low score
|
|||
|
itoa10<uint64_t>(total ? nbtfiltsc : nbtfiltsc_u, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 120. Backtrace candidates filtered due to domination
|
|||
|
itoa10<uint64_t>(total ? nbtfiltdo : nbtfiltdo_u, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 121. Overall memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak() >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 122. Uncategorized memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(0) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 123. Ebwt memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(EBWT_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 124. Cache memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(CA_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 125. Resolver memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(GW_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 126. Seed aligner memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(AL_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 127. Dynamic programming aligner memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(DP_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 128. Miscellaneous memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(MISC_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 129. Debug memory peak
|
|||
|
itoa10<size_t>(gMemTally.peak(DEBUG_CAT) >> 20, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
|
|||
|
// 130
|
|||
|
itoa10<size_t>(him.localatts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 131
|
|||
|
itoa10<size_t>(him.anchoratts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 132
|
|||
|
itoa10<size_t>(him.localindexatts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 133
|
|||
|
itoa10<size_t>(him.localextatts, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 134
|
|||
|
itoa10<size_t>(him.localsearchrecur, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 135
|
|||
|
itoa10<size_t>(him.globalgenomecoords, buf);
|
|||
|
if(metricsStderr) stderrSs << buf << '\t';
|
|||
|
if(o != NULL) { o->writeChars(buf); o->write('\t'); }
|
|||
|
// 136
|
|||
|
itoa10<size_t>(him.localgenomecoords, buf);
|
|||
|
if(metricsStderr) stderrSs << buf;
|
|||
|
if(o != NULL) { o->writeChars(buf); }
|
|||
|
|
|||
|
if(o != NULL) { o->write('\n'); }
|
|||
|
if(metricsStderr) cerr << stderrSs.str().c_str() << endl;
|
|||
|
if(!total) mergeIncrementals();
|
|||
|
}
|
|||
|
|
|||
|
void mergeIncrementals() {
|
|||
|
olm.merge(olmu, false);
|
|||
|
sdm.merge(sdmu, false);
|
|||
|
wlm.merge(wlmu, false);
|
|||
|
swmSeed.merge(swmuSeed, false);
|
|||
|
swmMate.merge(swmuMate, false);
|
|||
|
dpSse8Seed.merge(dpSse8uSeed, false);
|
|||
|
dpSse8Mate.merge(dpSse8uMate, false);
|
|||
|
dpSse16Seed.merge(dpSse16uSeed, false);
|
|||
|
dpSse16Mate.merge(dpSse16uMate, false);
|
|||
|
nbtfiltst_u += nbtfiltst;
|
|||
|
nbtfiltsc_u += nbtfiltsc;
|
|||
|
nbtfiltdo_u += nbtfiltdo;
|
|||
|
|
|||
|
olmu.reset();
|
|||
|
sdmu.reset();
|
|||
|
wlmu.reset();
|
|||
|
swmuSeed.reset();
|
|||
|
swmuMate.reset();
|
|||
|
rpmu.reset();
|
|||
|
dpSse8uSeed.reset();
|
|||
|
dpSse8uMate.reset();
|
|||
|
dpSse16uSeed.reset();
|
|||
|
dpSse16uMate.reset();
|
|||
|
nbtfiltst_u = 0;
|
|||
|
nbtfiltsc_u = 0;
|
|||
|
nbtfiltdo_u = 0;
|
|||
|
}
|
|||
|
|
|||
|
// Total over the whole job
|
|||
|
OuterLoopMetrics olm; // overall metrics
|
|||
|
SeedSearchMetrics sdm; // metrics related to seed alignment
|
|||
|
WalkMetrics wlm; // metrics related to walking left (i.e. resolving reference offsets)
|
|||
|
SwMetrics swmSeed; // metrics related to DP seed-extend alignment
|
|||
|
SwMetrics swmMate; // metrics related to DP mate-finding alignment
|
|||
|
ReportingMetrics rpm; // metrics related to reporting
|
|||
|
SSEMetrics dpSse8Seed; // 8-bit SSE seed extensions
|
|||
|
SSEMetrics dpSse8Mate; // 8-bit SSE mate finds
|
|||
|
SSEMetrics dpSse16Seed; // 16-bit SSE seed extensions
|
|||
|
SSEMetrics dpSse16Mate; // 16-bit SSE mate finds
|
|||
|
uint64_t nbtfiltst;
|
|||
|
uint64_t nbtfiltsc;
|
|||
|
uint64_t nbtfiltdo;
|
|||
|
|
|||
|
// Just since the last update
|
|||
|
OuterLoopMetrics olmu; // overall metrics
|
|||
|
SeedSearchMetrics sdmu; // metrics related to seed alignment
|
|||
|
WalkMetrics wlmu; // metrics related to walking left (i.e. resolving reference offsets)
|
|||
|
SwMetrics swmuSeed; // metrics related to DP seed-extend alignment
|
|||
|
SwMetrics swmuMate; // metrics related to DP mate-finding alignment
|
|||
|
ReportingMetrics rpmu; // metrics related to reporting
|
|||
|
SSEMetrics dpSse8uSeed; // 8-bit SSE seed extensions
|
|||
|
SSEMetrics dpSse8uMate; // 8-bit SSE mate finds
|
|||
|
SSEMetrics dpSse16uSeed; // 16-bit SSE seed extensions
|
|||
|
SSEMetrics dpSse16uMate; // 16-bit SSE mate finds
|
|||
|
uint64_t nbtfiltst_u;
|
|||
|
uint64_t nbtfiltsc_u;
|
|||
|
uint64_t nbtfiltdo_u;
|
|||
|
|
|||
|
//
|
|||
|
HIMetrics him;
|
|||
|
|
|||
|
MUTEX_T mutex_m; // lock for when one ob
|
|||
|
bool first; // yet to print first line?
|
|||
|
time_t lastElapsed; // used in reportInterval to measure time since last call
|
|||
|
};
|
|||
|
|
|||
|
static PerfMetrics metrics;
|
|||
|
|
|||
|
// Cyclic rotations
|
|||
|
#define ROTL(n, x) (((x) << (n)) | ((x) >> (32-n)))
|
|||
|
#define ROTR(n, x) (((x) >> (n)) | ((x) << (32-n)))
|
|||
|
|
|||
|
static inline void printMmsSkipMsg(
|
|||
|
const PatternSourcePerThread& ps,
|
|||
|
bool paired,
|
|||
|
bool mate1,
|
|||
|
int seedmms)
|
|||
|
{
|
|||
|
ostringstream os;
|
|||
|
if(paired) {
|
|||
|
os << "Warning: skipping mate #" << (mate1 ? '1' : '2')
|
|||
|
<< " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length())
|
|||
|
<< ") <= # seed mismatches (" << seedmms << ")" << endl;
|
|||
|
} else {
|
|||
|
os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "' because length (" << (mate1 ? ps.bufa().patFw.length() : ps.bufb().patFw.length())
|
|||
|
<< ") <= # seed mismatches (" << seedmms << ")" << endl;
|
|||
|
}
|
|||
|
cerr << os.str().c_str();
|
|||
|
}
|
|||
|
|
|||
|
static inline void printLenSkipMsg(
|
|||
|
const PatternSourcePerThread& ps,
|
|||
|
bool paired,
|
|||
|
bool mate1)
|
|||
|
{
|
|||
|
ostringstream os;
|
|||
|
if(paired) {
|
|||
|
os << "Warning: skipping mate #" << (mate1 ? '1' : '2')
|
|||
|
<< " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "' because it was < 2 characters long" << endl;
|
|||
|
} else {
|
|||
|
os << "Warning: skipping read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "' because it was < 2 characters long" << endl;
|
|||
|
}
|
|||
|
cerr << os.str().c_str();
|
|||
|
}
|
|||
|
|
|||
|
static inline void printLocalScoreMsg(
|
|||
|
const PatternSourcePerThread& ps,
|
|||
|
bool paired,
|
|||
|
bool mate1)
|
|||
|
{
|
|||
|
ostringstream os;
|
|||
|
if(paired) {
|
|||
|
os << "Warning: minimum score function gave negative number in "
|
|||
|
<< "--local mode for mate #" << (mate1 ? '1' : '2')
|
|||
|
<< " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "; setting to 0 instead" << endl;
|
|||
|
} else {
|
|||
|
os << "Warning: minimum score function gave negative number in "
|
|||
|
<< "--local mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "; setting to 0 instead" << endl;
|
|||
|
}
|
|||
|
cerr << os.str().c_str();
|
|||
|
}
|
|||
|
|
|||
|
static inline void printEEScoreMsg(
|
|||
|
const PatternSourcePerThread& ps,
|
|||
|
bool paired,
|
|||
|
bool mate1)
|
|||
|
{
|
|||
|
ostringstream os;
|
|||
|
if(paired) {
|
|||
|
os << "Warning: minimum score function gave positive number in "
|
|||
|
<< "--end-to-end mode for mate #" << (mate1 ? '1' : '2')
|
|||
|
<< " of read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "; setting to 0 instead" << endl;
|
|||
|
} else {
|
|||
|
os << "Warning: minimum score function gave positive number in "
|
|||
|
<< "--end-to-end mode for read '" << (mate1 ? ps.bufa().name : ps.bufb().name)
|
|||
|
<< "; setting to 0 instead" << endl;
|
|||
|
}
|
|||
|
cerr << os.str().c_str();
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
#define MERGE_METRICS(met, sync) { \
|
|||
|
msink.mergeMetrics(rpm); \
|
|||
|
met.merge( \
|
|||
|
&olm, \
|
|||
|
&sdm, \
|
|||
|
&wlm, \
|
|||
|
&swmSeed, \
|
|||
|
&swmMate, \
|
|||
|
&rpm, \
|
|||
|
&sseU8ExtendMet, \
|
|||
|
&sseU8MateMet, \
|
|||
|
&sseI16ExtendMet, \
|
|||
|
&sseI16MateMet, \
|
|||
|
nbtfiltst, \
|
|||
|
nbtfiltsc, \
|
|||
|
nbtfiltdo, \
|
|||
|
&him, \
|
|||
|
sync); \
|
|||
|
olm.reset(); \
|
|||
|
sdm.reset(); \
|
|||
|
wlm.reset(); \
|
|||
|
swmSeed.reset(); \
|
|||
|
swmMate.reset(); \
|
|||
|
rpm.reset(); \
|
|||
|
sseU8ExtendMet.reset(); \
|
|||
|
sseU8MateMet.reset(); \
|
|||
|
sseI16ExtendMet.reset(); \
|
|||
|
sseI16MateMet.reset(); \
|
|||
|
him.reset(); \
|
|||
|
}
|
|||
|
|
|||
|
#define MERGE_SW(x) { \
|
|||
|
x.merge( \
|
|||
|
sseU8ExtendMet, \
|
|||
|
sseU8MateMet, \
|
|||
|
sseI16ExtendMet, \
|
|||
|
sseI16MateMet, \
|
|||
|
nbtfiltst, \
|
|||
|
nbtfiltsc, \
|
|||
|
nbtfiltdo); \
|
|||
|
x.resetCounters(); \
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
/**
|
|||
|
* Called once per thread. Sets up per-thread pointers to the shared global
|
|||
|
* data structures, creates per-thread structures, then enters the alignment
|
|||
|
* loop. The general flow of the alignment loop is:
|
|||
|
*
|
|||
|
* - If it's been a while and we're the master thread, report some alignment
|
|||
|
* metrics
|
|||
|
* - Get the next read/pair
|
|||
|
* - Check if this read/pair is identical to the previous
|
|||
|
* + If identical, check whether we can skip any or all alignment stages. If
|
|||
|
* we can skip all stages, report the result immediately and move to next
|
|||
|
* read/pair
|
|||
|
* + If not identical, continue
|
|||
|
* -
|
|||
|
*/
|
|||
|
static void multiseedSearchWorker_hisat2(void *vp) {
|
|||
|
int tid = *((int*)vp);
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
assert(ref3N.multiseed_gfm[0] != NULL);
|
|||
|
assert(ref3N.multiseed_gfm[1] != NULL);
|
|||
|
} else {
|
|||
|
assert(multiseed_gfm != NULL);
|
|||
|
}
|
|||
|
|
|||
|
assert(multiseedMms == 0);
|
|||
|
|
|||
|
// for regular Hisat2
|
|||
|
PairedPatternSource& patsrc = *multiseed_patsrc;
|
|||
|
const HGFM<index_t>& gfm = *multiseed_gfm;
|
|||
|
const RFM<index_t>* rgfm = multiseed_rgfm;
|
|||
|
const Scoring& sc = *multiseed_sc;
|
|||
|
const BitPairReference& ref = *multiseed_refs;
|
|||
|
const BitPairReference* rref = multiseed_rrefs;
|
|||
|
AlnSink<index_t>& msink = *multiseed_msink;
|
|||
|
OutFileBuf* metricsOfb = multiseed_metricsOfb;
|
|||
|
|
|||
|
// for Hisat-3N
|
|||
|
const HGFM<index_t>* gfm_3N[2];
|
|||
|
const RFM<index_t>* rgfm_3N[2];
|
|||
|
const BitPairReference* rref_3N[2];
|
|||
|
|
|||
|
for (int i = 0; i < 2; i++) {
|
|||
|
gfm_3N[i] = ref3N.multiseed_gfm[i];
|
|||
|
rgfm_3N[i] = ref3N.multiseed_rgfm[i];
|
|||
|
rref_3N[i] = ref3N.multiseed_rrefs[i];
|
|||
|
}
|
|||
|
|
|||
|
// Sinks: these are so that we can print tables encoding counts for
|
|||
|
// events of interest on a per-read, per-seed, per-join, or per-SW
|
|||
|
// level. These in turn can be used to diagnose performance
|
|||
|
// problems, or generally characterize performance.
|
|||
|
|
|||
|
//const BitPairReference& refs = *multiseed_refs;
|
|||
|
auto_ptr<PatternSourcePerThreadFactory> patsrcFact(createPatsrcFactory(patsrc, tid));
|
|||
|
auto_ptr<PatternSourcePerThread> ps(patsrcFact->create());
|
|||
|
|
|||
|
// Instantiate an object for holding reporting-related parameters.
|
|||
|
if(maxSeeds == 0) {
|
|||
|
maxSeeds = max<size_t>(5, khits * 2);
|
|||
|
}
|
|||
|
ReportingParams rp(
|
|||
|
(allHits ? std::numeric_limits<THitInt>::max() : khits), // -k
|
|||
|
(allHits ? std::numeric_limits<THitInt>::max() : maxSeeds), // --max-seeds
|
|||
|
mhits, // -m/-M
|
|||
|
0, // penalty gap (not used now)
|
|||
|
msample, // true -> -M was specified, otherwise assume -m
|
|||
|
gReportDiscordant, // report discordang paired-end alignments?
|
|||
|
gReportMixed, // report unpaired alignments for paired reads?
|
|||
|
secondary,
|
|||
|
localAlign,
|
|||
|
bowtie2_dp,
|
|||
|
sensitive | very_sensitive,
|
|||
|
repeat);
|
|||
|
|
|||
|
// Instantiate a mapping quality calculator
|
|||
|
auto_ptr<Mapq> bmapq(new_mapq(mapqv, scoreMin, sc));
|
|||
|
|
|||
|
|
|||
|
// Make a per-thread wrapper for the global MHitSink object.
|
|||
|
|
|||
|
AlnSinkWrap<index_t>* msinkwrap;
|
|||
|
if (threeN) {
|
|||
|
msinkwrap = new AlnSinkWrap3N<index_t>(
|
|||
|
msink, // global sink
|
|||
|
rp, // reporting parameters
|
|||
|
*bmapq.get(), // MAPQ calculator
|
|||
|
(size_t)tid, // thread id
|
|||
|
mappingCycles,
|
|||
|
secondary, // secondary alignments
|
|||
|
no_spliced_alignment ? NULL : ssdb,
|
|||
|
thread_rids_mindist);
|
|||
|
} else {
|
|||
|
msinkwrap = new AlnSinkWrap<index_t>(
|
|||
|
msink, // global sink
|
|||
|
rp, // reporting parameters
|
|||
|
*bmapq.get(), // MAPQ calculator
|
|||
|
(size_t)tid, // thread id
|
|||
|
secondary, // secondary alignments
|
|||
|
no_spliced_alignment ? NULL : ssdb,
|
|||
|
thread_rids_mindist);
|
|||
|
}
|
|||
|
|
|||
|
SplicedAligner<index_t, local_index_t> splicedAligner(threeN? *gfm_3N[0]: gfm,
|
|||
|
anchorStop,
|
|||
|
thread_rids_mindist);
|
|||
|
SwAligner sw;
|
|||
|
OuterLoopMetrics olm;
|
|||
|
SeedSearchMetrics sdm;
|
|||
|
WalkMetrics wlm;
|
|||
|
SwMetrics swmSeed, swmMate;
|
|||
|
ReportingMetrics rpm;
|
|||
|
RandomSource rnd, rndArb;
|
|||
|
SSEMetrics sseU8ExtendMet;
|
|||
|
SSEMetrics sseU8MateMet;
|
|||
|
SSEMetrics sseI16ExtendMet;
|
|||
|
SSEMetrics sseI16MateMet;
|
|||
|
DescentMetrics descm;
|
|||
|
uint64_t nbtfiltst = 0; // TODO: find a new home for these
|
|||
|
uint64_t nbtfiltsc = 0; // TODO: find a new home for these
|
|||
|
uint64_t nbtfiltdo = 0; // TODO: find a new home for these
|
|||
|
HIMetrics him;
|
|||
|
|
|||
|
ASSERT_ONLY(BTDnaString tmp);
|
|||
|
|
|||
|
int pepolFlag;
|
|||
|
if(gMate1fw && gMate2fw) {
|
|||
|
pepolFlag = PE_POLICY_FF;
|
|||
|
} else if(gMate1fw && !gMate2fw) {
|
|||
|
pepolFlag = PE_POLICY_FR;
|
|||
|
} else if(!gMate1fw && gMate2fw) {
|
|||
|
pepolFlag = PE_POLICY_RF;
|
|||
|
} else {
|
|||
|
pepolFlag = PE_POLICY_RR;
|
|||
|
}
|
|||
|
assert_geq(gMaxInsert, gMinInsert);
|
|||
|
assert_geq(gMinInsert, 0);
|
|||
|
PairedEndPolicy pepol(
|
|||
|
pepolFlag,
|
|||
|
gMaxInsert,
|
|||
|
gMinInsert,
|
|||
|
localAlign,
|
|||
|
gFlippedMatesOK,
|
|||
|
gDovetailMatesOK,
|
|||
|
gContainMatesOK,
|
|||
|
gOlapMatesOK,
|
|||
|
gExpandToFrag);
|
|||
|
|
|||
|
PerfMetrics metricsPt; // per-thread metrics object; for read-level metrics
|
|||
|
BTString nametmp;
|
|||
|
|
|||
|
PerReadMetrics prm;
|
|||
|
|
|||
|
// Used by thread with threadid == 1 to measure time elapsed
|
|||
|
time_t iTime = time(0);
|
|||
|
|
|||
|
// Keep track of whether last search was exhaustive for mates 1 and 2
|
|||
|
bool exhaustive[2] = { false, false };
|
|||
|
// Keep track of whether mates 1/2 were filtered out last time through
|
|||
|
bool filt[2] = { true, true };
|
|||
|
// Keep track of whether mates 1/2 were filtered out due Ns last time
|
|||
|
bool nfilt[2] = { true, true };
|
|||
|
// Keep track of whether mates 1/2 were filtered out due to not having
|
|||
|
// enough characters to rise about the score threshold.
|
|||
|
bool scfilt[2] = { true, true };
|
|||
|
// Keep track of whether mates 1/2 were filtered out due to not having
|
|||
|
// more characters than the number of mismatches permitted in a seed.
|
|||
|
bool lenfilt[2] = { true, true };
|
|||
|
// Keep track of whether mates 1/2 were filtered out by upstream qc
|
|||
|
bool qcfilt[2] = { true, true };
|
|||
|
|
|||
|
rndArb.init((uint32_t)time(0));
|
|||
|
int mergei = 0;
|
|||
|
int mergeival = 16;
|
|||
|
while(true) {
|
|||
|
bool success = false, done = false, paired = false;
|
|||
|
ps->nextReadPair(success, done, paired, outType != OUTPUT_SAM);
|
|||
|
if(!success && done) {
|
|||
|
break;
|
|||
|
} else if(!success) {
|
|||
|
continue;
|
|||
|
}
|
|||
|
TReadId rdid = ps->rdid();
|
|||
|
if(nthreads > 1 && useTempSpliceSite) {
|
|||
|
assert_gt(tid, 0);
|
|||
|
assert_leq(tid, thread_rids.size());
|
|||
|
assert(thread_rids[tid - 1] == 0 || rdid > thread_rids[tid - 1]);
|
|||
|
thread_rids[tid - 1] = (rdid > 0 ? rdid - 1 : 0);
|
|||
|
while(true) {
|
|||
|
uint64_t min_rdid = thread_rids[0];
|
|||
|
{
|
|||
|
for(size_t i = 1; i < thread_rids.size(); i++) {
|
|||
|
if(thread_rids[i] < min_rdid) {
|
|||
|
min_rdid = thread_rids[i];
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if(min_rdid + thread_rids_mindist < rdid) {
|
|||
|
#if defined(_TTHREAD_WIN32_)
|
|||
|
Sleep(0);
|
|||
|
#elif defined(_TTHREAD_POSIX_)
|
|||
|
sched_yield();
|
|||
|
#endif
|
|||
|
} else break;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
bool sample = true;
|
|||
|
if(arbitraryRandom) {
|
|||
|
ps->bufa().seed = rndArb.nextU32();
|
|||
|
ps->bufb().seed = rndArb.nextU32();
|
|||
|
}
|
|||
|
if(sampleFrac < 1.0f) {
|
|||
|
rnd.init(ROTL(ps->bufa().seed, 2));
|
|||
|
sample = rnd.nextFloat() < sampleFrac;
|
|||
|
}
|
|||
|
if(rdid >= skipReads && rdid < qUpto && sample) {
|
|||
|
// Align this read/pair
|
|||
|
bool retry = true;
|
|||
|
//
|
|||
|
// Check if there is metrics reporting for us to do.
|
|||
|
//
|
|||
|
if(metricsIval > 0 &&
|
|||
|
(metricsOfb != NULL || metricsStderr) &&
|
|||
|
!metricsPerRead &&
|
|||
|
++mergei == mergeival)
|
|||
|
{
|
|||
|
// Do a periodic merge. Update global metrics, in a
|
|||
|
// synchronized manner if needed.
|
|||
|
MERGE_METRICS(metrics, nthreads > 1);
|
|||
|
mergei = 0;
|
|||
|
// Check if a progress message should be printed
|
|||
|
if(tid == 0) {
|
|||
|
// Only thread 1 prints progress messages
|
|||
|
time_t curTime = time(0);
|
|||
|
if(curTime - iTime >= metricsIval) {
|
|||
|
metrics.reportInterval(metricsOfb, metricsStderr, false, true, NULL);
|
|||
|
iTime = curTime;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
prm.reset(); // per-read metrics
|
|||
|
prm.doFmString = false;
|
|||
|
if(sam_print_xt) {
|
|||
|
gettimeofday(&prm.tv_beg, &prm.tz_beg);
|
|||
|
}
|
|||
|
// Try to align this read
|
|||
|
int mappingCycle = 0;
|
|||
|
bool gNofw3N = false;
|
|||
|
bool gNorc3N = false;
|
|||
|
// for threeN (3N) mode, we need to map the read 4 times. for regular mode, only 1 time.
|
|||
|
while(retry || mappingCycle < nMappingCycle) {
|
|||
|
|
|||
|
msinkwrap->resetInit_();
|
|||
|
if (threeN) {
|
|||
|
ps->changePlan3N(mappingCycle);
|
|||
|
gNorc3N = (mappingCycle == threeN_type1conversion_FW || mappingCycle == threeN_type2conversion_FW);
|
|||
|
gNofw3N = !gNorc3N;
|
|||
|
}
|
|||
|
retry = false;
|
|||
|
assert_eq(ps->bufa().color, false);
|
|||
|
if (!mappingCycles[mappingCycle])
|
|||
|
{
|
|||
|
mappingCycle++;
|
|||
|
continue;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
olm.reads++;
|
|||
|
bool pair = paired;
|
|||
|
const size_t rdlen1 = ps->bufa().length();
|
|||
|
const size_t rdlen2 = pair ? ps->bufb().length() : 0;
|
|||
|
olm.bases += (rdlen1 + rdlen2);
|
|||
|
msinkwrap->nextRead(
|
|||
|
&ps->bufa(),
|
|||
|
pair ? &ps->bufb() : NULL,
|
|||
|
rdid,
|
|||
|
sc.qualitiesMatter());
|
|||
|
assert(msinkwrap->inited());
|
|||
|
size_t rdlens[2] = { rdlen1, rdlen2 };
|
|||
|
// Calculate the minimum valid score threshold for the read
|
|||
|
TAlScore minsc[2], maxpen[2];
|
|||
|
maxpen[0] = maxpen[1] = 0;
|
|||
|
minsc[0] = minsc[1] = std::numeric_limits<TAlScore>::max();
|
|||
|
if(bwaSwLike) {
|
|||
|
// From BWA-SW manual: "Given an l-long query, the
|
|||
|
// threshold for a hit to be retained is
|
|||
|
// a*max{T,c*log(l)}." We try to recreate that here.
|
|||
|
float a = (float)sc.match(30);
|
|||
|
float T = bwaSwLikeT, c = bwaSwLikeC;
|
|||
|
minsc[0] = (TAlScore)max<float>(a*T, a*c*log(rdlens[0]));
|
|||
|
if(paired) {
|
|||
|
minsc[1] = (TAlScore)max<float>(a*T, a*c*log(rdlens[1]));
|
|||
|
}
|
|||
|
} else {
|
|||
|
minsc[0] = scoreMin.f<TAlScore>(rdlens[0]);
|
|||
|
if(paired) minsc[1] = scoreMin.f<TAlScore>(rdlens[1]);
|
|||
|
if(localAlign) {
|
|||
|
if(minsc[0] < 0) {
|
|||
|
if(!gQuiet) printLocalScoreMsg(*ps, paired, true);
|
|||
|
minsc[0] = 0;
|
|||
|
}
|
|||
|
if(paired && minsc[1] < 0) {
|
|||
|
if(!gQuiet) printLocalScoreMsg(*ps, paired, false);
|
|||
|
minsc[1] = 0;
|
|||
|
}
|
|||
|
} else {
|
|||
|
if(minsc[0] > 0) {
|
|||
|
if(!gQuiet) printEEScoreMsg(*ps, paired, true);
|
|||
|
minsc[0] = 0;
|
|||
|
}
|
|||
|
if(paired && minsc[1] > 0) {
|
|||
|
if(!gQuiet) printEEScoreMsg(*ps, paired, false);
|
|||
|
minsc[1] = 0;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// N filter; does the read have too many Ns?
|
|||
|
size_t readns[2] = {0, 0};
|
|||
|
sc.nFilterPair(
|
|||
|
&ps->bufa().patFw,
|
|||
|
pair ? &ps->bufb().patFw : NULL,
|
|||
|
readns[0],
|
|||
|
readns[1],
|
|||
|
nfilt[0],
|
|||
|
nfilt[1]);
|
|||
|
// Score filter; does the read enough character to rise above
|
|||
|
// the score threshold?
|
|||
|
scfilt[0] = sc.scoreFilter(minsc[0], rdlens[0]);
|
|||
|
scfilt[1] = sc.scoreFilter(minsc[1], rdlens[1]);
|
|||
|
lenfilt[0] = lenfilt[1] = true;
|
|||
|
if(rdlens[0] <= (size_t)multiseedMms || rdlens[0] < 2) {
|
|||
|
if(!gQuiet) printMmsSkipMsg(*ps, paired, true, multiseedMms);
|
|||
|
lenfilt[0] = false;
|
|||
|
}
|
|||
|
if((rdlens[1] <= (size_t)multiseedMms || rdlens[1] < 2) && paired) {
|
|||
|
if(!gQuiet) printMmsSkipMsg(*ps, paired, false, multiseedMms);
|
|||
|
lenfilt[1] = false;
|
|||
|
}
|
|||
|
if(rdlens[0] < 2) {
|
|||
|
if(!gQuiet) printLenSkipMsg(*ps, paired, true);
|
|||
|
lenfilt[0] = false;
|
|||
|
}
|
|||
|
if(rdlens[1] < 2 && paired) {
|
|||
|
if(!gQuiet) printLenSkipMsg(*ps, paired, false);
|
|||
|
lenfilt[1] = false;
|
|||
|
}
|
|||
|
qcfilt[0] = qcfilt[1] = true;
|
|||
|
if(qcFilter) {
|
|||
|
qcfilt[0] = (ps->bufa().filter != '0');
|
|||
|
qcfilt[1] = (ps->bufb().filter != '0');
|
|||
|
}
|
|||
|
filt[0] = (nfilt[0] && scfilt[0] && lenfilt[0] && qcfilt[0]);
|
|||
|
filt[1] = (nfilt[1] && scfilt[1] && lenfilt[1] && qcfilt[1]);
|
|||
|
prm.nFilt += (filt[0] ? 0 : 1) + (filt[1] ? 0 : 1);
|
|||
|
Read* rds[2] = { &ps->bufa(), &ps->bufb() };
|
|||
|
// For each mate...
|
|||
|
assert(msinkwrap->empty());
|
|||
|
//size_t minedfw[2] = { 0, 0 };
|
|||
|
//size_t minedrc[2] = { 0, 0 };
|
|||
|
// Calcualte nofw / no rc
|
|||
|
bool nofw[2] = { false, false };
|
|||
|
bool norc[2] = { false, false };
|
|||
|
if (threeN) {
|
|||
|
nofw[0] = paired ? (gMate1fw ? gNofw3N : gNorc3N) : gNofw3N;
|
|||
|
norc[0] = paired ? (gMate1fw ? gNorc3N : gNofw3N) : gNorc3N;
|
|||
|
nofw[1] = paired ? (gMate2fw ? gNofw3N : gNorc3N) : gNofw3N;
|
|||
|
norc[1] = paired ? (gMate2fw ? gNorc3N : gNofw3N) : gNorc3N;
|
|||
|
} else {
|
|||
|
nofw[0] = paired ? (gMate1fw ? gNofw : gNorc) : gNofw;
|
|||
|
norc[0] = paired ? (gMate1fw ? gNorc : gNofw) : gNorc;
|
|||
|
nofw[1] = paired ? (gMate2fw ? gNofw : gNorc) : gNofw;
|
|||
|
norc[1] = paired ? (gMate2fw ? gNorc : gNofw) : gNorc;
|
|||
|
}
|
|||
|
|
|||
|
// Calculate nceil
|
|||
|
int nceil[2] = { 0, 0 };
|
|||
|
nceil[0] = nCeil.f<int>((double)rdlens[0]);
|
|||
|
nceil[0] = min(nceil[0], (int)rdlens[0]);
|
|||
|
if(paired) {
|
|||
|
nceil[1] = nCeil.f<int>((double)rdlens[1]);
|
|||
|
nceil[1] = min(nceil[1], (int)rdlens[1]);
|
|||
|
}
|
|||
|
exhaustive[0] = exhaustive[1] = false;
|
|||
|
//size_t matemap[2] = { 0, 1 };
|
|||
|
bool pairPostFilt = filt[0] && filt[1];
|
|||
|
if(pairPostFilt) {
|
|||
|
rnd.init(ps->bufa().seed ^ ps->bufb().seed);
|
|||
|
} else {
|
|||
|
rnd.init(ps->bufa().seed);
|
|||
|
}
|
|||
|
// Calculate interval length for both mates
|
|||
|
int interval[2] = { 0, 0 };
|
|||
|
for(size_t mate = 0; mate < (pair ? 2:1); mate++) {
|
|||
|
interval[mate] = msIval.f<int>((double)rdlens[mate]);
|
|||
|
if(filt[0] && filt[1]) {
|
|||
|
// Boost interval length by 20% for paired-end reads
|
|||
|
interval[mate] = (int)(interval[mate] * 1.2 + 0.5);
|
|||
|
}
|
|||
|
interval[mate] = max(interval[mate], 1);
|
|||
|
}
|
|||
|
// Calculate streak length
|
|||
|
size_t streak[2] = { maxDpStreak, maxDpStreak };
|
|||
|
size_t mtStreak[2] = { maxMateStreak, maxMateStreak };
|
|||
|
size_t mxDp[2] = { maxDp, maxDp };
|
|||
|
size_t mxUg[2] = { maxUg, maxUg };
|
|||
|
size_t mxIter[2] = { maxIters, maxIters };
|
|||
|
if(allHits) {
|
|||
|
streak[0] = streak[1] = std::numeric_limits<size_t>::max();
|
|||
|
mtStreak[0] = mtStreak[1] = std::numeric_limits<size_t>::max();
|
|||
|
mxDp[0] = mxDp[1] = std::numeric_limits<size_t>::max();
|
|||
|
mxUg[0] = mxUg[1] = std::numeric_limits<size_t>::max();
|
|||
|
mxIter[0] = mxIter[1] = std::numeric_limits<size_t>::max();
|
|||
|
} else if(khits > 1) {
|
|||
|
for(size_t mate = 0; mate < 2; mate++) {
|
|||
|
streak[mate] += (khits-1) * maxStreakIncr;
|
|||
|
mtStreak[mate] += (khits-1) * maxStreakIncr;
|
|||
|
mxDp[mate] += (khits-1) * maxItersIncr;
|
|||
|
mxUg[mate] += (khits-1) * maxItersIncr;
|
|||
|
mxIter[mate] += (khits-1) * maxItersIncr;
|
|||
|
}
|
|||
|
}
|
|||
|
if(filt[0] && filt[1]) {
|
|||
|
streak[0] = (size_t)ceil((double)streak[0] / 2.0);
|
|||
|
streak[1] = (size_t)ceil((double)streak[1] / 2.0);
|
|||
|
assert_gt(streak[1], 0);
|
|||
|
}
|
|||
|
assert_gt(streak[0], 0);
|
|||
|
// Calculate # seed rounds for each mate
|
|||
|
size_t nrounds[2] = { nSeedRounds, nSeedRounds };
|
|||
|
if(filt[0] && filt[1]) {
|
|||
|
nrounds[0] = (size_t)ceil((double)nrounds[0] / 2.0);
|
|||
|
nrounds[1] = (size_t)ceil((double)nrounds[1] / 2.0);
|
|||
|
assert_gt(nrounds[1], 0);
|
|||
|
}
|
|||
|
assert_gt(nrounds[0], 0);
|
|||
|
// Increment counters according to what got filtered
|
|||
|
for(size_t mate = 0; mate < (pair ? 2:1); mate++) {
|
|||
|
if(!filt[mate]) {
|
|||
|
// Mate was rejected by N filter
|
|||
|
olm.freads++; // reads filtered out
|
|||
|
olm.fbases += rdlens[mate]; // bases filtered out
|
|||
|
} else {
|
|||
|
//shs[mate].clear();
|
|||
|
//shs[mate].nextRead(mate == 0 ? ps->bufa() : ps->bufb());
|
|||
|
//assert(shs[mate].empty());
|
|||
|
olm.ureads++; // reads passing filter
|
|||
|
olm.ubases += rdlens[mate]; // bases passing filter
|
|||
|
}
|
|||
|
}
|
|||
|
//size_t eePeEeltLimit = std::numeric_limits<size_t>::max();
|
|||
|
// Whether we're done with mate1 / mate2
|
|||
|
bool done[2] = { !filt[0], !filt[1] };
|
|||
|
// size_t nelt[2] = {0, 0};
|
|||
|
if(filt[0] && filt[1]) {
|
|||
|
splicedAligner.initReads(rds, nofw, norc, minsc, maxpen);
|
|||
|
} else if(filt[0]) {
|
|||
|
splicedAligner.initRead(rds[0], nofw[0], norc[0], minsc[0], maxpen[0], false);
|
|||
|
} else if(filt[1]) {
|
|||
|
splicedAligner.initRead(rds[1], nofw[1], norc[1], minsc[1], maxpen[1], true);
|
|||
|
}
|
|||
|
if(filt[0] || filt[1]) {
|
|||
|
int ret;
|
|||
|
int threeN_index;
|
|||
|
bool useRepeat;
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
threeN_index = (mappingCycle == threeN_type1conversion_FW || mappingCycle == threeN_type2conversion_RC) ? 0 : 1;
|
|||
|
useRepeat = paired ? (ps->bufa().length() >= 100) && (ps->bufb().length() >= 100) :
|
|||
|
ps->bufa().length() >= 80;
|
|||
|
}
|
|||
|
|
|||
|
ret = splicedAligner.go(
|
|||
|
sc,
|
|||
|
pepol,
|
|||
|
*multiseed_tpol,
|
|||
|
*gpol,
|
|||
|
threeN ? *gfm_3N[threeN_index] : gfm,
|
|||
|
threeN ?(useRepeat ? rgfm_3N[threeN_index] : NULL) : rgfm,
|
|||
|
threeN ? *altdbs_3N[threeN_index] : *altdb,
|
|||
|
threeN ? *repeatdbs_3N[threeN_index] : *repeatdb,
|
|||
|
threeN ? *raltdbs_3N[threeN_index] : *raltdb,
|
|||
|
ref,
|
|||
|
threeN ? rref_3N[threeN_index] : rref,
|
|||
|
sw,
|
|||
|
*ssdb,
|
|||
|
wlm,
|
|||
|
prm,
|
|||
|
swmSeed,
|
|||
|
him,
|
|||
|
rnd,
|
|||
|
*msinkwrap);
|
|||
|
|
|||
|
MERGE_SW(sw);
|
|||
|
// daehwan
|
|||
|
size_t mate = 0;
|
|||
|
|
|||
|
assert_gt(ret, 0);
|
|||
|
// Clear out the exact hits so that we don't try to
|
|||
|
// extend them again later!
|
|||
|
if(ret == EXTEND_EXHAUSTED_CANDIDATES) {
|
|||
|
// Not done yet
|
|||
|
} else if(ret == EXTEND_POLICY_FULFILLED) {
|
|||
|
// Policy is satisfied for this mate at least
|
|||
|
if(msinkwrap->state().doneWithMate(mate == 0)) {
|
|||
|
done[mate] = true;
|
|||
|
}
|
|||
|
if(msinkwrap->state().doneWithMate(mate == 1)) {
|
|||
|
done[mate^1] = true;
|
|||
|
}
|
|||
|
} else if(ret == EXTEND_PERFECT_SCORE) {
|
|||
|
// We exhausted this mode at least
|
|||
|
done[mate] = true;
|
|||
|
} else if(ret == EXTEND_EXCEEDED_HARD_LIMIT) {
|
|||
|
// We exceeded a per-read limit
|
|||
|
done[mate] = true;
|
|||
|
} else if(ret == EXTEND_EXCEEDED_SOFT_LIMIT) {
|
|||
|
// Not done yet
|
|||
|
} else {
|
|||
|
//
|
|||
|
cerr << "Bad return value: " << ret << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(!done[mate]) {
|
|||
|
TAlScore perfectScore = sc.perfectScore(rdlens[mate]);
|
|||
|
if(!done[mate] && minsc[mate] == perfectScore) {
|
|||
|
done[mate] = true;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
for(size_t i = 0; i < 2; i++) {
|
|||
|
assert_leq(prm.nExIters, mxIter[i]);
|
|||
|
assert_leq(prm.nExDps, mxDp[i]);
|
|||
|
assert_leq(prm.nMateDps, mxDp[i]);
|
|||
|
assert_leq(prm.nExUgs, mxUg[i]);
|
|||
|
assert_leq(prm.nMateUgs, mxUg[i]);
|
|||
|
assert_leq(prm.nDpFail, streak[i]);
|
|||
|
assert_leq(prm.nUgFail, streak[i]);
|
|||
|
assert_leq(prm.nEeFail, streak[i]);
|
|||
|
}
|
|||
|
|
|||
|
msinkwrap->finishRead(
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
exhaustive[0], // exhausted seed hits for mate 1?
|
|||
|
exhaustive[1], // exhausted seed hits for mate 2?
|
|||
|
nfilt[0],
|
|||
|
nfilt[1],
|
|||
|
scfilt[0],
|
|||
|
scfilt[1],
|
|||
|
lenfilt[0],
|
|||
|
lenfilt[1],
|
|||
|
qcfilt[0],
|
|||
|
qcfilt[1],
|
|||
|
sortByScore, // prioritize by alignment score
|
|||
|
rnd, // pseudo-random generator
|
|||
|
rpm, // reporting metrics
|
|||
|
prm, // per-read metrics
|
|||
|
sc, // scoring scheme
|
|||
|
!seedSumm, // suppress seed summaries?
|
|||
|
seedSumm, //rdid suppress alignments?
|
|||
|
templateLenAdjustment);
|
|||
|
mappingCycle++;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
} // if(rdid >= skipReads && rdid < qUpto)
|
|||
|
else if(rdid >= qUpto) {
|
|||
|
break;
|
|||
|
}
|
|||
|
if(metricsPerRead) {
|
|||
|
MERGE_METRICS(metricsPt, nthreads > 1);
|
|||
|
nametmp = ps->bufa().name;
|
|||
|
metricsPt.reportInterval(
|
|||
|
metricsOfb, metricsStderr, true, true, &nametmp);
|
|||
|
metricsPt.reset();
|
|||
|
}
|
|||
|
} // while(true)
|
|||
|
|
|||
|
// One last metrics merge
|
|||
|
MERGE_METRICS(metrics, nthreads > 1);
|
|||
|
delete msinkwrap;
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* Called once per alignment job. Sets up global pointers to the
|
|||
|
* shared global data structures, creates per-thread structures, then
|
|||
|
* enters the search loop.
|
|||
|
*/
|
|||
|
static void multiseedSearch(
|
|||
|
Scoring& sc,
|
|||
|
TranscriptomePolicy& tpol,
|
|||
|
GraphPolicy& gp,
|
|||
|
PairedPatternSource& patsrc, // pattern source
|
|||
|
AlnSink<index_t>& msink, // hit sink
|
|||
|
EList<HGFM<index_t>* > gfms_3N, // 3N index of original text
|
|||
|
RFM<index_t>* rgfms_3N[2], // 3N index of repeat sequences
|
|||
|
BitPairReference* rrefss[2], // 3N repeat reference
|
|||
|
HGFM<index_t>* gfm, // index of original text
|
|||
|
RFM<index_t>* rgfm, // index of repeat sequences
|
|||
|
BitPairReference* refs, // base reference
|
|||
|
BitPairReference* rrefs, // repeat reference
|
|||
|
OutFileBuf *metricsOfb)
|
|||
|
{
|
|||
|
multiseed_patsrc = &patsrc;
|
|||
|
multiseed_msink = &msink;
|
|||
|
multiseed_sc = ≻
|
|||
|
multiseed_tpol = &tpol;
|
|||
|
gpol = &gp;
|
|||
|
multiseed_metricsOfb = metricsOfb;
|
|||
|
multiseed_refs = refs;
|
|||
|
if (threeN) {
|
|||
|
ref3N.load(gfms_3N, rgfms_3N, rrefss);
|
|||
|
} else {
|
|||
|
multiseed_gfm = gfm;
|
|||
|
multiseed_rgfm = rgfm;
|
|||
|
multiseed_rrefs = rrefs;
|
|||
|
}
|
|||
|
|
|||
|
AutoArray<tthread::thread*> threads(nthreads);
|
|||
|
AutoArray<int> tids(nthreads);
|
|||
|
// Start the metrics thread
|
|||
|
{
|
|||
|
Timer _t(cerr, "Multiseed full-index search: ", timing);
|
|||
|
|
|||
|
thread_rids.resize(nthreads);
|
|||
|
thread_rids.fill(0);
|
|||
|
thread_rids_mindist = (nthreads == 1 || !useTempSpliceSite ? 0 : 1000 * nthreads);
|
|||
|
for(int i = 0; i < nthreads; i++) {
|
|||
|
// Thread IDs start at 1
|
|||
|
tids[i] = i+1;
|
|||
|
threads[i] = new tthread::thread(multiseedSearchWorker_hisat2, (void*)&tids[i]);
|
|||
|
}
|
|||
|
|
|||
|
for (int i = 0; i < nthreads; i++)
|
|||
|
threads[i]->join();
|
|||
|
|
|||
|
}
|
|||
|
if(!metricsPerRead && (metricsOfb != NULL || metricsStderr)) {
|
|||
|
metrics.reportInterval(metricsOfb, metricsStderr, true, false, NULL);
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
static string argstr;
|
|||
|
|
|||
|
extern void initializeCntLut();
|
|||
|
extern void initializeCntBit();
|
|||
|
|
|||
|
template<typename TStr>
|
|||
|
static void driver(
|
|||
|
const char * type,
|
|||
|
const string bt2indexBases[2],
|
|||
|
const string& outfile)
|
|||
|
{
|
|||
|
if(gVerbose || startVerbose) {
|
|||
|
cerr << "Entered driver(): "; logTime(cerr, true);
|
|||
|
}
|
|||
|
|
|||
|
if (gVerbose || startVerbose) {
|
|||
|
cerr << "Running in " << ((threeN) ? "3N" : "Regular") << " Mode" << endl;
|
|||
|
}
|
|||
|
|
|||
|
initializeCntLut();
|
|||
|
initializeCntBit();
|
|||
|
|
|||
|
// Vector of the reference sequences; used for sanity-checking
|
|||
|
EList<SString<char> > names, os;
|
|||
|
EList<size_t> nameLens, seqLens;
|
|||
|
// Read reference sequences from the command-line or from a FASTA file
|
|||
|
if(!origString.empty()) {
|
|||
|
// Read fasta file(s)
|
|||
|
EList<string> origFiles;
|
|||
|
tokenize(origString, ",", origFiles);
|
|||
|
parseFastas(origFiles, names, nameLens, os, seqLens);
|
|||
|
}
|
|||
|
PatternParams pp(
|
|||
|
format, // file format
|
|||
|
fileParallel, // true -> wrap files with separate PairedPatternSources
|
|||
|
seed, // pseudo-random seed
|
|||
|
useSpinlock, // use spin locks instead of pthreads
|
|||
|
solexaQuals, // true -> qualities are on solexa64 scale
|
|||
|
phred64Quals, // true -> qualities are on phred64 scale
|
|||
|
integerQuals, // true -> qualities are space-separated numbers
|
|||
|
fuzzy, // true -> try to parse fuzzy fastq
|
|||
|
fastaContLen, // length of sampled reads for FastaContinuous...
|
|||
|
fastaContFreq, // frequency of sampled reads for FastaContinuous...
|
|||
|
skipReads // skip the first 'skip' patterns
|
|||
|
);
|
|||
|
if(gVerbose || startVerbose) {
|
|||
|
cerr << "Creating PatternSource: "; logTime(cerr, true);
|
|||
|
}
|
|||
|
PairedPatternSource *patsrc = PairedPatternSource::setupPatternSources(
|
|||
|
queries, // singles, from argv
|
|||
|
mates1, // mate1's, from -1 arg
|
|||
|
mates2, // mate2's, from -2 arg
|
|||
|
mates12, // both mates on each line, from --12 arg
|
|||
|
#ifdef USE_SRA
|
|||
|
sra_accs, // SRA accessions
|
|||
|
#endif
|
|||
|
qualities, // qualities associated with singles
|
|||
|
qualities1, // qualities associated with m1
|
|||
|
qualities2, // qualities associated with m2
|
|||
|
pp, // read read-in parameters
|
|||
|
nthreads,
|
|||
|
gVerbose || startVerbose); // be talkative
|
|||
|
// Open hit output file
|
|||
|
if(gVerbose || startVerbose) {
|
|||
|
cerr << "Opening hit output file: "; logTime(cerr, true);
|
|||
|
}
|
|||
|
OutFileBuf *fout;
|
|||
|
if(!outfile.empty()) {
|
|||
|
fout = new OutFileBuf(outfile.c_str(), false);
|
|||
|
} else {
|
|||
|
fout = new OutFileBuf();
|
|||
|
}
|
|||
|
|
|||
|
// Initialize GFM object and read in header
|
|||
|
if(gVerbose || startVerbose) {
|
|||
|
cerr << "About to initialize fw GFM: "; logTime(cerr, true);
|
|||
|
}
|
|||
|
|
|||
|
// for 3N
|
|||
|
if (threeN) {
|
|||
|
for (int i = 0; i < 2; i++) {
|
|||
|
altdbs_3N[i] = new ALTDB<index_t>();
|
|||
|
repeatdbs_3N[i] = new RepeatDB<index_t>();
|
|||
|
raltdbs_3N[i] = new ALTDB<index_t>();
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
EList<HGFM<index_t>* >gfms_3N;
|
|||
|
RFM<index_t>* rgfms_3N[2];
|
|||
|
for (int i = 0; i < 2; i++) {
|
|||
|
rgfms_3N[i] = NULL;
|
|||
|
}
|
|||
|
bool rep_index_exists_3N[2]{false};
|
|||
|
bool rep_index_exists = false;
|
|||
|
string rep_adjIdxBase_3N[2];
|
|||
|
string rep_adjIdxBase;
|
|||
|
|
|||
|
HGFM<index_t>* gfm;
|
|||
|
RFM<index_t>* rgfm = NULL;
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
for (int j = 0; j < 2; j++) {
|
|||
|
adjIdxBases_3N[j] = adjustEbwtBase(argv0, bt2indexBases[j], gVerbose);
|
|||
|
HGFM<index_t, local_index_t> *tmp_gfm = new HGFM<index_t, local_index_t>(
|
|||
|
adjIdxBases_3N[j],
|
|||
|
altdbs_3N[j],
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
-1, // fw index
|
|||
|
true, // index is for the forward direction
|
|||
|
/* overriding: */ offRate,
|
|||
|
0, // amount to add to index offrate or <= 0 to do nothing
|
|||
|
useMm, // whether to use memory-mapped files
|
|||
|
useShmem, // whether to use shared memory
|
|||
|
mmSweep, // sweep memory-mapped files
|
|||
|
!noRefNames, // load names?
|
|||
|
true, // load SA sample?
|
|||
|
true, // load ftab?
|
|||
|
true, // load rstarts?
|
|||
|
!no_spliced_alignment, // load splice sites?
|
|||
|
gVerbose, // whether to be talkative
|
|||
|
startVerbose, // talkative during initialization
|
|||
|
false /*passMemExc*/,
|
|||
|
sanityCheck,
|
|||
|
use_haplotype); //use haplotypes?
|
|||
|
|
|||
|
gfms_3N.push_back(tmp_gfm);
|
|||
|
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
// Sanity check number of patterns and pattern lengths in GFM
|
|||
|
// against original strings
|
|||
|
assert_eq(os.size(), gfms_3N[j]->nPat());
|
|||
|
|
|||
|
for(size_t i = 0; i < os.size(); i++) {
|
|||
|
assert_eq(os[i].length(), gfms_3N[j]->plen()[i]);
|
|||
|
}
|
|||
|
}
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
gfms_3N[j]->loadIntoMemory(
|
|||
|
-1, // fw index
|
|||
|
true, // load SA sample
|
|||
|
true, // load ftab
|
|||
|
true, // load rstarts
|
|||
|
!noRefNames,
|
|||
|
startVerbose);
|
|||
|
gfms_3N[j]->checkOrigs(os, false);
|
|||
|
gfms_3N[j]->evictFromMemory();
|
|||
|
}
|
|||
|
{
|
|||
|
// Load the other half of the index into memory
|
|||
|
assert(!gfms_3N[j]->isInMemory());
|
|||
|
Timer _t(cerr, "Time loading forward index: ", timing);
|
|||
|
gfms_3N[j]->loadIntoMemory(
|
|||
|
-1, // not the reverse index
|
|||
|
true, // load SA samp? (yes, need forward index's SA samp)
|
|||
|
true, // load ftab (in forward index)
|
|||
|
true, // load rstarts (in forward index)
|
|||
|
!noRefNames, // load names?
|
|||
|
startVerbose);
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
rep_adjIdxBase_3N[j] = adjIdxBases_3N[j] + ".rep";
|
|||
|
{
|
|||
|
std::ifstream infile((rep_adjIdxBase_3N[j] + ".1." + gfm_ext.c_str()).c_str());
|
|||
|
rep_index_exists_3N[j] = infile.good();
|
|||
|
}
|
|||
|
|
|||
|
if(rep_index_exists_3N[j] && use_repeat_index) {
|
|||
|
rgfms_3N[j] = new RFM<index_t>(
|
|||
|
rep_adjIdxBase_3N[j],
|
|||
|
raltdbs_3N[j],
|
|||
|
repeatdbs_3N[j],
|
|||
|
&readLens,
|
|||
|
-1, // fw index
|
|||
|
true, // index is for the forward direction
|
|||
|
/* overriding: */ offRate,
|
|||
|
0, // amount to add to index offrate or <= 0 to do nothing
|
|||
|
useMm, // whether to use memory-mapped files
|
|||
|
useShmem, // whether to use shared memory
|
|||
|
mmSweep, // sweep memory-mapped files
|
|||
|
!noRefNames, // load names?
|
|||
|
true, // load SA sample?
|
|||
|
true, // load ftab?
|
|||
|
true, // load rstarts?
|
|||
|
!no_spliced_alignment, // load splice sites?
|
|||
|
gVerbose, // whether to be talkative
|
|||
|
startVerbose, // talkative during initialization
|
|||
|
false /*passMemExc*/,
|
|||
|
sanityCheck,
|
|||
|
false); //use haplotypes?
|
|||
|
|
|||
|
// CP to do
|
|||
|
#if 0
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
// Sanity check number of patterns and pattern lengths in GFM
|
|||
|
// against original strings
|
|||
|
assert_eq(os.size(), gfm.nPat());
|
|||
|
for(size_t i = 0; i < os.size(); i++) {
|
|||
|
assert_eq(os[i].length(), rgfm->plen()[i]);
|
|||
|
}
|
|||
|
}
|
|||
|
// Sanity-check the restored version of the GFM
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
rgfm->loadIntoMemory(
|
|||
|
-1, // fw index
|
|||
|
true, // load SA sample
|
|||
|
true, // load ftab
|
|||
|
true, // load rstarts
|
|||
|
!noRefNames,
|
|||
|
startVerbose);
|
|||
|
rgfm->checkOrigs(os, false);
|
|||
|
rgfm->evictFromMemory();
|
|||
|
}
|
|||
|
#endif
|
|||
|
{
|
|||
|
// Load the other half of the index into memory
|
|||
|
assert(!rgfms_3N[j]->isInMemory());
|
|||
|
Timer _t(cerr, "Time loading forward index: ", timing);
|
|||
|
rgfms_3N[j]->loadIntoMemory(
|
|||
|
-1, // not the reverse index
|
|||
|
true, // load SA samp? (yes, need forward index's SA samp)
|
|||
|
true, // load ftab (in forward index)
|
|||
|
true, // load rstarts (in forward index)
|
|||
|
!noRefNames, // load names?
|
|||
|
startVerbose);
|
|||
|
|
|||
|
repeatdbs_3N[j]->construct(gfms_3N[j]->rstarts(), gfms_3N[j]->nFrag());
|
|||
|
}
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
ht2_option_t option;
|
|||
|
ht2_init_options(&option);
|
|||
|
|
|||
|
option.altdb = altdbs_3N[j];
|
|||
|
option.raltdb = raltdbs_3N[j];
|
|||
|
option.repeatdb = repeatdbs_3N[j];
|
|||
|
option.gfm = gfms_3N[j];
|
|||
|
option.rgfm = rgfms_3N[j];
|
|||
|
|
|||
|
ht2_handle_t handle = ht2_init(adjIdxBases_3N[j].c_str(), &option);
|
|||
|
|
|||
|
repeatHandles.push_back(handle);
|
|||
|
if (refNameMap == NULL) {
|
|||
|
ht2_index_getrefnames(repeatHandles[0], &refNameMap);
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
if(!saw_k) {
|
|||
|
if(gfms_3N[j]->gh().linearFM()) khits = 5;
|
|||
|
else khits = 10;
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
altdb = new ALTDB<index_t>();
|
|||
|
repeatdb = new RepeatDB<index_t>();
|
|||
|
raltdb = new ALTDB<index_t>();
|
|||
|
adjIdxBase = adjustEbwtBase(argv0, bt2indexBases[0], gVerbose);
|
|||
|
gfm = new HGFM<index_t, local_index_t>(
|
|||
|
adjIdxBase,
|
|||
|
altdb,
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
-1, // fw index
|
|||
|
true, // index is for the forward direction
|
|||
|
/* overriding: */ offRate,
|
|||
|
0, // amount to add to index offrate or <= 0 to do nothing
|
|||
|
useMm, // whether to use memory-mapped files
|
|||
|
useShmem, // whether to use shared memory
|
|||
|
mmSweep, // sweep memory-mapped files
|
|||
|
!noRefNames, // load names?
|
|||
|
true, // load SA sample?
|
|||
|
true, // load ftab?
|
|||
|
true, // load rstarts?
|
|||
|
!no_spliced_alignment, // load splice sites?
|
|||
|
gVerbose, // whether to be talkative
|
|||
|
startVerbose, // talkative during initialization
|
|||
|
false /*passMemExc*/,
|
|||
|
sanityCheck,
|
|||
|
use_haplotype); //use haplotypes?
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
// Sanity check number of patterns and pattern lengths in GFM
|
|||
|
// against original strings
|
|||
|
assert_eq(os.size(), gfm->nPat());
|
|||
|
for(size_t i = 0; i < os.size(); i++) {
|
|||
|
assert_eq(os[i].length(), gfm->plen()[i]);
|
|||
|
}
|
|||
|
}
|
|||
|
// Sanity-check the restored version of the GFM
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
gfm->loadIntoMemory(
|
|||
|
-1, // fw index
|
|||
|
true, // load SA sample
|
|||
|
true, // load ftab
|
|||
|
true, // load rstarts
|
|||
|
!noRefNames,
|
|||
|
startVerbose);
|
|||
|
gfm->checkOrigs(os, false);
|
|||
|
gfm->evictFromMemory();
|
|||
|
}
|
|||
|
{
|
|||
|
// Load the other half of the index into memory
|
|||
|
assert(!gfm->isInMemory());
|
|||
|
Timer _t(cerr, "Time loading forward index: ", timing);
|
|||
|
gfm->loadIntoMemory(
|
|||
|
-1, // not the reverse index
|
|||
|
true, // load SA samp? (yes, need forward index's SA samp)
|
|||
|
true, // load ftab (in forward index)
|
|||
|
true, // load rstarts (in forward index)
|
|||
|
!noRefNames, // load names?
|
|||
|
startVerbose);
|
|||
|
}
|
|||
|
rep_adjIdxBase = adjIdxBase + ".rep";
|
|||
|
|
|||
|
{
|
|||
|
std::ifstream infile((rep_adjIdxBase + ".1." + gfm_ext.c_str()).c_str());
|
|||
|
rep_index_exists = infile.good();
|
|||
|
}
|
|||
|
if(rep_index_exists && use_repeat_index) {
|
|||
|
rgfm = new RFM<index_t>(
|
|||
|
rep_adjIdxBase,
|
|||
|
raltdb,
|
|||
|
repeatdb,
|
|||
|
&readLens,
|
|||
|
-1, // fw index
|
|||
|
true, // index is for the forward direction
|
|||
|
/* overriding: */ offRate,
|
|||
|
0, // amount to add to index offrate or <= 0 to do nothing
|
|||
|
useMm, // whether to use memory-mapped files
|
|||
|
useShmem, // whether to use shared memory
|
|||
|
mmSweep, // sweep memory-mapped files
|
|||
|
!noRefNames, // load names?
|
|||
|
true, // load SA sample?
|
|||
|
true, // load ftab?
|
|||
|
true, // load rstarts?
|
|||
|
!no_spliced_alignment, // load splice sites?
|
|||
|
gVerbose, // whether to be talkative
|
|||
|
startVerbose, // talkative during initialization
|
|||
|
false /*passMemExc*/,
|
|||
|
sanityCheck,
|
|||
|
false); //use haplotypes?
|
|||
|
|
|||
|
// CP to do
|
|||
|
#if 0
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
// Sanity check number of patterns and pattern lengths in GFM
|
|||
|
// against original strings
|
|||
|
assert_eq(os.size(), gfm.nPat());
|
|||
|
for(size_t i = 0; i < os.size(); i++) {
|
|||
|
assert_eq(os[i].length(), rgfm->plen()[i]);
|
|||
|
}
|
|||
|
}
|
|||
|
// Sanity-check the restored version of the GFM
|
|||
|
if(sanityCheck && !os.empty()) {
|
|||
|
rgfm->loadIntoMemory(
|
|||
|
-1, // fw index
|
|||
|
true, // load SA sample
|
|||
|
true, // load ftab
|
|||
|
true, // load rstarts
|
|||
|
!noRefNames,
|
|||
|
startVerbose);
|
|||
|
rgfm->checkOrigs(os, false);
|
|||
|
rgfm->evictFromMemory();
|
|||
|
}
|
|||
|
#endif
|
|||
|
{
|
|||
|
// Load the other half of the index into memory
|
|||
|
assert(!rgfm->isInMemory());
|
|||
|
Timer _t(cerr, "Time loading forward index: ", timing);
|
|||
|
rgfm->loadIntoMemory(
|
|||
|
-1, // not the reverse index
|
|||
|
true, // load SA samp? (yes, need forward index's SA samp)
|
|||
|
true, // load ftab (in forward index)
|
|||
|
true, // load rstarts (in forward index)
|
|||
|
!noRefNames, // load names?
|
|||
|
startVerbose);
|
|||
|
|
|||
|
repeatdb->construct(gfm->rstarts(), gfm->nFrag());
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if(!saw_k) {
|
|||
|
if(gfm->gh().linearFM()) khits = 5;
|
|||
|
else khits = 10;
|
|||
|
}
|
|||
|
} // else threeN
|
|||
|
|
|||
|
OutputQueue oq(
|
|||
|
*fout, // out file buffer
|
|||
|
reorder && nthreads > 1, // whether to reorder when there's >1 thread
|
|||
|
nthreads, // # threads
|
|||
|
nthreads > 1, // whether to be thread-safe
|
|||
|
skipReads); // first read will have this rdid
|
|||
|
{
|
|||
|
Timer _t(cerr, "Time searching: ", timing);
|
|||
|
// Set up penalities
|
|||
|
if(bonusMatch > 0 && !localAlign) {
|
|||
|
cerr << "Warning: Match bonus always = 0 in --end-to-end mode; ignoring user setting" << endl;
|
|||
|
bonusMatch = 0;
|
|||
|
}
|
|||
|
if(tranAssm) {
|
|||
|
penNoncanIntronLen.init(SIMPLE_FUNC_LOG, -8, 2);
|
|||
|
}
|
|||
|
Scoring sc(
|
|||
|
bonusMatch, // constant reward for match
|
|||
|
penMmcType, // how to penalize mismatches
|
|||
|
penMmcMax, // max mm penalty
|
|||
|
penMmcMin, // min mm penalty
|
|||
|
penScMax, // max sc penalty
|
|||
|
penScMin, // min sc penalty
|
|||
|
scoreMin, // min score as function of read len
|
|||
|
nCeil, // max # Ns as function of read len
|
|||
|
penNType, // how to penalize Ns in the read
|
|||
|
penN, // constant if N pelanty is a constant
|
|||
|
penNCatPair, // whether to concat mates before N filtering
|
|||
|
penRdGapConst, // constant coeff for read gap cost
|
|||
|
penRfGapConst, // constant coeff for ref gap cost
|
|||
|
penRdGapLinear, // linear coeff for read gap cost
|
|||
|
penRfGapLinear, // linear coeff for ref gap cost
|
|||
|
gGapBarrier, // # rows at top/bot only entered diagonally
|
|||
|
penCanSplice, // canonical splicing penalty
|
|||
|
penNoncanSplice,// non-canonical splicing penalty
|
|||
|
penConflictSplice, // conflicting splice site penalty
|
|||
|
&penCanIntronLen, // penalty as to intron length
|
|||
|
&penNoncanIntronLen); // penalty as to intron length
|
|||
|
|
|||
|
EList<size_t> reflens;
|
|||
|
// for HISAT-3N
|
|||
|
EList<string> refnames_3N[2];
|
|||
|
EList<size_t> replens_3N[2];
|
|||
|
EList<string> repnames_3N[2];
|
|||
|
EList<size_t> empty_replens_3N[2];
|
|||
|
EList<string> empty_repnames_3N[2];
|
|||
|
|
|||
|
//for regular hisat2
|
|||
|
EList<string> refnames;
|
|||
|
//readEbwtRefnames<index_t>(adjIdxBase, refnames);
|
|||
|
EList<size_t> replens;
|
|||
|
EList<string> repnames;
|
|||
|
EList<size_t> empty_replens;
|
|||
|
EList<string> empty_repnames;
|
|||
|
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
for(size_t i = 0; i < gfms_3N[0]->nPat(); i++) {
|
|||
|
reflens.push_back(gfms_3N[0]->plen()[i]);
|
|||
|
}
|
|||
|
for (int j = 0; j < 2; j++) {
|
|||
|
readEbwtRefnames<index_t>(adjIdxBases_3N[j], refnames_3N[j]);
|
|||
|
if (rep_index_exists_3N[j] && use_repeat_index) {
|
|||
|
rgfms_3N[j]->getReferenceNames(repnames_3N[j]);
|
|||
|
rgfms_3N[j]->getReferenceLens(replens_3N[j]);
|
|||
|
}
|
|||
|
if(rmChrName && addChrName) {
|
|||
|
cerr << "Error: --remove-chrname and --add-chrname cannot be used at the same time" << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(rmChrName) {
|
|||
|
for(size_t i = 0; i < refnames_3N[j].size(); i++) {
|
|||
|
string& refname = refnames_3N[j][i];
|
|||
|
if(refname.find("chr") == 0) {
|
|||
|
refname = refname.substr(3);
|
|||
|
}
|
|||
|
}
|
|||
|
} else if(addChrName) {
|
|||
|
for(size_t i = 0; i < refnames_3N[j].size(); i++) {
|
|||
|
string& refname = refnames_3N[j][i];
|
|||
|
if(refname.find("chr") != 0) {
|
|||
|
refname = string("chr") + refname;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
readEbwtRefnames<index_t>(adjIdxBase, refnames);
|
|||
|
for(size_t i = 0; i < gfm->nPat(); i++) {
|
|||
|
reflens.push_back(gfm->plen()[i]);
|
|||
|
}
|
|||
|
if(rep_index_exists && use_repeat_index) {
|
|||
|
rgfm->getReferenceNames(repnames);
|
|||
|
rgfm->getReferenceLens(replens);
|
|||
|
}
|
|||
|
if(rmChrName && addChrName) {
|
|||
|
cerr << "Error: --remove-chrname and --add-chrname cannot be used at the same time" << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(rmChrName) {
|
|||
|
for(size_t i = 0; i < refnames.size(); i++) {
|
|||
|
string& refname = refnames[i];
|
|||
|
if(refname.find("chr") == 0) {
|
|||
|
refname = refname.substr(3);
|
|||
|
}
|
|||
|
}
|
|||
|
} else if(addChrName) {
|
|||
|
for(size_t i = 0; i < refnames.size(); i++) {
|
|||
|
string& refname = refnames[i];
|
|||
|
if(refname.find("chr") != 0) {
|
|||
|
refname = string("chr") + refname;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
SamConfig<index_t> samc(
|
|||
|
threeN ? refnames_3N[0]: refnames, // reference sequence names
|
|||
|
reflens, // reference sequence lengths
|
|||
|
threeN?(repeat ? repnames_3N[0] : empty_repnames_3N[0]): (repeat ? repnames : empty_repnames), // repeat sequence names
|
|||
|
threeN? (repeat ? replens_3N[0] : empty_replens_3N[0]): (repeat ? replens : empty_replens), // repeat sequence lengths
|
|||
|
samTruncQname, // whether to truncate QNAME to 255 chars
|
|||
|
samOmitSecSeqQual, // omit SEQ/QUAL for 2ndary alignments?
|
|||
|
samNoUnal, // omit unaligned-read records?
|
|||
|
string("hisat2"), // program id
|
|||
|
string("hisat2"), // program name
|
|||
|
string(HISAT2_VERSION), // program version
|
|||
|
argstr, // command-line
|
|||
|
rgs_optflag, // read-group string
|
|||
|
rna_strandness,
|
|||
|
sam_print_as,
|
|||
|
sam_print_xs,
|
|||
|
sam_print_xss,
|
|||
|
sam_print_yn,
|
|||
|
sam_print_xn,
|
|||
|
sam_print_cs,
|
|||
|
sam_print_cq,
|
|||
|
sam_print_x0,
|
|||
|
sam_print_x1,
|
|||
|
sam_print_xm,
|
|||
|
sam_print_xo,
|
|||
|
sam_print_xg,
|
|||
|
sam_print_nm,
|
|||
|
sam_print_md,
|
|||
|
sam_print_yf,
|
|||
|
sam_print_yi,
|
|||
|
sam_print_ym,
|
|||
|
sam_print_yp,
|
|||
|
sam_print_yt,
|
|||
|
sam_print_ys,
|
|||
|
sam_print_zs,
|
|||
|
sam_print_xr,
|
|||
|
sam_print_xt,
|
|||
|
sam_print_xd,
|
|||
|
sam_print_xu,
|
|||
|
sam_print_yl,
|
|||
|
sam_print_ye,
|
|||
|
sam_print_yu,
|
|||
|
sam_print_xp,
|
|||
|
sam_print_yr,
|
|||
|
sam_print_zb,
|
|||
|
sam_print_zr,
|
|||
|
sam_print_zf,
|
|||
|
sam_print_zm,
|
|||
|
sam_print_zi,
|
|||
|
sam_print_zp,
|
|||
|
sam_print_zu,
|
|||
|
sam_print_xs_a,
|
|||
|
sam_print_nh);
|
|||
|
// Set up hit sink; if sanityCheck && !os.empty() is true,
|
|||
|
// then instruct the sink to "retain" hits in a vector in
|
|||
|
// memory so that we can easily sanity check them later on
|
|||
|
|
|||
|
|
|||
|
AlnSink<index_t> *mssink = NULL;
|
|||
|
|
|||
|
//auto_ptr<BitPairReference> refss[2];
|
|||
|
auto_ptr<BitPairReference> refs;
|
|||
|
|
|||
|
Timer *_tRef = new Timer(cerr, "Time loading reference: ", timing);
|
|||
|
refs = auto_ptr<BitPairReference>(
|
|||
|
new BitPairReference(
|
|||
|
threeN ? adjIdxBases_3N[0] : adjIdxBase,
|
|||
|
NULL,
|
|||
|
false,
|
|||
|
sanityCheck,
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
false,
|
|||
|
useMm,
|
|||
|
useShmem,
|
|||
|
mmSweep,
|
|||
|
gVerbose,
|
|||
|
startVerbose)
|
|||
|
);
|
|||
|
delete _tRef;
|
|||
|
if(!refs->loaded()) throw 1;
|
|||
|
|
|||
|
|
|||
|
|
|||
|
|
|||
|
BitPairReference* rrefss[2] = {NULL, };
|
|||
|
BitPairReference* rrefs = NULL;
|
|||
|
|
|||
|
if (threeN) {
|
|||
|
for (int j = 0; j < 2; j++) {
|
|||
|
if (rep_index_exists_3N[j] && use_repeat_index) {
|
|||
|
const EList<uint8_t> &included = rgfms_3N[j]->getRepeatIncluded();
|
|||
|
rrefss[j] = new BitPairReference(
|
|||
|
rep_adjIdxBase_3N[j],
|
|||
|
&included,
|
|||
|
false,
|
|||
|
sanityCheck,
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
false,
|
|||
|
useMm,
|
|||
|
useShmem,
|
|||
|
mmSweep,
|
|||
|
gVerbose,
|
|||
|
startVerbose);
|
|||
|
if (!rrefss[j]->loaded()) throw 1;
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
if(rep_index_exists && use_repeat_index) {
|
|||
|
const EList<uint8_t>& included = rgfm->getRepeatIncluded();
|
|||
|
rrefs = new BitPairReference(
|
|||
|
rep_adjIdxBase,
|
|||
|
&included,
|
|||
|
false,
|
|||
|
sanityCheck,
|
|||
|
NULL,
|
|||
|
NULL,
|
|||
|
false,
|
|||
|
useMm,
|
|||
|
useShmem,
|
|||
|
mmSweep,
|
|||
|
gVerbose,
|
|||
|
startVerbose);
|
|||
|
if(!rrefs->loaded()) throw 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
bool xsOnly = (tranAssm_program == "cufflinks");
|
|||
|
TranscriptomePolicy tpol(minIntronLen,
|
|||
|
maxIntronLen,
|
|||
|
tranAssm ? 15 : 7,
|
|||
|
tranAssm ? 20 : 14,
|
|||
|
no_spliced_alignment,
|
|||
|
tranMapOnly,
|
|||
|
tranAssm,
|
|||
|
xsOnly,
|
|||
|
avoid_pseudogene);
|
|||
|
|
|||
|
GraphPolicy gpol(max_alts_tried,
|
|||
|
use_haplotype,
|
|||
|
(threeN ? altdbs_3N[0]->haplotypes().size() : altdb->haplotypes().size()) > 0 && use_haplotype,
|
|||
|
enable_codis);
|
|||
|
|
|||
|
init_junction_prob();
|
|||
|
bool write = novelSpliceSiteOutfile != "" || useTempSpliceSite;
|
|||
|
bool read = knownSpliceSiteInfile != "" || novelSpliceSiteInfile != "" || useTempSpliceSite || altdbs_3N[0]->hasSpliceSites();
|
|||
|
ssdb = new SpliceSiteDB(
|
|||
|
*(refs.get()),
|
|||
|
threeN ? refnames_3N[0] : refnames,
|
|||
|
nthreads > 1, // thread-safe
|
|||
|
write, // write?
|
|||
|
read); // read?
|
|||
|
ssdb->read(threeN ? *gfms_3N[0] : *gfm, threeN ? altdbs_3N[0]->alts() : altdb->alts());
|
|||
|
if(knownSpliceSiteInfile != "") {
|
|||
|
ifstream ssdb_file(knownSpliceSiteInfile.c_str(), ios::in);
|
|||
|
if(ssdb_file.is_open()) {
|
|||
|
ssdb->read(ssdb_file,
|
|||
|
true); // known splice sites
|
|||
|
ssdb_file.close();
|
|||
|
}
|
|||
|
}
|
|||
|
if(novelSpliceSiteInfile != "") {
|
|||
|
ifstream ssdb_file(novelSpliceSiteInfile.c_str(), ios::in);
|
|||
|
if(ssdb_file.is_open()) {
|
|||
|
ssdb->read(ssdb_file,
|
|||
|
false); // novel splice sites
|
|||
|
ssdb_file.close();
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
switch(outType) {
|
|||
|
case OUTPUT_SAM: {
|
|||
|
if (threeN) {
|
|||
|
mssink = new AlnSink3NSam<index_t>(
|
|||
|
oq, // output queue
|
|||
|
samc, // settings & routines for SAM output
|
|||
|
refnames_3N[0], // reference names
|
|||
|
repnames_3N[0], // repeat names
|
|||
|
gQuiet, // don't print alignment summary at end
|
|||
|
nthreads,
|
|||
|
refs.get(),
|
|||
|
no_spliced_alignment,
|
|||
|
altdbs_3N[0],
|
|||
|
ssdb);
|
|||
|
} else {
|
|||
|
mssink = new AlnSinkSam<index_t>(
|
|||
|
oq, // output queue
|
|||
|
samc, // settings & routines for SAM output
|
|||
|
refnames, // reference names
|
|||
|
repnames, // repeat names
|
|||
|
gQuiet, // don't print alignment summary at end
|
|||
|
altdb,
|
|||
|
ssdb);
|
|||
|
};
|
|||
|
|
|||
|
if(!samNoHead) {
|
|||
|
bool printHd = true, printSq = true;
|
|||
|
BTString buf;
|
|||
|
samc.printHeader(buf, rgid, rgs, printHd, !samNoSQ, printSq);
|
|||
|
fout->writeString(buf);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
default:
|
|||
|
cerr << "Invalid output type: " << outType << endl;
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
if(gVerbose || startVerbose) {
|
|||
|
cerr << "Dispatching to search driver: "; logTime(cerr, true);
|
|||
|
}
|
|||
|
// Set up global constraint
|
|||
|
OutFileBuf *metricsOfb = NULL;
|
|||
|
if(!metricsFile.empty() && metricsIval > 0) {
|
|||
|
metricsOfb = new OutFileBuf(metricsFile);
|
|||
|
}
|
|||
|
// Do the search for all input reads
|
|||
|
assert(patsrc != NULL);
|
|||
|
assert(mssink != NULL);
|
|||
|
multiseedSearch(
|
|||
|
sc, // scoring scheme
|
|||
|
tpol,
|
|||
|
gpol,
|
|||
|
*patsrc, // pattern source
|
|||
|
*mssink, // hit sink
|
|||
|
gfms_3N, // 3N BWT
|
|||
|
rgfms_3N, // 3N
|
|||
|
rrefss, // 3N
|
|||
|
gfm, // BWT
|
|||
|
rgfm,
|
|||
|
refs.get(),
|
|||
|
rrefs,
|
|||
|
metricsOfb);
|
|||
|
// Evict any loaded indexes from memory
|
|||
|
if (threeN) {
|
|||
|
for (int j = 0; j < 2; j++) {
|
|||
|
if(gfms_3N[j]->isInMemory()) {
|
|||
|
gfms_3N[j]->evictFromMemory();
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
if(gfm->isInMemory()) {
|
|||
|
gfm->evictFromMemory();
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if(!gQuiet && !seedSumm) {
|
|||
|
size_t repThresh = mhits;
|
|||
|
if(repThresh == 0) {
|
|||
|
repThresh = std::numeric_limits<size_t>::max();
|
|||
|
}
|
|||
|
mssink->finish(cerr,
|
|||
|
repThresh,
|
|||
|
gReportDiscordant,
|
|||
|
gReportMixed,
|
|||
|
newAlignSummary,
|
|||
|
hadoopOut);
|
|||
|
if(alignSumFile != "") {
|
|||
|
ofstream sumfile(alignSumFile.c_str(), ios::out);
|
|||
|
if(sumfile.is_open()) {
|
|||
|
mssink->finish(sumfile,
|
|||
|
repThresh,
|
|||
|
gReportDiscordant,
|
|||
|
gReportMixed,
|
|||
|
newAlignSummary,
|
|||
|
false); // hadoopOut
|
|||
|
sumfile.close();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
if(ssdb != NULL) {
|
|||
|
if(novelSpliceSiteOutfile != "") {
|
|||
|
ofstream ssdb_file(novelSpliceSiteOutfile.c_str(), ios::out);
|
|||
|
if(ssdb_file.is_open()) {
|
|||
|
ssdb->print(ssdb_file);
|
|||
|
ssdb_file.close();
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
oq.flush(true);
|
|||
|
assert_eq(oq.numStarted(), oq.numFinished());
|
|||
|
assert_eq(oq.numStarted(), oq.numFlushed());
|
|||
|
delete patsrc;
|
|||
|
delete mssink;
|
|||
|
delete ssdb;
|
|||
|
delete metricsOfb;
|
|||
|
if (threeN) {
|
|||
|
for (int i = 0; i < 2; i++) {
|
|||
|
if(rep_index_exists_3N[i] && use_repeat_index) {
|
|||
|
delete rgfms_3N[i];
|
|||
|
delete rrefss[i];
|
|||
|
delete repeatdbs_3N[i];
|
|||
|
delete raltdbs_3N[i];
|
|||
|
}
|
|||
|
delete gfms_3N[i];
|
|||
|
delete altdbs_3N[i];
|
|||
|
}
|
|||
|
if(rep_index_exists_3N[0] && use_repeat_index){
|
|||
|
for (int k = 0; k < 2; k++) {
|
|||
|
ht2_close(repeatHandles[k]);
|
|||
|
}
|
|||
|
}
|
|||
|
} else {
|
|||
|
delete altdb;
|
|||
|
delete repeatdb;
|
|||
|
delete raltdb;
|
|||
|
delete rgfm;
|
|||
|
delete rrefs;
|
|||
|
delete gfm;
|
|||
|
}
|
|||
|
if (refNameMap != NULL) {
|
|||
|
free(refNameMap);
|
|||
|
}
|
|||
|
if(fout != NULL) {
|
|||
|
delete fout;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// C++ name mangling is disabled for the bowtie() function to make it
|
|||
|
// easier to use Bowtie as a library.
|
|||
|
extern "C" {
|
|||
|
|
|||
|
/**
|
|||
|
* Main bowtie entry function. Parses argc/argv style command-line
|
|||
|
* options, sets global configuration variables, and calls the driver()
|
|||
|
* function.
|
|||
|
*/
|
|||
|
int hisat2(int argc, const char **argv) {
|
|||
|
try {
|
|||
|
// Reset all global state, including getopt state
|
|||
|
opterr = optind = 1;
|
|||
|
resetOptions();
|
|||
|
for(int i = 0; i < argc; i++) {
|
|||
|
argstr += argv[i];
|
|||
|
if(i < argc-1) argstr += " ";
|
|||
|
}
|
|||
|
if(startVerbose) { cerr << "Entered main(): "; logTime(cerr, true); }
|
|||
|
parseOptions(argc, argv);
|
|||
|
argv0 = argv[0];
|
|||
|
if(showVersion) {
|
|||
|
cout << argv0 << " version " << HISAT2_VERSION << endl;
|
|||
|
if(sizeof(void*) == 4) {
|
|||
|
cout << "32-bit" << endl;
|
|||
|
} else if(sizeof(void*) == 8) {
|
|||
|
cout << "64-bit" << endl;
|
|||
|
} else {
|
|||
|
cout << "Neither 32- nor 64-bit: sizeof(void*) = " << sizeof(void*) << endl;
|
|||
|
}
|
|||
|
cout << "Built on " << BUILD_HOST << endl;
|
|||
|
cout << BUILD_TIME << endl;
|
|||
|
cout << "Compiler: " << COMPILER_VERSION << endl;
|
|||
|
cout << "Options: " << COMPILER_OPTIONS << endl;
|
|||
|
cout << "Sizeof {int, long, long long, void*, size_t, off_t}: {"
|
|||
|
<< sizeof(int)
|
|||
|
<< ", " << sizeof(long) << ", " << sizeof(long long)
|
|||
|
<< ", " << sizeof(void *) << ", " << sizeof(size_t)
|
|||
|
<< ", " << sizeof(off_t) << "}" << endl;
|
|||
|
return 0;
|
|||
|
}
|
|||
|
{
|
|||
|
Timer _t(cerr, "Overall time: ", timing);
|
|||
|
if(startVerbose) {
|
|||
|
cerr << "Parsing index and read arguments: "; logTime(cerr, true);
|
|||
|
}
|
|||
|
|
|||
|
// Get index basename (but only if it wasn't specified via --index)
|
|||
|
if(bt2indexs[0].empty()) {
|
|||
|
if(optind >= argc) {
|
|||
|
cerr << "No index, query, or output file specified!" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
bt2indexs[0] = argv[optind++];
|
|||
|
}
|
|||
|
if (threeN) {
|
|||
|
bt2indexs[1] = bt2indexs[0];
|
|||
|
if (fileExist(bt2indexs[0] + threeN_indexTags[0] + ".1." + gfm_ext)) {
|
|||
|
bt2indexs[0] += threeN_indexTags[0];
|
|||
|
bt2indexs[1] += threeN_indexTags[1];
|
|||
|
} else if (fileExist(bt2indexs[0] + ".3n.1.1." + gfm_ext)) {
|
|||
|
bt2indexs[0] += ".3n.1";
|
|||
|
bt2indexs[1] += ".3n.2";
|
|||
|
if (!((usrInput_convertedFrom == 'C' && usrInput_convertedTo == 'T') ||
|
|||
|
(usrInput_convertedFrom == 'T' && usrInput_convertedTo == 'C'))) {
|
|||
|
cerr << "Your current hisat-3n index only support C-to-T or T-to-C base change. Please build new hisat-3n index to support "
|
|||
|
<< usrInput_convertedFrom << " to " << usrInput_convertedTo << "change." << endl;
|
|||
|
printUsage(cerr);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
} else {
|
|||
|
cerr << "Index is not exist, please use hisat-3n-build to build index first. Please use the same --base-change argument for both hisat-3n-build and hisat-3n." << endl;
|
|||
|
printUsage(cerr);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Get query filename
|
|||
|
bool got_reads = !queries.empty() || !mates1.empty() || !mates12.empty();
|
|||
|
#ifdef USE_SRA
|
|||
|
got_reads = got_reads || !sra_accs.empty();
|
|||
|
#endif
|
|||
|
if(minIntronLen > maxIntronLen) {
|
|||
|
cerr << "--min-intronlen(" << minIntronLen << ") should not be greater than --max-intronlen("
|
|||
|
<< maxIntronLen << ")" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
if(optind >= argc) {
|
|||
|
if(!got_reads) {
|
|||
|
printUsage(cerr);
|
|||
|
cerr << "***" << endl
|
|||
|
#ifdef USE_SRA
|
|||
|
<< "Error: Must specify at least one read input with -U/-1/-2/--sra-acc" << endl;
|
|||
|
#else
|
|||
|
<< "Error: Must specify at least one read input with -U/-1/-2" << endl;
|
|||
|
|
|||
|
#endif
|
|||
|
return 1;
|
|||
|
}
|
|||
|
} else if(!got_reads) {
|
|||
|
// Tokenize the list of query files
|
|||
|
tokenize(argv[optind++], ",", queries);
|
|||
|
if(queries.empty()) {
|
|||
|
cerr << "Tokenized query file list was empty!" << endl;
|
|||
|
printUsage(cerr);
|
|||
|
return 1;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Get output filename
|
|||
|
if(optind < argc && outfile.empty()) {
|
|||
|
outfile = argv[optind++];
|
|||
|
cerr << "Warning: Output file '" << outfile.c_str()
|
|||
|
<< "' was specified without -S. This will not work in "
|
|||
|
<< "future HISAT 2 versions. Please use -S instead."
|
|||
|
<< endl;
|
|||
|
}
|
|||
|
|
|||
|
// Extra parametesr?
|
|||
|
if(optind < argc) {
|
|||
|
cerr << "Extra parameter(s) specified: ";
|
|||
|
for(int i = optind; i < argc; i++) {
|
|||
|
cerr << "\"" << argv[i] << "\"";
|
|||
|
if(i < argc-1) cerr << ", ";
|
|||
|
}
|
|||
|
cerr << endl;
|
|||
|
if(mates1.size() > 0) {
|
|||
|
cerr << "Note that if <mates> files are specified using -1/-2, a <singles> file cannot" << endl
|
|||
|
<< "also be specified. Please run HISAT2 separately for mates and singles." << endl;
|
|||
|
}
|
|||
|
throw 1;
|
|||
|
}
|
|||
|
|
|||
|
// Optionally summarize
|
|||
|
if(gVerbose) {
|
|||
|
cout << "Input bt2 file: \"" << bt2indexs[0].c_str() << "\"" << endl;
|
|||
|
cout << "Input bt2 file: \"" << bt2indexs[1].c_str() << "\"" << endl;
|
|||
|
cout << "Query inputs (DNA, " << file_format_names[format].c_str() << "):" << endl;
|
|||
|
for(size_t i = 0; i < queries.size(); i++) {
|
|||
|
cout << " " << queries[i].c_str() << endl;
|
|||
|
}
|
|||
|
cout << "Quality inputs:" << endl;
|
|||
|
for(size_t i = 0; i < qualities.size(); i++) {
|
|||
|
cout << " " << qualities[i].c_str() << endl;
|
|||
|
}
|
|||
|
cout << "Output file: \"" << outfile.c_str() << "\"" << endl;
|
|||
|
cout << "Local endianness: " << (currentlyBigEndian()? "big":"little") << endl;
|
|||
|
cout << "Sanity checking: " << (sanityCheck? "enabled":"disabled") << endl;
|
|||
|
#ifdef NDEBUG
|
|||
|
cout << "Assertions: disabled" << endl;
|
|||
|
#else
|
|||
|
cout << "Assertions: enabled" << endl;
|
|||
|
#endif
|
|||
|
}
|
|||
|
if(ipause) {
|
|||
|
cout << "Press key to continue..." << endl;
|
|||
|
getchar();
|
|||
|
}
|
|||
|
driver<SString<char> >("DNA", bt2indexs, outfile);
|
|||
|
}
|
|||
|
return 0;
|
|||
|
} catch(std::exception& e) {
|
|||
|
cerr << "Error: Encountered exception: '" << e.what() << "'" << endl;
|
|||
|
cerr << "Command: ";
|
|||
|
for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
|
|||
|
cerr << endl;
|
|||
|
return 1;
|
|||
|
} catch(int e) {
|
|||
|
if(e != 0) {
|
|||
|
cerr << "Error: Encountered internal HISAT2 exception (#" << e << ")" << endl;
|
|||
|
cerr << "Command: ";
|
|||
|
for(int i = 0; i < argc; i++) cerr << argv[i] << " ";
|
|||
|
cerr << endl;
|
|||
|
}
|
|||
|
return e;
|
|||
|
}
|
|||
|
} // bowtie()
|
|||
|
} // extern "C"
|