hisat-3n/aligner_seed_policy.cpp

917 lines
30 KiB
C++
Raw Normal View History

2025-01-18 13:09:52 +00:00
/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
#include <string>
#include <iostream>
#include <sstream>
#include <limits>
#include "ds.h"
#include "aligner_seed_policy.h"
#include "mem_ids.h"
using namespace std;
static int parseFuncType(const std::string& otype) {
string type = otype;
if(type == "C" || type == "Constant") {
return SIMPLE_FUNC_CONST;
} else if(type == "L" || type == "Linear") {
return SIMPLE_FUNC_LINEAR;
} else if(type == "S" || type == "Sqrt") {
return SIMPLE_FUNC_SQRT;
} else if(type == "G" || type == "Log") {
return SIMPLE_FUNC_LOG;
}
std::cerr << "Error: Bad function type '" << otype.c_str()
<< "'. Should be C (constant), L (linear), "
<< "S (square root) or G (natural log)." << std::endl;
throw 1;
}
#define PARSE_FUNC(fv) { \
if(ctoks.size() >= 1) { \
fv.setType(parseFuncType(ctoks[0])); \
} \
if(ctoks.size() >= 2) { \
double co; \
istringstream tmpss(ctoks[1]); \
tmpss >> co; \
fv.setConst(co); \
} \
if(ctoks.size() >= 3) { \
double ce; \
istringstream tmpss(ctoks[2]); \
tmpss >> ce; \
fv.setCoeff(ce); \
} \
if(ctoks.size() >= 4) { \
double mn; \
istringstream tmpss(ctoks[3]); \
tmpss >> mn; \
fv.setMin(mn); \
} \
if(ctoks.size() >= 5) { \
double mx; \
istringstream tmpss(ctoks[4]); \
tmpss >> mx; \
fv.setMin(mx); \
} \
}
/**
* Parse alignment policy when provided in this format:
* <lab>=<val>;<lab>=<val>;<lab>=<val>...
*
* And label=value possibilities are:
*
* Bonus for a match
* -----------------
*
* MA=xx (default: MA=0, or MA=2 if --local is set)
*
* xx = Each position where equal read and reference characters match up
* in the alignment contriubtes this amount to the total score.
*
* Penalty for a mismatch
* ----------------------
*
* MMP={Cxx|Q|RQ} (default: MMP=C6)
*
* Cxx = Each mismatch costs xx. If MMP=Cxx is specified, quality
* values are ignored when assessing penalities for mismatches.
* Q = Each mismatch incurs a penalty equal to the mismatched base's
* value.
* R = Each mismatch incurs a penalty equal to the mismatched base's
* rounded quality value. Qualities are rounded off to the
* nearest 10, and qualities greater than 30 are rounded to 30.
*
* Penalty for position with N (in either read or reference)
* ---------------------------------------------------------
*
* NP={Cxx|Q|RQ} (default: NP=C1)
*
* Cxx = Each alignment position with an N in either the read or the
* reference costs xx. If NP=Cxx is specified, quality values are
* ignored when assessing penalities for Ns.
* Q = Each alignment position with an N in either the read or the
* reference incurs a penalty equal to the read base's quality
* value.
* R = Each alignment position with an N in either the read or the
* reference incurs a penalty equal to the read base's rounded
* quality value. Qualities are rounded off to the nearest 10,
* and qualities greater than 30 are rounded to 30.
*
* Penalty for a read gap
* ----------------------
*
* RDG=xx,yy (default: RDG=5,3)
*
* xx = Read gap open penalty.
* yy = Read gap extension penalty.
*
* Total cost incurred by a read gap = xx + (yy * gap length)
*
* Penalty for a reference gap
* ---------------------------
*
* RFG=xx,yy (default: RFG=5,3)
*
* xx = Reference gap open penalty.
* yy = Reference gap extension penalty.
*
* Total cost incurred by a reference gap = xx + (yy * gap length)
*
* Minimum score for valid alignment
* ---------------------------------
*
* MIN=xx,yy (defaults: MIN=-0.6,-0.6, or MIN=0.0,0.66 if --local is set)
*
* xx,yy = For a read of length N, the total score must be at least
* xx + (read length * yy) for the alignment to be valid. The
* total score is the sum of all negative penalties (from
* mismatches and gaps) and all positive bonuses. The minimum
* can be negative (and is by default in global alignment mode).
*
* Score floor for local alignment
* -------------------------------
*
* FL=xx,yy (defaults: FL=-Infinity,0.0, or FL=0.0,0.0 if --local is set)
*
* xx,yy = If a cell in the dynamic programming table has a score less
* than xx + (read length * yy), then no valid alignment can go
* through it. Defaults are highly recommended.
*
* N ceiling
* ---------
*
* NCEIL=xx,yy (default: NCEIL=0.0,0.15)
*
* xx,yy = For a read of length N, the number of alignment
* positions with an N in either the read or the
* reference cannot exceed
* ceiling = xx + (read length * yy). If the ceiling is
* exceeded, the alignment is considered invalid.
*
* Seeds
* -----
*
* SEED=mm,len,ival (default: SEED=0,22)
*
* mm = Maximum number of mismatches allowed within a seed.
* Must be >= 0 and <= 2. Note that 2-mismatch mode is
* not fully sensitive; i.e. some 2-mismatch seed
* alignments may be missed.
* len = Length of seed.
* ival = Interval between seeds. If not specified, seed
* interval is determined by IVAL.
*
* Seed interval
* -------------
*
* IVAL={L|S|C},xx,yy (default: IVAL=S,1.0,0.0)
*
* L = let interval between seeds be a linear function of the
* read length. xx and yy are the constant and linear
* coefficients respectively. In other words, the interval
* equals a * len + b, where len is the read length.
* Intervals less than 1 are rounded up to 1.
* S = let interval between seeds be a function of the sqaure
* root of the read length. xx and yy are the
* coefficients. In other words, the interval equals
* a * sqrt(len) + b, where len is the read length.
* Intervals less than 1 are rounded up to 1.
* C = Like S but uses cube root of length instead of square
* root.
*
* Example 1:
*
* SEED=1,10,5 and read sequence is TGCTATCGTACGATCGTAC:
*
* The following seeds are extracted from the forward
* representation of the read and aligned to the reference
* allowing up to 1 mismatch:
*
* Read: TGCTATCGTACGATCGTACA
*
* Seed 1+: TGCTATCGTA
* Seed 2+: TCGTACGATC
* Seed 3+: CGATCGTACA
*
* ...and the following are extracted from the reverse-complement
* representation of the read and align to the reference allowing
* up to 1 mismatch:
*
* Seed 1-: TACGATAGCA
* Seed 2-: GATCGTACGA
* Seed 3-: TGTACGATCG
*
* Example 2:
*
* SEED=1,20,20 and read sequence is TGCTATCGTACGATC. The seed
* length is 20 but the read is only 15 characters long. In this
* case, Bowtie2 automatically shrinks the seed length to be equal
* to the read length.
*
* Read: TGCTATCGTACGATC
*
* Seed 1+: TGCTATCGTACGATC
* Seed 1-: GATCGTACGATAGCA
*
* Example 3:
*
* SEED=1,10,10 and read sequence is TGCTATCGTACGATC. Only one seed
* fits on the read; a second seed would overhang the end of the read
* by 5 positions. In this case, Bowtie2 extracts one seed.
*
* Read: TGCTATCGTACGATC
*
* Seed 1+: TGCTATCGTA
* Seed 1-: TACGATAGCA
*/
void SeedAlignmentPolicy::parseString(
const std::string& s,
bool local,
bool noisyHpolymer,
bool ignoreQuals,
int& bonusMatchType,
int& bonusMatch,
int& penMmcType,
int& penMmcMax,
int& penMmcMin,
int& penScMax,
int& penScMin,
int& penNType,
int& penN,
int& penRdExConst,
int& penRfExConst,
int& penRdExLinear,
int& penRfExLinear,
SimpleFunc& costMin,
SimpleFunc& nCeil,
bool& nCatPair,
int& multiseedMms,
int& multiseedLen,
SimpleFunc& multiseedIval,
size_t& failStreak,
size_t& seedRounds,
SimpleFunc* penCanIntronLen,
SimpleFunc* penNoncanIntronLen)
{
bonusMatchType = local ? DEFAULT_MATCH_BONUS_TYPE_LOCAL : DEFAULT_MATCH_BONUS_TYPE;
bonusMatch = local ? DEFAULT_MATCH_BONUS_LOCAL : DEFAULT_MATCH_BONUS;
penMmcType = ignoreQuals ? DEFAULT_MM_PENALTY_TYPE_IGNORE_QUALS :
DEFAULT_MM_PENALTY_TYPE;
penMmcMax = DEFAULT_MM_PENALTY_MAX;
penMmcMin = DEFAULT_MM_PENALTY_MIN;
penNType = DEFAULT_N_PENALTY_TYPE;
penN = DEFAULT_N_PENALTY;
penScMax = DEFAULT_SC_PENALTY_MAX;
penScMin = DEFAULT_SC_PENALTY_MIN;
const double DMAX = std::numeric_limits<double>::max();
costMin.init(
local ? SIMPLE_FUNC_LOG : SIMPLE_FUNC_LINEAR,
local ? DEFAULT_MIN_CONST_LOCAL : 0.0f,
local ? DEFAULT_MIN_LINEAR_LOCAL : -0.2f);
nCeil.init(
SIMPLE_FUNC_LINEAR, 0.0f, DMAX,
DEFAULT_N_CEIL_CONST, DEFAULT_N_CEIL_LINEAR);
multiseedIval.init(
DEFAULT_IVAL, 1.0f, DMAX,
DEFAULT_IVAL_B, DEFAULT_IVAL_A);
nCatPair = DEFAULT_N_CAT_PAIR;
if(!noisyHpolymer) {
penRdExConst = DEFAULT_READ_GAP_CONST;
penRdExLinear = DEFAULT_READ_GAP_LINEAR;
penRfExConst = DEFAULT_REF_GAP_CONST;
penRfExLinear = DEFAULT_REF_GAP_LINEAR;
} else {
penRdExConst = DEFAULT_READ_GAP_CONST_BADHPOLY;
penRdExLinear = DEFAULT_READ_GAP_LINEAR_BADHPOLY;
penRfExConst = DEFAULT_REF_GAP_CONST_BADHPOLY;
penRfExLinear = DEFAULT_REF_GAP_LINEAR_BADHPOLY;
}
multiseedMms = DEFAULT_SEEDMMS;
multiseedLen = DEFAULT_SEEDLEN;
EList<string> toks(MISC_CAT);
string tok;
istringstream ss(s);
int setting = 0;
// Get each ;-separated token
while(getline(ss, tok, ';')) {
setting++;
EList<string> etoks(MISC_CAT);
string etok;
// Divide into tokens on either side of =
istringstream ess(tok);
while(getline(ess, etok, '=')) {
etoks.push_back(etok);
}
// Must be exactly 1 =
if(etoks.size() != 2) {
cerr << "Error parsing alignment policy setting " << setting
<< "; must be bisected by = sign" << endl
<< "Policy: " << s.c_str() << endl;
assert(false); throw 1;
}
// LHS is tag, RHS value
string tag = etoks[0], val = etoks[1];
// Separate value into comma-separated tokens
EList<string> ctoks(MISC_CAT);
string ctok;
istringstream css(val);
while(getline(css, ctok, ',')) {
ctoks.push_back(ctok);
}
if(ctoks.size() == 0) {
cerr << "Error parsing alignment policy setting " << setting
<< "; RHS must have at least 1 token" << endl
<< "Policy: " << s.c_str() << endl;
assert(false); throw 1;
}
for(size_t i = 0; i < ctoks.size(); i++) {
if(ctoks[i].length() == 0) {
cerr << "Error parsing alignment policy setting " << setting
<< "; token " << i+1 << " on RHS had length=0" << endl
<< "Policy: " << s.c_str() << endl;
assert(false); throw 1;
}
}
// Bonus for a match
// MA=xx (default: MA=0, or MA=10 if --local is set)
if(tag == "MA") {
if(ctoks.size() != 1) {
cerr << "Error parsing alignment policy setting " << setting
<< "; RHS must have 1 token" << endl
<< "Policy: " << s.c_str() << endl;
assert(false); throw 1;
}
string tmp = ctoks[0];
istringstream tmpss(tmp);
tmpss >> bonusMatch;
}
// Scoring for mismatches
// MMP={Cxx|Q|RQ}
// Cxx = constant, where constant is integer xx
// Qxx = equal to quality, scaled
// R = equal to maq-rounded quality value (rounded to nearest
// 10, can't be greater than 30)
else if(tag == "MMP") {
if(ctoks.size() > 3) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'"
<< "; RHS must have at most 3 tokens" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks[0][0] == 'C') {
string tmp = ctoks[0].substr(1);
// Parse constant penalty
istringstream tmpss(tmp);
tmpss >> penMmcMax;
penMmcMin = penMmcMax;
// Parse constant penalty
penMmcType = COST_MODEL_CONSTANT;
} else if(ctoks[0][0] == 'Q') {
if(ctoks.size() >= 2) {
string tmp = ctoks[1];
istringstream tmpss(tmp);
tmpss >> penMmcMax;
} else {
penMmcMax = DEFAULT_MM_PENALTY_MAX;
}
if(ctoks.size() >= 3) {
string tmp = ctoks[2];
istringstream tmpss(tmp);
tmpss >> penMmcMin;
} else {
penMmcMin = DEFAULT_MM_PENALTY_MIN;
}
if(penMmcMin > penMmcMax) {
cerr << "Error: Maximum mismatch penalty (" << penMmcMax
<< ") is less than minimum penalty (" << penMmcMin
<< endl;
throw 1;
}
// Set type to =quality
penMmcType = COST_MODEL_QUAL;
} else if(ctoks[0][0] == 'R') {
// Set type to=Maq-quality
penMmcType = COST_MODEL_ROUNDED_QUAL;
} else {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'"
<< "; RHS must start with C, Q or R" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
}
else if(tag == "SCP") {
if(ctoks.size() > 3) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'"
<< "; SCP must have at most 3 tokens" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
istringstream tmpMax(ctoks[1]);
tmpMax >> penScMax;
istringstream tmpMin(ctoks[1]);
tmpMin >> penScMin;
if(penScMin > penScMax) {
cerr << "max (" << penScMax << ") should be >= min (" << penScMin << ")" << endl;
assert(false); throw 1;
}
if(penScMin < 1) {
cerr << "min (" << penScMin << ") should be greater than 0" << endl;
assert(false); throw 1;
}
}
// Scoring for mismatches where read char=N
// NP={Cxx|Q|RQ}
// Cxx = constant, where constant is integer xx
// Q = equal to quality
// R = equal to maq-rounded quality value (rounded to nearest
// 10, can't be greater than 30)
else if(tag == "NP") {
if(ctoks.size() != 1) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'"
<< "; RHS must have 1 token" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks[0][0] == 'C') {
string tmp = ctoks[0].substr(1);
// Parse constant penalty
istringstream tmpss(tmp);
tmpss >> penN;
// Parse constant penalty
penNType = COST_MODEL_CONSTANT;
} else if(ctoks[0][0] == 'Q') {
// Set type to =quality
penNType = COST_MODEL_QUAL;
} else if(ctoks[0][0] == 'R') {
// Set type to=Maq-quality
penNType = COST_MODEL_ROUNDED_QUAL;
} else {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'"
<< "; RHS must start with C, Q or R" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
}
// Scoring for read gaps
// RDG=xx,yy,zz
// xx = read gap open penalty
// yy = read gap extension penalty constant coefficient
// (defaults to open penalty)
// zz = read gap extension penalty linear coefficient
// (defaults to 0)
else if(tag == "RDG") {
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> penRdExConst;
} else {
penRdExConst = noisyHpolymer ?
DEFAULT_READ_GAP_CONST_BADHPOLY :
DEFAULT_READ_GAP_CONST;
}
if(ctoks.size() >= 2) {
istringstream tmpss(ctoks[1]);
tmpss >> penRdExLinear;
} else {
penRdExLinear = noisyHpolymer ?
DEFAULT_READ_GAP_LINEAR_BADHPOLY :
DEFAULT_READ_GAP_LINEAR;
}
}
// Scoring for reference gaps
// RFG=xx,yy,zz
// xx = ref gap open penalty
// yy = ref gap extension penalty constant coefficient
// (defaults to open penalty)
// zz = ref gap extension penalty linear coefficient
// (defaults to 0)
else if(tag == "RFG") {
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> penRfExConst;
} else {
penRfExConst = noisyHpolymer ?
DEFAULT_REF_GAP_CONST_BADHPOLY :
DEFAULT_REF_GAP_CONST;
}
if(ctoks.size() >= 2) {
istringstream tmpss(ctoks[1]);
tmpss >> penRfExLinear;
} else {
penRfExLinear = noisyHpolymer ?
DEFAULT_REF_GAP_LINEAR_BADHPOLY :
DEFAULT_REF_GAP_LINEAR;
}
}
// Minimum score as a function of read length
// MIN=xx,yy
// xx = constant coefficient
// yy = linear coefficient
else if(tag == "MIN") {
PARSE_FUNC(costMin);
}
// Per-read N ceiling as a function of read length
// NCEIL=xx,yy
// xx = N ceiling constant coefficient
// yy = N ceiling linear coefficient (set to 0 if unspecified)
else if(tag == "NCEIL") {
PARSE_FUNC(nCeil);
}
/*
* Seeds
* -----
*
* SEED=mm,len,ival (default: SEED=0,22)
*
* mm = Maximum number of mismatches allowed within a seed.
* Must be >= 0 and <= 2. Note that 2-mismatch mode is
* not fully sensitive; i.e. some 2-mismatch seed
* alignments may be missed.
* len = Length of seed.
* ival = Interval between seeds. If not specified, seed
* interval is determined by IVAL.
*/
else if(tag == "SEED") {
if(ctoks.size() > 2) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'; RHS must have 1 or 2 tokens, "
<< "had " << ctoks.size() << ". "
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> multiseedMms;
if(multiseedMms > 1) {
cerr << "Error: -N was set to " << multiseedMms << ", but cannot be set greater than 1" << endl;
throw 1;
}
if(multiseedMms < 0) {
cerr << "Error: -N was set to a number less than 0 (" << multiseedMms << ")" << endl;
throw 1;
}
}
if(ctoks.size() >= 2) {
istringstream tmpss(ctoks[1]);
tmpss >> multiseedLen;
} else {
multiseedLen = DEFAULT_SEEDLEN;
}
}
else if(tag == "SEEDLEN") {
if(ctoks.size() > 1) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'; RHS must have 1 token, "
<< "had " << ctoks.size() << ". "
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> multiseedLen;
}
}
else if(tag == "DPS") {
if(ctoks.size() > 1) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'; RHS must have 1 token, "
<< "had " << ctoks.size() << ". "
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> failStreak;
}
}
else if(tag == "ROUNDS") {
if(ctoks.size() > 1) {
cerr << "Error parsing alignment policy setting "
<< "'" << tag.c_str() << "'; RHS must have 1 token, "
<< "had " << ctoks.size() << ". "
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
if(ctoks.size() >= 1) {
istringstream tmpss(ctoks[0]);
tmpss >> seedRounds;
}
}
/*
* Seed interval
* -------------
*
* IVAL={L|S|C},a,b (default: IVAL=S,1.0,0.0)
*
* L = let interval between seeds be a linear function of the
* read length. xx and yy are the constant and linear
* coefficients respectively. In other words, the interval
* equals a * len + b, where len is the read length.
* Intervals less than 1 are rounded up to 1.
* S = let interval between seeds be a function of the sqaure
* root of the read length. xx and yy are the
* coefficients. In other words, the interval equals
* a * sqrt(len) + b, where len is the read length.
* Intervals less than 1 are rounded up to 1.
* C = Like S but uses cube root of length instead of square
* root.
*/
else if(tag == "IVAL") {
PARSE_FUNC(multiseedIval);
}
else if(tag == "CANINTRONLEN") {
assert(penCanIntronLen != NULL);
PARSE_FUNC((*penCanIntronLen));
}
else if(tag == "NONCANINTRONLEN") {
assert(penNoncanIntronLen != NULL);
PARSE_FUNC((*penNoncanIntronLen));
}
else {
// Unknown tag
cerr << "Unexpected alignment policy setting "
<< "'" << tag.c_str() << "'" << endl
<< "Policy: '" << s.c_str() << "'" << endl;
assert(false); throw 1;
}
}
}
#ifdef ALIGNER_SEED_POLICY_MAIN
int main() {
int bonusMatchType;
int bonusMatch;
int penMmcType;
int penMmc;
int penScMax;
int penScMin;
int penNType;
int penN;
int penRdExConst;
int penRfExConst;
int penRdExLinear;
int penRfExLinear;
SimpleFunc costMin;
SimpleFunc costFloor;
SimpleFunc nCeil;
bool nCatPair;
int multiseedMms;
int multiseedLen;
SimpleFunc msIval;
SimpleFunc posfrac;
SimpleFunc rowmult;
uint32_t mhits;
{
cout << "Case 1: Defaults 1 ... ";
const char *pol = "";
SeedAlignmentPolicy::parseString(
string(pol),
false, // --local?
false, // noisy homopolymers a la 454?
false, // ignore qualities?
bonusMatchType,
bonusMatch,
penMmcType,
penMmc,
penScMax,
penScMin,
penNType,
penN,
penRdExConst,
penRfExConst,
penRdExLinear,
penRfExLinear,
costMin,
costFloor,
nCeil,
nCatPair,
multiseedMms,
multiseedLen,
msIval,
mhits);
assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
assert_eq(DEFAULT_N_PENALTY, penN);
assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
assert_eq(DEFAULT_SEEDMMS, multiseedMms);
assert_eq(DEFAULT_SEEDLEN, multiseedLen);
assert_eq(DEFAULT_IVAL, msIval.getType());
assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
assert_eq(DEFAULT_IVAL_B, msIval.getConst());
cout << "PASSED" << endl;
}
{
cout << "Case 2: Defaults 2 ... ";
const char *pol = "";
SeedAlignmentPolicy::parseString(
string(pol),
false, // --local?
true, // noisy homopolymers a la 454?
false, // ignore qualities?
bonusMatchType,
bonusMatch,
penMmcType,
penMmc,
penNType,
penN,
penRdExConst,
penRfExConst,
penRdExLinear,
penRfExLinear,
costMin,
costFloor,
nCeil,
nCatPair,
multiseedMms,
multiseedLen,
msIval,
mhits);
assert_eq(DEFAULT_MATCH_BONUS_TYPE, bonusMatchType);
assert_eq(DEFAULT_MATCH_BONUS, bonusMatch);
assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
assert_eq(DEFAULT_MM_PENALTY_MAX, penMmc);
assert_eq(DEFAULT_MM_PENALTY_MIN, penMmc);
assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
assert_eq(DEFAULT_N_PENALTY, penN);
assert_eq(DEFAULT_MIN_CONST, costMin.getConst());
assert_eq(DEFAULT_MIN_LINEAR, costMin.getCoeff());
assert_eq(DEFAULT_FLOOR_CONST, costFloor.getConst());
assert_eq(DEFAULT_FLOOR_LINEAR, costFloor.getCoeff());
assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
assert_eq(DEFAULT_READ_GAP_CONST_BADHPOLY, penRdExConst);
assert_eq(DEFAULT_READ_GAP_LINEAR_BADHPOLY, penRdExLinear);
assert_eq(DEFAULT_REF_GAP_CONST_BADHPOLY, penRfExConst);
assert_eq(DEFAULT_REF_GAP_LINEAR_BADHPOLY, penRfExLinear);
assert_eq(DEFAULT_SEEDMMS, multiseedMms);
assert_eq(DEFAULT_SEEDLEN, multiseedLen);
assert_eq(DEFAULT_IVAL, msIval.getType());
assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
assert_eq(DEFAULT_IVAL_B, msIval.getConst());
cout << "PASSED" << endl;
}
{
cout << "Case 3: Defaults 3 ... ";
const char *pol = "";
SeedAlignmentPolicy::parseString(
string(pol),
true, // --local?
false, // noisy homopolymers a la 454?
false, // ignore qualities?
bonusMatchType,
bonusMatch,
penMmcType,
penMmc,
penNType,
penN,
penRdExConst,
penRfExConst,
penRdExLinear,
penRfExLinear,
costMin,
costFloor,
nCeil,
nCatPair,
multiseedMms,
multiseedLen,
msIval,
mhits);
assert_eq(DEFAULT_MATCH_BONUS_TYPE_LOCAL, bonusMatchType);
assert_eq(DEFAULT_MATCH_BONUS_LOCAL, bonusMatch);
assert_eq(DEFAULT_MM_PENALTY_TYPE, penMmcType);
assert_eq(DEFAULT_MM_PENALTY_MAX, penMmcMax);
assert_eq(DEFAULT_MM_PENALTY_MIN, penMmcMin);
assert_eq(DEFAULT_N_PENALTY_TYPE, penNType);
assert_eq(DEFAULT_N_PENALTY, penN);
assert_eq(DEFAULT_MIN_CONST_LOCAL, costMin.getConst());
assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
assert_eq(DEFAULT_FLOOR_CONST_LOCAL, costFloor.getConst());
assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
assert_eq(DEFAULT_READ_GAP_CONST, penRdExConst);
assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
assert_eq(DEFAULT_REF_GAP_CONST, penRfExConst);
assert_eq(DEFAULT_REF_GAP_LINEAR, penRfExLinear);
assert_eq(DEFAULT_SEEDMMS, multiseedMms);
assert_eq(DEFAULT_SEEDLEN, multiseedLen);
assert_eq(DEFAULT_IVAL, msIval.getType());
assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
assert_eq(DEFAULT_IVAL_B, msIval.getConst());
cout << "PASSED" << endl;
}
{
cout << "Case 4: Simple string 1 ... ";
const char *pol = "MMP=C44;MA=4;RFG=24,12;FL=C,8;RDG=2;NP=C4;MIN=C,7";
SeedAlignmentPolicy::parseString(
string(pol),
true, // --local?
false, // noisy homopolymers a la 454?
false, // ignore qualities?
bonusMatchType,
bonusMatch,
penMmcType,
penMmc,
penNType,
penN,
penRdExConst,
penRfExConst,
penRdExLinear,
penRfExLinear,
costMin,
costFloor,
nCeil,
nCatPair,
multiseedMms,
multiseedLen,
msIval,
mhits);
assert_eq(COST_MODEL_CONSTANT, bonusMatchType);
assert_eq(4, bonusMatch);
assert_eq(COST_MODEL_CONSTANT, penMmcType);
assert_eq(44, penMmc);
assert_eq(COST_MODEL_CONSTANT, penNType);
assert_eq(4.0f, penN);
assert_eq(7, costMin.getConst());
assert_eq(DEFAULT_MIN_LINEAR_LOCAL, costMin.getCoeff());
assert_eq(8, costFloor.getConst());
assert_eq(DEFAULT_FLOOR_LINEAR_LOCAL, costFloor.getCoeff());
assert_eq(DEFAULT_N_CEIL_CONST, nCeil.getConst());
assert_eq(DEFAULT_N_CEIL_LINEAR, nCeil.getCoeff());
assert_eq(DEFAULT_N_CAT_PAIR, nCatPair);
assert_eq(2.0f, penRdExConst);
assert_eq(DEFAULT_READ_GAP_LINEAR, penRdExLinear);
assert_eq(24.0f, penRfExConst);
assert_eq(12.0f, penRfExLinear);
assert_eq(DEFAULT_SEEDMMS, multiseedMms);
assert_eq(DEFAULT_SEEDLEN, multiseedLen);
assert_eq(DEFAULT_IVAL, msIval.getType());
assert_eq(DEFAULT_IVAL_A, msIval.getCoeff());
assert_eq(DEFAULT_IVAL_B, msIval.getConst());
cout << "PASSED" << endl;
}
}
#endif /*def ALIGNER_SEED_POLICY_MAIN*/