/* * Copyright 2011, Ben Langmead * * This file is part of Bowtie 2. * * Bowtie 2 is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Bowtie 2 is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Bowtie 2. If not, see . */ #ifndef SEQUENCE_IO_H_ #define SEQUENCE_IO_H_ #include #include #include #include #include "assert_helpers.h" #include "ds.h" #include "filebuf.h" #include "sstring.h" using namespace std; /** * Parse the fasta file 'infile'. Store */ template static void parseFastaLens( const TFnStr& infile, // filename EList& namelens, // destination for fasta name lengths EList& seqlens) // destination for fasta sequence lengths { FILE *in = fopen(sstr_to_cstr(infile), "r"); if(in == NULL) { cerr << "Could not open sequence file" << endl; throw 1; } FileBuf fb(in); while(!fb.eof()) { namelens.expand(); namelens.back() = 0; seqlens.expand(); seqlens.back() = 0; fb.parseFastaRecordLength(namelens.back(), seqlens.back()); if(seqlens.back() == 0) { // Couldn't read a record. We're probably done with this file. namelens.pop_back(); seqlens.pop_back(); continue; } } fb.close(); } /** * Parse the fasta file 'infile'. Store each name record in 'names', each * sequence record in 'seqs', and the lengths of each */ template static void parseFasta( const TFnStr& infile, // filename EList& names, // destination for fasta names EList& namelens, // destination for fasta name lengths EList& seqs, // destination for fasta sequences EList& seqlens) // destination for fasta sequence lengths { assert_eq(namelens.size(), seqlens.size()); assert_eq(names.size(), namelens.size()); assert_eq(seqs.size(), seqlens.size()); size_t cur = namelens.size(); parseFastaLens(infile, namelens, seqlens); FILE *in = fopen(sstr_to_cstr(infile), "r"); if(in == NULL) { cerr << "Could not open sequence file" << endl; throw 1; } FileBuf fb(in); while(!fb.eof()) { // Add a new empty record to the end names.expand(); seqs.expand(); names.back() = new char[namelens[cur]+1]; seqs.back() = new char[seqlens[cur]+1]; fb.parseFastaRecord(names.back(), seqs.back()); if(seqs.back().empty()) { // Couldn't read a record. We're probably done with this file. names.pop_back(); seqs.pop_back(); continue; } } fb.close(); } /** * Read a set of FASTA sequence files of the given format and alphabet type. * Store all of the extracted sequences in vector ss. */ template static void parseFastas( const EList& infiles, // filenames EList& names, // destination for fasta names EList& namelens, // destination for fasta name lengths EList& seqs, // destination for fasta sequences EList& seqlens) // destination for fasta sequence lengths { for(size_t i = 0; i < infiles.size(); i++) { parseFasta( infiles[i], names, namelens, seqs, seqlens); } } #endif /*SEQUENCE_IO_H_*/